service: storage monitor added (#13082)

* service: storage monitor added

Storage monitor added. It uses `notify` create to get notifications
about any changes to monitored path (which is database path).
Notifications are consumed in essential task which terminates when
available storage space drops below given threshold.

Closes: #12399

* Cargo.lock updated

* misspell

* fs events throttling added

* minor updates

* filter out non mutating events

* misspell

* ".git/.scripts/commands/fmt/fmt.sh"

* Update client/service/src/storage_monitor.rs

Co-authored-by: Anton <anton.kalyaev@gmail.com>

* storage-monitor crate added

* cleanup: configuration + service builder

* storage_monitor in custom service (wip)

* copy-paste bad desc fixed

* notify removed

* storage_monitor added to node

* fix for clippy

* publish = false

* Update bin/node/cli/src/command.rs

Co-authored-by: Dmitry Markin <dmitry@markin.tech>

* Apply suggestions from code review

Co-authored-by: Bastian Köcher <git@kchr.de>

* crate name: storage-monitor -> sc-storage-monitor

* error handling improved

* Apply suggestions from code review

Co-authored-by: Bastian Köcher <git@kchr.de>

* publish=false removed

Co-authored-by: command-bot <>
Co-authored-by: Anton <anton.kalyaev@gmail.com>
Co-authored-by: Dmitry Markin <dmitry@markin.tech>
Co-authored-by: Bastian Köcher <git@kchr.de>
This commit is contained in:
Michal Kucharczyk
2023-01-24 14:26:04 +01:00
committed by GitHub
parent 025f9d9ba3
commit 14a4eed2aa
11 changed files with 227 additions and 10 deletions
+31
View File
@@ -4665,6 +4665,20 @@ dependencies = [
"memoffset 0.6.5",
]
[[package]]
name = "nix"
version = "0.26.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46a58d1d356c6597d08cde02c2f09d785b09e28711837b1ed667dc652c08a694"
dependencies = [
"bitflags",
"cfg-if",
"libc",
"memoffset 0.7.1",
"pin-utils",
"static_assertions",
]
[[package]]
name = "node-bench"
version = "0.9.0-dev"
@@ -4752,6 +4766,7 @@ dependencies = [
"sc-rpc",
"sc-service",
"sc-service-test",
"sc-storage-monitor",
"sc-sync-state-rpc",
"sc-sysinfo",
"sc-telemetry",
@@ -8895,6 +8910,7 @@ dependencies = [
"sc-rpc",
"sc-rpc-server",
"sc-rpc-spec-v2",
"sc-storage-monitor",
"sc-sysinfo",
"sc-telemetry",
"sc-tracing",
@@ -8973,6 +8989,21 @@ dependencies = [
"sp-core",
]
[[package]]
name = "sc-storage-monitor"
version = "0.1.0"
dependencies = [
"clap 4.0.32",
"futures",
"log",
"nix 0.26.1",
"sc-client-db",
"sc-utils",
"sp-core",
"thiserror",
"tokio",
]
[[package]]
name = "sc-sync-state-rpc"
version = "0.10.0-dev"
+1
View File
@@ -64,6 +64,7 @@ members = [
"client/service",
"client/service/test",
"client/state-db",
"client/storage-monitor",
"client/sysinfo",
"client/sync-state-rpc",
"client/telemetry",
+2
View File
@@ -81,6 +81,7 @@ sc-executor = { version = "0.10.0-dev", path = "../../../client/executor" }
sc-authority-discovery = { version = "0.10.0-dev", path = "../../../client/authority-discovery" }
sc-sync-state-rpc = { version = "0.10.0-dev", path = "../../../client/sync-state-rpc" }
sc-sysinfo = { version = "6.0.0-dev", path = "../../../client/sysinfo" }
sc-storage-monitor = { version = "0.1.0", path = "../../../client/storage-monitor" }
# frame dependencies
frame-system = { version = "4.0.0-dev", path = "../../../frame/system" }
@@ -138,6 +139,7 @@ substrate-frame-cli = { version = "4.0.0-dev", optional = true, path = "../../..
try-runtime-cli = { version = "0.10.0-dev", optional = true, path = "../../../utils/frame/try-runtime/cli" }
sc-cli = { version = "0.10.0-dev", path = "../../../client/cli", optional = true }
pallet-balances = { version = "4.0.0-dev", path = "../../../frame/balances" }
sc-storage-monitor = { version = "0.1.0", path = "../../../client/storage-monitor" }
[features]
default = ["cli"]
+4
View File
@@ -36,6 +36,10 @@ pub struct Cli {
/// telemetry, if telemetry is enabled.
#[arg(long)]
pub no_hardware_benchmarks: bool,
#[allow(missing_docs)]
#[clap(flatten)]
pub storage_monitor: sc_storage_monitor::StorageMonitorParams,
}
/// Possible subcommands of the main binary.
+1 -2
View File
@@ -87,8 +87,7 @@ pub fn run() -> Result<()> {
None => {
let runner = cli.create_runner(&cli.run)?;
runner.run_node_until_exit(|config| async move {
service::new_full(config, cli.no_hardware_benchmarks)
.map_err(sc_cli::Error::Service)
service::new_full(config, cli).map_err(sc_cli::Error::Service)
})
},
Some(Subcommand::Inspect(cmd)) => {
+13 -6
View File
@@ -20,6 +20,7 @@
//! Service implementation. Specialized wrapper over substrate service.
use crate::Cli;
use codec::Encode;
use frame_benchmarking_cli::SUBSTRATE_REFERENCE_HARDWARE;
use frame_system_rpc_runtime_api::AccountNonceApi;
@@ -556,12 +557,18 @@ pub fn new_full_base(
}
/// Builds a new service for a full client.
pub fn new_full(
config: Configuration,
disable_hardware_benchmarks: bool,
) -> Result<TaskManager, ServiceError> {
new_full_base(config, disable_hardware_benchmarks, |_, _| ())
.map(|NewFullBase { task_manager, .. }| task_manager)
pub fn new_full(config: Configuration, cli: Cli) -> Result<TaskManager, ServiceError> {
let database_source = config.database.clone();
let task_manager = new_full_base(config, cli.no_hardware_benchmarks, |_, _| ())
.map(|NewFullBase { task_manager, .. }| task_manager)?;
sc_storage_monitor::StorageMonitorService::try_spawn(
cli.storage_monitor,
database_source,
&task_manager.spawn_essential_handle(),
)?;
Ok(task_manager)
}
#[cfg(test)]
@@ -19,7 +19,7 @@
use crate::arg_enums::Database;
use clap::Args;
/// Parameters for block import.
/// Parameters for database
#[derive(Debug, Clone, PartialEq, Args)]
pub struct DatabaseParams {
/// Select database backend to use.
@@ -32,7 +32,7 @@ pub struct DatabaseParams {
}
impl DatabaseParams {
/// Limit the memory the database cache can use.
/// Database backend
pub fn database(&self) -> Option<Database> {
self.database
}
+1
View File
@@ -73,6 +73,7 @@ sc-offchain = { version = "4.0.0-dev", path = "../offchain" }
prometheus-endpoint = { package = "substrate-prometheus-endpoint", path = "../../utils/prometheus", version = "0.10.0-dev" }
sc-tracing = { version = "4.0.0-dev", path = "../tracing" }
sc-sysinfo = { version = "6.0.0-dev", path = "../sysinfo" }
sc-storage-monitor = { version = "0.1.0", path = "../storage-monitor" }
tracing = "0.1.29"
tracing-futures = { version = "0.2.4" }
async-trait = "0.1.57"
+3
View File
@@ -48,6 +48,9 @@ pub enum Error {
#[error(transparent)]
Telemetry(#[from] sc_telemetry::Error),
#[error(transparent)]
Storage(#[from] sc_storage_monitor::Error),
#[error("Best chain selection strategy (SelectChain) is not provided.")]
SelectChainRequired,
@@ -0,0 +1,20 @@
[package]
name = "sc-storage-monitor"
version = "0.1.0"
authors = ["Parity Technologies <admin@parity.io>"]
edition = "2021"
license = "GPL-3.0-or-later WITH Classpath-exception-2.0"
repository = "https://github.com/paritytech/substrate"
description = "Storage monitor service for substrate"
homepage = "https://substrate.io"
[dependencies]
clap = { version = "4.0.9", features = ["derive", "string"] }
futures = "0.3.21"
log = "0.4.17"
nix = { version = "0.26.1", features = ["fs"] }
sc-client-db = { version = "0.10.0-dev", default-features = false, path = "../db" }
sc-utils = { version = "4.0.0-dev", path = "../utils" }
sp-core = { version = "7.0.0", path = "../../primitives/core" }
tokio = "1.22.0"
thiserror = "1.0.30"
+149
View File
@@ -0,0 +1,149 @@
// This file is part of Substrate.
// Copyright (C) 2022 Parity Technologies (UK) Ltd.
// SPDX-License-Identifier: GPL-3.0-or-later WITH Classpath-exception-2.0
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
use clap::Args;
use nix::{errno::Errno, sys::statvfs::statvfs};
use sc_client_db::DatabaseSource;
use sp_core::traits::SpawnEssentialNamed;
use std::{
path::{Path, PathBuf},
time::Duration,
};
const LOG_TARGET: &str = "storage-monitor";
/// Error type used in this crate.
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("IO Error")]
IOError(#[from] Errno),
#[error("Out of storage space: available {0}MB, required {1}MB")]
StorageOutOfSpace(u64, u64),
}
/// Parameters used to create the storage monitor.
#[derive(Default, Debug, Clone, Args)]
pub struct StorageMonitorParams {
/// Required available space on database storage. If available space for DB storage drops below
/// the given threshold, node will be gracefully terminated. If `0` is given monitoring will be
/// disabled.
#[arg(long = "db-storage-threshold", value_name = "MB", default_value_t = 1000)]
pub threshold: u64,
/// How often available space is polled.
#[arg(long = "db-storage-polling-period", value_name = "SECONDS", default_value_t = 5, value_parser = clap::value_parser!(u32).range(1..))]
pub polling_period: u32,
}
/// Storage monitor service: checks the available space for the filesystem for fiven path.
pub struct StorageMonitorService {
/// watched path
path: PathBuf,
/// number of megabytes that shall be free on the filesystem for watched path
threshold: u64,
/// storage space polling period (seconds)
polling_period: u32,
}
impl StorageMonitorService {
/// Creates new StorageMonitorService for given client config
pub fn try_spawn(
parameters: StorageMonitorParams,
database: DatabaseSource,
spawner: &impl SpawnEssentialNamed,
) -> Result<(), Error> {
Ok(match (parameters.threshold, database.path()) {
(0, _) => {
log::info!(
target: LOG_TARGET,
"StorageMonitorService: threshold `0` given, storage monitoring disabled",
);
},
(_, None) => {
log::warn!(
target: LOG_TARGET,
"StorageMonitorService: no database path to observe",
);
},
(threshold, Some(path)) => {
log::debug!(
target: LOG_TARGET,
"Initializing StorageMonitorService for db path: {:?}",
path,
);
Self::check_free_space(&path, threshold)?;
let storage_monitor_service = StorageMonitorService {
path: path.to_path_buf(),
threshold,
polling_period: parameters.polling_period,
};
spawner.spawn_essential(
"storage-monitor",
None,
Box::pin(storage_monitor_service.run()),
);
},
})
}
/// Main monitoring loop, intended to be spawned as essential task. Quits if free space drop
/// below threshold.
async fn run(self) {
loop {
tokio::time::sleep(Duration::from_secs(self.polling_period.into())).await;
if Self::check_free_space(&self.path, self.threshold).is_err() {
break
};
}
}
/// Returns free space in MB, or error if statvfs failed.
fn free_space(path: &Path) -> Result<u64, Error> {
statvfs(path)
.map(|stats| stats.blocks_available() * stats.block_size() / 1_000_000)
.map_err(Error::from)
}
/// Checks if the amount of free space for given `path` is above given `threshold`.
/// If it dropped below, error is returned.
/// System errors are silently ignored.
fn check_free_space(path: &Path, threshold: u64) -> Result<(), Error> {
match StorageMonitorService::free_space(path) {
Ok(available_space) => {
log::trace!(
target: LOG_TARGET,
"free: {available_space} , threshold: {threshold}.",
);
if available_space < threshold {
log::error!(target: LOG_TARGET, "Available space {available_space}MB for path `{}` dropped below threshold: {threshold}MB , terminating...", path.display());
Err(Error::StorageOutOfSpace(available_space, threshold))
} else {
Ok(())
}
},
Err(e) => {
log::error!(target: LOG_TARGET, "Could not read available space: {:?}.", e);
Err(e)
},
}
}
}