mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-06-12 17:01:09 +00:00
Additional Metrics collected and exposed via prometheus (#5414)
This PR refactors the metrics measuring and Prometheus exposing entity in sc-service into its own submodule and extends the parameters it exposes by: - system load average (over one, five and 15min) - the TCP connection state of the process (lsof), refs #5304 - number of tokio threads - number of known forks - counter for items in each unbounded queue (with internal unbounded channels) - number of file descriptors opened by this process (*nix only at this point) - number of system threads (*nix only at this point) refs #4679 Co-authored-by: Max Inden <mail@max-inden.de> Co-authored-by: Ashley <ashley.ruglys@gmail.com>
This commit is contained in:
committed by
GitHub
parent
6847f8452e
commit
247822bb33
@@ -18,6 +18,7 @@ use crate::{Service, NetworkStatus, NetworkState, error::Error, DEFAULT_PROTOCOL
|
||||
use crate::{TaskManagerBuilder, start_rpc_servers, build_network_future, TransactionPoolAdapter};
|
||||
use crate::status_sinks;
|
||||
use crate::config::{Configuration, DatabaseConfig, KeystoreConfig, PrometheusConfig};
|
||||
use crate::metrics::MetricsService;
|
||||
use sc_client_api::{
|
||||
self,
|
||||
BlockchainEvents,
|
||||
@@ -25,12 +26,12 @@ use sc_client_api::{
|
||||
execution_extensions::ExtensionsFactory,
|
||||
ExecutorProvider, CallExecutor
|
||||
};
|
||||
use sp_utils::mpsc::{tracing_unbounded, TracingUnboundedSender};
|
||||
use sc_client::Client;
|
||||
use sc_chain_spec::get_extension;
|
||||
use sp_consensus::import_queue::ImportQueue;
|
||||
use futures::{
|
||||
Future, FutureExt, StreamExt,
|
||||
channel::mpsc,
|
||||
future::ready,
|
||||
};
|
||||
use sc_keystore::{Store as Keystore};
|
||||
@@ -40,7 +41,7 @@ use sc_network::{NetworkService, NetworkStateInfo};
|
||||
use parking_lot::{Mutex, RwLock};
|
||||
use sp_runtime::generic::BlockId;
|
||||
use sp_runtime::traits::{
|
||||
Block as BlockT, NumberFor, SaturatedConversion, HashFor, UniqueSaturatedInto,
|
||||
Block as BlockT, NumberFor, SaturatedConversion, HashFor,
|
||||
};
|
||||
use sp_api::ProvideRuntimeApi;
|
||||
use sc_executor::{NativeExecutor, NativeExecutionDispatch};
|
||||
@@ -49,56 +50,9 @@ use std::{
|
||||
marker::PhantomData, sync::Arc, pin::Pin
|
||||
};
|
||||
use wasm_timer::SystemTime;
|
||||
use sysinfo::{get_current_pid, ProcessExt, System, SystemExt};
|
||||
use sc_telemetry::{telemetry, SUBSTRATE_INFO};
|
||||
use sp_transaction_pool::{MaintainedTransactionPool, ChainEvent};
|
||||
use sp_blockchain;
|
||||
use prometheus_endpoint::{register, Gauge, U64, F64, Registry, PrometheusError, Opts, GaugeVec};
|
||||
|
||||
struct ServiceMetrics {
|
||||
block_height_number: GaugeVec<U64>,
|
||||
ready_transactions_number: Gauge<U64>,
|
||||
memory_usage_bytes: Gauge<U64>,
|
||||
cpu_usage_percentage: Gauge<F64>,
|
||||
network_per_sec_bytes: GaugeVec<U64>,
|
||||
database_cache: Gauge<U64>,
|
||||
state_cache: Gauge<U64>,
|
||||
state_db: GaugeVec<U64>,
|
||||
}
|
||||
|
||||
impl ServiceMetrics {
|
||||
fn register(registry: &Registry) -> Result<Self, PrometheusError> {
|
||||
Ok(Self {
|
||||
block_height_number: register(GaugeVec::new(
|
||||
Opts::new("block_height_number", "Height of the chain"),
|
||||
&["status"]
|
||||
)?, registry)?,
|
||||
ready_transactions_number: register(Gauge::new(
|
||||
"ready_transactions_number", "Number of transactions in the ready queue",
|
||||
)?, registry)?,
|
||||
memory_usage_bytes: register(Gauge::new(
|
||||
"memory_usage_bytes", "Node memory (resident set size) usage",
|
||||
)?, registry)?,
|
||||
cpu_usage_percentage: register(Gauge::new(
|
||||
"cpu_usage_percentage", "Node CPU usage",
|
||||
)?, registry)?,
|
||||
network_per_sec_bytes: register(GaugeVec::new(
|
||||
Opts::new("network_per_sec_bytes", "Networking bytes per second"),
|
||||
&["direction"]
|
||||
)?, registry)?,
|
||||
database_cache: register(Gauge::new(
|
||||
"database_cache_bytes", "RocksDB cache size in bytes",
|
||||
)?, registry)?,
|
||||
state_cache: register(Gauge::new(
|
||||
"state_cache_bytes", "State cache size in bytes",
|
||||
)?, registry)?,
|
||||
state_db: register(GaugeVec::new(
|
||||
Opts::new("state_db_cache_bytes", "State DB cache in bytes"),
|
||||
&["subtype"]
|
||||
)?, registry)?,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub type BackgroundTask = Pin<Box<dyn Future<Output=()> + Send>>;
|
||||
|
||||
@@ -820,7 +774,7 @@ ServiceBuilder<
|
||||
)?;
|
||||
|
||||
// A side-channel for essential tasks to communicate shutdown.
|
||||
let (essential_failed_tx, essential_failed_rx) = mpsc::unbounded();
|
||||
let (essential_failed_tx, essential_failed_rx) = tracing_unbounded("mpsc_essential_tasks");
|
||||
|
||||
let import_queue = Box::new(import_queue);
|
||||
let chain_info = client.chain_info();
|
||||
@@ -992,122 +946,44 @@ ServiceBuilder<
|
||||
}
|
||||
|
||||
// Prometheus metrics.
|
||||
let metrics = if let Some(PrometheusConfig { port, registry }) = config.prometheus_config.clone() {
|
||||
let mut metrics_service = if let Some(PrometheusConfig { port, registry }) = config.prometheus_config.clone() {
|
||||
// Set static metrics.
|
||||
register(Gauge::<U64>::with_opts(
|
||||
Opts::new(
|
||||
"build_info",
|
||||
"A metric with a constant '1' value labeled by name, version, and commit."
|
||||
)
|
||||
.const_label("name", config.impl_name)
|
||||
.const_label("version", config.impl_version)
|
||||
.const_label("commit", config.impl_commit),
|
||||
)?, ®istry)?.set(1);
|
||||
|
||||
|
||||
let role_bits = match config.role {
|
||||
Role::Full => 1,
|
||||
Role::Light => 2,
|
||||
Role::Sentry { .. } => 3,
|
||||
Role::Authority { .. } => 4,
|
||||
Role::Full => 1u64,
|
||||
Role::Light => 2u64,
|
||||
Role::Sentry { .. } => 3u64,
|
||||
Role::Authority { .. } => 4u64,
|
||||
};
|
||||
register(Gauge::<U64>::new(
|
||||
"node_role", "The role the node is running as",
|
||||
)?, ®istry)?.set(role_bits);
|
||||
|
||||
let metrics = ServiceMetrics::register(®istry)?;
|
||||
|
||||
let metrics = MetricsService::with_prometheus(
|
||||
®istry,
|
||||
&config.name,
|
||||
&config.impl_version,
|
||||
role_bits,
|
||||
)?;
|
||||
spawn_handle.spawn(
|
||||
"prometheus-endpoint",
|
||||
prometheus_endpoint::init_prometheus(port, registry).map(drop)
|
||||
);
|
||||
|
||||
Some(metrics)
|
||||
metrics
|
||||
} else {
|
||||
None
|
||||
MetricsService::new()
|
||||
};
|
||||
|
||||
// Periodically notify the telemetry.
|
||||
let transaction_pool_ = transaction_pool.clone();
|
||||
let client_ = client.clone();
|
||||
let mut sys = System::new();
|
||||
let self_pid = get_current_pid().ok();
|
||||
let (state_tx, state_rx) = mpsc::unbounded::<(NetworkStatus<_>, NetworkState)>();
|
||||
let (state_tx, state_rx) = tracing_unbounded::<(NetworkStatus<_>, NetworkState)>("mpsc_netstat1");
|
||||
network_status_sinks.lock().push(std::time::Duration::from_millis(5000), state_tx);
|
||||
let tel_task = state_rx.for_each(move |(net_status, _)| {
|
||||
let info = client_.usage_info();
|
||||
let best_number = info.chain.best_number.saturated_into::<u64>();
|
||||
let best_hash = info.chain.best_hash;
|
||||
let num_peers = net_status.num_connected_peers;
|
||||
let txpool_status = transaction_pool_.status();
|
||||
let finalized_number: u64 = info.chain.finalized_number.saturated_into::<u64>();
|
||||
let bandwidth_download = net_status.average_download_per_sec;
|
||||
let bandwidth_upload = net_status.average_upload_per_sec;
|
||||
let best_seen_block = net_status.best_seen_block
|
||||
.map(|num: NumberFor<TBl>| num.unique_saturated_into() as u64);
|
||||
|
||||
// get cpu usage and memory usage of this process
|
||||
let (cpu_usage, memory) = if let Some(self_pid) = self_pid {
|
||||
if sys.refresh_process(self_pid) {
|
||||
let proc = sys.get_process(self_pid)
|
||||
.expect("Above refresh_process succeeds, this should be Some(), qed");
|
||||
(proc.cpu_usage(), proc.memory())
|
||||
} else { (0.0, 0) }
|
||||
} else { (0.0, 0) };
|
||||
|
||||
telemetry!(
|
||||
SUBSTRATE_INFO;
|
||||
"system.interval";
|
||||
"peers" => num_peers,
|
||||
"height" => best_number,
|
||||
"best" => ?best_hash,
|
||||
"txcount" => txpool_status.ready,
|
||||
"cpu" => cpu_usage,
|
||||
"memory" => memory,
|
||||
"finalized_height" => finalized_number,
|
||||
"finalized_hash" => ?info.chain.finalized_hash,
|
||||
"bandwidth_download" => bandwidth_download,
|
||||
"bandwidth_upload" => bandwidth_upload,
|
||||
"used_state_cache_size" => info.usage.as_ref()
|
||||
.map(|usage| usage.memory.state_cache.as_bytes())
|
||||
.unwrap_or(0),
|
||||
"used_db_cache_size" => info.usage.as_ref()
|
||||
.map(|usage| usage.memory.database_cache.as_bytes())
|
||||
.unwrap_or(0),
|
||||
"disk_read_per_sec" => info.usage.as_ref()
|
||||
.map(|usage| usage.io.bytes_read)
|
||||
.unwrap_or(0),
|
||||
"disk_write_per_sec" => info.usage.as_ref()
|
||||
.map(|usage| usage.io.bytes_written)
|
||||
.unwrap_or(0),
|
||||
metrics_service.tick(
|
||||
&info,
|
||||
&transaction_pool_.status(),
|
||||
&net_status,
|
||||
);
|
||||
if let Some(metrics) = metrics.as_ref() {
|
||||
// `sysinfo::Process::memory` returns memory usage in KiB and not bytes.
|
||||
metrics.memory_usage_bytes.set(memory * 1024);
|
||||
metrics.cpu_usage_percentage.set(f64::from(cpu_usage));
|
||||
metrics.ready_transactions_number.set(txpool_status.ready as u64);
|
||||
|
||||
metrics.network_per_sec_bytes.with_label_values(&["download"]).set(net_status.average_download_per_sec);
|
||||
metrics.network_per_sec_bytes.with_label_values(&["upload"]).set(net_status.average_upload_per_sec);
|
||||
|
||||
metrics.block_height_number.with_label_values(&["finalized"]).set(finalized_number);
|
||||
metrics.block_height_number.with_label_values(&["best"]).set(best_number);
|
||||
|
||||
if let Some(best_seen_block) = best_seen_block {
|
||||
metrics.block_height_number.with_label_values(&["sync_target"]).set(best_seen_block);
|
||||
}
|
||||
|
||||
if let Some(info) = info.usage.as_ref() {
|
||||
metrics.database_cache.set(info.memory.database_cache.as_bytes() as u64);
|
||||
metrics.state_cache.set(info.memory.state_cache.as_bytes() as u64);
|
||||
|
||||
metrics.state_db.with_label_values(&["non_canonical"]).set(info.memory.state_db.non_canonical.as_bytes() as u64);
|
||||
if let Some(pruning) = info.memory.state_db.pruning {
|
||||
metrics.state_db.with_label_values(&["pruning"]).set(pruning.as_bytes() as u64);
|
||||
}
|
||||
metrics.state_db.with_label_values(&["pinned"]).set(info.memory.state_db.pinned.as_bytes() as u64);
|
||||
}
|
||||
}
|
||||
|
||||
ready(())
|
||||
});
|
||||
|
||||
@@ -1117,7 +993,7 @@ ServiceBuilder<
|
||||
);
|
||||
|
||||
// Periodically send the network state to the telemetry.
|
||||
let (netstat_tx, netstat_rx) = mpsc::unbounded::<(NetworkStatus<_>, NetworkState)>();
|
||||
let (netstat_tx, netstat_rx) = tracing_unbounded::<(NetworkStatus<_>, NetworkState)>("mpsc_netstat2");
|
||||
network_status_sinks.lock().push(std::time::Duration::from_secs(30), netstat_tx);
|
||||
let tel_task_2 = netstat_rx.for_each(move |(_, network_state)| {
|
||||
telemetry!(
|
||||
@@ -1133,7 +1009,7 @@ ServiceBuilder<
|
||||
);
|
||||
|
||||
// RPC
|
||||
let (system_rpc_tx, system_rpc_rx) = mpsc::unbounded();
|
||||
let (system_rpc_tx, system_rpc_rx) = tracing_unbounded("mpsc_system_rpc");
|
||||
let gen_handler = || {
|
||||
use sc_rpc::{chain, state, author, system, offchain};
|
||||
|
||||
@@ -1215,7 +1091,7 @@ ServiceBuilder<
|
||||
),
|
||||
);
|
||||
|
||||
let telemetry_connection_sinks: Arc<Mutex<Vec<futures::channel::mpsc::UnboundedSender<()>>>> = Default::default();
|
||||
let telemetry_connection_sinks: Arc<Mutex<Vec<TracingUnboundedSender<()>>>> = Default::default();
|
||||
|
||||
// Telemetry
|
||||
let telemetry = config.telemetry_endpoints.clone().map(|endpoints| {
|
||||
|
||||
@@ -24,6 +24,7 @@ pub mod config;
|
||||
pub mod chain_ops;
|
||||
pub mod error;
|
||||
|
||||
mod metrics;
|
||||
mod builder;
|
||||
mod status_sinks;
|
||||
mod task_manager;
|
||||
@@ -40,7 +41,6 @@ use parking_lot::Mutex;
|
||||
use sc_client::Client;
|
||||
use futures::{
|
||||
Future, FutureExt, Stream, StreamExt,
|
||||
channel::mpsc,
|
||||
compat::*,
|
||||
sink::SinkExt,
|
||||
task::{Spawn, FutureObj, SpawnError},
|
||||
@@ -51,6 +51,7 @@ use codec::{Encode, Decode};
|
||||
use sp_runtime::generic::BlockId;
|
||||
use sp_runtime::traits::{NumberFor, Block as BlockT};
|
||||
use parity_util_mem::MallocSizeOf;
|
||||
use sp_utils::mpsc::{tracing_unbounded, TracingUnboundedReceiver, TracingUnboundedSender};
|
||||
|
||||
pub use self::error::Error;
|
||||
pub use self::builder::{
|
||||
@@ -98,13 +99,13 @@ pub struct Service<TBl, TCl, TSc, TNetStatus, TNet, TTxPool, TOc> {
|
||||
transaction_pool: Arc<TTxPool>,
|
||||
/// Send a signal when a spawned essential task has concluded. The next time
|
||||
/// the service future is polled it should complete with an error.
|
||||
essential_failed_tx: mpsc::UnboundedSender<()>,
|
||||
essential_failed_tx: TracingUnboundedSender<()>,
|
||||
/// A receiver for spawned essential-tasks concluding.
|
||||
essential_failed_rx: mpsc::UnboundedReceiver<()>,
|
||||
essential_failed_rx: TracingUnboundedReceiver<()>,
|
||||
rpc_handlers: sc_rpc_server::RpcHandler<sc_rpc::Metadata>,
|
||||
_rpc: Box<dyn std::any::Any + Send + Sync>,
|
||||
_telemetry: Option<sc_telemetry::Telemetry>,
|
||||
_telemetry_on_connect_sinks: Arc<Mutex<Vec<futures::channel::mpsc::UnboundedSender<()>>>>,
|
||||
_telemetry_on_connect_sinks: Arc<Mutex<Vec<TracingUnboundedSender<()>>>>,
|
||||
_offchain_workers: Option<Arc<TOc>>,
|
||||
keystore: sc_keystore::KeyStorePtr,
|
||||
marker: PhantomData<TBl>,
|
||||
@@ -130,7 +131,7 @@ pub trait AbstractService: 'static + Future<Output = Result<(), Error>> +
|
||||
type TransactionPool: TransactionPool<Block = Self::Block> + MallocSizeOfWasm;
|
||||
|
||||
/// Get event stream for telemetry connection established events.
|
||||
fn telemetry_on_connect_stream(&self) -> futures::channel::mpsc::UnboundedReceiver<()>;
|
||||
fn telemetry_on_connect_stream(&self) -> TracingUnboundedReceiver<()>;
|
||||
|
||||
/// return a shared instance of Telemetry (if enabled)
|
||||
fn telemetry(&self) -> Option<sc_telemetry::Telemetry>;
|
||||
@@ -171,7 +172,7 @@ pub trait AbstractService: 'static + Future<Output = Result<(), Error>> +
|
||||
-> Arc<NetworkService<Self::Block, <Self::Block as BlockT>::Hash>>;
|
||||
|
||||
/// Returns a receiver that periodically receives a status of the network.
|
||||
fn network_status(&self, interval: Duration) -> mpsc::UnboundedReceiver<(NetworkStatus<Self::Block>, NetworkState)>;
|
||||
fn network_status(&self, interval: Duration) -> TracingUnboundedReceiver<(NetworkStatus<Self::Block>, NetworkState)>;
|
||||
|
||||
/// Get shared transaction pool instance.
|
||||
fn transaction_pool(&self) -> Arc<Self::TransactionPool>;
|
||||
@@ -203,8 +204,8 @@ where
|
||||
type SelectChain = TSc;
|
||||
type TransactionPool = TExPool;
|
||||
|
||||
fn telemetry_on_connect_stream(&self) -> futures::channel::mpsc::UnboundedReceiver<()> {
|
||||
let (sink, stream) = futures::channel::mpsc::unbounded();
|
||||
fn telemetry_on_connect_stream(&self) -> TracingUnboundedReceiver<()> {
|
||||
let (sink, stream) = tracing_unbounded("mpsc_telemetry_on_connect");
|
||||
self._telemetry_on_connect_sinks.lock().push(sink);
|
||||
stream
|
||||
}
|
||||
@@ -259,8 +260,8 @@ where
|
||||
self.network.clone()
|
||||
}
|
||||
|
||||
fn network_status(&self, interval: Duration) -> mpsc::UnboundedReceiver<(NetworkStatus<Self::Block>, NetworkState)> {
|
||||
let (sink, stream) = mpsc::unbounded();
|
||||
fn network_status(&self, interval: Duration) -> TracingUnboundedReceiver<(NetworkStatus<Self::Block>, NetworkState)> {
|
||||
let (sink, stream) = tracing_unbounded("mpsc_network_status");
|
||||
self.network_status_sinks.lock().push(interval, sink);
|
||||
stream
|
||||
}
|
||||
@@ -326,7 +327,7 @@ fn build_network_future<
|
||||
mut network: sc_network::NetworkWorker<B, H>,
|
||||
client: Arc<C>,
|
||||
status_sinks: Arc<Mutex<status_sinks::StatusSinks<(NetworkStatus<B>, NetworkState)>>>,
|
||||
mut rpc_rx: mpsc::UnboundedReceiver<sc_rpc::system::Request<B>>,
|
||||
mut rpc_rx: TracingUnboundedReceiver<sc_rpc::system::Request<B>>,
|
||||
should_have_peers: bool,
|
||||
announce_imported_blocks: bool,
|
||||
) -> impl Future<Output = ()> {
|
||||
|
||||
@@ -0,0 +1,428 @@
|
||||
// Copyright 2020 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Substrate.
|
||||
|
||||
// Substrate is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Substrate is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Substrate. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use crate::NetworkStatus;
|
||||
use prometheus_endpoint::{register, Gauge, U64, F64, Registry, PrometheusError, Opts, GaugeVec};
|
||||
use sc_client::ClientInfo;
|
||||
use sc_telemetry::{telemetry, SUBSTRATE_INFO};
|
||||
use std::convert::TryFrom;
|
||||
use sp_runtime::traits::{NumberFor, Block, SaturatedConversion, UniqueSaturatedInto};
|
||||
use sp_transaction_pool::PoolStatus;
|
||||
use sp_utils::metrics::register_globals;
|
||||
|
||||
#[cfg(any(windows, unix))]
|
||||
use sysinfo::{ProcessExt, System, SystemExt};
|
||||
|
||||
#[cfg(any(unix, windows))]
|
||||
use netstat2::{TcpState, ProtocolSocketInfo, iterate_sockets_info, AddressFamilyFlags, ProtocolFlags};
|
||||
|
||||
#[cfg(not(unix))]
|
||||
use sysinfo::get_current_pid;
|
||||
|
||||
#[cfg(unix)]
|
||||
use procfs;
|
||||
|
||||
struct PrometheusMetrics {
|
||||
// system
|
||||
#[cfg(any(unix, windows))]
|
||||
load_avg: GaugeVec<F64>,
|
||||
|
||||
// process
|
||||
cpu_usage_percentage: Gauge<F64>,
|
||||
memory_usage_bytes: Gauge<U64>,
|
||||
threads: Gauge<U64>,
|
||||
open_files: GaugeVec<U64>,
|
||||
|
||||
#[cfg(any(unix, windows))]
|
||||
netstat: GaugeVec<U64>,
|
||||
|
||||
// -- inner counters
|
||||
// generic info
|
||||
block_height: GaugeVec<U64>,
|
||||
number_leaves: Gauge<U64>,
|
||||
ready_transactions_number: Gauge<U64>,
|
||||
|
||||
// I/O
|
||||
network_per_sec_bytes: GaugeVec<U64>,
|
||||
database_cache: Gauge<U64>,
|
||||
state_cache: Gauge<U64>,
|
||||
state_db: GaugeVec<U64>,
|
||||
}
|
||||
|
||||
impl PrometheusMetrics {
|
||||
fn setup(registry: &Registry, name: &str, version: &str, roles: u64)
|
||||
-> Result<Self, PrometheusError>
|
||||
{
|
||||
register(Gauge::<U64>::with_opts(
|
||||
Opts::new(
|
||||
"build_info",
|
||||
"A metric with a constant '1' value labeled by name, version"
|
||||
)
|
||||
.const_label("name", name)
|
||||
.const_label("version", version)
|
||||
)?, ®istry)?.set(1);
|
||||
|
||||
register(Gauge::<U64>::new(
|
||||
"node_roles", "The roles the node is running as",
|
||||
)?, ®istry)?.set(roles);
|
||||
|
||||
register_globals(registry)?;
|
||||
|
||||
Ok(Self {
|
||||
// system
|
||||
#[cfg(any(unix, windows))]
|
||||
load_avg: register(GaugeVec::new(
|
||||
Opts::new("load_avg", "System load average"),
|
||||
&["over"]
|
||||
)?, registry)?,
|
||||
|
||||
// process
|
||||
memory_usage_bytes: register(Gauge::new(
|
||||
"memory_usage_bytes", "Node memory (resident set size) usage",
|
||||
)?, registry)?,
|
||||
|
||||
cpu_usage_percentage: register(Gauge::new(
|
||||
"cpu_usage_percentage", "Node CPU usage",
|
||||
)?, registry)?,
|
||||
|
||||
#[cfg(any(unix, windows))]
|
||||
netstat: register(GaugeVec::new(
|
||||
Opts::new("netstat_tcp", "Current TCP connections "),
|
||||
&["status"]
|
||||
)?, registry)?,
|
||||
|
||||
threads: register(Gauge::new(
|
||||
"threads", "Number of threads used by the process",
|
||||
)?, registry)?,
|
||||
|
||||
open_files: register(GaugeVec::new(
|
||||
Opts::new("open_file_handles", "Open file handlers held by the process"),
|
||||
&["fd_type"]
|
||||
)?, registry)?,
|
||||
|
||||
// --- internal
|
||||
|
||||
// generic internals
|
||||
|
||||
block_height: register(GaugeVec::new(
|
||||
Opts::new("block_height", "Block height info of the chain"),
|
||||
&["status"]
|
||||
)?, registry)?,
|
||||
|
||||
number_leaves: register(Gauge::new(
|
||||
"number_leaves", "Number of known chain leaves (aka forks)",
|
||||
)?, registry)?,
|
||||
|
||||
ready_transactions_number: register(Gauge::new(
|
||||
"ready_transactions_number", "Number of transactions in the ready queue",
|
||||
)?, registry)?,
|
||||
|
||||
// I/ O
|
||||
|
||||
network_per_sec_bytes: register(GaugeVec::new(
|
||||
Opts::new("network_per_sec_bytes", "Networking bytes per second"),
|
||||
&["direction"]
|
||||
)?, registry)?,
|
||||
database_cache: register(Gauge::new(
|
||||
"database_cache_bytes", "RocksDB cache size in bytes",
|
||||
)?, registry)?,
|
||||
state_cache: register(Gauge::new(
|
||||
"state_cache_bytes", "State cache size in bytes",
|
||||
)?, registry)?,
|
||||
state_db: register(GaugeVec::new(
|
||||
Opts::new("state_db_cache_bytes", "State DB cache in bytes"),
|
||||
&["subtype"]
|
||||
)?, registry)?,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(any(unix, windows))]
|
||||
#[derive(Default)]
|
||||
struct ConnectionsCount {
|
||||
listen: u64,
|
||||
established: u64,
|
||||
starting: u64,
|
||||
closing: u64,
|
||||
closed: u64,
|
||||
other: u64
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct FdCounter {
|
||||
paths: u64,
|
||||
sockets: u64,
|
||||
net: u64,
|
||||
pipes: u64,
|
||||
anon_inode: u64,
|
||||
mem: u64,
|
||||
other: u64,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct ProcessInfo {
|
||||
cpu_usage: f64,
|
||||
memory: u64,
|
||||
threads: Option<u64>,
|
||||
open_fd: Option<FdCounter>,
|
||||
}
|
||||
|
||||
pub struct MetricsService {
|
||||
metrics: Option<PrometheusMetrics>,
|
||||
#[cfg(any(windows, unix))]
|
||||
system: System,
|
||||
pid: Option<i32>,
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
impl MetricsService {
|
||||
fn inner_new(metrics: Option<PrometheusMetrics>) -> Self {
|
||||
let process = procfs::process::Process::myself()
|
||||
.expect("Procfs doesn't fail on unix. qed");
|
||||
|
||||
Self {
|
||||
metrics,
|
||||
system: System::new(),
|
||||
pid: Some(process.pid),
|
||||
}
|
||||
}
|
||||
fn process_info(&mut self) -> ProcessInfo {
|
||||
let pid = self.pid.clone().expect("unix always has a pid. qed");
|
||||
let mut info = self._process_info_for(&pid);
|
||||
let process = procfs::process::Process::new(pid).expect("Our process exists. qed.");
|
||||
info.threads = process.stat().ok().map(|s|
|
||||
u64::try_from(s.num_threads).expect("There are no negative thread counts. qed"));
|
||||
info.open_fd = process.fd().ok().map(|i|
|
||||
i.into_iter().fold(FdCounter::default(), |mut f, info| {
|
||||
match info.target {
|
||||
procfs::process::FDTarget::Path(_) => f.paths += 1,
|
||||
procfs::process::FDTarget::Socket(_) => f.sockets += 1,
|
||||
procfs::process::FDTarget::Net(_) => f.net += 1,
|
||||
procfs::process::FDTarget::Pipe(_) => f.pipes += 1,
|
||||
procfs::process::FDTarget::AnonInode(_) => f.anon_inode += 1,
|
||||
procfs::process::FDTarget::MemFD(_) => f.mem += 1,
|
||||
procfs::process::FDTarget::Other(_,_) => f.other += 1,
|
||||
};
|
||||
f
|
||||
})
|
||||
);
|
||||
info
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
#[cfg(windows)]
|
||||
impl MetricsService {
|
||||
fn inner_new(metrics: Option<PrometheusMetrics>) -> Self {
|
||||
Self {
|
||||
metrics,
|
||||
system: System(),
|
||||
pid: get_current_pid().ok()
|
||||
}
|
||||
}
|
||||
|
||||
fn process_info(&mut self) -> ProcessInfo {
|
||||
self.pid.map(|pid| self._process_info_for(pid)).or_else(ProcessInfo::default)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(any(unix, windows)))]
|
||||
impl MetricsService {
|
||||
fn inner_new(metrics: Option<PrometheusMetrics>) -> Self {
|
||||
Self {
|
||||
metrics,
|
||||
pid: None
|
||||
}
|
||||
}
|
||||
|
||||
fn process_info(&mut self) -> ProcessInfo {
|
||||
ProcessInfo::default()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl MetricsService {
|
||||
|
||||
pub fn with_prometheus(registry: &Registry, name: &str, version: &str, roles: u64)
|
||||
-> Result<Self, PrometheusError>
|
||||
{
|
||||
PrometheusMetrics::setup(registry, name, version, roles).map(|p| {
|
||||
Self::inner_new(Some(p))
|
||||
})
|
||||
}
|
||||
|
||||
pub fn new() -> Self {
|
||||
Self::inner_new(None)
|
||||
}
|
||||
|
||||
#[cfg(any(windows, unix))]
|
||||
fn _process_info_for(&mut self, pid: &i32) -> ProcessInfo {
|
||||
let mut info = ProcessInfo::default();
|
||||
if self.system.refresh_process(*pid) {
|
||||
let prc = self.system.get_process(*pid)
|
||||
.expect("Above refresh_process succeeds, this must be Some(), qed");
|
||||
info.cpu_usage = prc.cpu_usage().into();
|
||||
info.memory = prc.memory();
|
||||
}
|
||||
info
|
||||
}
|
||||
|
||||
#[cfg(any(unix, windows))]
|
||||
fn connections_info(&self) -> Option<ConnectionsCount> {
|
||||
self.pid.as_ref().and_then(|pid| {
|
||||
let af_flags = AddressFamilyFlags::IPV4 | AddressFamilyFlags::IPV6;
|
||||
let proto_flags = ProtocolFlags::TCP;
|
||||
let netstat_pid = *pid as u32;
|
||||
|
||||
iterate_sockets_info(af_flags, proto_flags).ok().map(|iter|
|
||||
iter.filter_map(|r|
|
||||
r.ok().and_then(|s| {
|
||||
if s.associated_pids.contains(&netstat_pid) {
|
||||
match s.protocol_socket_info {
|
||||
ProtocolSocketInfo::Tcp(info) => Some(info.state),
|
||||
_ => None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
).fold(ConnectionsCount::default(), |mut counter, socket_state| {
|
||||
match socket_state {
|
||||
TcpState::Listen => counter.listen += 1,
|
||||
TcpState::Established => counter.established += 1,
|
||||
TcpState::Closed => counter.closed += 1,
|
||||
TcpState::SynSent | TcpState::SynReceived => counter.starting += 1,
|
||||
TcpState::FinWait1 | TcpState::FinWait2 | TcpState::CloseWait
|
||||
| TcpState::Closing | TcpState::LastAck => counter.closing += 1,
|
||||
_ => counter.other += 1
|
||||
}
|
||||
|
||||
counter
|
||||
})
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
pub fn tick<T: Block>(
|
||||
&mut self,
|
||||
info: &ClientInfo<T>,
|
||||
txpool_status: &PoolStatus,
|
||||
net_status: &NetworkStatus<T>
|
||||
) {
|
||||
|
||||
let best_number = info.chain.best_number.saturated_into::<u64>();
|
||||
let best_hash = info.chain.best_hash;
|
||||
let num_peers = net_status.num_connected_peers;
|
||||
let finalized_number: u64 = info.chain.finalized_number.saturated_into::<u64>();
|
||||
let bandwidth_download = net_status.average_download_per_sec;
|
||||
let bandwidth_upload = net_status.average_upload_per_sec;
|
||||
let best_seen_block = net_status.best_seen_block
|
||||
.map(|num: NumberFor<T>| num.unique_saturated_into() as u64);
|
||||
let process_info = self.process_info();
|
||||
|
||||
telemetry!(
|
||||
SUBSTRATE_INFO;
|
||||
"system.interval";
|
||||
"peers" => num_peers,
|
||||
"height" => best_number,
|
||||
"best" => ?best_hash,
|
||||
"txcount" => txpool_status.ready,
|
||||
"cpu" => process_info.cpu_usage,
|
||||
"memory" => process_info.memory,
|
||||
"finalized_height" => finalized_number,
|
||||
"finalized_hash" => ?info.chain.finalized_hash,
|
||||
"bandwidth_download" => bandwidth_download,
|
||||
"bandwidth_upload" => bandwidth_upload,
|
||||
"used_state_cache_size" => info.usage.as_ref()
|
||||
.map(|usage| usage.memory.state_cache.as_bytes())
|
||||
.unwrap_or(0),
|
||||
"used_db_cache_size" => info.usage.as_ref()
|
||||
.map(|usage| usage.memory.database_cache.as_bytes())
|
||||
.unwrap_or(0),
|
||||
"disk_read_per_sec" => info.usage.as_ref()
|
||||
.map(|usage| usage.io.bytes_read)
|
||||
.unwrap_or(0),
|
||||
"disk_write_per_sec" => info.usage.as_ref()
|
||||
.map(|usage| usage.io.bytes_written)
|
||||
.unwrap_or(0),
|
||||
);
|
||||
|
||||
if let Some(metrics) = self.metrics.as_ref() {
|
||||
metrics.cpu_usage_percentage.set(process_info.cpu_usage as f64);
|
||||
// `sysinfo::Process::memory` returns memory usage in KiB and not bytes.
|
||||
metrics.memory_usage_bytes.set(process_info.memory * 1024);
|
||||
|
||||
if let Some(threads) = process_info.threads {
|
||||
metrics.threads.set(threads);
|
||||
}
|
||||
|
||||
if let Some(fd_info) = process_info.open_fd {
|
||||
metrics.open_files.with_label_values(&["paths"]).set(fd_info.paths);
|
||||
metrics.open_files.with_label_values(&["mem"]).set(fd_info.mem);
|
||||
metrics.open_files.with_label_values(&["sockets"]).set(fd_info.sockets);
|
||||
metrics.open_files.with_label_values(&["net"]).set(fd_info.net);
|
||||
metrics.open_files.with_label_values(&["pipe"]).set(fd_info.pipes);
|
||||
metrics.open_files.with_label_values(&["anon_inode"]).set(fd_info.anon_inode);
|
||||
metrics.open_files.with_label_values(&["other"]).set(fd_info.other);
|
||||
}
|
||||
|
||||
|
||||
metrics.network_per_sec_bytes.with_label_values(&["download"]).set(net_status.average_download_per_sec);
|
||||
metrics.network_per_sec_bytes.with_label_values(&["upload"]).set(net_status.average_upload_per_sec);
|
||||
|
||||
metrics.block_height.with_label_values(&["finalized"]).set(finalized_number);
|
||||
metrics.block_height.with_label_values(&["best"]).set(best_number);
|
||||
if let Ok(leaves) = u64::try_from(info.chain.number_leaves) {
|
||||
metrics.number_leaves.set(leaves);
|
||||
}
|
||||
|
||||
metrics.ready_transactions_number.set(txpool_status.ready as u64);
|
||||
|
||||
if let Some(best_seen_block) = best_seen_block {
|
||||
metrics.block_height.with_label_values(&["sync_target"]).set(best_seen_block);
|
||||
}
|
||||
|
||||
if let Some(info) = info.usage.as_ref() {
|
||||
metrics.database_cache.set(info.memory.database_cache.as_bytes() as u64);
|
||||
metrics.state_cache.set(info.memory.state_cache.as_bytes() as u64);
|
||||
|
||||
metrics.state_db.with_label_values(&["non_canonical"]).set(info.memory.state_db.non_canonical.as_bytes() as u64);
|
||||
if let Some(pruning) = info.memory.state_db.pruning {
|
||||
metrics.state_db.with_label_values(&["pruning"]).set(pruning.as_bytes() as u64);
|
||||
}
|
||||
metrics.state_db.with_label_values(&["pinned"]).set(info.memory.state_db.pinned.as_bytes() as u64);
|
||||
}
|
||||
|
||||
#[cfg(any(unix, windows))]
|
||||
{
|
||||
let load = self.system.get_load_average();
|
||||
metrics.load_avg.with_label_values(&["1min"]).set(load.one);
|
||||
metrics.load_avg.with_label_values(&["5min"]).set(load.five);
|
||||
metrics.load_avg.with_label_values(&["15min"]).set(load.fifteen);
|
||||
|
||||
if let Some(conns) = self.connections_info() {
|
||||
metrics.netstat.with_label_values(&["listen"]).set(conns.listen);
|
||||
metrics.netstat.with_label_values(&["established"]).set(conns.established);
|
||||
metrics.netstat.with_label_values(&["starting"]).set(conns.starting);
|
||||
metrics.netstat.with_label_values(&["closing"]).set(conns.closing);
|
||||
metrics.netstat.with_label_values(&["closed"]).set(conns.closed);
|
||||
metrics.netstat.with_label_values(&["other"]).set(conns.other);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -14,11 +14,12 @@
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Substrate. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use futures::{Stream, stream::futures_unordered::FuturesUnordered, channel::mpsc};
|
||||
use futures::{Stream, stream::futures_unordered::FuturesUnordered};
|
||||
use std::time::Duration;
|
||||
use std::pin::Pin;
|
||||
use std::task::{Poll, Context};
|
||||
use futures_timer::Delay;
|
||||
use sp_utils::mpsc::TracingUnboundedSender;
|
||||
|
||||
/// Holds a list of `UnboundedSender`s, each associated with a certain time period. Every time the
|
||||
/// period elapses, we push an element on the sender.
|
||||
@@ -31,7 +32,7 @@ pub struct StatusSinks<T> {
|
||||
struct YieldAfter<T> {
|
||||
delay: Delay,
|
||||
interval: Duration,
|
||||
sender: Option<mpsc::UnboundedSender<T>>,
|
||||
sender: Option<TracingUnboundedSender<T>>,
|
||||
}
|
||||
|
||||
impl<T> StatusSinks<T> {
|
||||
@@ -45,7 +46,7 @@ impl<T> StatusSinks<T> {
|
||||
/// Adds a sender to the collection.
|
||||
///
|
||||
/// The `interval` is the time period between two pushes on the sender.
|
||||
pub fn push(&mut self, interval: Duration, sender: mpsc::UnboundedSender<T>) {
|
||||
pub fn push(&mut self, interval: Duration, sender: TracingUnboundedSender<T>) {
|
||||
self.entries.push(YieldAfter {
|
||||
delay: Delay::new(interval),
|
||||
interval,
|
||||
@@ -88,7 +89,7 @@ impl<T> StatusSinks<T> {
|
||||
}
|
||||
|
||||
impl<T> futures::Future for YieldAfter<T> {
|
||||
type Output = (mpsc::UnboundedSender<T>, Duration);
|
||||
type Output = (TracingUnboundedSender<T>, Duration);
|
||||
|
||||
fn poll(self: Pin<&mut Self>, cx: &mut Context) -> Poll<Self::Output> {
|
||||
let this = Pin::into_inner(self);
|
||||
|
||||
@@ -22,17 +22,18 @@ use exit_future::Signal;
|
||||
use log::{debug, error};
|
||||
use futures::{
|
||||
Future, FutureExt, Stream,
|
||||
future::select, channel::mpsc,
|
||||
future::select,
|
||||
compat::*,
|
||||
task::{Spawn, FutureObj, SpawnError},
|
||||
};
|
||||
use sc_client_api::CloneableSpawn;
|
||||
use sp_utils::mpsc::{tracing_unbounded, TracingUnboundedSender, TracingUnboundedReceiver};
|
||||
|
||||
/// Type alias for service task executor (usually runtime).
|
||||
pub type ServiceTaskExecutor = Arc<dyn Fn(Pin<Box<dyn Future<Output = ()> + Send>>) + Send + Sync>;
|
||||
|
||||
/// Type alias for the task scheduler.
|
||||
pub type TaskScheduler = mpsc::UnboundedSender<(Pin<Box<dyn Future<Output = ()> + Send>>, Cow<'static, str>)>;
|
||||
pub type TaskScheduler = TracingUnboundedSender<(Pin<Box<dyn Future<Output = ()> + Send>>, Cow<'static, str>)>;
|
||||
|
||||
/// Helper struct to setup background tasks execution for service.
|
||||
pub struct TaskManagerBuilder {
|
||||
@@ -44,14 +45,14 @@ pub struct TaskManagerBuilder {
|
||||
/// Sender for futures that must be spawned as background tasks.
|
||||
to_spawn_tx: TaskScheduler,
|
||||
/// Receiver for futures that must be spawned as background tasks.
|
||||
to_spawn_rx: mpsc::UnboundedReceiver<(Pin<Box<dyn Future<Output = ()> + Send>>, Cow<'static, str>)>,
|
||||
to_spawn_rx: TracingUnboundedReceiver<(Pin<Box<dyn Future<Output = ()> + Send>>, Cow<'static, str>)>,
|
||||
}
|
||||
|
||||
impl TaskManagerBuilder {
|
||||
/// New asynchronous task manager setup.
|
||||
pub fn new() -> Self {
|
||||
let (signal, on_exit) = exit_future::signal();
|
||||
let (to_spawn_tx, to_spawn_rx) = mpsc::unbounded();
|
||||
let (to_spawn_tx, to_spawn_rx) = tracing_unbounded("mpsc_task_manager");
|
||||
Self {
|
||||
on_exit,
|
||||
signal: Some(signal),
|
||||
@@ -144,7 +145,7 @@ pub struct TaskManager {
|
||||
/// Sender for futures that must be spawned as background tasks.
|
||||
to_spawn_tx: TaskScheduler,
|
||||
/// Receiver for futures that must be spawned as background tasks.
|
||||
to_spawn_rx: mpsc::UnboundedReceiver<(Pin<Box<dyn Future<Output = ()> + Send>>, Cow<'static, str>)>,
|
||||
to_spawn_rx: TracingUnboundedReceiver<(Pin<Box<dyn Future<Output = ()> + Send>>, Cow<'static, str>)>,
|
||||
/// How to spawn background tasks.
|
||||
executor: ServiceTaskExecutor,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user