Add more metrics to prometheus (#5034)

* Add a few things

* Add finality_grandpa_round

* fix fg tests

* Nitpicks

* Nitpicks

* Fix name of prometheus crate
This commit is contained in:
Ashley
2020-03-03 11:36:58 +01:00
committed by GitHub
parent b27a820032
commit 883ddfc897
16 changed files with 135 additions and 24 deletions
+21 -15
View File
@@ -53,15 +53,15 @@ use sysinfo::{get_current_pid, ProcessExt, System, SystemExt};
use sc_telemetry::{telemetry, SUBSTRATE_INFO};
use sp_transaction_pool::{MaintainedTransactionPool, ChainEvent};
use sp_blockchain;
use substrate_prometheus_endpoint::{register, Gauge, U64, F64, Registry, PrometheusError, Opts, GaugeVec};
use prometheus_endpoint::{register, Gauge, U64, F64, Registry, PrometheusError, Opts, GaugeVec};
struct ServiceMetrics {
block_height_number: GaugeVec<U64>,
peers_count: Gauge<U64>,
ready_transactions_number: Gauge<U64>,
memory_usage_bytes: Gauge<U64>,
cpu_usage_percentage: Gauge<F64>,
network_per_sec_bytes: GaugeVec<U64>,
node_roles: Gauge<U64>,
}
impl ServiceMetrics {
@@ -71,9 +71,6 @@ impl ServiceMetrics {
Opts::new("block_height_number", "Height of the chain"),
&["status"]
)?, registry)?,
peers_count: register(Gauge::new(
"peers_count", "Number of network gossip peers",
)?, registry)?,
ready_transactions_number: register(Gauge::new(
"ready_transactions_number", "Number of transactions in the ready queue",
)?, registry)?,
@@ -87,6 +84,10 @@ impl ServiceMetrics {
Opts::new("network_per_sec_bytes", "Networking bytes per second"),
&["direction"]
)?, registry)?,
node_roles: register(Gauge::new(
"node_roles", "The roles the node is running as",
)?, registry)?,
})
}
}
@@ -887,6 +888,14 @@ ServiceBuilder<
let block_announce_validator =
Box::new(sp_consensus::block_validation::DefaultBlockAnnounceValidator::new(client.clone()));
let prometheus_registry_and_port = match config.prometheus_port {
Some(port) => match prometheus_registry {
Some(registry) => Some((registry, port)),
None => Some((Registry::new_custom(Some("substrate".into()), None)?, port))
},
None => None
};
let network_params = sc_network::config::Params {
roles: config.roles,
executor: {
@@ -906,6 +915,7 @@ ServiceBuilder<
import_queue,
protocol_id,
block_announce_validator,
metrics_registry: prometheus_registry_and_port.as_ref().map(|(r, _)| r.clone())
};
let has_bootnodes = !network_params.network_config.boot_nodes.is_empty();
@@ -1020,17 +1030,14 @@ ServiceBuilder<
));
}
// Prometheus endpoint and metrics
let metrics = if let Some(port) = config.prometheus_port {
let registry = match prometheus_registry {
Some(registry) => registry,
None => Registry::new_custom(Some("substrate".into()), None)?
};
// Prometheus metrics
let metrics = if let Some((registry, port)) = prometheus_registry_and_port.clone() {
let metrics = ServiceMetrics::register(&registry)?;
metrics.node_roles.set(u64::from(config.roles.bits()));
let future = select(
substrate_prometheus_endpoint::init_prometheus(port, registry).boxed(),
prometheus_endpoint::init_prometheus(port, registry).boxed(),
exit.clone()
).map(drop);
@@ -1043,7 +1050,6 @@ ServiceBuilder<
} else {
None
};
// Periodically notify the telemetry.
let transaction_pool_ = transaction_pool.clone();
let client_ = client.clone();
@@ -1102,7 +1108,6 @@ ServiceBuilder<
metrics.memory_usage_bytes.set(memory);
metrics.cpu_usage_percentage.set(f64::from(cpu_usage));
metrics.ready_transactions_number.set(txpool_status.ready as u64);
metrics.peers_count.set(num_peers as u64);
metrics.network_per_sec_bytes.with_label_values(&["download"]).set(net_status.average_download_per_sec);
metrics.network_per_sec_bytes.with_label_values(&["upload"]).set(net_status.average_upload_per_sec);
@@ -1305,6 +1310,7 @@ ServiceBuilder<
_telemetry_on_connect_sinks: telemetry_connection_sinks.clone(),
keystore,
marker: PhantomData::<TBl>,
prometheus_registry: prometheus_registry_and_port.map(|(r, _)| r)
})
}
}
+2 -2
View File
@@ -53,8 +53,8 @@ impl<'a> From<&'a str> for Error {
}
}
impl From<substrate_prometheus_endpoint::PrometheusError> for Error {
fn from(e: substrate_prometheus_endpoint::PrometheusError) -> Self {
impl From<prometheus_endpoint::PrometheusError> for Error {
fn from(e: prometheus_endpoint::PrometheusError) -> Self {
Error::Other(format!("Prometheus error: {}", e))
}
}
+8
View File
@@ -114,6 +114,7 @@ pub struct Service<TBl, TCl, TSc, TNetStatus, TNet, TTxPool, TOc> {
_offchain_workers: Option<Arc<TOc>>,
keystore: sc_keystore::KeyStorePtr,
marker: PhantomData<TBl>,
prometheus_registry: Option<prometheus_endpoint::Registry>,
}
impl<TBl, TCl, TSc, TNetStatus, TNet, TTxPool, TOc> Unpin for Service<TBl, TCl, TSc, TNetStatus, TNet, TTxPool, TOc> {}
@@ -225,6 +226,9 @@ pub trait AbstractService: 'static + Future<Output = Result<(), Error>> +
/// Get a handle to a future that will resolve on exit.
fn on_exit(&self) -> ::exit_future::Exit;
/// Get the prometheus metrics registry, if available.
fn prometheus_registry(&self) -> Option<prometheus_endpoint::Registry>;
}
impl<TBl, TBackend, TExec, TRtApi, TSc, TExPool, TOc> AbstractService for
@@ -328,6 +332,10 @@ where
fn on_exit(&self) -> exit_future::Exit {
self.exit.clone()
}
fn prometheus_registry(&self) -> Option<prometheus_endpoint::Registry> {
self.prometheus_registry.clone()
}
}
impl<TBl, TCl, TSc, TNetStatus, TNet, TTxPool, TOc> Future for