Introduce Prometheus metric endpoint replacing Grafana endpoint (#4981)

* Refactor rebase master prometheus_v0.3

* Milestone1: Final Version of v0.3

* no-std or warm compatibility issues, grapana-data -source code reference and correction,applicable

* Cargo.lock paritytech/master rebase

* prometheus networking.rs del, grafana-data-source networking.rs pub edit and note

* chore: reflect various feedback

* Spaces to tabs.

* Replace grafana and tidy

* Add generics

* Add photo back

* Re-fix spaces in primitives/consensus/babe/src/inherents.rs

* Refactor rebase master prometheus_v0.3

* Milestone1: Final Version of v0.3

* no-std or warm compatibility issues, grapana-data -source code reference and correction,applicable

* prometheus networking.rs del, grafana-data-source networking.rs pub edit and note

* chore: reflect various feedback

* Replace grafana and tidy

* Add generics

* Add photo back

* Re-fix spaces in primitives/consensus/babe/src/inherents.rs

* chore: revert this file back to paritytech/master inherents.rs.

* Add newline at EOF

* Tidy

* Use local registry

* fix typo

Co-Authored-By: Max Inden <mail@max-inden.de>

* chore:  Apply review feedback

* endpoint -> exporter

* fix readme

* Remove lazy_static, use ServiceMetrics struct instead

* Switch to using GaugeVecs

* chore: without nightly , edit README

* block_height -> block_height_number

* Switch to a ready_transactions_number gauge

* Update utils/prometheus/src/lib.rs

Co-Authored-By: Max Inden <mail@max-inden.de>

* no-prometheus flag add

* /metrics url Input check

* remove prometheus in Tracing

* remove prometheus in Tracing

* chore: master code rebase edit

* gitlab-check-web-wasm edit code

* From:from and cargo.lock update

* with_prometheus_registry add background_tasks

* utils/prometheus/src/lib.rs: Restructure #[cfg] for wasm without hyper

Given that Hyper is not compatible with WASM targets it needs to be
excluded from WASM builds. Instead of introducing #[cfg] lines
throughout the crate, this patch splits the crate into two: known_os and
unknown_os (WASM).

* utils/prometheus/src/lib.rs: Feature gate known_os module

* client/cli/src/lib.rs: Re-add newline at end of file

Co-authored-by: JeseonLEE <zeroday26@gmail.com>
Co-authored-by: Gavin Wood <github@gavwood.com>
Co-authored-by: Ashley <ashley.ruglys@gmail.com>
Co-authored-by: Hyungsuk Kang <hskang9@gmail.com>
This commit is contained in:
Max Inden
2020-02-19 15:36:24 +01:00
committed by GitHub
parent e417f986be
commit d8230ecf4b
22 changed files with 409 additions and 648 deletions
+1
View File
@@ -31,6 +31,7 @@ sp-core = { version = "2.0.0", path = "../../primitives/core" }
sc-service = { version = "0.8", default-features = false, path = "../service" }
sp-state-machine = { version = "0.8", path = "../../primitives/state-machine" }
sc-telemetry = { version = "2.0.0", path = "../telemetry" }
prometheus-exporter = { path = "../../utils/prometheus" }
sp-keyring = { version = "2.0.0", path = "../../primitives/keyring" }
names = "0.11.0"
structopt = "0.3.8"
+8 -7
View File
@@ -623,13 +623,6 @@ where
config.rpc_ws = Some(parse_address(&format!("{}:{}", ws_interface, 9944), cli.ws_port)?);
}
if config.grafana_port.is_none() || cli.grafana_port.is_some() {
let grafana_interface: &str = if cli.grafana_external { "0.0.0.0" } else { "127.0.0.1" };
config.grafana_port = Some(
parse_address(&format!("{}:{}", grafana_interface, 9955), cli.grafana_port)?
);
}
config.rpc_ws_max_connections = cli.ws_max_connections;
config.rpc_cors = cli.rpc_cors.unwrap_or_else(|| if is_dev {
log::warn!("Running in --dev mode, RPC CORS has been disabled.");
@@ -651,6 +644,14 @@ where
} else if !cli.telemetry_endpoints.is_empty() {
config.telemetry_endpoints = Some(TelemetryEndpoints::new(cli.telemetry_endpoints));
}
// Override prometheus
if cli.no_prometheus {
config.prometheus_port = None;
} else {
let prometheus_interface: &str = if cli.prometheus_external { "0.0.0.0" } else { "127.0.0.1" };
config.prometheus_port = Some(
parse_address(&format!("{}:{}", prometheus_interface, 9615), cli.prometheus_port)?);
}
config.tracing_targets = cli.import_params.tracing_targets.into();
config.tracing_receiver = cli.import_params.tracing_receiver.into();
+12 -8
View File
@@ -337,7 +337,6 @@ arg_enum! {
pub enum TracingReceiver {
Log,
Telemetry,
Grafana,
}
}
@@ -346,7 +345,6 @@ impl Into<sc_tracing::TracingReceiver> for TracingReceiver {
match self {
TracingReceiver::Log => sc_tracing::TracingReceiver::Log,
TracingReceiver::Telemetry => sc_tracing::TracingReceiver::Telemetry,
TracingReceiver::Grafana => sc_tracing::TracingReceiver::Grafana,
}
}
}
@@ -486,11 +484,11 @@ pub struct RunCmd {
#[structopt(long = "unsafe-ws-external")]
pub unsafe_ws_external: bool,
/// Listen to all Grafana data source interfaces.
/// Listen to all Prometheus endpoint interfaces.
///
/// Default is local.
#[structopt(long = "grafana-external")]
pub grafana_external: bool,
#[structopt(long = "prometheus-external")]
pub prometheus_external: bool,
/// Specify HTTP RPC server TCP port.
#[structopt(long = "rpc-port", value_name = "PORT")]
@@ -514,9 +512,15 @@ pub struct RunCmd {
#[structopt(long = "rpc-cors", value_name = "ORIGINS", parse(try_from_str = parse_cors))]
pub rpc_cors: Option<Cors>,
/// Specify Grafana data source server TCP Port.
#[structopt(long = "grafana-port", value_name = "PORT")]
pub grafana_port: Option<u16>,
/// Specify Prometheus endpoint TCP Port.
#[structopt(long = "prometheus-port", value_name = "PORT")]
pub prometheus_port: Option<u16>,
/// Do not expose a Prometheus metric endpoint.
///
/// Prometheus metric endpoint is enabled by default.
#[structopt(long = "no-prometheus")]
pub no_prometheus: bool,
/// The human-readable name for this node.
///
+2 -2
View File
@@ -52,8 +52,8 @@ sc-rpc-server = { version = "2.0.0", path = "../rpc-servers" }
sc-rpc = { version = "2.0.0", path = "../rpc" }
sc-telemetry = { version = "2.0.0", path = "../telemetry" }
sc-offchain = { version = "2.0.0", path = "../offchain" }
parity-multiaddr = { package = "parity-multiaddr", version = "0.7.1" }
grafana-data-source = { version = "0.8", path = "../../utils/grafana-data-source" }
parity-multiaddr = { package = "parity-multiaddr", version = "0.5.0" }
prometheus-exporter = { path = "../../utils/prometheus" }
sc-tracing = { version = "2.0.0", path = "../tracing" }
tracing = "0.1.10"
parity-util-mem = { version = "0.5.1", default-features = false, features = ["primitive-types"] }
+113 -31
View File
@@ -39,7 +39,7 @@ use sc_network::{config::BoxFinalityProofRequestBuilder, specialization::Network
use parking_lot::{Mutex, RwLock};
use sp_runtime::generic::BlockId;
use sp_runtime::traits::{
Block as BlockT, NumberFor, SaturatedConversion, HasherFor,
Block as BlockT, NumberFor, SaturatedConversion, HasherFor, UniqueSaturatedInto,
};
use sp_api::ProvideRuntimeApi;
use sc_executor::{NativeExecutor, NativeExecutionDispatch};
@@ -53,7 +53,43 @@ use sysinfo::{get_current_pid, ProcessExt, System, SystemExt};
use sc_telemetry::{telemetry, SUBSTRATE_INFO};
use sp_transaction_pool::{MaintainedTransactionPool, ChainEvent};
use sp_blockchain;
use grafana_data_source::{self, record_metrics};
use prometheus_exporter::{register, Gauge, U64, F64, Registry, PrometheusError, Opts, GaugeVec};
struct ServiceMetrics {
block_height_number: GaugeVec<U64>,
peers_count: Gauge<U64>,
ready_transactions_number: Gauge<U64>,
memory_usage_bytes: Gauge<U64>,
cpu_usage_percentage: Gauge<F64>,
network_per_sec_bytes: GaugeVec<U64>,
}
impl ServiceMetrics {
fn register(registry: &Registry) -> Result<Self, PrometheusError> {
Ok(Self {
block_height_number: register(GaugeVec::new(
Opts::new("block_height_number", "Height of the chain"),
&["status"]
)?, registry)?,
peers_count: register(Gauge::new(
"peers_count", "Number of network gossip peers",
)?, registry)?,
ready_transactions_number: register(Gauge::new(
"ready_transactions_number", "Number of transactions in the ready queue",
)?, registry)?,
memory_usage_bytes: register(Gauge::new(
"memory_usage_bytes", "Node memory usage",
)?, registry)?,
cpu_usage_percentage: register(Gauge::new(
"cpu_usage_percentage", "Node CPU usage",
)?, registry)?,
network_per_sec_bytes: register(GaugeVec::new(
Opts::new("network_per_sec_bytes", "Networking bytes per second"),
&["direction"]
)?, registry)?,
})
}
}
pub type BackgroundTask = Pin<Box<dyn Future<Output=()> + Send>>;
@@ -93,6 +129,7 @@ pub struct ServiceBuilder<TBl, TRtApi, TGen, TCSExt, TCl, TFchr, TSc, TImpQu, TF
remote_backend: Option<Arc<dyn RemoteBlockchain<TBl>>>,
marker: PhantomData<(TBl, TRtApi)>,
background_tasks: Vec<(&'static str, BackgroundTask)>,
prometheus_registry: Option<Registry>
}
/// Full client type.
@@ -270,6 +307,7 @@ where TGen: RuntimeGenesis, TCSExt: Extension {
remote_backend: None,
background_tasks: Default::default(),
marker: PhantomData,
prometheus_registry: None,
})
}
@@ -356,6 +394,7 @@ where TGen: RuntimeGenesis, TCSExt: Extension {
remote_backend: Some(remote_blockchain),
background_tasks: Default::default(),
marker: PhantomData,
prometheus_registry: None,
})
}
}
@@ -429,6 +468,7 @@ impl<TBl, TRtApi, TGen, TCSExt, TCl, TFchr, TSc, TImpQu, TFprb, TFpp, TNetP, TEx
remote_backend: self.remote_backend,
background_tasks: self.background_tasks,
marker: self.marker,
prometheus_registry: self.prometheus_registry,
})
}
@@ -472,6 +512,7 @@ impl<TBl, TRtApi, TGen, TCSExt, TCl, TFchr, TSc, TImpQu, TFprb, TFpp, TNetP, TEx
remote_backend: self.remote_backend,
background_tasks: self.background_tasks,
marker: self.marker,
prometheus_registry: self.prometheus_registry,
})
}
@@ -499,6 +540,7 @@ impl<TBl, TRtApi, TGen, TCSExt, TCl, TFchr, TSc, TImpQu, TFprb, TFpp, TNetP, TEx
remote_backend: self.remote_backend,
background_tasks: self.background_tasks,
marker: self.marker,
prometheus_registry: self.prometheus_registry,
})
}
@@ -540,6 +582,7 @@ impl<TBl, TRtApi, TGen, TCSExt, TCl, TFchr, TSc, TImpQu, TFprb, TFpp, TNetP, TEx
remote_backend: self.remote_backend,
background_tasks: self.background_tasks,
marker: self.marker,
prometheus_registry: self.prometheus_registry,
})
}
@@ -605,6 +648,7 @@ impl<TBl, TRtApi, TGen, TCSExt, TCl, TFchr, TSc, TImpQu, TFprb, TFpp, TNetP, TEx
remote_backend: self.remote_backend,
background_tasks: self.background_tasks,
marker: self.marker,
prometheus_registry: self.prometheus_registry,
})
}
@@ -665,6 +709,7 @@ impl<TBl, TRtApi, TGen, TCSExt, TCl, TFchr, TSc, TImpQu, TFprb, TFpp, TNetP, TEx
remote_backend: self.remote_backend,
background_tasks: self.background_tasks,
marker: self.marker,
prometheus_registry: self.prometheus_registry,
})
}
@@ -693,8 +738,31 @@ impl<TBl, TRtApi, TGen, TCSExt, TCl, TFchr, TSc, TImpQu, TFprb, TFpp, TNetP, TEx
remote_backend: self.remote_backend,
background_tasks: self.background_tasks,
marker: self.marker,
prometheus_registry: self.prometheus_registry,
})
}
/// Use an existing prometheus `Registry` to record metrics into.
pub fn with_prometheus_registry(self, registry: Registry) -> Self {
Self {
config: self.config,
client: self.client,
backend: self.backend,
keystore: self.keystore,
fetcher: self.fetcher,
select_chain: self.select_chain,
import_queue: self.import_queue,
finality_proof_request_builder: self.finality_proof_request_builder,
finality_proof_provider: self.finality_proof_provider,
network_protocol: self.network_protocol,
transaction_pool: self.transaction_pool,
rpc_extensions: self.rpc_extensions,
remote_backend: self.remote_backend,
background_tasks: self.background_tasks,
marker: self.marker,
prometheus_registry: Some(registry),
}
}
}
/// Implemented on `ServiceBuilder`. Allows running block commands, such as import/export/validate
@@ -807,6 +875,7 @@ ServiceBuilder<
rpc_extensions,
remote_backend,
background_tasks,
prometheus_registry,
} = self;
sp_session::generate_initial_session_keys(
@@ -998,6 +1067,30 @@ ServiceBuilder<
));
}
// Prometheus exporter and metrics
let metrics = if let Some(port) = config.prometheus_port {
let registry = match prometheus_registry {
Some(registry) => registry,
None => Registry::new_custom(Some("substrate".into()), None)?
};
let metrics = ServiceMetrics::register(&registry)?;
let future = select(
prometheus_exporter::init_prometheus(port, registry).boxed(),
exit.clone()
).map(drop);
let _ = to_spawn_tx.unbounded_send((
Box::pin(future),
From::from("prometheus-endpoint")
));
Some(metrics)
} else {
None
};
// Periodically notify the telemetry.
let transaction_pool_ = transaction_pool.clone();
let client_ = client.clone();
@@ -1014,6 +1107,8 @@ ServiceBuilder<
let finalized_number: u64 = info.chain.finalized_number.saturated_into::<u64>();
let bandwidth_download = net_status.average_download_per_sec;
let bandwidth_upload = net_status.average_upload_per_sec;
let best_seen_block = net_status.best_seen_block
.map(|num: NumberFor<TBl>| num.unique_saturated_into() as u64);
// get cpu usage and memory usage of this process
let (cpu_usage, memory) = if let Some(self_pid) = self_pid {
@@ -1042,25 +1137,22 @@ ServiceBuilder<
"disk_read_per_sec" => info.usage.as_ref().map(|usage| usage.io.bytes_read).unwrap_or(0),
"disk_write_per_sec" => info.usage.as_ref().map(|usage| usage.io.bytes_written).unwrap_or(0),
);
#[cfg(not(target_os = "unknown"))]
let memory_transaction_pool = parity_util_mem::malloc_size(&*transaction_pool_);
#[cfg(target_os = "unknown")]
let memory_transaction_pool = 0;
let _ = record_metrics!(
"peers" => num_peers,
"height" => best_number,
"txcount" => txpool_status.ready,
"cpu" => cpu_usage,
"memory" => memory,
"finalized_height" => finalized_number,
"bandwidth_download" => bandwidth_download,
"bandwidth_upload" => bandwidth_upload,
"used_state_cache_size" => info.usage.as_ref().map(|usage| usage.memory.state_cache).unwrap_or(0),
"used_db_cache_size" => info.usage.as_ref().map(|usage| usage.memory.database_cache).unwrap_or(0),
"disk_read_per_sec" => info.usage.as_ref().map(|usage| usage.io.bytes_read).unwrap_or(0),
"disk_write_per_sec" => info.usage.as_ref().map(|usage| usage.io.bytes_written).unwrap_or(0),
"memory_transaction_pool" => memory_transaction_pool,
);
if let Some(metrics) = metrics.as_ref() {
metrics.memory_usage_bytes.set(memory);
metrics.cpu_usage_percentage.set(f64::from(cpu_usage));
metrics.ready_transactions_number.set(txpool_status.ready as u64);
metrics.peers_count.set(num_peers as u64);
metrics.network_per_sec_bytes.with_label_values(&["download"]).set(net_status.average_download_per_sec);
metrics.network_per_sec_bytes.with_label_values(&["upload"]).set(net_status.average_upload_per_sec);
metrics.block_height_number.with_label_values(&["finalized"]).set(finalized_number);
metrics.block_height_number.with_label_values(&["best"]).set(best_number);
if let Some(best_seen_block) = best_seen_block {
metrics.block_height_number.with_label_values(&["sync_target"]).set(best_seen_block);
}
}
ready(())
});
@@ -1217,16 +1309,6 @@ ServiceBuilder<
telemetry
});
// Grafana data source
if let Some(port) = config.grafana_port {
let future = select(
grafana_data_source::run_server(port).boxed(),
exit.clone()
).map(drop);
let _ = to_spawn_tx.unbounded_send((Box::pin(future), From::from("grafana-server")));
}
// Instrumentation
if let Some(tracing_targets) = config.tracing_targets.as_ref() {
let subscriber = sc_tracing::ProfilingSubscriber::new(
+3 -3
View File
@@ -93,8 +93,8 @@ pub struct Configuration<G, E = NoExtension> {
pub rpc_ws_max_connections: Option<usize>,
/// CORS settings for HTTP & WS servers. `None` if all origins are allowed.
pub rpc_cors: Option<Vec<String>>,
/// Grafana data source http port. `None` if disabled.
pub grafana_port: Option<SocketAddr>,
/// Prometheus exporter Port. `None` if disabled.
pub prometheus_port: Option<SocketAddr>,
/// Telemetry service URL. `None` if disabled.
pub telemetry_endpoints: Option<TelemetryEndpoints>,
/// External WASM transport for the telemetry. If `Some`, when connection to a telemetry
@@ -190,7 +190,7 @@ impl<G, E> Default for Configuration<G, E> {
rpc_ws: None,
rpc_ws_max_connections: None,
rpc_cors: Some(vec![]),
grafana_port: None,
prometheus_port: None,
telemetry_endpoints: None,
telemetry_external_transport: None,
default_heap_pages: None,
+6
View File
@@ -53,6 +53,12 @@ impl<'a> From<&'a str> for Error {
}
}
impl From<prometheus_exporter::PrometheusError> for Error {
fn from(e: prometheus_exporter::PrometheusError) -> Self {
Error::Other(format!("Prometheus error: {}", e))
}
}
impl std::error::Error for Error {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
+1 -1
View File
@@ -199,7 +199,7 @@ fn node_config<G, E: Clone> (
rpc_ws: None,
rpc_ws_max_connections: None,
rpc_cors: None,
grafana_port: None,
prometheus_port: None,
telemetry_endpoints: None,
telemetry_external_transport: None,
default_heap_pages: None,
-1
View File
@@ -15,7 +15,6 @@ slog = { version = "2.5.2", features = ["nested-values"] }
tracing-core = "0.1.7"
sc-telemetry = { version = "2.0.0", path = "../telemetry" }
grafana-data-source = { version = "0.8", path = "../../utils/grafana-data-source" }
[dev-dependencies]
tracing = "0.1.10"
+1 -11
View File
@@ -34,7 +34,7 @@
//! let span = tracing::span!(tracing::Level::INFO, "my_span_name", my_number = 10, a_key = "a value");
//! let _guard = span.enter();
//! ```
//! Currently we provide `Log` (default), `Telemetry` and `Grafana` variants for `Receiver`
//! Currently we provide `Log` (default), `Telemetry` variants for `Receiver`
use std::collections::HashMap;
use std::fmt;
@@ -53,7 +53,6 @@ use tracing_core::{
subscriber::Subscriber
};
use grafana_data_source::{self, record_metrics};
use sc_telemetry::{telemetry, SUBSTRATE_INFO};
/// Used to configure how to receive the metrics
@@ -63,8 +62,6 @@ pub enum TracingReceiver {
Log,
/// Output to telemetry
Telemetry,
/// Output to Grafana
Grafana,
}
impl Default for TracingReceiver {
@@ -255,7 +252,6 @@ impl ProfilingSubscriber {
match self.receiver {
TracingReceiver::Log => print_log(span_datum),
TracingReceiver::Telemetry => send_telemetry(span_datum),
TracingReceiver::Grafana => send_grafana(span_datum),
}
}
}
@@ -291,9 +287,3 @@ fn send_telemetry(span_datum: SpanDatum) {
);
}
fn send_grafana(span_datum: SpanDatum) {
let name = format!("{}::{}", span_datum.target, span_datum.name);
if let Err(e) = record_metrics!(&name => span_datum.overall_time.as_nanos(),) {
log::warn!("Unable to send metrics to grafana: {:?}", e);
}
}