Histogram support in runtime metrics (#6935)

* Histogram support in runtime metrics

Add support for histograms to the runtime metrics. Additionally add
`polkadot_parachain_verify_dispute_signature` histogram which tracks the
time needed from the runtime to verify a single validator signature of a
dispute statement.

* Add noops

* u64 instead of f64

* Update buckets

* Wrap `get_current_time()` in runtime metrics

* Change the dimension of the Histogram from usec to sec

* Fix a compilation error

* Update buckets

* Fix `on_signature_check_complete` calculation

* Update buckets

* Update buckets

* formatting

* Another weights update

* Adjust buckets again

* Final buckets adjustment

* Revert "Fix a compilation error"

This reverts commit 06290b40a39eeb78de2602d8916a39edf7a8b714.

* Update primitives/src/v4/metrics.rs

Co-authored-by: Andrei Sandu <54316454+sandreim@users.noreply.github.com>

* Use `saturating_sub` for time difference calculation

* Pass nanoseconds to client instead of seconds (using f64 in runtime is dangerous)

---------

Co-authored-by: Andrei Sandu <54316454+sandreim@users.noreply.github.com>
This commit is contained in:
Tsvetomir Dimitrov
2023-03-28 16:37:19 +03:00
committed by GitHub
parent 4903859544
commit 22f404f092
9 changed files with 153 additions and 39 deletions
+1
View File
@@ -7686,6 +7686,7 @@ name = "polkadot-runtime-metrics"
version = "0.9.39"
dependencies = [
"bs58",
"frame-benchmarking",
"parity-scale-codec",
"polkadot-primitives",
"sp-std",
+44 -20
View File
@@ -20,12 +20,16 @@
//! tracing support. This requires that the custom profiler (`TraceHandler`) to be
//! registered in substrate via a `logger_hook()`. Events emitted from runtime are
//! then captured/processed by the `TraceHandler` implementation.
//!
//! Don't add logs in this file because it gets executed before the logger is
//! initialized and they won't be delivered. Add println! statements if you need
//! to debug this code.
#![cfg(feature = "runtime-metrics")]
use codec::Decode;
use primitives::{
metric_definitions::{CounterDefinition, CounterVecDefinition},
metric_definitions::{CounterDefinition, CounterVecDefinition, HistogramDefinition},
RuntimeMetricLabelValues, RuntimeMetricOp, RuntimeMetricUpdate,
};
use std::{
@@ -33,17 +37,16 @@ use std::{
sync::{Arc, Mutex, MutexGuard},
};
use substrate_prometheus_endpoint::{
register, Counter, CounterVec, Opts, PrometheusError, Registry, U64,
register, Counter, CounterVec, Histogram, HistogramOpts, Opts, PrometheusError, Registry, U64,
};
mod parachain;
const LOG_TARGET: &'static str = "metrics::runtime";
/// Holds the registered Prometheus metric collections.
#[derive(Clone, Default)]
pub struct Metrics {
counter_vecs: Arc<Mutex<HashMap<String, CounterVec<U64>>>>,
counters: Arc<Mutex<HashMap<String, Counter<U64>>>>,
histograms: Arc<Mutex<HashMap<String, Histogram>>>,
}
/// Runtime metrics wrapper.
@@ -80,7 +83,20 @@ impl RuntimeMetricsProvider {
})
}
/// Increment a counter with labels by a value.
/// Register a histogram metric
pub fn register_histogram(&self, hist: HistogramDefinition) {
self.with_histograms_lock_held(|mut hashmap| {
hashmap.entry(hist.name.to_owned()).or_insert(register(
Histogram::with_opts(
HistogramOpts::new(hist.name, hist.description).buckets(hist.buckets.to_vec()),
)?,
&self.0,
)?);
return Ok(())
})
}
/// Increment a counter with labels by a value
pub fn inc_counter_vec_by(&self, name: &str, value: u64, labels: &RuntimeMetricLabelValues) {
self.with_counter_vecs_lock_held(|mut hashmap| {
hashmap.entry(name.to_owned()).and_modify(|counter_vec| {
@@ -101,28 +117,35 @@ impl RuntimeMetricsProvider {
})
}
/// Observe a histogram. `value` should be in `ns`.
pub fn observe_histogram(&self, name: &str, value: u128) {
self.with_histograms_lock_held(|mut hashmap| {
hashmap
.entry(name.to_owned())
.and_modify(|histogram| histogram.observe(value as f64 / 1_000_000_000.0)); // ns to sec
Ok(())
})
}
fn with_counters_lock_held<F>(&self, do_something: F)
where
F: FnOnce(MutexGuard<'_, HashMap<String, Counter<U64>>>) -> Result<(), PrometheusError>,
{
let _ = self.1.counters.lock().map(do_something).or_else(|error| {
gum::error!(target: LOG_TARGET, "Cannot acquire the counter hashmap lock: {:?}", error);
Err(error)
});
let _ = self.1.counters.lock().map(do_something).or_else(|error| Err(error));
}
fn with_counter_vecs_lock_held<F>(&self, do_something: F)
where
F: FnOnce(MutexGuard<'_, HashMap<String, CounterVec<U64>>>) -> Result<(), PrometheusError>,
{
let _ = self.1.counter_vecs.lock().map(do_something).or_else(|error| {
gum::error!(
target: LOG_TARGET,
"Cannot acquire the countervec hashmap lock: {:?}",
error
);
Err(error)
});
let _ = self.1.counter_vecs.lock().map(do_something).or_else(|error| Err(error));
}
fn with_histograms_lock_held<F>(&self, do_something: F)
where
F: FnOnce(MutexGuard<'_, HashMap<String, Histogram>>) -> Result<(), PrometheusError>,
{
let _ = self.1.histograms.lock().map(do_something).or_else(|error| Err(error));
}
}
@@ -149,8 +172,8 @@ impl sc_tracing::TraceHandler for RuntimeMetricsProvider {
Ok(update_op) => {
self.parse_metric_update(update_op);
},
Err(e) => {
gum::error!(target: LOG_TARGET, "TraceEvent decode failed: {:?}", e);
Err(_) => {
// do nothing
},
}
}
@@ -165,6 +188,8 @@ impl RuntimeMetricsProvider {
self.inc_counter_vec_by(update.metric_name(), value, labels),
RuntimeMetricOp::IncrementCounter(value) =>
self.inc_counter_by(update.metric_name(), value),
RuntimeMetricOp::ObserveHistogram(value) =>
self.observe_histogram(update.metric_name(), value),
}
}
@@ -191,7 +216,6 @@ impl RuntimeMetricsProvider {
pub fn logger_hook() -> impl FnOnce(&mut sc_cli::LoggerBuilder, &sc_service::Configuration) -> () {
|logger_builder, config| {
if config.prometheus_registry().is_none() {
gum::debug!(target: LOG_TARGET, "Prometheus registry is not configured.",);
return
}
let registry = config.prometheus_registry().cloned().unwrap();
@@ -22,7 +22,7 @@ use primitives::metric_definitions::{
PARACHAIN_CREATE_INHERENT_BITFIELDS_SIGNATURE_CHECKS,
PARACHAIN_INHERENT_DATA_BITFIELDS_PROCESSED, PARACHAIN_INHERENT_DATA_CANDIDATES_PROCESSED,
PARACHAIN_INHERENT_DATA_DISPUTE_SETS_INCLUDED, PARACHAIN_INHERENT_DATA_DISPUTE_SETS_PROCESSED,
PARACHAIN_INHERENT_DATA_WEIGHT,
PARACHAIN_INHERENT_DATA_WEIGHT, PARACHAIN_VERIFY_DISPUTE_SIGNATURE,
};
/// Register the parachain runtime metrics.
@@ -35,4 +35,5 @@ pub fn register_metrics(runtime_metrics_provider: &RuntimeMetricsProvider) {
runtime_metrics_provider.register_countervec(PARACHAIN_INHERENT_DATA_CANDIDATES_PROCESSED);
runtime_metrics_provider
.register_countervec(PARACHAIN_CREATE_INHERENT_BITFIELDS_SIGNATURE_CHECKS);
runtime_metrics_provider.register_histogram(PARACHAIN_VERIFY_DISPUTE_SIGNATURE);
}
+20
View File
@@ -27,6 +27,8 @@ pub enum RuntimeMetricOp {
IncrementCounterVec(u64, RuntimeMetricLabelValues),
/// Increment a counter metric by value.
IncrementCounter(u64),
/// Observe histogram value
ObserveHistogram(u128),
}
/// Runtime metric update event.
@@ -127,6 +129,16 @@ pub mod metric_definitions {
pub labels: &'a [&'static str],
}
/// `Histogram` metric definition
pub struct HistogramDefinition<'a> {
/// The name of the metric.
pub name: &'static str,
/// The description of the metric.
pub description: &'static str,
/// The buckets for the histogram
pub buckets: &'a [f64],
}
/// Counts parachain inherent data weights. Use `before` and `after` labels to differentiate
/// between the weight before and after filtering.
pub const PARACHAIN_INHERENT_DATA_WEIGHT: CounterVecDefinition = CounterVecDefinition {
@@ -176,4 +188,12 @@ pub mod metric_definitions {
description: "Counts the number of bitfields signature checked in `enter_inner`.",
labels: &["validity"],
};
/// Measures how much time does it take to verify a single validator signature of a dispute statement
pub const PARACHAIN_VERIFY_DISPUTE_SIGNATURE: HistogramDefinition =
HistogramDefinition {
name: "polkadot_parachain_verify_dispute_signature",
description: "How much time does it take to verify a single validator signature of a dispute statement, in seconds",
buckets: &[0.0, 0.00005, 0.00006, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.3, 0.5, 1.0],
};
}
+2 -1
View File
@@ -9,6 +9,7 @@ sp-std = { package = "sp-std", git = "https://github.com/paritytech/substrate",
sp-tracing = { git = "https://github.com/paritytech/substrate", branch = "master", default-features = false }
parity-scale-codec = { version = "3.4.0", default-features = false }
primitives = { package = "polkadot-primitives", path = "../../primitives", default-features = false }
frame-benchmarking = { git = "https://github.com/paritytech/substrate", branch = "master", default-features = false, optional = true }
bs58 = { version = "0.4.0", default-features = false, features = ["alloc"] }
@@ -21,4 +22,4 @@ std = [
"primitives/std",
"bs58/std"
]
runtime-metrics = ["sp-tracing/with-tracing"]
runtime-metrics = ["sp-tracing/with-tracing", "frame-benchmarking"]
@@ -14,17 +14,17 @@
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
//! This module provides an implementation for the runtime metrics types: `Counter`
//! and `CounterVec`. These types expose a Prometheus like interface and same functionality.
//! Each instance of a runtime metric is mapped to a Prometheus metric on the client side.
//! The runtime metrics must be registered with the registry in the client, otherwise
//! they will not be published.
//! This module provides an implementation for the runtime metrics types: `Counter`,
//! `CounterVec` and `Histogram`. These types expose a Prometheus like interface and
//! same functionality. Each instance of a runtime metric is mapped to a Prometheus
//! metric on the client side. The runtime metrics must be registered with the registry
//! in the client, otherwise they will not be published.
const TRACING_TARGET: &'static str = "metrics";
use parity_scale_codec::Encode;
use primitives::{
metric_definitions::{CounterDefinition, CounterVecDefinition},
metric_definitions::{CounterDefinition, CounterVecDefinition, HistogramDefinition},
RuntimeMetricLabelValues, RuntimeMetricOp, RuntimeMetricUpdate,
};
@@ -41,6 +41,10 @@ pub struct Counter {
name: &'static str,
}
pub struct Histogram {
name: &'static str,
}
/// Convenience trait implemented for all metric types.
trait MetricEmitter {
fn emit(metric_op: &RuntimeMetricUpdate) {
@@ -77,6 +81,7 @@ impl LabeledMetric {
impl MetricEmitter for LabeledMetric {}
impl MetricEmitter for Counter {}
impl MetricEmitter for Histogram {}
impl CounterVec {
/// Create a new counter as specified by `definition`. This metric needs to be registered
@@ -116,3 +121,27 @@ impl Counter {
self.inc_by(1);
}
}
impl Histogram {
/// Create a new histogram as specified by `definition`. This metric needs to be registered
/// in the client before it can be used.
pub const fn new(definition: HistogramDefinition) -> Self {
// No register op is emitted since the metric is supposed to be registered
// on the client by the time `inc()` is called.
Histogram { name: definition.name }
}
// Observe a value in the histogram
pub fn observe(&self, value: u128) {
let metric_update = RuntimeMetricUpdate {
metric_name: Vec::from(self.name),
op: RuntimeMetricOp::ObserveHistogram(value),
};
Self::emit(&metric_update);
}
}
/// Returns current time in ns
pub fn get_current_time() -> u128 {
frame_benchmarking::benchmarking::current_time()
}
@@ -18,13 +18,18 @@
//! provide a dummy implementation for the native runtime to avoid cluttering the runtime code
//! with `#[cfg(feature = "runtime-metrics")]`.
use primitives::metric_definitions::{CounterDefinition, CounterVecDefinition};
use primitives::metric_definitions::{
CounterDefinition, CounterVecDefinition, HistogramDefinition,
};
/// A dummy `Counter`.
pub struct Counter;
/// A dummy `CounterVec`.
pub struct CounterVec;
/// A dummy `Histogram`
pub struct Histogram;
/// Dummy implementation.
impl CounterVec {
/// Constructor.
@@ -52,3 +57,20 @@ impl Counter {
/// Increment counter, implementation is a `no op`.
pub fn inc(&self) {}
}
/// Dummy implementation
impl Histogram {
/// Create a new histogram as specified by `definition`. This metric needs to be registered
/// in the client before it can be used.
pub const fn new(_definition: HistogramDefinition) -> Self {
Histogram
}
// Observe a value in the histogram
pub fn observe(&self, _value: u128) {}
}
/// Dummy implementation - always 0
pub fn get_current_time() -> u128 {
0
}
+14 -6
View File
@@ -16,11 +16,14 @@
//! Runtime component for handling disputes of parachain candidates.
use crate::{configuration, initializer::SessionChangeNotification, session_info};
use crate::{
configuration, initializer::SessionChangeNotification, metrics::METRICS, session_info,
};
use bitvec::{bitvec, order::Lsb0 as BitOrderLsb0};
use frame_support::{ensure, weights::Weight};
use frame_system::pallet_prelude::*;
use parity_scale_codec::{Decode, Encode};
use polkadot_runtime_metrics::get_current_time;
use primitives::{
byzantine_threshold, supermajority_threshold, ApprovalVote, CandidateHash,
CheckedDisputeStatementSet, CheckedMultiDisputeStatementSet, CompactStatement, ConsensusLog,
@@ -1336,9 +1339,14 @@ fn check_signature(
ExplicitDisputeStatement { valid: false, candidate_hash, session }.signing_payload(),
};
if validator_signature.verify(&payload[..], &validator_public) {
Ok(())
} else {
Err(())
}
let start = get_current_time();
let res =
if validator_signature.verify(&payload[..], &validator_public) { Ok(()) } else { Err(()) };
let end = get_current_time();
METRICS.on_signature_check_complete(end.saturating_sub(start)); // ns
res
}
+12 -4
View File
@@ -16,12 +16,12 @@
//! Runtime declaration of the parachain metrics.
use polkadot_runtime_metrics::{Counter, CounterVec};
use polkadot_runtime_metrics::{Counter, CounterVec, Histogram};
use primitives::metric_definitions::{
PARACHAIN_CREATE_INHERENT_BITFIELDS_SIGNATURE_CHECKS,
PARACHAIN_INHERENT_DATA_BITFIELDS_PROCESSED, PARACHAIN_INHERENT_DATA_CANDIDATES_PROCESSED,
PARACHAIN_INHERENT_DATA_DISPUTE_SETS_INCLUDED, PARACHAIN_INHERENT_DATA_DISPUTE_SETS_PROCESSED,
PARACHAIN_INHERENT_DATA_WEIGHT,
PARACHAIN_INHERENT_DATA_WEIGHT, PARACHAIN_VERIFY_DISPUTE_SIGNATURE,
};
pub struct Metrics {
@@ -37,6 +37,9 @@ pub struct Metrics {
disputes_included: Counter,
/// Counts bitfield signature checks in `enter_inner`.
bitfields_signature_checks: CounterVec,
/// Histogram with the time spent checking a validator signature of a dispute statement
signature_timings: Histogram,
}
impl Metrics {
@@ -98,11 +101,15 @@ impl Metrics {
}
pub fn on_valid_bitfield_signature(&self) {
self.bitfields_signature_checks.with_label_values(&["valid"]).inc();
self.bitfields_signature_checks.with_label_values(&["valid"]).inc_by(1);
}
pub fn on_invalid_bitfield_signature(&self) {
self.bitfields_signature_checks.with_label_values(&["invalid"]).inc();
self.bitfields_signature_checks.with_label_values(&["invalid"]).inc_by(1);
}
pub fn on_signature_check_complete(&self, val: u128) {
self.signature_timings.observe(val);
}
}
@@ -115,4 +122,5 @@ pub const METRICS: Metrics = Metrics {
bitfields_signature_checks: CounterVec::new(
PARACHAIN_CREATE_INHERENT_BITFIELDS_SIGNATURE_CHECKS,
),
signature_timings: Histogram::new(PARACHAIN_VERIFY_DISPUTE_SIGNATURE),
};