Files
pezkuwi-subxt/polkadot/node/core/pvf/src/metrics.rs
T
Oliver Tale-Yazdi 342d720573 Use same fmt and clippy configs as in Substrate (#7611)
* Use same rustfmt.toml as Substrate

Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io>

* format format file

Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io>

* Format with new config

Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io>

* Add Substrate Clippy config

Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io>

* Print Clippy version in CI

Otherwise its difficult to reproduce locally.

Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io>

* Make fmt happy

Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io>

* Update node/core/pvf/src/error.rs

Co-authored-by: Tsvetomir Dimitrov <tsvetomir@parity.io>

* Update node/core/pvf/src/error.rs

Co-authored-by: Tsvetomir Dimitrov <tsvetomir@parity.io>

---------

Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io>
Co-authored-by: Tsvetomir Dimitrov <tsvetomir@parity.io>
2023-08-14 14:29:29 +00:00

320 lines
9.5 KiB
Rust

// Copyright (C) Parity Technologies (UK) Ltd.
// This file is part of Polkadot.
// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
//! Prometheus metrics related to the validation host.
use polkadot_node_core_pvf_common::prepare::MemoryStats;
use polkadot_node_metrics::metrics::{self, prometheus};
/// Validation host metrics.
#[derive(Default, Clone)]
pub struct Metrics(Option<MetricsInner>);
impl Metrics {
/// Returns a handle to submit prepare workers metrics.
pub(crate) fn prepare_worker(&'_ self) -> WorkerRelatedMetrics<'_> {
WorkerRelatedMetrics { metrics: self, flavor: WorkerFlavor::Prepare }
}
/// Returns a handle to submit execute workers metrics.
pub(crate) fn execute_worker(&'_ self) -> WorkerRelatedMetrics<'_> {
WorkerRelatedMetrics { metrics: self, flavor: WorkerFlavor::Execute }
}
/// When preparation pipeline had a new item enqueued.
pub(crate) fn prepare_enqueued(&self) {
if let Some(metrics) = &self.0 {
metrics.prepare_enqueued.inc();
}
}
/// When preparation pipeline concluded working on an item.
pub(crate) fn prepare_concluded(&self) {
if let Some(metrics) = &self.0 {
metrics.prepare_concluded.inc();
}
}
/// When execution pipeline had a new item enqueued.
pub(crate) fn execute_enqueued(&self) {
if let Some(metrics) = &self.0 {
metrics.execute_enqueued.inc();
}
}
/// When execution pipeline finished executing a request.
pub(crate) fn execute_finished(&self) {
if let Some(metrics) = &self.0 {
metrics.execute_finished.inc();
}
}
/// Time between sending preparation request to a worker to having the response.
pub(crate) fn time_preparation(
&self,
) -> Option<metrics::prometheus::prometheus::HistogramTimer> {
self.0.as_ref().map(|metrics| metrics.preparation_time.start_timer())
}
/// Time between sending execution request to a worker to having the response.
pub(crate) fn time_execution(&self) -> Option<metrics::prometheus::prometheus::HistogramTimer> {
self.0.as_ref().map(|metrics| metrics.execution_time.start_timer())
}
/// Observe memory stats for preparation.
#[allow(unused_variables)]
pub(crate) fn observe_preparation_memory_metrics(&self, memory_stats: MemoryStats) {
if let Some(metrics) = &self.0 {
#[cfg(target_os = "linux")]
if let Some(max_rss) = memory_stats.max_rss {
metrics.preparation_max_rss.observe(max_rss as f64);
}
#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))]
if let Some(tracker_stats) = memory_stats.memory_tracker_stats {
// We convert these stats from B to KB to match the unit of `ru_maxrss` from
// `getrusage`.
let max_resident_kb = (tracker_stats.resident / 1024) as f64;
let max_allocated_kb = (tracker_stats.allocated / 1024) as f64;
metrics.preparation_max_resident.observe(max_resident_kb);
metrics.preparation_max_allocated.observe(max_allocated_kb);
}
}
}
}
#[derive(Clone)]
struct MetricsInner {
worker_spawning: prometheus::CounterVec<prometheus::U64>,
worker_spawned: prometheus::CounterVec<prometheus::U64>,
worker_retired: prometheus::CounterVec<prometheus::U64>,
prepare_enqueued: prometheus::Counter<prometheus::U64>,
prepare_concluded: prometheus::Counter<prometheus::U64>,
execute_enqueued: prometheus::Counter<prometheus::U64>,
execute_finished: prometheus::Counter<prometheus::U64>,
preparation_time: prometheus::Histogram,
execution_time: prometheus::Histogram,
#[cfg(target_os = "linux")]
preparation_max_rss: prometheus::Histogram,
#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))]
preparation_max_allocated: prometheus::Histogram,
#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))]
preparation_max_resident: prometheus::Histogram,
}
impl metrics::Metrics for Metrics {
fn try_register(registry: &prometheus::Registry) -> Result<Self, prometheus::PrometheusError> {
let inner = MetricsInner {
worker_spawning: prometheus::register(
prometheus::CounterVec::new(
prometheus::Opts::new(
"polkadot_pvf_worker_spawning",
"The total number of workers began to spawn",
),
&["flavor"],
)?,
registry,
)?,
worker_spawned: prometheus::register(
prometheus::CounterVec::new(
prometheus::Opts::new(
"polkadot_pvf_worker_spawned",
"The total number of workers spawned successfully",
),
&["flavor"],
)?,
registry,
)?,
worker_retired: prometheus::register(
prometheus::CounterVec::new(
prometheus::Opts::new(
"polkadot_pvf_worker_retired",
"The total number of workers retired, either killed by the host or died on duty",
),
&["flavor"],
)?,
registry,
)?,
prepare_enqueued: prometheus::register(
prometheus::Counter::new(
"polkadot_pvf_prepare_enqueued",
"The total number of jobs enqueued into the preparation pipeline"
)?,
registry,
)?,
prepare_concluded: prometheus::register(
prometheus::Counter::new(
"polkadot_pvf_prepare_concluded",
"The total number of jobs concluded in the preparation pipeline"
)?,
registry,
)?,
execute_enqueued: prometheus::register(
prometheus::Counter::new(
"polkadot_pvf_execute_enqueued",
"The total number of jobs enqueued into the execution pipeline"
)?,
registry,
)?,
execute_finished: prometheus::register(
prometheus::Counter::new(
"polkadot_pvf_execute_finished",
"The total number of jobs done in the execution pipeline"
)?,
registry,
)?,
preparation_time: prometheus::register(
prometheus::Histogram::with_opts(
prometheus::HistogramOpts::new(
"polkadot_pvf_preparation_time",
"Time spent in preparing PVF artifacts in seconds",
)
.buckets(vec![
// This is synchronized with the `DEFAULT_PRECHECK_PREPARATION_TIMEOUT=60s`
// and `DEFAULT_LENIENT_PREPARATION_TIMEOUT=360s` constants found in
// node/core/candidate-validation/src/lib.rs
0.1,
0.5,
1.0,
2.0,
3.0,
10.0,
20.0,
30.0,
60.0,
120.0,
240.0,
360.0,
480.0,
]),
)?,
registry,
)?,
execution_time: prometheus::register(
prometheus::Histogram::with_opts(
prometheus::HistogramOpts::new(
"polkadot_pvf_execution_time",
"Time spent in executing PVFs",
).buckets(vec![
// This is synchronized with `DEFAULT_APPROVAL_EXECUTION_TIMEOUT` and
// `DEFAULT_BACKING_EXECUTION_TIMEOUT` constants in
// node/core/candidate-validation/src/lib.rs
0.01,
0.025,
0.05,
0.1,
0.25,
0.5,
1.0,
2.0,
3.0,
4.0,
5.0,
6.0,
8.0,
10.0,
12.0,
]),
)?,
registry,
)?,
#[cfg(target_os = "linux")]
preparation_max_rss: prometheus::register(
prometheus::Histogram::with_opts(
prometheus::HistogramOpts::new(
"polkadot_pvf_preparation_max_rss",
"ru_maxrss (maximum resident set size) observed for preparation (in kilobytes)",
).buckets(
prometheus::exponential_buckets(8192.0, 2.0, 10)
.expect("arguments are always valid; qed"),
),
)?,
registry,
)?,
#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))]
preparation_max_resident: prometheus::register(
prometheus::Histogram::with_opts(
prometheus::HistogramOpts::new(
"polkadot_pvf_preparation_max_resident",
"max resident memory observed for preparation (in kilobytes)",
).buckets(
prometheus::exponential_buckets(8192.0, 2.0, 10)
.expect("arguments are always valid; qed"),
),
)?,
registry,
)?,
#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))]
preparation_max_allocated: prometheus::register(
prometheus::Histogram::with_opts(
prometheus::HistogramOpts::new(
"polkadot_pvf_preparation_max_allocated",
"max allocated memory observed for preparation (in kilobytes)",
).buckets(
prometheus::exponential_buckets(8192.0, 2.0, 10)
.expect("arguments are always valid; qed"),
),
)?,
registry,
)?,
};
Ok(Metrics(Some(inner)))
}
}
enum WorkerFlavor {
Prepare,
Execute,
}
impl WorkerFlavor {
fn as_label(&self) -> &'static str {
match *self {
WorkerFlavor::Prepare => "prepare",
WorkerFlavor::Execute => "execute",
}
}
}
pub(crate) struct WorkerRelatedMetrics<'a> {
metrics: &'a Metrics,
flavor: WorkerFlavor,
}
impl<'a> WorkerRelatedMetrics<'a> {
/// When the spawning of a worker started.
pub(crate) fn on_begin_spawn(&self) {
if let Some(metrics) = &self.metrics.0 {
metrics.worker_spawning.with_label_values(&[self.flavor.as_label()]).inc();
}
}
/// When the worker successfully spawned.
pub(crate) fn on_spawned(&self) {
if let Some(metrics) = &self.metrics.0 {
metrics.worker_spawned.with_label_values(&[self.flavor.as_label()]).inc();
}
}
/// When the worker was killed or died.
pub(crate) fn on_retired(&self) {
if let Some(metrics) = &self.metrics.0 {
metrics.worker_retired.with_label_values(&[self.flavor.as_label()]).inc();
}
}
}