// Copyright (C) Parity Technologies (UK) Ltd. // This file is part of Pezkuwi. // Pezkuwi is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // Pezkuwi is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // You should have received a copy of the GNU General Public License // along with Pezkuwi. If not, see . //! Prometheus metrics related to the validation host. use pezkuwi_node_core_pvf_common::prepare::MemoryStats; use pezkuwi_node_metrics::metrics::{self, prometheus}; use pezkuwi_node_subsystem::messages::PvfExecKind; /// Validation host metrics. #[derive(Default, Clone)] pub struct Metrics(Option); impl Metrics { /// Returns a handle to submit prepare workers metrics. pub(crate) fn prepare_worker(&'_ self) -> WorkerRelatedMetrics<'_> { WorkerRelatedMetrics { metrics: self, flavor: WorkerFlavor::Prepare } } /// Returns a handle to submit execute workers metrics. pub(crate) fn execute_worker(&'_ self) -> WorkerRelatedMetrics<'_> { WorkerRelatedMetrics { metrics: self, flavor: WorkerFlavor::Execute } } /// When preparation pipeline had a new item enqueued. pub(crate) fn prepare_enqueued(&self) { if let Some(metrics) = &self.0 { metrics.prepare_enqueued.inc(); } } /// When preparation pipeline concluded working on an item. pub(crate) fn prepare_concluded(&self) { if let Some(metrics) = &self.0 { metrics.prepare_concluded.inc(); } } /// When execution pipeline had a new item enqueued. pub(crate) fn execute_enqueued(&self) { if let Some(metrics) = &self.0 { metrics.execute_enqueued.inc(); } } /// When execution pipeline finished executing a request. pub(crate) fn execute_finished(&self) { if let Some(metrics) = &self.0 { metrics.execute_finished.inc(); } } /// Time between sending preparation request to a worker to having the response. pub(crate) fn time_preparation( &self, ) -> Option { self.0.as_ref().map(|metrics| metrics.preparation_time.start_timer()) } /// Time between sending execution request to a worker to having the response. pub(crate) fn time_execution(&self) -> Option { self.0.as_ref().map(|metrics| metrics.execution_time.start_timer()) } pub(crate) fn observe_execution_queued_time(&self, queued_for_millis: u32) { self.0.as_ref().map(|metrics| { metrics.execution_queued_time.observe(queued_for_millis as f64 / 1000 as f64) }); } /// Observe memory stats for preparation. #[allow(unused_variables)] pub(crate) fn observe_preparation_memory_metrics(&self, memory_stats: MemoryStats) { if let Some(metrics) = &self.0 { #[cfg(target_os = "linux")] if let Some(max_rss) = memory_stats.max_rss { metrics.preparation_max_rss.observe(max_rss as f64); } #[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))] if let Some(tracker_stats) = memory_stats.memory_tracker_stats { // We convert these stats from B to KB to match the unit of `ru_maxrss` from // `getrusage`. let max_resident_kb = (tracker_stats.resident / 1024) as f64; let max_allocated_kb = (tracker_stats.allocated / 1024) as f64; metrics.preparation_max_resident.observe(max_resident_kb); metrics.preparation_max_allocated.observe(max_allocated_kb); } metrics .preparation_peak_tracked_allocation .observe((memory_stats.peak_tracked_alloc / 1024) as f64); } } pub(crate) fn observe_code_size(&self, code_size: usize) { if let Some(metrics) = &self.0 { metrics.code_size.observe(code_size as f64); } } pub(crate) fn observe_pov_size(&self, pov_size: usize, compressed: bool) { if let Some(metrics) = &self.0 { metrics .pov_size .with_label_values(&[if compressed { "true" } else { "false" }]) .observe(pov_size as f64); } } /// When preparation pipeline concluded working on an item. pub(crate) fn on_execute_kind(&self, kind: PvfExecKind) { if let Some(metrics) = &self.0 { metrics.exec_kind_selected.with_label_values(&[kind.as_str()]).inc(); } } } #[derive(Clone)] struct MetricsInner { worker_spawning: prometheus::CounterVec, worker_spawned: prometheus::CounterVec, worker_retired: prometheus::CounterVec, prepare_enqueued: prometheus::Counter, prepare_concluded: prometheus::Counter, execute_enqueued: prometheus::Counter, execute_finished: prometheus::Counter, preparation_time: prometheus::Histogram, execution_time: prometheus::Histogram, execution_queued_time: prometheus::Histogram, #[cfg(target_os = "linux")] preparation_max_rss: prometheus::Histogram, // Max. allocated memory, tracked by Jemallocator, polling-based #[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))] preparation_max_allocated: prometheus::Histogram, // Max. resident memory, tracked by Jemallocator, polling-based #[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))] preparation_max_resident: prometheus::Histogram, // Peak allocation value, tracked by tracking-allocator preparation_peak_tracked_allocation: prometheus::Histogram, pov_size: prometheus::HistogramVec, code_size: prometheus::Histogram, exec_kind_selected: prometheus::CounterVec, } impl metrics::Metrics for Metrics { fn try_register(registry: &prometheus::Registry) -> Result { let inner = MetricsInner { worker_spawning: prometheus::register( prometheus::CounterVec::new( prometheus::Opts::new( "pezkuwi_pvf_worker_spawning", "The total number of workers began to spawn", ), &["flavor"], )?, registry, )?, worker_spawned: prometheus::register( prometheus::CounterVec::new( prometheus::Opts::new( "pezkuwi_pvf_worker_spawned", "The total number of workers spawned successfully", ), &["flavor"], )?, registry, )?, worker_retired: prometheus::register( prometheus::CounterVec::new( prometheus::Opts::new( "pezkuwi_pvf_worker_retired", "The total number of workers retired, either killed by the host or died on duty", ), &["flavor"], )?, registry, )?, prepare_enqueued: prometheus::register( prometheus::Counter::new( "pezkuwi_pvf_prepare_enqueued", "The total number of jobs enqueued into the preparation pipeline" )?, registry, )?, prepare_concluded: prometheus::register( prometheus::Counter::new( "pezkuwi_pvf_prepare_concluded", "The total number of jobs concluded in the preparation pipeline" )?, registry, )?, execute_enqueued: prometheus::register( prometheus::Counter::new( "pezkuwi_pvf_execute_enqueued", "The total number of jobs enqueued into the execution pipeline" )?, registry, )?, execute_finished: prometheus::register( prometheus::Counter::new( "pezkuwi_pvf_execute_finished", "The total number of jobs done in the execution pipeline" )?, registry, )?, preparation_time: prometheus::register( prometheus::Histogram::with_opts( prometheus::HistogramOpts::new( "pezkuwi_pvf_preparation_time", "Time spent in preparing PVF artifacts in seconds", ) .buckets(vec![ // This is synchronized with the `DEFAULT_PRECHECK_PREPARATION_TIMEOUT=60s` // and `DEFAULT_LENIENT_PREPARATION_TIMEOUT=360s` constants found in // node/core/candidate-validation/src/lib.rs 0.1, 0.5, 1.0, 2.0, 3.0, 10.0, 20.0, 30.0, 60.0, 120.0, 240.0, 360.0, 480.0, ]), )?, registry, )?, execution_time: prometheus::register( prometheus::Histogram::with_opts( prometheus::HistogramOpts::new( "pezkuwi_pvf_execution_time", "Time spent in executing PVFs", ).buckets(vec![ // This is synchronized with `DEFAULT_APPROVAL_EXECUTION_TIMEOUT` and // `DEFAULT_BACKING_EXECUTION_TIMEOUT` constants in // node/core/candidate-validation/src/lib.rs 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 8.0, 10.0, 12.0, ]), )?, registry, )?, execution_queued_time: prometheus::register( prometheus::Histogram::with_opts( prometheus::HistogramOpts::new( "pezkuwi_pvf_execution_queued_time", "Time spent in queue waiting for PVFs execution job to be assigned", ).buckets(vec![ 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 12.0, 24.0, 48.0, ]), )?, registry, )?, #[cfg(target_os = "linux")] preparation_max_rss: prometheus::register( prometheus::Histogram::with_opts( prometheus::HistogramOpts::new( "pezkuwi_pvf_preparation_max_rss", "ru_maxrss (maximum resident set size) observed for preparation (in kilobytes)", ).buckets( prometheus::exponential_buckets(8192.0, 2.0, 10) .expect("arguments are always valid; qed"), ), )?, registry, )?, #[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))] preparation_max_resident: prometheus::register( prometheus::Histogram::with_opts( prometheus::HistogramOpts::new( "pezkuwi_pvf_preparation_max_resident", "max resident memory observed for preparation (in kilobytes)", ).buckets( prometheus::exponential_buckets(8192.0, 2.0, 10) .expect("arguments are always valid; qed"), ), )?, registry, )?, #[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))] preparation_max_allocated: prometheus::register( prometheus::Histogram::with_opts( prometheus::HistogramOpts::new( "pezkuwi_pvf_preparation_max_allocated", "max allocated memory observed for preparation (in kilobytes)", ).buckets( prometheus::exponential_buckets(8192.0, 2.0, 10) .expect("arguments are always valid; qed"), ), )?, registry, )?, preparation_peak_tracked_allocation: prometheus::register( prometheus::Histogram::with_opts( prometheus::HistogramOpts::new( "pezkuwi_pvf_preparation_peak_tracked_allocation", "peak allocation observed for preparation (in kilobytes)", ).buckets( prometheus::exponential_buckets(8192.0, 2.0, 10) .expect("arguments are always valid; qed"), ), )?, registry, )?, // The following metrics was moved here from the candidate valiidation subsystem. // Names are kept to avoid breaking dashboards and stuff. pov_size: prometheus::register( prometheus::HistogramVec::new( prometheus::HistogramOpts::new( "pezkuwi_teyrchain_candidate_validation_pov_size", "The compressed and decompressed size of the proof of validity of a candidate", ) .buckets( prometheus::exponential_buckets(16384.0, 2.0, 10) .expect("arguments are always valid; qed"), ), &["compressed"], )?, registry, )?, code_size: prometheus::register( prometheus::Histogram::with_opts( prometheus::HistogramOpts::new( "pezkuwi_teyrchain_candidate_validation_code_size", "The size of the decompressed WASM validation blob used for checking a candidate", ) .buckets( prometheus::exponential_buckets(16384.0, 2.0, 10) .expect("arguments are always valid; qed"), ), )?, registry, )?, exec_kind_selected: prometheus::register( prometheus::CounterVec::new( prometheus::Opts::new( "pezkuwi_pvf_exec_kind_selected", "The total number of selected execute kinds", ), &["priority"], )?, registry, )?, }; Ok(Metrics(Some(inner))) } } enum WorkerFlavor { Prepare, Execute, } impl WorkerFlavor { fn as_label(&self) -> &'static str { match *self { WorkerFlavor::Prepare => "prepare", WorkerFlavor::Execute => "execute", } } } pub(crate) struct WorkerRelatedMetrics<'a> { metrics: &'a Metrics, flavor: WorkerFlavor, } impl<'a> WorkerRelatedMetrics<'a> { /// When the spawning of a worker started. pub(crate) fn on_begin_spawn(&self) { if let Some(metrics) = &self.metrics.0 { metrics.worker_spawning.with_label_values(&[self.flavor.as_label()]).inc(); } } /// When the worker successfully spawned. pub(crate) fn on_spawned(&self) { if let Some(metrics) = &self.metrics.0 { metrics.worker_spawned.with_label_values(&[self.flavor.as_label()]).inc(); } } /// When the worker was killed or died. pub(crate) fn on_retired(&self) { if let Some(metrics) = &self.metrics.0 { metrics.worker_retired.with_label_values(&[self.flavor.as_label()]).inc(); } } }