From fb0258e7351d2090d35c80726068fa01955b1e1b Mon Sep 17 00:00:00 2001 From: Sergei Shulepov Date: Tue, 18 Jan 2022 17:44:11 +0100 Subject: [PATCH] pvf-checker-subsystem: metrics (#4741) * pvf-checker-subsystem: metrics This commits adds metrics to the PVF pre-checking subsystem. * Apply suggestions from code review Co-authored-by: sandreim <54316454+sandreim@users.noreply.github.com> Co-authored-by: sandreim <54316454+sandreim@users.noreply.github.com> --- .../core/pvf-checker/src/interest_view.rs | 14 +- polkadot/node/core/pvf-checker/src/lib.rs | 48 +++++-- polkadot/node/core/pvf-checker/src/metrics.rs | 130 ++++++++++++++++++ polkadot/node/core/pvf-checker/src/tests.rs | 2 +- polkadot/node/service/src/overseer.rs | 6 +- 5 files changed, 186 insertions(+), 14 deletions(-) create mode 100644 polkadot/node/core/pvf-checker/src/metrics.rs diff --git a/polkadot/node/core/pvf-checker/src/interest_view.rs b/polkadot/node/core/pvf-checker/src/interest_view.rs index 528af6fd22..6ca07fe805 100644 --- a/polkadot/node/core/pvf-checker/src/interest_view.rs +++ b/polkadot/node/core/pvf-checker/src/interest_view.rs @@ -70,6 +70,14 @@ impl PvfData { } } +/// The result of [`InterestView::on_leaves_update`]. +pub struct OnLeavesUpdateOutcome { + /// The list of PVFs that we first seen in the activated block. + pub newcomers: Vec, + /// The number of PVFs that were removed from the view. + pub left_num: usize, +} + /// A structure that keeps track of relevant PVFs and judgements about them. A relevant PVF is one /// that resides in at least a single active leaf. #[derive(Debug)] @@ -87,7 +95,7 @@ impl InterestView { &mut self, activated: Option<(Hash, Vec)>, deactivated: &[Hash], - ) -> Vec { + ) -> OnLeavesUpdateOutcome { let mut newcomers = Vec::new(); if let Some((leaf, pending_pvfs)) = activated { @@ -105,19 +113,21 @@ impl InterestView { self.active_leaves.entry(leaf).or_default().extend(pending_pvfs); } + let mut left_num = 0; for leaf in deactivated { let pvfs = self.active_leaves.remove(leaf); for pvf in pvfs.into_iter().flatten() { if let btree_map::Entry::Occupied(mut o) = self.pvfs.entry(pvf) { let now_empty = o.get_mut().remove_origin(leaf); if now_empty { + left_num += 1; o.remove(); } } } } - newcomers + OnLeavesUpdateOutcome { newcomers, left_num } } /// Handles a new judgement for the given `pvf`. diff --git a/polkadot/node/core/pvf-checker/src/lib.rs b/polkadot/node/core/pvf-checker/src/lib.rs index 6256ebbc37..34c7a2bbd0 100644 --- a/polkadot/node/core/pvf-checker/src/lib.rs +++ b/polkadot/node/core/pvf-checker/src/lib.rs @@ -36,22 +36,27 @@ use std::collections::HashSet; const LOG_TARGET: &str = "parachain::pvf-checker"; mod interest_view; +mod metrics; mod runtime_api; #[cfg(test)] mod tests; -use self::interest_view::{InterestView, Judgement}; +use self::{ + interest_view::{InterestView, Judgement}, + metrics::Metrics, +}; /// PVF pre-checking subsystem. pub struct PvfCheckerSubsystem { enabled: bool, keystore: SyncCryptoStorePtr, + metrics: Metrics, } impl PvfCheckerSubsystem { - pub fn new(enabled: bool, keystore: SyncCryptoStorePtr) -> Self { - PvfCheckerSubsystem { enabled, keystore } + pub fn new(enabled: bool, keystore: SyncCryptoStorePtr, metrics: Metrics) -> Self { + PvfCheckerSubsystem { enabled, keystore, metrics } } } @@ -62,7 +67,7 @@ where { fn start(self, ctx: Context) -> SpawnedSubsystem { if self.enabled { - let future = run(ctx, self.keystore) + let future = run(ctx, self.keystore, self.metrics) .map_err(|e| SubsystemError::with_origin("pvf-checker", e)) .boxed(); @@ -118,7 +123,11 @@ struct State { FuturesUnordered>>, } -async fn run(mut ctx: Context, keystore: SyncCryptoStorePtr) -> SubsystemResult<()> +async fn run( + mut ctx: Context, + keystore: SyncCryptoStorePtr, + metrics: Metrics, +) -> SubsystemResult<()> where Context: SubsystemContext, Context: overseer::SubsystemContext, @@ -141,6 +150,7 @@ where &mut state, &mut sender, &keystore, + &metrics, outcome, validation_code_hash, ).await; @@ -154,6 +164,7 @@ where &mut state, &mut sender, &keystore, + &metrics, from_overseer?, ) .await; @@ -170,6 +181,7 @@ async fn handle_pvf_check( state: &mut State, sender: &mut impl SubsystemSender, keystore: &SyncCryptoStorePtr, + metrics: &Metrics, outcome: PreCheckOutcome, validation_code_hash: ValidationCodeHash, ) { @@ -218,6 +230,7 @@ async fn handle_pvf_check( keystore, &mut state.voted, credentials, + metrics, recent_block.1, session_index, judgement, @@ -236,6 +249,7 @@ async fn handle_from_overseer( state: &mut State, sender: &mut impl SubsystemSender, keystore: &SyncCryptoStorePtr, + metrics: &Metrics, from_overseer: FromOverseer, ) -> Option { match from_overseer { @@ -248,7 +262,7 @@ async fn handle_from_overseer( None }, FromOverseer::Signal(OverseerSignal::ActiveLeaves(update)) => { - handle_leaves_update(state, sender, keystore, update).await; + handle_leaves_update(state, sender, keystore, metrics, update).await; None }, FromOverseer::Communication { msg } => match msg { @@ -261,6 +275,7 @@ async fn handle_leaves_update( state: &mut State, sender: &mut impl SubsystemSender, keystore: &SyncCryptoStorePtr, + metrics: &Metrics, update: ActiveLeavesUpdate, ) { if let Some(activated) = update.activated { @@ -280,11 +295,13 @@ async fn handle_leaves_update( state.recent_block = Some(recent_block); // Update the PVF view and get the previously unseen PVFs and start working on them. - let newcomers = state + let outcome = state .view .on_leaves_update(Some((activated.hash, pending_pvfs)), &update.deactivated); - for newcomer in newcomers { - initiate_precheck(state, sender, recent_block_hash, newcomer).await; + metrics.on_pvf_observed(outcome.newcomers.len()); + metrics.on_pvf_left(outcome.left_num); + for newcomer in outcome.newcomers { + initiate_precheck(state, sender, recent_block_hash, newcomer, metrics).await; } if let Some((new_session_index, credentials)) = new_session_index { @@ -305,6 +322,7 @@ async fn handle_leaves_update( keystore, &mut state.voted, credentials, + metrics, recent_block_hash, new_session_index, judgement, @@ -429,6 +447,7 @@ async fn sign_and_submit_pvf_check_statement( keystore: &SyncCryptoStorePtr, voted: &mut HashSet, credentials: &SigningCredentials, + metrics: &Metrics, relay_parent: Hash, session_index: SessionIndex, judgement: Judgement, @@ -442,6 +461,8 @@ async fn sign_and_submit_pvf_check_statement( judgement, ); + metrics.on_vote_submission_started(); + if voted.contains(&validation_code_hash) { tracing::trace!( target: LOG_TARGET, @@ -449,6 +470,7 @@ async fn sign_and_submit_pvf_check_statement( ?validation_code_hash, "already voted for this validation code", ); + metrics.on_vote_duplicate(); return } @@ -492,7 +514,9 @@ async fn sign_and_submit_pvf_check_statement( }; match runtime_api::submit_pvf_check_statement(sender, relay_parent, stmt, signature).await { - Ok(()) => (), + Ok(()) => { + metrics.on_vote_submitted(); + }, Err(e) => { tracing::warn!( target: LOG_TARGET, @@ -514,6 +538,7 @@ async fn initiate_precheck( sender: &mut impl SubsystemSender, relay_parent: Hash, validation_code_hash: ValidationCodeHash, + metrics: &Metrics, ) { tracing::debug!( target: LOG_TARGET, @@ -528,7 +553,10 @@ async fn initiate_precheck( CandidateValidationMessage::PreCheck(relay_parent, validation_code_hash, tx).into(), ) .await; + + let timer = metrics.time_pre_check_judgement(); state.currently_checking.push(Box::pin(async move { + let _timer = timer; match rx.await { Ok(accept) => Some((accept, validation_code_hash)), Err(oneshot::Canceled) => { diff --git a/polkadot/node/core/pvf-checker/src/metrics.rs b/polkadot/node/core/pvf-checker/src/metrics.rs new file mode 100644 index 0000000000..7241e9f14d --- /dev/null +++ b/polkadot/node/core/pvf-checker/src/metrics.rs @@ -0,0 +1,130 @@ +// Copyright 2022 Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +//! Metrics definitions for the PVF pre-checking subsystem. + +use polkadot_node_subsystem_util::metrics::{self, prometheus}; + +#[derive(Clone)] +struct MetricsInner { + pre_check_judgement: prometheus::Histogram, + votes_total: prometheus::Counter, + votes_started: prometheus::Counter, + votes_duplicate: prometheus::Counter, + pvfs_observed: prometheus::Counter, + pvfs_left: prometheus::Counter, +} + +#[derive(Default, Clone)] +pub struct Metrics(Option); + +impl Metrics { + /// Time between sending the pre-check request to receiving the response. + pub(crate) fn time_pre_check_judgement( + &self, + ) -> Option { + self.0.as_ref().map(|metrics| metrics.pre_check_judgement.start_timer()) + } + + /// Called when a PVF vote/statement is submitted. + pub(crate) fn on_vote_submitted(&self) { + if let Some(metrics) = &self.0 { + metrics.votes_total.inc(); + } + } + + /// Called when a PVF vote/statement is started submission. + pub(crate) fn on_vote_submission_started(&self) { + if let Some(metrics) = &self.0 { + metrics.votes_started.inc(); + } + } + + /// Called when the vote is a duplicate. + pub(crate) fn on_vote_duplicate(&self) { + if let Some(metrics) = &self.0 { + metrics.votes_duplicate.inc(); + } + } + + /// Called when a new PVF is observed. + pub(crate) fn on_pvf_observed(&self, num: usize) { + if let Some(metrics) = &self.0 { + metrics.pvfs_observed.inc_by(num as u64); + } + } + + /// Called when a PVF left the view. + pub(crate) fn on_pvf_left(&self, num: usize) { + if let Some(metrics) = &self.0 { + metrics.pvfs_left.inc_by(num as u64); + } + } +} + +impl metrics::Metrics for Metrics { + fn try_register(registry: &prometheus::Registry) -> Result { + let metrics = MetricsInner { + pre_check_judgement: prometheus::register( + prometheus::Histogram::with_opts( + prometheus::HistogramOpts::new( + "polkadot_pvf_precheck_judgement", + "Time between sending the pre-check request to receiving the response.", + ) + .buckets(vec![0.1, 0.5, 1.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0]), + )?, + registry, + )?, + votes_total: prometheus::register( + prometheus::Counter::new( + "polkadot_pvf_precheck_votes_total", + "The total number of votes submitted.", + )?, + registry, + )?, + votes_started: prometheus::register( + prometheus::Counter::new( + "polkadot_pvf_precheck_votes_started", + "The number of votes that are pending submission", + )?, + registry, + )?, + votes_duplicate: prometheus::register( + prometheus::Counter::new( + "polkadot_pvf_precheck_votes_duplicate", + "The number of votes that are submitted more than once for the same code within\ +the same session.", + )?, + registry, + )?, + pvfs_observed: prometheus::register( + prometheus::Counter::new( + "polkadot_pvf_precheck_pvfs_observed", + "The number of new PVFs observed.", + )?, + registry, + )?, + pvfs_left: prometheus::register( + prometheus::Counter::new( + "polkadot_pvf_precheck_pvfs_left", + "The number of PVFs removed from the view.", + )?, + registry, + )?, + }; + Ok(Self(Some(metrics))) + } +} diff --git a/polkadot/node/core/pvf-checker/src/tests.rs b/polkadot/node/core/pvf-checker/src/tests.rs index ee9792cf0d..80a7601ead 100644 --- a/polkadot/node/core/pvf-checker/src/tests.rs +++ b/polkadot/node/core/pvf-checker/src/tests.rs @@ -372,7 +372,7 @@ fn test_harness(test: impl FnOnce(TestState, VirtualOverseer) -> BoxFuture<'stat ) .expect("Generating keys for our node failed"); - let subsystem_task = crate::run(ctx, keystore).map(|x| x.unwrap()); + let subsystem_task = crate::run(ctx, keystore, crate::Metrics::default()).map(|x| x.unwrap()); let test_state = TestState::new(); let test_task = test(test_state, handle); diff --git a/polkadot/node/service/src/overseer.rs b/polkadot/node/service/src/overseer.rs index 58be883fa1..2b673e894c 100644 --- a/polkadot/node/service/src/overseer.rs +++ b/polkadot/node/service/src/overseer.rs @@ -213,7 +213,11 @@ where Metrics::register(registry)?, // candidate-validation metrics Metrics::register(registry)?, // validation host metrics )) - .pvf_checker(PvfCheckerSubsystem::new(pvf_checker_enabled, keystore.clone())) + .pvf_checker(PvfCheckerSubsystem::new( + pvf_checker_enabled, + keystore.clone(), + Metrics::register(registry)?, + )) .chain_api(ChainApiSubsystem::new(runtime_client.clone(), Metrics::register(registry)?)) .collation_generation(CollationGenerationSubsystem::new(Metrics::register(registry)?)) .collator_protocol({