diff --git a/polkadot/node/network/collator-protocol/src/collator_side/mod.rs b/polkadot/node/network/collator-protocol/src/collator_side/mod.rs index 8cea56d846..bb972598ab 100644 --- a/polkadot/node/network/collator-protocol/src/collator_side/mod.rs +++ b/polkadot/node/network/collator-protocol/src/collator_side/mod.rs @@ -96,6 +96,16 @@ impl Metrics { fn time_process_msg(&self) -> Option { self.0.as_ref().map(|metrics| metrics.process_msg.start_timer()) } + + /// Provide a timer for `distribute_collation` which observes on drop. + fn time_collation_distribution( + &self, + label: &'static str, + ) -> Option { + self.0.as_ref().map(|metrics| { + metrics.collation_distribution_time.with_label_values(&[label]).start_timer() + }) + } } #[derive(Clone)] @@ -104,6 +114,7 @@ struct MetricsInner { collations_sent: prometheus::Counter, collations_send_requested: prometheus::Counter, process_msg: prometheus::Histogram, + collation_distribution_time: prometheus::HistogramVec, } impl metrics::Metrics for Metrics { @@ -133,10 +144,30 @@ impl metrics::Metrics for Metrics { registry, )?, process_msg: prometheus::register( - prometheus::Histogram::with_opts(prometheus::HistogramOpts::new( - "polkadot_parachain_collator_protocol_collator_process_msg", - "Time spent within `collator_protocol_collator::process_msg`", - ))?, + prometheus::Histogram::with_opts( + prometheus::HistogramOpts::new( + "polkadot_parachain_collator_protocol_collator_process_msg", + "Time spent within `collator_protocol_collator::process_msg`", + ) + .buckets(vec![ + 0.001, 0.002, 0.005, 0.01, 0.025, 0.05, 0.1, 0.15, 0.25, 0.35, 0.5, 0.75, + 1.0, + ]), + )?, + registry, + )?, + collation_distribution_time: prometheus::register( + prometheus::HistogramVec::new( + prometheus::HistogramOpts::new( + "polkadot_parachain_collator_protocol_collator_distribution_time", + "Time spent within `collator_protocol_collator::distribute_collation`", + ) + .buckets(vec![ + 0.001, 0.002, 0.005, 0.01, 0.025, 0.05, 0.1, 0.15, 0.25, 0.35, 0.5, 0.75, + 1.0, + ]), + &["state"], + )?, registry, )?, }; @@ -356,6 +387,11 @@ where // We have already seen collation for this relay parent. if state.collations.contains_key(&relay_parent) { + gum::debug!( + target: LOG_TARGET, + ?relay_parent, + "Already seen collation for this relay parent", + ); return Ok(()) } @@ -610,8 +646,6 @@ where { use CollatorProtocolMessage::*; - let _timer = state.metrics.time_process_msg(); - match msg { CollateOn(id) => { state.collating_on = Some(id); @@ -634,6 +668,7 @@ where ); }, Some(id) => { + let _ = state.metrics.time_collation_distribution("distribute"); distribute_collation(ctx, runtime, state, id, receipt, pov, result_sender) .await?; }, @@ -653,6 +688,8 @@ where ); }, NetworkBridgeUpdateV1(event) => { + // We should count only this shoulder in the histogram, as other shoulders are just introducing noise + let _ = state.metrics.time_process_msg(); if let Err(e) = handle_network_msg(ctx, runtime, state, event).await { gum::warn!( target: LOG_TARGET, @@ -773,9 +810,16 @@ where target: LOG_TARGET, ?statement, ?origin, - "received a `CollationSeconded`", + "received a valid `CollationSeconded`", ); let _ = sender.send(CollationSecondedSignal { statement, relay_parent }); + } else { + gum::warn!( + target: LOG_TARGET, + ?statement, + ?origin, + "received an unexpected `CollationSeconded`: unknown statement", + ); } } }, @@ -836,6 +880,8 @@ where waiting.waiting.push_back(req); } else { waiting.collation_fetch_active = true; + // Obtain a timer for sending collation + let _ = state.metrics.time_collation_distribution("send"); send_collation(state, req, receipt, pov).await; } },