Add Prometheus timers to the subsystems (#1923)

* reexport prometheus-super for ease of use of other subsystems * add some prometheus timers for collation generation subsystem * add timing metrics to av-store * add metrics to candidate backing * add timing metric to bitfield signing * add timing metrics to candidate selection * add timing metrics to candidate-validation * add timing metrics to chain-api * add timing metrics to provisioner * add timing metrics to runtime-api * add timing metrics to availability-distribution * add timing metrics to bitfield-distribution * add timing metrics to collator protocol: collator side * add timing metrics to collator protocol: validator side * fix candidate validation test failures * add timing metrics to pov distribution * add timing metrics to statement-distribution * use substrate_prometheus_endpoint prometheus reexport instead of prometheus_super * don't include JOB_DELAY in bitfield-signing metrics * give adder-collator ability to easily export its genesis-state and validation code * wip: adder-collator pushbutton script * don't attempt to register the adder-collator automatically Instead, get these values with ```sh target/release/adder-collator export-genesis-state target/release/adder-collator export-genesis-wasm ``` And then register the parachain on https://polkadot.js.org/apps/?rpc=ws%3A%2F%2F127.0.0.1%3A9944#/explorer To collect prometheus data, after running the script, create `prometheus.yml` per the instructions at https://www.notion.so/paritytechnologies/Setting-up-Prometheus-locally-835cb3a9df7541a781c381006252b5ff and then run: ```sh docker run -v `pwd`/prometheus.yml:/etc/prometheus/prometheus.yml:z --network host prom/prometheus ``` Demonstrates that data makes it across to prometheus, though it is likely to be useful in the future to tweak the buckets. * Update parachain/test-parachains/adder/collator/src/cli.rs Co-authored-by: Andronik Ordian <write@reusable.software> * use the grandpa-pause parameter * skip metrics in tracing instrumentation * remove unnecessary grandpa_pause cli param Co-authored-by: Andronik Ordian <write@reusable.software>
2026-06-14 16:51:03 +00:00 · 2020-11-20 15:04:51 +01:00
parent e49989971d
commit 0a5bc82529
23 changed files with 1199 additions and 87 deletions
@@ -183,6 +183,8 @@ impl BitfieldDistribution {
 					}
 				}
 				FromOverseer::Signal(OverseerSignal::ActiveLeaves(ActiveLeavesUpdate { activated, deactivated })) => {
+					let _timer = self.metrics.time_active_leaves_update();
+
 					for relay_parent in activated {
 						tracing::trace!(target: LOG_TARGET, relay_parent = %relay_parent, "activated");
 						// query basic system parameters once
@@ -257,6 +259,8 @@ async fn handle_bitfield_distribution<Context>(
 where
 	Context: SubsystemContext<Message = BitfieldDistributionMessage>,
 {
+	let _timer = metrics.time_handle_bitfield_distribution();
+
 	// Ignore anything the overseer did not tell this subsystem to work on
 	let mut job_data = state.per_relay_parent.get_mut(&relay_parent);
 	let job_data: &mut _ = if let Some(ref mut job_data) = job_data {
@@ -461,6 +465,8 @@ async fn handle_network_msg<Context>(
 where
 	Context: SubsystemContext<Message = BitfieldDistributionMessage>,
 {
+	let _timer = metrics.time_handle_network_msg();
+
 	match bridge_message {
 		NetworkBridgeEvent::PeerConnected(peerid, _role) => {
 			// insert if none already present
@@ -659,6 +665,9 @@ where
 struct MetricsInner {
 	gossipped_own_availability_bitfields: prometheus::Counter<prometheus::U64>,
 	received_availability_bitfields: prometheus::Counter<prometheus::U64>,
+	active_leaves_update: prometheus::Histogram,
+	handle_bitfield_distribution: prometheus::Histogram,
+	handle_network_msg: prometheus::Histogram,
 }

 /// Bitfield Distribution metrics.
@@ -677,6 +686,21 @@ impl Metrics {
 			metrics.received_availability_bitfields.inc();
 		}
 	}
+
+	/// Provide a timer for `active_leaves_update` which observes on drop.
+	fn time_active_leaves_update(&self) -> Option<metrics::prometheus::prometheus::HistogramTimer> {
+		self.0.as_ref().map(|metrics| metrics.active_leaves_update.start_timer())
+	}
+
+	/// Provide a timer for `handle_bitfield_distribution` which observes on drop.
+	fn time_handle_bitfield_distribution(&self) -> Option<metrics::prometheus::prometheus::HistogramTimer> {
+		self.0.as_ref().map(|metrics| metrics.handle_bitfield_distribution.start_timer())
+	}
+
+	/// Provide a timer for `handle_network_msg` which observes on drop.
+	fn time_handle_network_msg(&self) -> Option<metrics::prometheus::prometheus::HistogramTimer> {
+		self.0.as_ref().map(|metrics| metrics.handle_network_msg.start_timer())
+	}
 }

 impl metrics::Metrics for Metrics {
@@ -696,6 +720,33 @@ impl metrics::Metrics for Metrics {
 				)?,
 				registry,
 			)?,
+			active_leaves_update: prometheus::register(
+				prometheus::Histogram::with_opts(
+					prometheus::HistogramOpts::new(
+						"parachain_bitfield_distribution_active_leaves_update",
+						"Time spent within `bitfield_distribution::active_leaves_update`",
+					)
+				)?,
+				registry,
+			)?,
+			handle_bitfield_distribution: prometheus::register(
+				prometheus::Histogram::with_opts(
+					prometheus::HistogramOpts::new(
+						"parachain_bitfield_distribution_handle_bitfield_distribution",
+						"Time spent within `bitfield_distribution::handle_bitfield_distribution`",
+					)
+				)?,
+				registry,
+			)?,
+			handle_network_msg: prometheus::register(
+				prometheus::Histogram::with_opts(
+					prometheus::HistogramOpts::new(
+						"parachain_bitfield_distribution_handle_network_msg",
+						"Time spent within `bitfield_distribution::handle_network_msg`",
+					)
+				)?,
+				registry,
+			)?,
 		};
 		Ok(Metrics(Some(metrics)))
 	}