Rework telemetry to replace the use of tracing with an object we pass around (#8143)

polkadot companion: paritytech/polkadot#2535
2026-06-15 12:41:07 +00:00 · 2021-03-11 11:05:45 +01:00
parent 7aaba0c154
commit 8031b6eacb
55 changed files with 1028 additions and 838 deletions
@@ -45,7 +45,7 @@ use sp_api::ProvideRuntimeApi;
 use sp_core::crypto::Pair;
 use sp_inherents::{InherentDataProviders, InherentData};
 use sp_timestamp::InherentError as TIError;
-use sc_telemetry::{telemetry, CONSENSUS_TRACE, CONSENSUS_DEBUG, CONSENSUS_INFO};
+use sc_telemetry::{telemetry, TelemetryHandle, CONSENSUS_TRACE, CONSENSUS_DEBUG, CONSENSUS_INFO};
 use sc_consensus_slots::{CheckedHeader, SlotCompatible, check_equivocation};
 use sp_consensus_slots::Slot;
 use sp_api::ApiExt;
@@ -129,6 +129,7 @@ pub struct AuraVerifier<C, P, CAW> {
 	inherent_data_providers: InherentDataProviders,
 	can_author_with: CAW,
 	check_for_equivocation: CheckForEquivocation,
+	telemetry: Option<TelemetryHandle>,
 }

 impl<C, P, CAW> AuraVerifier<C, P, CAW> {
@@ -137,12 +138,14 @@ impl<C, P, CAW> AuraVerifier<C, P, CAW> {
 		inherent_data_providers: InherentDataProviders,
 		can_author_with: CAW,
 		check_for_equivocation: CheckForEquivocation,
+		telemetry: Option<TelemetryHandle>,
 	) -> Self {
 		Self {
 			client,
 			inherent_data_providers,
 			can_author_with,
 			check_for_equivocation,
+			telemetry,
 			phantom: PhantomData,
 		}
 	}
@@ -197,7 +200,10 @@ impl<C, P, CAW> AuraVerifier<C, P, CAW> where
 							"halting for block {} seconds in the future",
 							diff
 						);
-						telemetry!(CONSENSUS_INFO; "aura.halting_for_future_block";
+						telemetry!(
+							self.telemetry;
+							CONSENSUS_INFO;
+							"aura.halting_for_future_block";
 							"diff" => ?diff
 						);
 						thread::sleep(Duration::from_secs(diff));
@@ -287,7 +293,12 @@ impl<B: BlockT, C, P, CAW> Verifier<B> for AuraVerifier<C, P, CAW> where
 				}

 				trace!(target: "aura", "Checked {:?}; importing.", pre_header);
-				telemetry!(CONSENSUS_TRACE; "aura.checked_and_importing"; "pre_header" => ?pre_header);
+				telemetry!(
+					self.telemetry;
+					CONSENSUS_TRACE;
+					"aura.checked_and_importing";
+					"pre_header" => ?pre_header,
+				);

 				// Look for an authorities-change log.
 				let maybe_keys = pre_header.digest()
@@ -314,8 +325,13 @@ impl<B: BlockT, C, P, CAW> Verifier<B> for AuraVerifier<C, P, CAW> where
 			}
 			CheckedHeader::Deferred(a, b) => {
 				debug!(target: "aura", "Checking {:?} failed; {:?}, {:?}.", hash, a, b);
-				telemetry!(CONSENSUS_DEBUG; "aura.header_too_far_in_future";
-					"hash" => ?hash, "a" => ?a, "b" => ?b
+				telemetry!(
+					self.telemetry;
+					CONSENSUS_DEBUG;
+					"aura.header_too_far_in_future";
+					"hash" => ?hash,
+					"a" => ?a,
+					"b" => ?b,
 				);
 				Err(format!("Header {:?} rejected: too far in the future", hash))
 			}
@@ -485,6 +501,8 @@ pub struct ImportQueueParams<'a, Block, I, C, S, CAW> {
 	pub check_for_equivocation: CheckForEquivocation,
 	/// The duration of one slot.
 	pub slot_duration: SlotDuration,
+	/// Telemetry instance used to report telemetry metrics.
+	pub telemetry: Option<TelemetryHandle>,
 }

 /// Start an import queue for the Aura consensus algorithm.
@@ -499,6 +517,7 @@ pub fn import_queue<'a, P, Block, I, C, S, CAW>(
 		can_author_with,
 		check_for_equivocation,
 		slot_duration,
+		telemetry,
 	}: ImportQueueParams<'a, Block, I, C, S, CAW>
 ) -> Result<DefaultImportQueue<Block, C>, sp_consensus::Error> where
 	Block: BlockT,
@@ -530,6 +549,7 @@ pub fn import_queue<'a, P, Block, I, C, S, CAW>(
 		inherent_data_providers,
 		can_author_with,
 		check_for_equivocation,
+		telemetry,
 	);

 	Ok(BasicQueue::new(
@@ -56,6 +56,7 @@ use sp_keystore::{SyncCryptoStorePtr, SyncCryptoStore};
 use sp_inherents::{InherentDataProviders, InherentData};
 use sp_timestamp::{TimestampInherentData, InherentType as TimestampInherent};
 use sc_consensus_slots::{SlotInfo, SlotCompatible, StorageChanges, BackoffAuthoringBlocksStrategy};
+use sc_telemetry::TelemetryHandle;
 use sp_consensus_slots::Slot;

 mod import_queue;
@@ -149,6 +150,8 @@ pub struct StartAuraParams<C, SC, I, PF, SO, BS, CAW> {
 	/// slot. However, the proposing can still take longer when there is some lenience factor applied,
 	/// because there were no blocks produced for some slots.
 	pub block_proposal_slot_portion: SlotProportion,
+	/// Telemetry instance used to report telemetry metrics.
+	pub telemetry: Option<TelemetryHandle>,
 }

 /// Start the aura worker. The returned future should be run in a futures executor.
@@ -166,6 +169,7 @@ pub fn start_aura<P, B, C, SC, PF, I, SO, CAW, BS, Error>(
 		keystore,
 		can_author_with,
 		block_proposal_slot_portion,
+		telemetry,
 	}: StartAuraParams<C, SC, I, PF, SO, BS, CAW>,
 ) -> Result<impl Future<Output = ()>, sp_consensus::Error> where
 	B: BlockT,
@@ -184,13 +188,14 @@ pub fn start_aura<P, B, C, SC, PF, I, SO, CAW, BS, Error>(
 	BS: BackoffAuthoringBlocksStrategy<NumberFor<B>> + Send + 'static,
 {
 	let worker = AuraWorker {
-		client,
+		client: client.clone(),
 		block_import: Arc::new(Mutex::new(block_import)),
 		env,
 		keystore,
 		sync_oracle: sync_oracle.clone(),
 		force_authoring,
 		backoff_authoring_blocks,
+		telemetry,
 		_key_type: PhantomData::<P>,
 		block_proposal_slot_portion,
 	};
@@ -218,6 +223,7 @@ struct AuraWorker<C, E, I, P, SO, BS> {
 	force_authoring: bool,
 	backoff_authoring_blocks: Option<BS>,
 	block_proposal_slot_portion: SlotProportion,
+	telemetry: Option<TelemetryHandle>,
 	_key_type: PhantomData<P>,
 }

@@ -371,6 +377,10 @@ where
 		}))
 	}

+	fn telemetry(&self) -> Option<TelemetryHandle> {
+		self.telemetry.clone()
+	}
+
 	fn proposing_remaining_duration(
 		&self,
 		head: &B::Header,
@@ -595,6 +605,7 @@ mod tests {
 						inherent_data_providers,
 						AlwaysCanAuthor,
 						CheckForEquivocation::Yes,
+						None,
 					)
 				},
 				PeersClient::Light(_, _) => unreachable!("No (yet) tests for light client + Aura"),
@@ -670,6 +681,7 @@ mod tests {
 				keystore,
 				can_author_with: sp_consensus::AlwaysCanAuthor,
 				block_proposal_slot_portion: SlotProportion::new(0.5),
+				telemetry: None,
 			}).expect("Starts aura"));
 		}

@@ -729,6 +741,7 @@ mod tests {
 			sync_oracle: DummyOracle.clone(),
 			force_authoring: false,
 			backoff_authoring_blocks: Some(BackoffAuthoringOnFinalizedHeadLagging::default()),
+			telemetry: None,
 			_key_type: PhantomData::<AuthorityPair>,
 			block_proposal_slot_portion: SlotProportion::new(0.5),
 		};
@@ -777,6 +790,7 @@ mod tests {
 			sync_oracle: DummyOracle.clone(),
 			force_authoring: false,
 			backoff_authoring_blocks: Option::<()>::None,
+			telemetry: None,
 			_key_type: PhantomData::<AuthorityPair>,
 			block_proposal_slot_portion: SlotProportion::new(0.5),
 		};
@@ -90,7 +90,7 @@ use sp_runtime::{
 use sp_api::{ProvideRuntimeApi, NumberFor};
 use parking_lot::Mutex;
 use sp_inherents::{InherentDataProviders, InherentData};
-use sc_telemetry::{telemetry, CONSENSUS_TRACE, CONSENSUS_DEBUG};
+use sc_telemetry::{telemetry, TelemetryHandle, CONSENSUS_TRACE, CONSENSUS_DEBUG};
 use sp_consensus::{
 	BlockImport, Environment, Proposer, BlockCheckParams,
 	ForkChoiceStrategy, BlockImportParams, BlockOrigin, Error as ConsensusError,
@@ -402,6 +402,9 @@ pub struct BabeParams<B: BlockT, C, E, I, SO, SC, CAW, BS> {
 	/// slot. However, the proposing can still take longer when there is some lenience factor applied,
 	/// because there were no blocks produced for some slots.
 	pub block_proposal_slot_portion: SlotProportion,
+
+	/// Handle use to report telemetries.
+	pub telemetry: Option<TelemetryHandle>,
 }

 /// Start the babe worker.
@@ -418,13 +421,15 @@ pub fn start_babe<B, C, SC, E, I, SO, CAW, BS, Error>(BabeParams {
 	babe_link,
 	can_author_with,
 	block_proposal_slot_portion,
+	telemetry,
 }: BabeParams<B, C, E, I, SO, SC, CAW, BS>) -> Result<
 	BabeWorker<B>,
 	sp_consensus::Error,
 > where
 	B: BlockT,
 	C: ProvideRuntimeApi<B> + ProvideCache<B> + ProvideUncles<B> + BlockchainEvents<B>
-		+ HeaderBackend<B> + HeaderMetadata<B, Error = ClientError> + Send + Sync + 'static,
+		+ HeaderBackend<B> + HeaderMetadata<B, Error = ClientError>
+		+ Send + Sync + 'static,
 	C::Api: BabeApi<B>,
 	SC: SelectChain<B> + 'static,
 	E: Environment<B, Error = Error> + Send + Sync + 'static,
@@ -453,6 +458,7 @@ pub fn start_babe<B, C, SC, E, I, SO, CAW, BS, Error>(BabeParams {
 		slot_notification_sinks: slot_notification_sinks.clone(),
 		config: config.clone(),
 		block_proposal_slot_portion,
+		telemetry,
 	};

 	register_babe_inherent_data_provider(&inherent_data_providers, config.slot_duration())?;
@@ -609,6 +615,7 @@ struct BabeSlotWorker<B: BlockT, C, E, I, SO, BS> {
 	slot_notification_sinks: SlotNotificationSinks<B>,
 	config: Config,
 	block_proposal_slot_portion: SlotProportion,
+	telemetry: Option<TelemetryHandle>,
 }

 impl<B, C, E, I, Error, SO, BS> sc_consensus_slots::SimpleSlotWorker<B>
@@ -799,6 +806,10 @@ where
 		}))
 	}

+	fn telemetry(&self) -> Option<TelemetryHandle> {
+		self.telemetry.clone()
+	}
+
 	fn proposing_remaining_duration(
 		&self,
 		parent_head: &B::Header,
@@ -947,6 +958,7 @@ pub struct BabeVerifier<Block: BlockT, Client, SelectChain, CAW> {
 	epoch_changes: SharedEpochChanges<Block, Epoch>,
 	time_source: TimeSource,
 	can_author_with: CAW,
+	telemetry: Option<TelemetryHandle>,
 }

 impl<Block, Client, SelectChain, CAW> BabeVerifier<Block, Client, SelectChain, CAW>
@@ -1174,6 +1186,7 @@ where

 				trace!(target: "babe", "Checked {:?}; importing.", pre_header);
 				telemetry!(
+					self.telemetry;
 					CONSENSUS_TRACE;
 					"babe.checked_and_importing";
 					"pre_header" => ?pre_header);
@@ -1192,7 +1205,10 @@ where
 			}
 			CheckedHeader::Deferred(a, b) => {
 				debug!(target: "babe", "Checking {:?} failed; {:?}, {:?}.", hash, a, b);
-				telemetry!(CONSENSUS_DEBUG; "babe.header_too_far_in_future";
+				telemetry!(
+					self.telemetry;
+					CONSENSUS_DEBUG;
+					"babe.header_too_far_in_future";
 					"hash" => ?hash, "a" => ?a, "b" => ?b
 				);
 				Err(Error::<Block>::TooFarInFuture(hash).into())
@@ -1599,11 +1615,13 @@ pub fn import_queue<Block: BlockT, Client, SelectChain, Inner, CAW>(
 	spawner: &impl sp_core::traits::SpawnEssentialNamed,
 	registry: Option<&Registry>,
 	can_author_with: CAW,
+	telemetry: Option<TelemetryHandle>,
 ) -> ClientResult<DefaultImportQueue<Block, Client>> where
 	Inner: BlockImport<Block, Error = ConsensusError, Transaction = sp_api::TransactionFor<Client, Block>>
 		+ Send + Sync + 'static,
-	Client: ProvideRuntimeApi<Block> + ProvideCache<Block> + Send + Sync + AuxStore + 'static,
-	Client: HeaderBackend<Block> + HeaderMetadata<Block, Error = sp_blockchain::Error>,
+	Client: ProvideRuntimeApi<Block> + ProvideCache<Block> + HeaderBackend<Block>
+		+ HeaderMetadata<Block, Error = sp_blockchain::Error> + AuxStore
+		+ Send + Sync + 'static,
 	Client::Api: BlockBuilderApi<Block> + BabeApi<Block> + ApiExt<Block>,
 	SelectChain: sp_consensus::SelectChain<Block> + 'static,
 	CAW: CanAuthorWith<Block> + Send + Sync + 'static,
@@ -1611,13 +1629,14 @@ pub fn import_queue<Block: BlockT, Client, SelectChain, Inner, CAW>(
 	register_babe_inherent_data_provider(&inherent_data_providers, babe_link.config.slot_duration)?;

 	let verifier = BabeVerifier {
-		client,
 		select_chain,
 		inherent_data_providers,
 		config: babe_link.config,
 		epoch_changes: babe_link.epoch_changes,
 		time_source: babe_link.time_source,
 		can_author_with,
+		telemetry,
+		client,
 	};

 	Ok(BasicQueue::new(
@@ -320,6 +320,7 @@ impl TestNetFactory for BabeTestNet {
 				epoch_changes: data.link.epoch_changes.clone(),
 				time_source: data.link.time_source.clone(),
 				can_author_with: AlwaysCanAuthor,
+				telemetry: None,
 			},
 			mutator: MUTATOR.with(|m| m.borrow().clone()),
 		}
@@ -432,6 +433,7 @@ fn run_one_test(
 			keystore,
 			can_author_with: sp_consensus::AlwaysCanAuthor,
 			block_proposal_slot_portion: SlotProportion::new(0.5),
+			telemetry: None,
 		}).expect("Starts babe"));
 	}
 	futures::executor::block_on(future::select(
@@ -300,6 +300,7 @@ mod tests {
 			client.clone(),
 			pool.clone(),
 			None,
+			None,
 		);
 		// this test checks that blocks are created as soon as transactions are imported into the pool.
 		let (sender, receiver) = futures::channel::oneshot::channel();
@@ -371,6 +372,7 @@ mod tests {
 			client.clone(),
 			pool.clone(),
 			None,
+			None,
 		);
 		// this test checks that blocks are created as soon as an engine command is sent over the stream.
 		let (mut sink, commands_stream) = futures::channel::mpsc::channel(1024);
@@ -446,6 +448,7 @@ mod tests {
 			client.clone(),
 			pool.clone(),
 			None,
+			None,
 		);
 		// this test checks that blocks are created as soon as an engine command is sent over the stream.
 		let (mut sink, commands_stream) = futures::channel::mpsc::channel(1024);
@@ -47,7 +47,7 @@ use sp_runtime::{
 	generic::BlockId,
 	traits::{Block as BlockT, Header, HashFor, NumberFor}
 };
-use sc_telemetry::{telemetry, CONSENSUS_DEBUG, CONSENSUS_WARN, CONSENSUS_INFO};
+use sc_telemetry::{telemetry, TelemetryHandle, CONSENSUS_DEBUG, CONSENSUS_WARN, CONSENSUS_INFO};

 /// The changes that need to applied to the storage to create the state for a block.
 ///
@@ -180,6 +180,9 @@ pub trait SimpleSlotWorker<B: BlockT> {
 	/// Returns a `Proposer` to author on top of the given block.
 	fn proposer(&mut self, block: &B::Header) -> Self::CreateProposer;

+	/// Returns a [`TelemetryHandle`] if any.
+	fn telemetry(&self) -> Option<TelemetryHandle>;
+
 	/// Remaining duration for proposing.
 	fn proposing_remaining_duration(
 		&self,
@@ -197,6 +200,7 @@ pub trait SimpleSlotWorker<B: BlockT> {
 		<Self::Proposer as Proposer<B>>::Proposal: Unpin + Send + 'static,
 	{
 		let (timestamp, slot) = (slot_info.timestamp, slot_info.slot);
+		let telemetry = self.telemetry();

 		let proposing_remaining_duration = self.proposing_remaining_duration(&chain_head, &slot_info);

@@ -219,7 +223,9 @@ pub trait SimpleSlotWorker<B: BlockT> {
 				warn!("Unable to fetch epoch data at block {:?}: {:?}", chain_head.hash(), err);

 				telemetry!(
-					CONSENSUS_WARN; "slots.unable_fetching_authorities";
+					telemetry;
+					CONSENSUS_WARN;
+					"slots.unable_fetching_authorities";
 					"slot" => ?chain_head.hash(),
 					"err" => ?err,
 				);
@@ -238,6 +244,7 @@ pub trait SimpleSlotWorker<B: BlockT> {
 		{
 			debug!(target: self.logging_target(), "Skipping proposal slot. Waiting for the network.");
 			telemetry!(
+				telemetry;
 				CONSENSUS_DEBUG;
 				"slots.skipping_proposal_slot";
 				"authorities_len" => authorities_len,
@@ -263,24 +270,29 @@ pub trait SimpleSlotWorker<B: BlockT> {
 		);

 		telemetry!(
+			telemetry;
 			CONSENSUS_DEBUG;
 			"slots.starting_authorship";
 			"slot_num" => *slot,
 			"timestamp" => timestamp,
 		);

-		let awaiting_proposer = self.proposer(&chain_head).map_err(move |err| {
-			warn!("Unable to author block in slot {:?}: {:?}", slot, err);
+		let awaiting_proposer = {
+			let telemetry = telemetry.clone();
+			self.proposer(&chain_head).map_err(move |err| {
+				warn!("Unable to author block in slot {:?}: {:?}", slot, err);

-			telemetry!(
-				CONSENSUS_WARN;
-				"slots.unable_authoring_block";
-				"slot" => *slot,
-				"err" => ?err
-			);
+				telemetry!(
+					telemetry;
+					CONSENSUS_WARN;
+					"slots.unable_authoring_block";
+					"slot" => *slot,
+					"err" => ?err
+				);

-			err
-		});
+				err
+			})
+		};

 		let logs = self.pre_digest_data(slot, &claim);

@@ -295,7 +307,8 @@ pub trait SimpleSlotWorker<B: BlockT> {
 			proposing_remaining_duration.mul_f32(0.98),
 		).map_err(|e| sp_consensus::Error::ClientImport(format!("{:?}", e))));

-		let proposal_work =
+		let proposal_work = {
+			let telemetry = telemetry.clone();
 			futures::future::select(proposing, proposing_remaining).map(move |v| match v {
 				Either::Left((b, _)) => b.map(|b| (b, claim)),
 				Either::Right(_) => {
@@ -307,6 +320,7 @@ pub trait SimpleSlotWorker<B: BlockT> {
 					#[cfg(build_type="debug")]
 					info!("👉 Recompile your node in `--release` mode to mitigate this problem.");
 					telemetry!(
+						telemetry;
 						CONSENSUS_INFO;
 						"slots.discarding_proposal_took_too_long";
 						"slot" => *slot,
@@ -314,7 +328,8 @@ pub trait SimpleSlotWorker<B: BlockT> {

 					Err(sp_consensus::Error::ClientImport("Timeout in the Slots proposer".into()))
 				},
-			});
+			})
+		};

 		let block_import_params_maker = self.block_import_params();
 		let block_import = self.block_import();
@@ -343,7 +358,10 @@ pub trait SimpleSlotWorker<B: BlockT> {
 				header_hash,
 			);

-			telemetry!(CONSENSUS_INFO; "slots.pre_sealed_block";
+			telemetry!(
+				telemetry;
+				CONSENSUS_INFO;
+				"slots.pre_sealed_block";
 				"header_num" => ?header_num,
 				"hash_now" => ?block_import_params.post_hash(),
 				"hash_previously" => ?header_hash,
@@ -359,7 +377,9 @@ pub trait SimpleSlotWorker<B: BlockT> {
 				);

 				telemetry!(
-					CONSENSUS_WARN; "slots.err_with_block_built_on";
+					telemetry;
+					CONSENSUS_WARN;
+					"slots.err_with_block_built_on";
 					"hash" => ?parent_hash,
 					"err" => ?err,
 				);
@@ -449,7 +469,8 @@ where
 				Either::Right(future::ready(Ok(())))
 			} else {
 				Either::Left(
-					worker.on_slot(chain_head, slot_info).then(|_| future::ready(Ok(())))
+					worker.on_slot(chain_head, slot_info)
+						.then(|_| future::ready(Ok(())))
 				)
 			}
 		}).then(|res| {