Rework telemetry to replace the use of tracing with an object we pass around (#8143)

polkadot companion: paritytech/polkadot#2535
This commit is contained in:
Cecile Tonglet
2021-03-11 11:05:45 +01:00
committed by GitHub
parent 7aaba0c154
commit 8031b6eacb
55 changed files with 1028 additions and 838 deletions
@@ -45,7 +45,7 @@ use sp_api::ProvideRuntimeApi;
use sp_core::crypto::Pair;
use sp_inherents::{InherentDataProviders, InherentData};
use sp_timestamp::InherentError as TIError;
use sc_telemetry::{telemetry, CONSENSUS_TRACE, CONSENSUS_DEBUG, CONSENSUS_INFO};
use sc_telemetry::{telemetry, TelemetryHandle, CONSENSUS_TRACE, CONSENSUS_DEBUG, CONSENSUS_INFO};
use sc_consensus_slots::{CheckedHeader, SlotCompatible, check_equivocation};
use sp_consensus_slots::Slot;
use sp_api::ApiExt;
@@ -129,6 +129,7 @@ pub struct AuraVerifier<C, P, CAW> {
inherent_data_providers: InherentDataProviders,
can_author_with: CAW,
check_for_equivocation: CheckForEquivocation,
telemetry: Option<TelemetryHandle>,
}
impl<C, P, CAW> AuraVerifier<C, P, CAW> {
@@ -137,12 +138,14 @@ impl<C, P, CAW> AuraVerifier<C, P, CAW> {
inherent_data_providers: InherentDataProviders,
can_author_with: CAW,
check_for_equivocation: CheckForEquivocation,
telemetry: Option<TelemetryHandle>,
) -> Self {
Self {
client,
inherent_data_providers,
can_author_with,
check_for_equivocation,
telemetry,
phantom: PhantomData,
}
}
@@ -197,7 +200,10 @@ impl<C, P, CAW> AuraVerifier<C, P, CAW> where
"halting for block {} seconds in the future",
diff
);
telemetry!(CONSENSUS_INFO; "aura.halting_for_future_block";
telemetry!(
self.telemetry;
CONSENSUS_INFO;
"aura.halting_for_future_block";
"diff" => ?diff
);
thread::sleep(Duration::from_secs(diff));
@@ -287,7 +293,12 @@ impl<B: BlockT, C, P, CAW> Verifier<B> for AuraVerifier<C, P, CAW> where
}
trace!(target: "aura", "Checked {:?}; importing.", pre_header);
telemetry!(CONSENSUS_TRACE; "aura.checked_and_importing"; "pre_header" => ?pre_header);
telemetry!(
self.telemetry;
CONSENSUS_TRACE;
"aura.checked_and_importing";
"pre_header" => ?pre_header,
);
// Look for an authorities-change log.
let maybe_keys = pre_header.digest()
@@ -314,8 +325,13 @@ impl<B: BlockT, C, P, CAW> Verifier<B> for AuraVerifier<C, P, CAW> where
}
CheckedHeader::Deferred(a, b) => {
debug!(target: "aura", "Checking {:?} failed; {:?}, {:?}.", hash, a, b);
telemetry!(CONSENSUS_DEBUG; "aura.header_too_far_in_future";
"hash" => ?hash, "a" => ?a, "b" => ?b
telemetry!(
self.telemetry;
CONSENSUS_DEBUG;
"aura.header_too_far_in_future";
"hash" => ?hash,
"a" => ?a,
"b" => ?b,
);
Err(format!("Header {:?} rejected: too far in the future", hash))
}
@@ -485,6 +501,8 @@ pub struct ImportQueueParams<'a, Block, I, C, S, CAW> {
pub check_for_equivocation: CheckForEquivocation,
/// The duration of one slot.
pub slot_duration: SlotDuration,
/// Telemetry instance used to report telemetry metrics.
pub telemetry: Option<TelemetryHandle>,
}
/// Start an import queue for the Aura consensus algorithm.
@@ -499,6 +517,7 @@ pub fn import_queue<'a, P, Block, I, C, S, CAW>(
can_author_with,
check_for_equivocation,
slot_duration,
telemetry,
}: ImportQueueParams<'a, Block, I, C, S, CAW>
) -> Result<DefaultImportQueue<Block, C>, sp_consensus::Error> where
Block: BlockT,
@@ -530,6 +549,7 @@ pub fn import_queue<'a, P, Block, I, C, S, CAW>(
inherent_data_providers,
can_author_with,
check_for_equivocation,
telemetry,
);
Ok(BasicQueue::new(
+15 -1
View File
@@ -56,6 +56,7 @@ use sp_keystore::{SyncCryptoStorePtr, SyncCryptoStore};
use sp_inherents::{InherentDataProviders, InherentData};
use sp_timestamp::{TimestampInherentData, InherentType as TimestampInherent};
use sc_consensus_slots::{SlotInfo, SlotCompatible, StorageChanges, BackoffAuthoringBlocksStrategy};
use sc_telemetry::TelemetryHandle;
use sp_consensus_slots::Slot;
mod import_queue;
@@ -149,6 +150,8 @@ pub struct StartAuraParams<C, SC, I, PF, SO, BS, CAW> {
/// slot. However, the proposing can still take longer when there is some lenience factor applied,
/// because there were no blocks produced for some slots.
pub block_proposal_slot_portion: SlotProportion,
/// Telemetry instance used to report telemetry metrics.
pub telemetry: Option<TelemetryHandle>,
}
/// Start the aura worker. The returned future should be run in a futures executor.
@@ -166,6 +169,7 @@ pub fn start_aura<P, B, C, SC, PF, I, SO, CAW, BS, Error>(
keystore,
can_author_with,
block_proposal_slot_portion,
telemetry,
}: StartAuraParams<C, SC, I, PF, SO, BS, CAW>,
) -> Result<impl Future<Output = ()>, sp_consensus::Error> where
B: BlockT,
@@ -184,13 +188,14 @@ pub fn start_aura<P, B, C, SC, PF, I, SO, CAW, BS, Error>(
BS: BackoffAuthoringBlocksStrategy<NumberFor<B>> + Send + 'static,
{
let worker = AuraWorker {
client,
client: client.clone(),
block_import: Arc::new(Mutex::new(block_import)),
env,
keystore,
sync_oracle: sync_oracle.clone(),
force_authoring,
backoff_authoring_blocks,
telemetry,
_key_type: PhantomData::<P>,
block_proposal_slot_portion,
};
@@ -218,6 +223,7 @@ struct AuraWorker<C, E, I, P, SO, BS> {
force_authoring: bool,
backoff_authoring_blocks: Option<BS>,
block_proposal_slot_portion: SlotProportion,
telemetry: Option<TelemetryHandle>,
_key_type: PhantomData<P>,
}
@@ -371,6 +377,10 @@ where
}))
}
fn telemetry(&self) -> Option<TelemetryHandle> {
self.telemetry.clone()
}
fn proposing_remaining_duration(
&self,
head: &B::Header,
@@ -595,6 +605,7 @@ mod tests {
inherent_data_providers,
AlwaysCanAuthor,
CheckForEquivocation::Yes,
None,
)
},
PeersClient::Light(_, _) => unreachable!("No (yet) tests for light client + Aura"),
@@ -670,6 +681,7 @@ mod tests {
keystore,
can_author_with: sp_consensus::AlwaysCanAuthor,
block_proposal_slot_portion: SlotProportion::new(0.5),
telemetry: None,
}).expect("Starts aura"));
}
@@ -729,6 +741,7 @@ mod tests {
sync_oracle: DummyOracle.clone(),
force_authoring: false,
backoff_authoring_blocks: Some(BackoffAuthoringOnFinalizedHeadLagging::default()),
telemetry: None,
_key_type: PhantomData::<AuthorityPair>,
block_proposal_slot_portion: SlotProportion::new(0.5),
};
@@ -777,6 +790,7 @@ mod tests {
sync_oracle: DummyOracle.clone(),
force_authoring: false,
backoff_authoring_blocks: Option::<()>::None,
telemetry: None,
_key_type: PhantomData::<AuthorityPair>,
block_proposal_slot_portion: SlotProportion::new(0.5),
};
+25 -6
View File
@@ -90,7 +90,7 @@ use sp_runtime::{
use sp_api::{ProvideRuntimeApi, NumberFor};
use parking_lot::Mutex;
use sp_inherents::{InherentDataProviders, InherentData};
use sc_telemetry::{telemetry, CONSENSUS_TRACE, CONSENSUS_DEBUG};
use sc_telemetry::{telemetry, TelemetryHandle, CONSENSUS_TRACE, CONSENSUS_DEBUG};
use sp_consensus::{
BlockImport, Environment, Proposer, BlockCheckParams,
ForkChoiceStrategy, BlockImportParams, BlockOrigin, Error as ConsensusError,
@@ -402,6 +402,9 @@ pub struct BabeParams<B: BlockT, C, E, I, SO, SC, CAW, BS> {
/// slot. However, the proposing can still take longer when there is some lenience factor applied,
/// because there were no blocks produced for some slots.
pub block_proposal_slot_portion: SlotProportion,
/// Handle use to report telemetries.
pub telemetry: Option<TelemetryHandle>,
}
/// Start the babe worker.
@@ -418,13 +421,15 @@ pub fn start_babe<B, C, SC, E, I, SO, CAW, BS, Error>(BabeParams {
babe_link,
can_author_with,
block_proposal_slot_portion,
telemetry,
}: BabeParams<B, C, E, I, SO, SC, CAW, BS>) -> Result<
BabeWorker<B>,
sp_consensus::Error,
> where
B: BlockT,
C: ProvideRuntimeApi<B> + ProvideCache<B> + ProvideUncles<B> + BlockchainEvents<B>
+ HeaderBackend<B> + HeaderMetadata<B, Error = ClientError> + Send + Sync + 'static,
+ HeaderBackend<B> + HeaderMetadata<B, Error = ClientError>
+ Send + Sync + 'static,
C::Api: BabeApi<B>,
SC: SelectChain<B> + 'static,
E: Environment<B, Error = Error> + Send + Sync + 'static,
@@ -453,6 +458,7 @@ pub fn start_babe<B, C, SC, E, I, SO, CAW, BS, Error>(BabeParams {
slot_notification_sinks: slot_notification_sinks.clone(),
config: config.clone(),
block_proposal_slot_portion,
telemetry,
};
register_babe_inherent_data_provider(&inherent_data_providers, config.slot_duration())?;
@@ -609,6 +615,7 @@ struct BabeSlotWorker<B: BlockT, C, E, I, SO, BS> {
slot_notification_sinks: SlotNotificationSinks<B>,
config: Config,
block_proposal_slot_portion: SlotProportion,
telemetry: Option<TelemetryHandle>,
}
impl<B, C, E, I, Error, SO, BS> sc_consensus_slots::SimpleSlotWorker<B>
@@ -799,6 +806,10 @@ where
}))
}
fn telemetry(&self) -> Option<TelemetryHandle> {
self.telemetry.clone()
}
fn proposing_remaining_duration(
&self,
parent_head: &B::Header,
@@ -947,6 +958,7 @@ pub struct BabeVerifier<Block: BlockT, Client, SelectChain, CAW> {
epoch_changes: SharedEpochChanges<Block, Epoch>,
time_source: TimeSource,
can_author_with: CAW,
telemetry: Option<TelemetryHandle>,
}
impl<Block, Client, SelectChain, CAW> BabeVerifier<Block, Client, SelectChain, CAW>
@@ -1174,6 +1186,7 @@ where
trace!(target: "babe", "Checked {:?}; importing.", pre_header);
telemetry!(
self.telemetry;
CONSENSUS_TRACE;
"babe.checked_and_importing";
"pre_header" => ?pre_header);
@@ -1192,7 +1205,10 @@ where
}
CheckedHeader::Deferred(a, b) => {
debug!(target: "babe", "Checking {:?} failed; {:?}, {:?}.", hash, a, b);
telemetry!(CONSENSUS_DEBUG; "babe.header_too_far_in_future";
telemetry!(
self.telemetry;
CONSENSUS_DEBUG;
"babe.header_too_far_in_future";
"hash" => ?hash, "a" => ?a, "b" => ?b
);
Err(Error::<Block>::TooFarInFuture(hash).into())
@@ -1599,11 +1615,13 @@ pub fn import_queue<Block: BlockT, Client, SelectChain, Inner, CAW>(
spawner: &impl sp_core::traits::SpawnEssentialNamed,
registry: Option<&Registry>,
can_author_with: CAW,
telemetry: Option<TelemetryHandle>,
) -> ClientResult<DefaultImportQueue<Block, Client>> where
Inner: BlockImport<Block, Error = ConsensusError, Transaction = sp_api::TransactionFor<Client, Block>>
+ Send + Sync + 'static,
Client: ProvideRuntimeApi<Block> + ProvideCache<Block> + Send + Sync + AuxStore + 'static,
Client: HeaderBackend<Block> + HeaderMetadata<Block, Error = sp_blockchain::Error>,
Client: ProvideRuntimeApi<Block> + ProvideCache<Block> + HeaderBackend<Block>
+ HeaderMetadata<Block, Error = sp_blockchain::Error> + AuxStore
+ Send + Sync + 'static,
Client::Api: BlockBuilderApi<Block> + BabeApi<Block> + ApiExt<Block>,
SelectChain: sp_consensus::SelectChain<Block> + 'static,
CAW: CanAuthorWith<Block> + Send + Sync + 'static,
@@ -1611,13 +1629,14 @@ pub fn import_queue<Block: BlockT, Client, SelectChain, Inner, CAW>(
register_babe_inherent_data_provider(&inherent_data_providers, babe_link.config.slot_duration)?;
let verifier = BabeVerifier {
client,
select_chain,
inherent_data_providers,
config: babe_link.config,
epoch_changes: babe_link.epoch_changes,
time_source: babe_link.time_source,
can_author_with,
telemetry,
client,
};
Ok(BasicQueue::new(
@@ -320,6 +320,7 @@ impl TestNetFactory for BabeTestNet {
epoch_changes: data.link.epoch_changes.clone(),
time_source: data.link.time_source.clone(),
can_author_with: AlwaysCanAuthor,
telemetry: None,
},
mutator: MUTATOR.with(|m| m.borrow().clone()),
}
@@ -432,6 +433,7 @@ fn run_one_test(
keystore,
can_author_with: sp_consensus::AlwaysCanAuthor,
block_proposal_slot_portion: SlotProportion::new(0.5),
telemetry: None,
}).expect("Starts babe"));
}
futures::executor::block_on(future::select(
@@ -300,6 +300,7 @@ mod tests {
client.clone(),
pool.clone(),
None,
None,
);
// this test checks that blocks are created as soon as transactions are imported into the pool.
let (sender, receiver) = futures::channel::oneshot::channel();
@@ -371,6 +372,7 @@ mod tests {
client.clone(),
pool.clone(),
None,
None,
);
// this test checks that blocks are created as soon as an engine command is sent over the stream.
let (mut sink, commands_stream) = futures::channel::mpsc::channel(1024);
@@ -446,6 +448,7 @@ mod tests {
client.clone(),
pool.clone(),
None,
None,
);
// this test checks that blocks are created as soon as an engine command is sent over the stream.
let (mut sink, commands_stream) = futures::channel::mpsc::channel(1024);
+38 -17
View File
@@ -47,7 +47,7 @@ use sp_runtime::{
generic::BlockId,
traits::{Block as BlockT, Header, HashFor, NumberFor}
};
use sc_telemetry::{telemetry, CONSENSUS_DEBUG, CONSENSUS_WARN, CONSENSUS_INFO};
use sc_telemetry::{telemetry, TelemetryHandle, CONSENSUS_DEBUG, CONSENSUS_WARN, CONSENSUS_INFO};
/// The changes that need to applied to the storage to create the state for a block.
///
@@ -180,6 +180,9 @@ pub trait SimpleSlotWorker<B: BlockT> {
/// Returns a `Proposer` to author on top of the given block.
fn proposer(&mut self, block: &B::Header) -> Self::CreateProposer;
/// Returns a [`TelemetryHandle`] if any.
fn telemetry(&self) -> Option<TelemetryHandle>;
/// Remaining duration for proposing.
fn proposing_remaining_duration(
&self,
@@ -197,6 +200,7 @@ pub trait SimpleSlotWorker<B: BlockT> {
<Self::Proposer as Proposer<B>>::Proposal: Unpin + Send + 'static,
{
let (timestamp, slot) = (slot_info.timestamp, slot_info.slot);
let telemetry = self.telemetry();
let proposing_remaining_duration = self.proposing_remaining_duration(&chain_head, &slot_info);
@@ -219,7 +223,9 @@ pub trait SimpleSlotWorker<B: BlockT> {
warn!("Unable to fetch epoch data at block {:?}: {:?}", chain_head.hash(), err);
telemetry!(
CONSENSUS_WARN; "slots.unable_fetching_authorities";
telemetry;
CONSENSUS_WARN;
"slots.unable_fetching_authorities";
"slot" => ?chain_head.hash(),
"err" => ?err,
);
@@ -238,6 +244,7 @@ pub trait SimpleSlotWorker<B: BlockT> {
{
debug!(target: self.logging_target(), "Skipping proposal slot. Waiting for the network.");
telemetry!(
telemetry;
CONSENSUS_DEBUG;
"slots.skipping_proposal_slot";
"authorities_len" => authorities_len,
@@ -263,24 +270,29 @@ pub trait SimpleSlotWorker<B: BlockT> {
);
telemetry!(
telemetry;
CONSENSUS_DEBUG;
"slots.starting_authorship";
"slot_num" => *slot,
"timestamp" => timestamp,
);
let awaiting_proposer = self.proposer(&chain_head).map_err(move |err| {
warn!("Unable to author block in slot {:?}: {:?}", slot, err);
let awaiting_proposer = {
let telemetry = telemetry.clone();
self.proposer(&chain_head).map_err(move |err| {
warn!("Unable to author block in slot {:?}: {:?}", slot, err);
telemetry!(
CONSENSUS_WARN;
"slots.unable_authoring_block";
"slot" => *slot,
"err" => ?err
);
telemetry!(
telemetry;
CONSENSUS_WARN;
"slots.unable_authoring_block";
"slot" => *slot,
"err" => ?err
);
err
});
err
})
};
let logs = self.pre_digest_data(slot, &claim);
@@ -295,7 +307,8 @@ pub trait SimpleSlotWorker<B: BlockT> {
proposing_remaining_duration.mul_f32(0.98),
).map_err(|e| sp_consensus::Error::ClientImport(format!("{:?}", e))));
let proposal_work =
let proposal_work = {
let telemetry = telemetry.clone();
futures::future::select(proposing, proposing_remaining).map(move |v| match v {
Either::Left((b, _)) => b.map(|b| (b, claim)),
Either::Right(_) => {
@@ -307,6 +320,7 @@ pub trait SimpleSlotWorker<B: BlockT> {
#[cfg(build_type="debug")]
info!("👉 Recompile your node in `--release` mode to mitigate this problem.");
telemetry!(
telemetry;
CONSENSUS_INFO;
"slots.discarding_proposal_took_too_long";
"slot" => *slot,
@@ -314,7 +328,8 @@ pub trait SimpleSlotWorker<B: BlockT> {
Err(sp_consensus::Error::ClientImport("Timeout in the Slots proposer".into()))
},
});
})
};
let block_import_params_maker = self.block_import_params();
let block_import = self.block_import();
@@ -343,7 +358,10 @@ pub trait SimpleSlotWorker<B: BlockT> {
header_hash,
);
telemetry!(CONSENSUS_INFO; "slots.pre_sealed_block";
telemetry!(
telemetry;
CONSENSUS_INFO;
"slots.pre_sealed_block";
"header_num" => ?header_num,
"hash_now" => ?block_import_params.post_hash(),
"hash_previously" => ?header_hash,
@@ -359,7 +377,9 @@ pub trait SimpleSlotWorker<B: BlockT> {
);
telemetry!(
CONSENSUS_WARN; "slots.err_with_block_built_on";
telemetry;
CONSENSUS_WARN;
"slots.err_with_block_built_on";
"hash" => ?parent_hash,
"err" => ?err,
);
@@ -449,7 +469,8 @@ where
Either::Right(future::ready(Ok(())))
} else {
Either::Left(
worker.on_slot(chain_head, slot_info).then(|_| future::ready(Ok(())))
worker.on_slot(chain_head, slot_info)
.then(|_| future::ready(Ok(())))
)
}
}).then(|res| {