Add tracing support to node (#1940)

* drop in tracing to replace log

* add structured logging to trace messages

* add structured logging to debug messages

* add structured logging to info messages

* add structured logging to warn messages

* add structured logging to error messages

* normalize spacing and Display vs Debug

* add instrumentation to the various 'fn run'

* use explicit tracing module throughout

* fix availability distribution test

* don't double-print errors

* remove further redundancy from logs

* fix test errors

* fix more test errors

* remove unused kv_log_macro

* fix unused variable

* add tracing spans to collation generation

* add tracing spans to av-store

* add tracing spans to backing

* add tracing spans to bitfield-signing

* add tracing spans to candidate-selection

* add tracing spans to candidate-validation

* add tracing spans to chain-api

* add tracing spans to provisioner

* add tracing spans to runtime-api

* add tracing spans to availability-distribution

* add tracing spans to bitfield-distribution

* add tracing spans to network-bridge

* add tracing spans to collator-protocol

* add tracing spans to pov-distribution

* add tracing spans to statement-distribution

* add tracing spans to overseer

* cleanup
This commit is contained in:
Peter Goodspeed-Niklaus
2020-11-20 12:02:04 +01:00
committed by GitHub
parent 94670d8082
commit e49989971d
53 changed files with 564 additions and 280 deletions
@@ -111,7 +111,7 @@ impl VcPerPeerTracker {
/// based on a message that we have sent it from our local pool.
fn note_local(&mut self, h: CandidateHash) {
if !note_hash(&mut self.local_observed, h) {
log::warn!("Statement distribution is erroneously attempting to distribute more \
tracing::warn!("Statement distribution is erroneously attempting to distribute more \
than {} candidate(s) per validator index. Ignoring", VC_THRESHOLD);
}
}
@@ -164,6 +164,7 @@ impl PeerRelayParentKnowledge {
///
/// This returns `Some(true)` if this is the first time the peer has become aware of a
/// candidate with the given hash.
#[tracing::instrument(level = "trace", skip(self), fields(subsystem = LOG_TARGET))]
fn send(&mut self, fingerprint: &(CompactStatement, ValidatorIndex)) -> Option<bool> {
let already_known = self.sent_statements.contains(fingerprint)
|| self.received_statements.contains(fingerprint);
@@ -212,6 +213,7 @@ impl PeerRelayParentKnowledge {
///
/// This returns `Ok(true)` if this is the first time the peer has become aware of a
/// candidate with given hash.
#[tracing::instrument(level = "trace", skip(self), fields(subsystem = LOG_TARGET))]
fn receive(
&mut self,
fingerprint: &(CompactStatement, ValidatorIndex),
@@ -278,6 +280,7 @@ impl PeerData {
///
/// This returns `Some(true)` if this is the first time the peer has become aware of a
/// candidate with the given hash.
#[tracing::instrument(level = "trace", skip(self), fields(subsystem = LOG_TARGET))]
fn send(
&mut self,
relay_parent: &Hash,
@@ -302,6 +305,7 @@ impl PeerData {
///
/// This returns `Ok(true)` if this is the first time the peer has become aware of a
/// candidate with given hash.
#[tracing::instrument(level = "trace", skip(self), fields(subsystem = LOG_TARGET))]
fn receive(
&mut self,
relay_parent: &Hash,
@@ -411,6 +415,7 @@ impl ActiveHeadData {
///
/// Any other statements or those that reference a candidate we are not aware of cannot be accepted
/// and will return `NotedStatement::NotUseful`.
#[tracing::instrument(level = "trace", skip(self), fields(subsystem = LOG_TARGET))]
fn note_statement(&mut self, statement: SignedFullStatement) -> NotedStatement {
let validator_index = statement.validator_index();
let comparator = StoredStatementComparator {
@@ -490,6 +495,7 @@ fn check_statement_signature(
/// Informs all registered listeners about a newly received statement.
///
/// Removes all closed listeners.
#[tracing::instrument(level = "trace", skip(listeners), fields(subsystem = LOG_TARGET))]
async fn inform_statement_listeners(
statement: &SignedFullStatement,
listeners: &mut Vec<mpsc::Sender<SignedFullStatement>>,
@@ -509,6 +515,7 @@ async fn inform_statement_listeners(
/// circulates the statement to all peers who have not seen it yet, and
/// sends all statements dependent on that statement to peers who could previously not receive
/// them but now can.
#[tracing::instrument(level = "trace", skip(peers, ctx, active_heads, metrics), fields(subsystem = LOG_TARGET))]
async fn circulate_statement_and_dependents(
peers: &mut HashMap<PeerId, PeerData>,
active_heads: &mut HashMap<Hash, ActiveHeadData>,
@@ -564,6 +571,7 @@ fn statement_message(relay_parent: Hash, statement: SignedFullStatement)
/// Circulates a statement to all peers who have not seen it yet, and returns
/// an iterator over peers who need to have dependent statements sent.
#[tracing::instrument(level = "trace", skip(peers, ctx), fields(subsystem = LOG_TARGET))]
async fn circulate_statement(
peers: &mut HashMap<PeerId, PeerData>,
ctx: &mut impl SubsystemContext<Message = StatementDistributionMessage>,
@@ -597,6 +605,7 @@ async fn circulate_statement(
}
/// Send all statements about a given candidate hash to a peer.
#[tracing::instrument(level = "trace", skip(peer_data, ctx, active_head, metrics), fields(subsystem = LOG_TARGET))]
async fn send_statements_about(
peer: PeerId,
peer_data: &mut PeerData,
@@ -625,6 +634,7 @@ async fn send_statements_about(
}
/// Send all statements at a given relay-parent to a peer.
#[tracing::instrument(level = "trace", skip(peer_data, ctx, active_head, metrics), fields(subsystem = LOG_TARGET))]
async fn send_statements(
peer: PeerId,
peer_data: &mut PeerData,
@@ -666,6 +676,7 @@ async fn report_peer(
//
// This function checks the signature and ensures the statement is compatible with our
// view.
#[tracing::instrument(level = "trace", skip(peer_data, ctx, active_heads, metrics), fields(subsystem = LOG_TARGET))]
async fn handle_incoming_message<'a>(
peer: PeerId,
peer_data: &mut PeerData,
@@ -688,7 +699,10 @@ async fn handle_incoming_message<'a>(
None => {
// This should never be out-of-sync with our view if the view updates
// correspond to actual `StartWork` messages. So we just log and ignore.
log::warn!("Our view out-of-sync with active heads. Head {} not found", relay_parent);
tracing::warn!(
requested_relay_parent = %relay_parent,
"our view out-of-sync with active heads; head not found",
);
return Ok(None);
}
};
@@ -741,6 +755,7 @@ async fn handle_incoming_message<'a>(
}
/// Update a peer's view. Sends all newly unlocked statements based on the previous
#[tracing::instrument(level = "trace", skip(peer_data, ctx, active_heads, metrics), fields(subsystem = LOG_TARGET))]
async fn update_peer_view_and_send_unlocked(
peer: PeerId,
peer_data: &mut PeerData,
@@ -777,6 +792,7 @@ async fn update_peer_view_and_send_unlocked(
Ok(())
}
#[tracing::instrument(level = "trace", skip(peers, active_heads, ctx, metrics), fields(subsystem = LOG_TARGET))]
async fn handle_network_update(
peers: &mut HashMap<PeerId, PeerData>,
active_heads: &mut HashMap<Hash, ActiveHeadData>,
@@ -847,9 +863,13 @@ async fn handle_network_update(
for new in our_view.difference(&old_view) {
if !active_heads.contains_key(&new) {
log::warn!(target: LOG_TARGET, "Our network bridge view update \
tracing::warn!(
target: LOG_TARGET,
unknown_hash = %new,
"Our network bridge view update \
inconsistent with `StartWork` messages we have received from overseer. \
Contains unknown hash {}", new);
Contains unknown hash.",
);
}
}
@@ -860,6 +880,7 @@ async fn handle_network_update(
}
impl StatementDistribution {
#[tracing::instrument(skip(self, ctx), fields(subsystem = LOG_TARGET))]
async fn run(
self,
mut ctx: impl SubsystemContext<Message = StatementDistributionMessage>,
@@ -899,10 +920,10 @@ impl StatementDistribution {
match (val_rx.await?, session_rx.await?) {
(Ok(v), Ok(s)) => (v, s),
(Err(e), _) | (_, Err(e)) => {
log::warn!(
tracing::warn!(
target: LOG_TARGET,
"Failed to fetch runtime API data for active leaf: {:?}",
e,
err = ?e,
"Failed to fetch runtime API data for active leaf",
);
// Lacking this bookkeeping might make us behave funny, although