Add tracing support to node (#1940)

* drop in tracing to replace log

* add structured logging to trace messages

* add structured logging to debug messages

* add structured logging to info messages

* add structured logging to warn messages

* add structured logging to error messages

* normalize spacing and Display vs Debug

* add instrumentation to the various 'fn run'

* use explicit tracing module throughout

* fix availability distribution test

* don't double-print errors

* remove further redundancy from logs

* fix test errors

* fix more test errors

* remove unused kv_log_macro

* fix unused variable

* add tracing spans to collation generation

* add tracing spans to av-store

* add tracing spans to backing

* add tracing spans to bitfield-signing

* add tracing spans to candidate-selection

* add tracing spans to candidate-validation

* add tracing spans to chain-api

* add tracing spans to provisioner

* add tracing spans to runtime-api

* add tracing spans to availability-distribution

* add tracing spans to bitfield-distribution

* add tracing spans to network-bridge

* add tracing spans to collator-protocol

* add tracing spans to pov-distribution

* add tracing spans to statement-distribution

* add tracing spans to overseer

* cleanup
This commit is contained in:
Peter Goodspeed-Niklaus
2020-11-20 12:02:04 +01:00
committed by GitHub
parent 94670d8082
commit e49989971d
53 changed files with 564 additions and 280 deletions
+27 -14
View File
@@ -579,11 +579,11 @@ impl<Spawner: SpawnNamed, Job: 'static + JobTrait> Jobs<Spawner, Job> {
let (future, abort_handle) = future::abortable(async move {
if let Err(e) = Job::run(parent_hash, run_args, metrics, to_job_rx, from_job_tx).await {
log::error!(
"{}({}) finished with an error {:?}",
Job::NAME,
parent_hash,
e,
tracing::error!(
job = Job::NAME,
parent_hash = %parent_hash,
err = ?e,
"job finished with an error",
);
if let Some(mut err_tx) = err_tx {
@@ -591,7 +591,7 @@ impl<Spawner: SpawnNamed, Job: 'static + JobTrait> Jobs<Spawner, Job> {
// there's no point trying to propagate this error onto the channel too
// all we can do is warn that error propagation has failed
if let Err(e) = err_tx.send((Some(parent_hash), JobsError::Job(e))).await {
log::warn!("failed to forward error: {:?}", e);
tracing::warn!(err = ?e, "failed to forward error");
}
}
}
@@ -632,7 +632,7 @@ impl<Spawner: SpawnNamed, Job: 'static + JobTrait> Jobs<Spawner, Job> {
async fn send_msg(&mut self, parent_hash: Hash, msg: Job::ToJob) {
if let Entry::Occupied(mut job) = self.running.entry(parent_hash) {
if job.get_mut().send_msg(msg).await.is_err() {
log::debug!("failed to send message to job ({}), will remove it", Job::NAME);
tracing::debug!(job = Job::NAME, "failed to send message to job, will remove it");
job.remove();
}
}
@@ -767,7 +767,7 @@ where
// if we can't send on the error transmission channel, we can't do anything useful about it
// still, we can at least log the failure
if let Err(e) = err_tx.send((hash, err)).await {
log::warn!("failed to forward error: {:?}", e);
tracing::warn!(err = ?e, "failed to forward error");
}
}
}
@@ -792,7 +792,11 @@ where
for hash in activated {
let metrics = metrics.clone();
if let Err(e) = jobs.spawn_job(hash, run_args.clone(), metrics) {
log::error!("Failed to spawn a job({}): {:?}", Job::NAME, e);
tracing::error!(
job = Job::NAME,
err = ?e,
"failed to spawn a job",
);
Self::fwd_err(Some(hash), JobsError::Utility(e), err_tx).await;
return true;
}
@@ -821,7 +825,11 @@ where
.forward(drain())
.await
{
log::error!("failed to stop all jobs ({}) on conclude signal: {:?}", Job::NAME, e);
tracing::error!(
job = Job::NAME,
err = ?e,
"failed to stop a job on conclude signal",
);
let e = Error::from(e);
Self::fwd_err(None, JobsError::Utility(e), err_tx).await;
}
@@ -832,16 +840,20 @@ where
if let Ok(to_job) = <Job::ToJob>::try_from(msg) {
match to_job.relay_parent() {
Some(hash) => jobs.send_msg(hash, to_job).await,
None => log::debug!(
"Trying to send a message to a job ({}) without specifying a relay parent.",
Job::NAME,
None => tracing::debug!(
job = Job::NAME,
"trying to send a message to a job without specifying a relay parent",
),
}
}
}
Ok(Signal(BlockFinalized(_))) => {}
Err(err) => {
log::error!("error receiving message from subsystem context for job ({}): {:?}", Job::NAME, err);
tracing::error!(
job = Job::NAME,
err = ?err,
"error receiving message from subsystem context for job",
);
Self::fwd_err(None, JobsError::Utility(Error::from(err)), err_tx).await;
return true;
}
@@ -956,6 +968,7 @@ macro_rules! delegated_subsystem {
}
/// Run this subsystem
#[tracing::instrument(skip(ctx, run_args, metrics, spawner), fields(subsystem = $subsystem_name))]
pub async fn run(ctx: Context, run_args: $run_args, metrics: $metrics, spawner: Spawner) {
<Manager<Spawner, Context>>::run(ctx, run_args, metrics, spawner, None).await
}
@@ -218,7 +218,7 @@ impl ConnectionRequest {
/// This can be done either by calling this function or dropping the request.
pub fn revoke(self) {
if let Err(_) = self.revoke.send(()) {
log::warn!(
tracing::warn!(
"Failed to revoke a validator connection request",
);
}