observability: tracing gum, automatically cross ref traceID (#5079)

* add some gum

* bump expander

* gum

* fix all remaining issues

* last fixup

* Update node/gum/proc-macro/src/lib.rs

Co-authored-by: Bastian Köcher <bkchr@users.noreply.github.com>

* change

* netowrk

* fixins

* chore

* allow optional fmt str + args, prep for expr as kv field

* tracing -> gum rename fallout

* restrict further

* allow multiple levels of field accesses

* another round of docs and a slip of the pen

* update ADR

* fixup lock fiel

* use target: instead of target=

* minors

* fix

* chore

* Update node/gum/README.md

Co-authored-by: Andrei Sandu <54316454+sandreim@users.noreply.github.com>

Co-authored-by: Bastian Köcher <bkchr@users.noreply.github.com>
Co-authored-by: Andrei Sandu <54316454+sandreim@users.noreply.github.com>
This commit is contained in:
Bernhard Schuster
2022-03-15 12:05:16 +01:00
committed by GitHub
parent fa359fd1f7
commit d631f1dea8
130 changed files with 1708 additions and 808 deletions
+6 -6
View File
@@ -174,7 +174,7 @@ async fn purge_dead(metrics: &Metrics, workers: &mut Workers) {
fn handle_to_queue(queue: &mut Queue, to_queue: ToQueue) {
let ToQueue::Enqueue { artifact, execution_timeout, params, result_tx } = to_queue;
tracing::debug!(
gum::debug!(
target: LOG_TARGET,
validation_code_hash = ?artifact.id.code_hash,
"enqueueing an artifact for execution",
@@ -208,7 +208,7 @@ fn handle_worker_spawned(queue: &mut Queue, idle: IdleWorker, handle: WorkerHand
queue.workers.spawn_inflight -= 1;
let worker = queue.workers.running.insert(WorkerData { idle: Some(idle), handle });
tracing::debug!(target: LOG_TARGET, ?worker, "execute worker spawned");
gum::debug!(target: LOG_TARGET, ?worker, "execute worker spawned");
if let Some(job) = queue.queue.pop_front() {
assign(queue, worker, job);
@@ -244,7 +244,7 @@ fn handle_job_finish(
};
queue.metrics.execute_finished();
tracing::debug!(
gum::debug!(
target: LOG_TARGET,
validation_code_hash = ?artifact_id.code_hash,
worker_rip = idle_worker.is_none(),
@@ -288,7 +288,7 @@ fn handle_job_finish(
fn spawn_extra_worker(queue: &mut Queue) {
queue.metrics.execute_worker().on_begin_spawn();
tracing::debug!(target: LOG_TARGET, "spawning an extra worker");
gum::debug!(target: LOG_TARGET, "spawning an extra worker");
queue
.mux
@@ -303,7 +303,7 @@ async fn spawn_worker_task(program_path: PathBuf, spawn_timeout: Duration) -> Qu
match super::worker::spawn(&program_path, spawn_timeout).await {
Ok((idle, handle)) => break QueueEvent::Spawn(idle, handle),
Err(err) => {
tracing::warn!(target: LOG_TARGET, "failed to spawn an execute worker: {:?}", err);
gum::warn!(target: LOG_TARGET, "failed to spawn an execute worker: {:?}", err);
// Assume that the failure intermittent and retry after a delay.
Delay::new(Duration::from_secs(3)).await;
@@ -316,7 +316,7 @@ async fn spawn_worker_task(program_path: PathBuf, spawn_timeout: Duration) -> Qu
///
/// The worker must be running and idle.
fn assign(queue: &mut Queue, worker: Worker, job: ExecuteJob) {
tracing::debug!(
gum::debug!(
target: LOG_TARGET,
validation_code_hash = ?job.artifact.id,
?worker,
+5 -5
View File
@@ -72,7 +72,7 @@ pub async fn start_work(
) -> Outcome {
let IdleWorker { mut stream, pid } = worker;
tracing::debug!(
gum::debug!(
target: LOG_TARGET,
worker_pid = %pid,
validation_code_hash = ?artifact.id.code_hash,
@@ -81,7 +81,7 @@ pub async fn start_work(
);
if let Err(error) = send_request(&mut stream, &artifact.path, &validation_params).await {
tracing::warn!(
gum::warn!(
target: LOG_TARGET,
worker_pid = %pid,
validation_code_hash = ?artifact.id.code_hash,
@@ -95,7 +95,7 @@ pub async fn start_work(
response = recv_response(&mut stream).fuse() => {
match response {
Err(error) => {
tracing::warn!(
gum::warn!(
target: LOG_TARGET,
worker_pid = %pid,
validation_code_hash = ?artifact.id.code_hash,
@@ -108,7 +108,7 @@ pub async fn start_work(
}
},
_ = Delay::new(execution_timeout).fuse() => {
tracing::warn!(
gum::warn!(
target: LOG_TARGET,
worker_pid = %pid,
validation_code_hash = ?artifact.id.code_hash,
@@ -189,7 +189,7 @@ pub fn worker_entrypoint(socket_path: &str) {
})?;
loop {
let (artifact_path, params) = recv_request(&mut stream).await?;
tracing::debug!(
gum::debug!(
target: LOG_TARGET,
worker_pid = %std::process::id(),
"worker: validating artifact {}",
+3 -3
View File
@@ -639,13 +639,13 @@ async fn handle_cleanup_pulse(
artifact_ttl: Duration,
) -> Result<(), Fatal> {
let to_remove = artifacts.prune(artifact_ttl);
tracing::debug!(
gum::debug!(
target: LOG_TARGET,
"PVF pruning: {} artifacts reached their end of life",
to_remove.len(),
);
for artifact_id in to_remove {
tracing::debug!(
gum::debug!(
target: LOG_TARGET,
validation_code_hash = ?artifact_id.code_hash,
"pruning artifact",
@@ -664,7 +664,7 @@ async fn sweeper_task(mut sweeper_rx: mpsc::Receiver<PathBuf>) {
None => break,
Some(condemned) => {
let result = async_std::fs::remove_file(&condemned).await;
tracing::trace!(
gum::trace!(
target: LOG_TARGET,
?result,
"Sweeping the artifact file {}",
+3 -3
View File
@@ -201,7 +201,7 @@ fn handle_to_pool(
) {
match to_pool {
ToPool::Spawn => {
tracing::debug!(target: LOG_TARGET, "spawning a new prepare worker");
gum::debug!(target: LOG_TARGET, "spawning a new prepare worker");
metrics.prepare_worker().on_begin_spawn();
mux.push(spawn_worker_task(program_path.to_owned(), spawn_timeout).boxed());
},
@@ -234,7 +234,7 @@ fn handle_to_pool(
}
},
ToPool::Kill(worker) => {
tracing::debug!(target: LOG_TARGET, ?worker, "killing prepare worker");
gum::debug!(target: LOG_TARGET, ?worker, "killing prepare worker");
// It may be absent if it were previously already removed by `purge_dead`.
let _ = attempt_retire(metrics, spawned, worker);
},
@@ -248,7 +248,7 @@ async fn spawn_worker_task(program_path: PathBuf, spawn_timeout: Duration) -> Po
match worker::spawn(&program_path, spawn_timeout).await {
Ok((idle, handle)) => break PoolEvent::Spawn(idle, handle),
Err(err) => {
tracing::warn!(target: LOG_TARGET, "failed to spawn a prepare worker: {:?}", err);
gum::warn!(target: LOG_TARGET, "failed to spawn a prepare worker: {:?}", err);
// Assume that the failure intermittent and retry after a delay.
Delay::new(Duration::from_secs(3)).await;
+4 -4
View File
@@ -211,7 +211,7 @@ async fn handle_to_queue(queue: &mut Queue, to_queue: ToQueue) -> Result<(), Fat
}
async fn handle_enqueue(queue: &mut Queue, priority: Priority, pvf: Pvf) -> Result<(), Fatal> {
tracing::debug!(
gum::debug!(
target: LOG_TARGET,
validation_code_hash = ?pvf.code_hash,
?priority,
@@ -228,7 +228,7 @@ async fn handle_enqueue(queue: &mut Queue, priority: Priority, pvf: Pvf) -> Resu
// Precondtion for `Enqueue` is that it is sent only once for a PVF;
// Thus this should always be `false`;
// qed.
tracing::warn!(
gum::warn!(
target: LOG_TARGET,
"duplicate `enqueue` command received for {:?}",
artifact_id,
@@ -331,7 +331,7 @@ async fn handle_worker_concluded(
queue.artifact_id_to_job.remove(&artifact_id);
tracing::debug!(
gum::debug!(
target: LOG_TARGET,
validation_code_hash = ?artifact_id.code_hash,
?worker,
@@ -370,7 +370,7 @@ async fn handle_worker_concluded(
}
async fn handle_worker_rip(queue: &mut Queue, worker: Worker) -> Result<(), Fatal> {
tracing::debug!(target: LOG_TARGET, ?worker, "prepare worker ripped");
gum::debug!(target: LOG_TARGET, ?worker, "prepare worker ripped");
let worker_data = queue.workers.remove(worker);
if let Some(WorkerData { job: Some(job), .. }) = worker_data {
+10 -10
View File
@@ -72,7 +72,7 @@ pub async fn start_work(
) -> Outcome {
let IdleWorker { mut stream, pid } = worker;
tracing::debug!(
gum::debug!(
target: LOG_TARGET,
worker_pid = %pid,
"starting prepare for {}",
@@ -81,7 +81,7 @@ pub async fn start_work(
with_tmp_file(pid, cache_path, |tmp_file| async move {
if let Err(err) = send_request(&mut stream, code, &tmp_file).await {
tracing::warn!(
gum::warn!(
target: LOG_TARGET,
worker_pid = %pid,
"failed to send a prepare request: {:?}",
@@ -109,7 +109,7 @@ pub async fn start_work(
// By convention we expect encoded `PrepareResult`.
if let Ok(result) = PrepareResult::decode(&mut response_bytes.as_slice()) {
if result.is_ok() {
tracing::debug!(
gum::debug!(
target: LOG_TARGET,
worker_pid = %pid,
"promoting WIP artifact {} to {}",
@@ -121,7 +121,7 @@ pub async fn start_work(
.await
.map(|_| Selected::Done(result))
.unwrap_or_else(|err| {
tracing::warn!(
gum::warn!(
target: LOG_TARGET,
worker_pid = %pid,
"failed to rename the artifact from {} to {}: {:?}",
@@ -137,7 +137,7 @@ pub async fn start_work(
} else {
// We received invalid bytes from the worker.
let bound_bytes = &response_bytes[..response_bytes.len().min(4)];
tracing::warn!(
gum::warn!(
target: LOG_TARGET,
worker_pid = %pid,
"received unexpected response from the prepare worker: {}",
@@ -148,7 +148,7 @@ pub async fn start_work(
},
Ok(Err(err)) => {
// Communication error within the time limit.
tracing::warn!(
gum::warn!(
target: LOG_TARGET,
worker_pid = %pid,
"failed to recv a prepare response: {:?}",
@@ -184,7 +184,7 @@ where
let tmp_file = match tmpfile_in("prepare-artifact-", cache_path).await {
Ok(f) => f,
Err(err) => {
tracing::warn!(
gum::warn!(
target: LOG_TARGET,
worker_pid = %pid,
"failed to create a temp file for the artifact: {:?}",
@@ -205,7 +205,7 @@ where
Ok(()) => (),
Err(err) if err.kind() == std::io::ErrorKind::NotFound => (),
Err(err) => {
tracing::warn!(
gum::warn!(
target: LOG_TARGET,
worker_pid = %pid,
"failed to remove the tmp file: {:?}",
@@ -246,7 +246,7 @@ pub fn worker_entrypoint(socket_path: &str) {
loop {
let (code, dest) = recv_request(&mut stream).await?;
tracing::debug!(
gum::debug!(
target: LOG_TARGET,
worker_pid = %std::process::id(),
"worker: preparing artifact",
@@ -267,7 +267,7 @@ pub fn worker_entrypoint(socket_path: &str) {
let artifact_bytes = compiled_artifact.encode();
tracing::debug!(
gum::debug!(
target: LOG_TARGET,
worker_pid = %std::process::id(),
"worker: writing artifact to {}",
+4 -4
View File
@@ -48,7 +48,7 @@ pub async fn spawn_with_program_path(
let socket_path = socket_path.to_owned();
async move {
let listener = UnixListener::bind(&socket_path).await.map_err(|err| {
tracing::warn!(
gum::warn!(
target: LOG_TARGET,
%debug_id,
"cannot bind unix socket: {:?}",
@@ -59,7 +59,7 @@ pub async fn spawn_with_program_path(
let handle =
WorkerHandle::spawn(program_path, extra_args, socket_path).map_err(|err| {
tracing::warn!(
gum::warn!(
target: LOG_TARGET,
%debug_id,
"cannot spawn a worker: {:?}",
@@ -71,7 +71,7 @@ pub async fn spawn_with_program_path(
futures::select! {
accept_result = listener.accept().fuse() => {
let (stream, _) = accept_result.map_err(|err| {
tracing::warn!(
gum::warn!(
target: LOG_TARGET,
%debug_id,
"cannot accept a worker: {:?}",
@@ -160,7 +160,7 @@ where
})
.unwrap_err(); // it's never `Ok` because it's `Ok(Never)`
tracing::debug!(
gum::debug!(
target: LOG_TARGET,
worker_pid = %std::process::id(),
"pvf worker ({}): {:?}",