mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-06-13 02:21:14 +00:00
observability: tracing gum, automatically cross ref traceID (#5079)
* add some gum * bump expander * gum * fix all remaining issues * last fixup * Update node/gum/proc-macro/src/lib.rs Co-authored-by: Bastian Köcher <bkchr@users.noreply.github.com> * change * netowrk * fixins * chore * allow optional fmt str + args, prep for expr as kv field * tracing -> gum rename fallout * restrict further * allow multiple levels of field accesses * another round of docs and a slip of the pen * update ADR * fixup lock fiel * use target: instead of target= * minors * fix * chore * Update node/gum/README.md Co-authored-by: Andrei Sandu <54316454+sandreim@users.noreply.github.com> Co-authored-by: Bastian Köcher <bkchr@users.noreply.github.com> Co-authored-by: Andrei Sandu <54316454+sandreim@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
fa359fd1f7
commit
d631f1dea8
@@ -174,7 +174,7 @@ async fn purge_dead(metrics: &Metrics, workers: &mut Workers) {
|
||||
|
||||
fn handle_to_queue(queue: &mut Queue, to_queue: ToQueue) {
|
||||
let ToQueue::Enqueue { artifact, execution_timeout, params, result_tx } = to_queue;
|
||||
tracing::debug!(
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
validation_code_hash = ?artifact.id.code_hash,
|
||||
"enqueueing an artifact for execution",
|
||||
@@ -208,7 +208,7 @@ fn handle_worker_spawned(queue: &mut Queue, idle: IdleWorker, handle: WorkerHand
|
||||
queue.workers.spawn_inflight -= 1;
|
||||
let worker = queue.workers.running.insert(WorkerData { idle: Some(idle), handle });
|
||||
|
||||
tracing::debug!(target: LOG_TARGET, ?worker, "execute worker spawned");
|
||||
gum::debug!(target: LOG_TARGET, ?worker, "execute worker spawned");
|
||||
|
||||
if let Some(job) = queue.queue.pop_front() {
|
||||
assign(queue, worker, job);
|
||||
@@ -244,7 +244,7 @@ fn handle_job_finish(
|
||||
};
|
||||
|
||||
queue.metrics.execute_finished();
|
||||
tracing::debug!(
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
validation_code_hash = ?artifact_id.code_hash,
|
||||
worker_rip = idle_worker.is_none(),
|
||||
@@ -288,7 +288,7 @@ fn handle_job_finish(
|
||||
|
||||
fn spawn_extra_worker(queue: &mut Queue) {
|
||||
queue.metrics.execute_worker().on_begin_spawn();
|
||||
tracing::debug!(target: LOG_TARGET, "spawning an extra worker");
|
||||
gum::debug!(target: LOG_TARGET, "spawning an extra worker");
|
||||
|
||||
queue
|
||||
.mux
|
||||
@@ -303,7 +303,7 @@ async fn spawn_worker_task(program_path: PathBuf, spawn_timeout: Duration) -> Qu
|
||||
match super::worker::spawn(&program_path, spawn_timeout).await {
|
||||
Ok((idle, handle)) => break QueueEvent::Spawn(idle, handle),
|
||||
Err(err) => {
|
||||
tracing::warn!(target: LOG_TARGET, "failed to spawn an execute worker: {:?}", err);
|
||||
gum::warn!(target: LOG_TARGET, "failed to spawn an execute worker: {:?}", err);
|
||||
|
||||
// Assume that the failure intermittent and retry after a delay.
|
||||
Delay::new(Duration::from_secs(3)).await;
|
||||
@@ -316,7 +316,7 @@ async fn spawn_worker_task(program_path: PathBuf, spawn_timeout: Duration) -> Qu
|
||||
///
|
||||
/// The worker must be running and idle.
|
||||
fn assign(queue: &mut Queue, worker: Worker, job: ExecuteJob) {
|
||||
tracing::debug!(
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
validation_code_hash = ?job.artifact.id,
|
||||
?worker,
|
||||
|
||||
@@ -72,7 +72,7 @@ pub async fn start_work(
|
||||
) -> Outcome {
|
||||
let IdleWorker { mut stream, pid } = worker;
|
||||
|
||||
tracing::debug!(
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
validation_code_hash = ?artifact.id.code_hash,
|
||||
@@ -81,7 +81,7 @@ pub async fn start_work(
|
||||
);
|
||||
|
||||
if let Err(error) = send_request(&mut stream, &artifact.path, &validation_params).await {
|
||||
tracing::warn!(
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
validation_code_hash = ?artifact.id.code_hash,
|
||||
@@ -95,7 +95,7 @@ pub async fn start_work(
|
||||
response = recv_response(&mut stream).fuse() => {
|
||||
match response {
|
||||
Err(error) => {
|
||||
tracing::warn!(
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
validation_code_hash = ?artifact.id.code_hash,
|
||||
@@ -108,7 +108,7 @@ pub async fn start_work(
|
||||
}
|
||||
},
|
||||
_ = Delay::new(execution_timeout).fuse() => {
|
||||
tracing::warn!(
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
validation_code_hash = ?artifact.id.code_hash,
|
||||
@@ -189,7 +189,7 @@ pub fn worker_entrypoint(socket_path: &str) {
|
||||
})?;
|
||||
loop {
|
||||
let (artifact_path, params) = recv_request(&mut stream).await?;
|
||||
tracing::debug!(
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %std::process::id(),
|
||||
"worker: validating artifact {}",
|
||||
|
||||
@@ -639,13 +639,13 @@ async fn handle_cleanup_pulse(
|
||||
artifact_ttl: Duration,
|
||||
) -> Result<(), Fatal> {
|
||||
let to_remove = artifacts.prune(artifact_ttl);
|
||||
tracing::debug!(
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
"PVF pruning: {} artifacts reached their end of life",
|
||||
to_remove.len(),
|
||||
);
|
||||
for artifact_id in to_remove {
|
||||
tracing::debug!(
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
validation_code_hash = ?artifact_id.code_hash,
|
||||
"pruning artifact",
|
||||
@@ -664,7 +664,7 @@ async fn sweeper_task(mut sweeper_rx: mpsc::Receiver<PathBuf>) {
|
||||
None => break,
|
||||
Some(condemned) => {
|
||||
let result = async_std::fs::remove_file(&condemned).await;
|
||||
tracing::trace!(
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
?result,
|
||||
"Sweeping the artifact file {}",
|
||||
|
||||
@@ -201,7 +201,7 @@ fn handle_to_pool(
|
||||
) {
|
||||
match to_pool {
|
||||
ToPool::Spawn => {
|
||||
tracing::debug!(target: LOG_TARGET, "spawning a new prepare worker");
|
||||
gum::debug!(target: LOG_TARGET, "spawning a new prepare worker");
|
||||
metrics.prepare_worker().on_begin_spawn();
|
||||
mux.push(spawn_worker_task(program_path.to_owned(), spawn_timeout).boxed());
|
||||
},
|
||||
@@ -234,7 +234,7 @@ fn handle_to_pool(
|
||||
}
|
||||
},
|
||||
ToPool::Kill(worker) => {
|
||||
tracing::debug!(target: LOG_TARGET, ?worker, "killing prepare worker");
|
||||
gum::debug!(target: LOG_TARGET, ?worker, "killing prepare worker");
|
||||
// It may be absent if it were previously already removed by `purge_dead`.
|
||||
let _ = attempt_retire(metrics, spawned, worker);
|
||||
},
|
||||
@@ -248,7 +248,7 @@ async fn spawn_worker_task(program_path: PathBuf, spawn_timeout: Duration) -> Po
|
||||
match worker::spawn(&program_path, spawn_timeout).await {
|
||||
Ok((idle, handle)) => break PoolEvent::Spawn(idle, handle),
|
||||
Err(err) => {
|
||||
tracing::warn!(target: LOG_TARGET, "failed to spawn a prepare worker: {:?}", err);
|
||||
gum::warn!(target: LOG_TARGET, "failed to spawn a prepare worker: {:?}", err);
|
||||
|
||||
// Assume that the failure intermittent and retry after a delay.
|
||||
Delay::new(Duration::from_secs(3)).await;
|
||||
|
||||
@@ -211,7 +211,7 @@ async fn handle_to_queue(queue: &mut Queue, to_queue: ToQueue) -> Result<(), Fat
|
||||
}
|
||||
|
||||
async fn handle_enqueue(queue: &mut Queue, priority: Priority, pvf: Pvf) -> Result<(), Fatal> {
|
||||
tracing::debug!(
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
validation_code_hash = ?pvf.code_hash,
|
||||
?priority,
|
||||
@@ -228,7 +228,7 @@ async fn handle_enqueue(queue: &mut Queue, priority: Priority, pvf: Pvf) -> Resu
|
||||
// Precondtion for `Enqueue` is that it is sent only once for a PVF;
|
||||
// Thus this should always be `false`;
|
||||
// qed.
|
||||
tracing::warn!(
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
"duplicate `enqueue` command received for {:?}",
|
||||
artifact_id,
|
||||
@@ -331,7 +331,7 @@ async fn handle_worker_concluded(
|
||||
|
||||
queue.artifact_id_to_job.remove(&artifact_id);
|
||||
|
||||
tracing::debug!(
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
validation_code_hash = ?artifact_id.code_hash,
|
||||
?worker,
|
||||
@@ -370,7 +370,7 @@ async fn handle_worker_concluded(
|
||||
}
|
||||
|
||||
async fn handle_worker_rip(queue: &mut Queue, worker: Worker) -> Result<(), Fatal> {
|
||||
tracing::debug!(target: LOG_TARGET, ?worker, "prepare worker ripped");
|
||||
gum::debug!(target: LOG_TARGET, ?worker, "prepare worker ripped");
|
||||
|
||||
let worker_data = queue.workers.remove(worker);
|
||||
if let Some(WorkerData { job: Some(job), .. }) = worker_data {
|
||||
|
||||
@@ -72,7 +72,7 @@ pub async fn start_work(
|
||||
) -> Outcome {
|
||||
let IdleWorker { mut stream, pid } = worker;
|
||||
|
||||
tracing::debug!(
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"starting prepare for {}",
|
||||
@@ -81,7 +81,7 @@ pub async fn start_work(
|
||||
|
||||
with_tmp_file(pid, cache_path, |tmp_file| async move {
|
||||
if let Err(err) = send_request(&mut stream, code, &tmp_file).await {
|
||||
tracing::warn!(
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"failed to send a prepare request: {:?}",
|
||||
@@ -109,7 +109,7 @@ pub async fn start_work(
|
||||
// By convention we expect encoded `PrepareResult`.
|
||||
if let Ok(result) = PrepareResult::decode(&mut response_bytes.as_slice()) {
|
||||
if result.is_ok() {
|
||||
tracing::debug!(
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"promoting WIP artifact {} to {}",
|
||||
@@ -121,7 +121,7 @@ pub async fn start_work(
|
||||
.await
|
||||
.map(|_| Selected::Done(result))
|
||||
.unwrap_or_else(|err| {
|
||||
tracing::warn!(
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"failed to rename the artifact from {} to {}: {:?}",
|
||||
@@ -137,7 +137,7 @@ pub async fn start_work(
|
||||
} else {
|
||||
// We received invalid bytes from the worker.
|
||||
let bound_bytes = &response_bytes[..response_bytes.len().min(4)];
|
||||
tracing::warn!(
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"received unexpected response from the prepare worker: {}",
|
||||
@@ -148,7 +148,7 @@ pub async fn start_work(
|
||||
},
|
||||
Ok(Err(err)) => {
|
||||
// Communication error within the time limit.
|
||||
tracing::warn!(
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"failed to recv a prepare response: {:?}",
|
||||
@@ -184,7 +184,7 @@ where
|
||||
let tmp_file = match tmpfile_in("prepare-artifact-", cache_path).await {
|
||||
Ok(f) => f,
|
||||
Err(err) => {
|
||||
tracing::warn!(
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"failed to create a temp file for the artifact: {:?}",
|
||||
@@ -205,7 +205,7 @@ where
|
||||
Ok(()) => (),
|
||||
Err(err) if err.kind() == std::io::ErrorKind::NotFound => (),
|
||||
Err(err) => {
|
||||
tracing::warn!(
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"failed to remove the tmp file: {:?}",
|
||||
@@ -246,7 +246,7 @@ pub fn worker_entrypoint(socket_path: &str) {
|
||||
loop {
|
||||
let (code, dest) = recv_request(&mut stream).await?;
|
||||
|
||||
tracing::debug!(
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %std::process::id(),
|
||||
"worker: preparing artifact",
|
||||
@@ -267,7 +267,7 @@ pub fn worker_entrypoint(socket_path: &str) {
|
||||
|
||||
let artifact_bytes = compiled_artifact.encode();
|
||||
|
||||
tracing::debug!(
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %std::process::id(),
|
||||
"worker: writing artifact to {}",
|
||||
|
||||
@@ -48,7 +48,7 @@ pub async fn spawn_with_program_path(
|
||||
let socket_path = socket_path.to_owned();
|
||||
async move {
|
||||
let listener = UnixListener::bind(&socket_path).await.map_err(|err| {
|
||||
tracing::warn!(
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
%debug_id,
|
||||
"cannot bind unix socket: {:?}",
|
||||
@@ -59,7 +59,7 @@ pub async fn spawn_with_program_path(
|
||||
|
||||
let handle =
|
||||
WorkerHandle::spawn(program_path, extra_args, socket_path).map_err(|err| {
|
||||
tracing::warn!(
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
%debug_id,
|
||||
"cannot spawn a worker: {:?}",
|
||||
@@ -71,7 +71,7 @@ pub async fn spawn_with_program_path(
|
||||
futures::select! {
|
||||
accept_result = listener.accept().fuse() => {
|
||||
let (stream, _) = accept_result.map_err(|err| {
|
||||
tracing::warn!(
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
%debug_id,
|
||||
"cannot accept a worker: {:?}",
|
||||
@@ -160,7 +160,7 @@ where
|
||||
})
|
||||
.unwrap_err(); // it's never `Ok` because it's `Ok(Never)`
|
||||
|
||||
tracing::debug!(
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %std::process::id(),
|
||||
"pvf worker ({}): {:?}",
|
||||
|
||||
Reference in New Issue
Block a user