mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-06-13 00:01:09 +00:00
Add logging to PVF and other related parts (#3596)
Co-authored-by: Bastian Köcher <bkchr@users.noreply.github.com>
This commit is contained in:
@@ -18,6 +18,7 @@
|
||||
|
||||
use super::worker::Outcome;
|
||||
use crate::{
|
||||
artifacts::{ArtifactId, ArtifactPathId},
|
||||
host::ResultSender,
|
||||
worker_common::{IdleWorker, WorkerHandle},
|
||||
InvalidCandidate, ValidationError, LOG_TARGET,
|
||||
@@ -36,11 +37,11 @@ slotmap::new_key_type! { struct Worker; }
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum ToQueue {
|
||||
Enqueue { artifact_path: PathBuf, params: Vec<u8>, result_tx: ResultSender },
|
||||
Enqueue { artifact: ArtifactPathId, params: Vec<u8>, result_tx: ResultSender },
|
||||
}
|
||||
|
||||
struct ExecuteJob {
|
||||
artifact_path: PathBuf,
|
||||
artifact: ArtifactPathId,
|
||||
params: Vec<u8>,
|
||||
result_tx: ResultSender,
|
||||
}
|
||||
@@ -87,8 +88,8 @@ impl Workers {
|
||||
}
|
||||
|
||||
enum QueueEvent {
|
||||
Spawn((IdleWorker, WorkerHandle)),
|
||||
StartWork(Worker, Outcome, ResultSender),
|
||||
Spawn(IdleWorker, WorkerHandle),
|
||||
StartWork(Worker, Outcome, ArtifactId, ResultSender),
|
||||
}
|
||||
|
||||
type Mux = FuturesUnordered<BoxFuture<'static, QueueEvent>>;
|
||||
@@ -159,9 +160,13 @@ async fn purge_dead(workers: &mut Workers) {
|
||||
}
|
||||
|
||||
fn handle_to_queue(queue: &mut Queue, to_queue: ToQueue) {
|
||||
let ToQueue::Enqueue { artifact_path, params, result_tx } = to_queue;
|
||||
|
||||
let job = ExecuteJob { artifact_path, params, result_tx };
|
||||
let ToQueue::Enqueue { artifact, params, result_tx } = to_queue;
|
||||
tracing::debug!(
|
||||
target: LOG_TARGET,
|
||||
validation_code_hash = ?artifact.id.code_hash,
|
||||
"enqueueing an artifact for execution",
|
||||
);
|
||||
let job = ExecuteJob { artifact, params, result_tx };
|
||||
|
||||
if let Some(available) = queue.workers.find_available() {
|
||||
assign(queue, available, job);
|
||||
@@ -175,24 +180,35 @@ fn handle_to_queue(queue: &mut Queue, to_queue: ToQueue) {
|
||||
|
||||
async fn handle_mux(queue: &mut Queue, event: QueueEvent) {
|
||||
match event {
|
||||
QueueEvent::Spawn((idle, handle)) => {
|
||||
queue.workers.spawn_inflight -= 1;
|
||||
|
||||
let worker = queue.workers.running.insert(WorkerData { idle: Some(idle), handle });
|
||||
|
||||
if let Some(job) = queue.queue.pop_front() {
|
||||
assign(queue, worker, job);
|
||||
}
|
||||
QueueEvent::Spawn(idle, handle) => {
|
||||
handle_worker_spawned(queue, idle, handle);
|
||||
},
|
||||
QueueEvent::StartWork(worker, outcome, result_tx) => {
|
||||
handle_job_finish(queue, worker, outcome, result_tx);
|
||||
QueueEvent::StartWork(worker, outcome, artifact_id, result_tx) => {
|
||||
handle_job_finish(queue, worker, outcome, artifact_id, result_tx);
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_worker_spawned(queue: &mut Queue, idle: IdleWorker, handle: WorkerHandle) {
|
||||
queue.workers.spawn_inflight -= 1;
|
||||
let worker = queue.workers.running.insert(WorkerData { idle: Some(idle), handle });
|
||||
|
||||
tracing::debug!(target: LOG_TARGET, ?worker, "execute worker spawned");
|
||||
|
||||
if let Some(job) = queue.queue.pop_front() {
|
||||
assign(queue, worker, job);
|
||||
}
|
||||
}
|
||||
|
||||
/// If there are pending jobs in the queue, schedules the next of them onto the just freed up
|
||||
/// worker. Otherwise, puts back into the available workers list.
|
||||
fn handle_job_finish(queue: &mut Queue, worker: Worker, outcome: Outcome, result_tx: ResultSender) {
|
||||
fn handle_job_finish(
|
||||
queue: &mut Queue,
|
||||
worker: Worker,
|
||||
outcome: Outcome,
|
||||
artifact_id: ArtifactId,
|
||||
result_tx: ResultSender,
|
||||
) {
|
||||
let (idle_worker, result) = match outcome {
|
||||
Outcome::Ok { result_descriptor, duration_ms, idle_worker } => {
|
||||
// TODO: propagate the soft timeout
|
||||
@@ -212,6 +228,14 @@ fn handle_job_finish(queue: &mut Queue, worker: Worker, outcome: Outcome, result
|
||||
(None, Err(ValidationError::InvalidCandidate(InvalidCandidate::AmbigiousWorkerDeath))),
|
||||
};
|
||||
|
||||
tracing::debug!(
|
||||
target: LOG_TARGET,
|
||||
validation_code_hash = ?artifact_id.code_hash,
|
||||
worker_rip = idle_worker.is_none(),
|
||||
?result,
|
||||
"job finished.",
|
||||
);
|
||||
|
||||
// First we send the result. It may fail due the other end of the channel being dropped, that's
|
||||
// legitimate and we don't treat that as an error.
|
||||
let _ = result_tx.send(result);
|
||||
@@ -245,6 +269,8 @@ fn handle_job_finish(queue: &mut Queue, worker: Worker, outcome: Outcome, result
|
||||
}
|
||||
|
||||
fn spawn_extra_worker(queue: &mut Queue) {
|
||||
tracing::debug!(target: LOG_TARGET, "spawning an extra worker");
|
||||
|
||||
queue
|
||||
.mux
|
||||
.push(spawn_worker_task(queue.program_path.clone(), queue.spawn_timeout).boxed());
|
||||
@@ -256,7 +282,7 @@ async fn spawn_worker_task(program_path: PathBuf, spawn_timeout: Duration) -> Qu
|
||||
|
||||
loop {
|
||||
match super::worker::spawn(&program_path, spawn_timeout).await {
|
||||
Ok((idle, handle)) => break QueueEvent::Spawn((idle, handle)),
|
||||
Ok((idle, handle)) => break QueueEvent::Spawn(idle, handle),
|
||||
Err(err) => {
|
||||
tracing::warn!(target: LOG_TARGET, "failed to spawn an execute worker: {:?}", err);
|
||||
|
||||
@@ -271,6 +297,13 @@ async fn spawn_worker_task(program_path: PathBuf, spawn_timeout: Duration) -> Qu
|
||||
///
|
||||
/// The worker must be running and idle.
|
||||
fn assign(queue: &mut Queue, worker: Worker, job: ExecuteJob) {
|
||||
tracing::debug!(
|
||||
target: LOG_TARGET,
|
||||
validation_code_hash = ?job.artifact.id,
|
||||
?worker,
|
||||
"assigning the execute worker",
|
||||
);
|
||||
|
||||
let idle = queue.workers.claim_idle(worker).expect(
|
||||
"this caller must supply a worker which is idle and running;
|
||||
thus claim_idle cannot return None;
|
||||
@@ -278,8 +311,8 @@ fn assign(queue: &mut Queue, worker: Worker, job: ExecuteJob) {
|
||||
);
|
||||
queue.mux.push(
|
||||
async move {
|
||||
let outcome = super::worker::start_work(idle, job.artifact_path, job.params).await;
|
||||
QueueEvent::StartWork(worker, outcome, job.result_tx)
|
||||
let outcome = super::worker::start_work(idle, job.artifact.clone(), job.params).await;
|
||||
QueueEvent::StartWork(worker, outcome, job.artifact.id, job.result_tx)
|
||||
}
|
||||
.boxed(),
|
||||
);
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use crate::{
|
||||
artifacts::Artifact,
|
||||
artifacts::{Artifact, ArtifactPathId},
|
||||
executor_intf::TaskExecutor,
|
||||
worker_common::{
|
||||
bytes_to_path, framed_recv, framed_send, path_to_bytes, spawn_with_program_path,
|
||||
@@ -68,7 +68,7 @@ pub enum Outcome {
|
||||
/// returns the outcome.
|
||||
pub async fn start_work(
|
||||
worker: IdleWorker,
|
||||
artifact_path: PathBuf,
|
||||
artifact: ArtifactPathId,
|
||||
validation_params: Vec<u8>,
|
||||
) -> Outcome {
|
||||
let IdleWorker { mut stream, pid } = worker;
|
||||
@@ -76,22 +76,47 @@ pub async fn start_work(
|
||||
tracing::debug!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
validation_code_hash = ?artifact.id.code_hash,
|
||||
"starting execute for {}",
|
||||
artifact_path.display(),
|
||||
artifact.path.display(),
|
||||
);
|
||||
|
||||
if send_request(&mut stream, &artifact_path, &validation_params).await.is_err() {
|
||||
if let Err(error) = send_request(&mut stream, &artifact.path, &validation_params).await {
|
||||
tracing::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
validation_code_hash = ?artifact.id.code_hash,
|
||||
?error,
|
||||
"failed to send an execute request",
|
||||
);
|
||||
return Outcome::IoErr
|
||||
}
|
||||
|
||||
let response = futures::select! {
|
||||
response = recv_response(&mut stream).fuse() => {
|
||||
match response {
|
||||
Err(_err) => return Outcome::IoErr,
|
||||
Err(error) => {
|
||||
tracing::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
validation_code_hash = ?artifact.id.code_hash,
|
||||
?error,
|
||||
"failed to recv an execute response",
|
||||
);
|
||||
return Outcome::IoErr
|
||||
},
|
||||
Ok(response) => response,
|
||||
}
|
||||
},
|
||||
_ = Delay::new(EXECUTION_TIMEOUT).fuse() => return Outcome::HardTimeout,
|
||||
_ = Delay::new(EXECUTION_TIMEOUT).fuse() => {
|
||||
tracing::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
validation_code_hash = ?artifact.id.code_hash,
|
||||
"execution worker exceeded alloted time for execution",
|
||||
);
|
||||
return Outcome::HardTimeout;
|
||||
},
|
||||
};
|
||||
|
||||
match response {
|
||||
|
||||
Reference in New Issue
Block a user