PVF host prechecking support v2 (#4123)

* pvf host: store only compiled artifacts on disk

* Correctly handle failed artifacts

* Serialize result of PVF preparation uniquely

* Set the artifact state depending on the result

* Return the result of PVF preparation directly

* Move PrepareError to the error module

* Update doc comments

* Update misleading comment

* pvf host: turn off parallel compilation

* pvf host: implement precheck requests

* Fix warnings

* Unnecessary clone

* Add a note about timed out outcome

* Revert the pool outcome handling behavior

* Move the prepare result type into error mod

* Test prepare done

* fmt

* Add an explanation to wasmtime config

* Split pvf host test

* Add precheck to dictionary

Co-authored-by: Sergei Shulepov <sergei@parity.io>
This commit is contained in:
Chris Sosnin
2021-11-13 19:25:59 +03:00
committed by GitHub
parent ada3fe1a2b
commit f5fbaa139f
9 changed files with 310 additions and 82 deletions
+16 -2
View File
@@ -16,7 +16,7 @@
use super::worker::{self, Outcome};
use crate::{
error::PrepareError,
error::{PrepareError, PrepareResult},
metrics::Metrics,
worker_common::{IdleWorker, WorkerHandle},
LOG_TARGET,
@@ -87,7 +87,7 @@ pub enum FromPool {
rip: bool,
/// [`Ok`] indicates that compiled artifact is successfully stored on disk.
/// Otherwise, an [error](PrepareError) is supplied.
result: Result<(), PrepareError>,
result: PrepareResult,
},
/// The given worker ceased to exist.
@@ -341,6 +341,20 @@ fn handle_mux(
)?;
}
Ok(())
},
Outcome::TimedOut => {
if attempt_retire(metrics, spawned, worker) {
reply(
from_pool,
FromPool::Concluded {
worker,
rip: true,
result: Err(PrepareError::TimedOut),
},
)?;
}
Ok(())
},
}
+6 -6
View File
@@ -17,9 +17,7 @@
//! A queue that handles requests for PVF preparation.
use super::pool::{self, Worker};
use crate::{
artifacts::ArtifactId, error::PrepareError, metrics::Metrics, Priority, Pvf, LOG_TARGET,
};
use crate::{artifacts::ArtifactId, metrics::Metrics, PrepareResult, Priority, Pvf, LOG_TARGET};
use always_assert::{always, never};
use async_std::path::PathBuf;
use futures::{channel::mpsc, stream::StreamExt as _, Future, SinkExt};
@@ -44,8 +42,9 @@ pub struct FromQueue {
/// Identifier of an artifact.
pub(crate) artifact_id: ArtifactId,
/// Outcome of the PVF processing. [`Ok`] indicates that compiled artifact
/// is successfully stored on disk. Otherwise, an [error](PrepareError) is supplied.
pub(crate) result: Result<(), PrepareError>,
/// is successfully stored on disk. Otherwise, an [error](crate::error::PrepareError)
/// is supplied.
pub(crate) result: PrepareResult,
}
#[derive(Default)]
@@ -327,7 +326,7 @@ async fn handle_worker_concluded(
queue: &mut Queue,
worker: Worker,
rip: bool,
result: Result<(), PrepareError>,
result: PrepareResult,
) -> Result<(), Fatal> {
queue.metrics.prepare_concluded();
@@ -529,6 +528,7 @@ pub fn start(
#[cfg(test)]
mod tests {
use super::*;
use crate::error::PrepareError;
use assert_matches::assert_matches;
use futures::{future::BoxFuture, FutureExt};
use slotmap::SlotMap;
+58 -59
View File
@@ -16,7 +16,7 @@
use crate::{
artifacts::CompiledArtifact,
error::PrepareError,
error::{PrepareError, PrepareResult},
worker_common::{
bytes_to_path, framed_recv, framed_send, path_to_bytes, spawn_with_program_path,
tmpfile_in, worker_event_loop, IdleWorker, SpawnErr, WorkerHandle,
@@ -28,8 +28,6 @@ use async_std::{
os::unix::net::UnixStream,
path::{Path, PathBuf},
};
use futures::FutureExt as _;
use futures_timer::Delay;
use parity_scale_codec::{Decode, Encode};
use sp_core::hexdisplay::HexDisplay;
use std::{sync::Arc, time::Duration};
@@ -51,15 +49,15 @@ pub async fn spawn(
pub enum Outcome {
/// The worker has finished the work assigned to it.
Concluded { worker: IdleWorker, result: Result<(), PrepareError> },
Concluded { worker: IdleWorker, result: PrepareResult },
/// The host tried to reach the worker but failed. This is most likely because the worked was
/// killed by the system.
Unreachable,
/// The execution was interrupted abruptly and the worker is not available anymore. For example,
/// this could've happen because the worker hadn't finished the work until the given deadline.
/// The worker failed to finish the job until the given deadline.
///
/// Note that in this case the artifact file is written (unless there was an error writing the
/// the artifact).
/// The worker is no longer usable and should be killed.
TimedOut,
/// The execution was interrupted abruptly and the worker is not available anymore.
///
/// This doesn't return an idle worker instance, thus this worker is no longer usable.
DidNotMakeIt,
@@ -106,77 +104,78 @@ pub async fn start_work(
#[derive(Debug)]
enum Selected {
Done(Result<(), PrepareError>),
Done(PrepareResult),
IoErr,
Deadline,
}
let selected = futures::select! {
res = framed_recv(&mut stream).fuse() => {
match res {
Ok(response_bytes) => {
// By convention we expect encoded `Result<(), PrepareError>`.
if let Ok(result) =
<Result<(), PrepareError>>::decode(&mut response_bytes.clone().as_slice())
{
if result.is_ok() {
tracing::debug!(
target: LOG_TARGET,
worker_pid = %pid,
"promoting WIP artifact {} to {}",
tmp_file.display(),
artifact_path.display(),
);
async_std::fs::rename(&tmp_file, &artifact_path)
.await
.map(|_| Selected::Done(result))
.unwrap_or_else(|err| {
tracing::warn!(
target: LOG_TARGET,
worker_pid = %pid,
"failed to rename the artifact from {} to {}: {:?}",
tmp_file.display(),
artifact_path.display(),
err,
);
Selected::IoErr
})
} else {
Selected::Done(result)
}
} else {
// We received invalid bytes from the worker.
let bound_bytes = &response_bytes[..response_bytes.len().min(4)];
tracing::warn!(
let selected =
match async_std::future::timeout(COMPILATION_TIMEOUT, framed_recv(&mut stream)).await {
Ok(Ok(response_bytes)) => {
// Received bytes from worker within the time limit.
// By convention we expect encoded `PrepareResult`.
if let Ok(result) = PrepareResult::decode(&mut response_bytes.as_slice()) {
if result.is_ok() {
tracing::debug!(
target: LOG_TARGET,
worker_pid = %pid,
"received unexpected response from the prepare worker: {}",
HexDisplay::from(&bound_bytes),
"promoting WIP artifact {} to {}",
tmp_file.display(),
artifact_path.display(),
);
Selected::IoErr
async_std::fs::rename(&tmp_file, &artifact_path)
.await
.map(|_| Selected::Done(result))
.unwrap_or_else(|err| {
tracing::warn!(
target: LOG_TARGET,
worker_pid = %pid,
"failed to rename the artifact from {} to {}: {:?}",
tmp_file.display(),
artifact_path.display(),
err,
);
Selected::IoErr
})
} else {
Selected::Done(result)
}
},
Err(err) => {
} else {
// We received invalid bytes from the worker.
let bound_bytes = &response_bytes[..response_bytes.len().min(4)];
tracing::warn!(
target: LOG_TARGET,
worker_pid = %pid,
"failed to recv a prepare response: {:?}",
err,
"received unexpected response from the prepare worker: {}",
HexDisplay::from(&bound_bytes),
);
Selected::IoErr
}
}
},
_ = Delay::new(COMPILATION_TIMEOUT).fuse() => Selected::Deadline,
};
},
Ok(Err(err)) => {
// Communication error within the time limit.
tracing::warn!(
target: LOG_TARGET,
worker_pid = %pid,
"failed to recv a prepare response: {:?}",
err,
);
Selected::IoErr
},
Err(_) => {
// Timed out.
Selected::Deadline
},
};
match selected {
Selected::Done(result) => {
renice(pid, NICENESS_FOREGROUND);
Outcome::Concluded { worker: IdleWorker { stream, pid }, result }
},
Selected::IoErr | Selected::Deadline => Outcome::DidNotMakeIt,
Selected::Deadline => Outcome::TimedOut,
Selected::IoErr => Outcome::DidNotMakeIt,
}
})
.await