mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-06-13 07:01:05 +00:00
New PVF validation host (#2710)
* Implement PVF validation host * WIP: Diener * Increase the alloted compilation time * Add more comments * Minor clean up * Apply suggestions from code review Co-authored-by: Bastian Köcher <bkchr@users.noreply.github.com> * Fix pruning artifact removal * Fix formatting and newlines * Fix the thread pool * Update node/core/pvf/src/executor_intf.rs Co-authored-by: Bastian Köcher <bkchr@users.noreply.github.com> * Remove redundant test declaration * Don't convert the path into an intermediate string * Try to workaround the test failure * Use the puppet_worker trick again * Fix a blip * Move `ensure_wasmtime_version` under the tests mod * Add a macro for puppet_workers * fix build for not real-overseer * Rename the puppet worker for adder collator * play it safe with the name of adder puppet worker * Typo: triggered * Add more comments * Do not kill exec worker on every error * Plumb Duration for timeouts * typo: critical * Add proofs * Clean unused imports * Revert "WIP: Diener" This reverts commit b9f54e513366c7a6dfdd117ac19fbdc46b900b4d. * Sync version of wasmtime * Update cargo.lock * Update Substrate * Merge fixes still * Update wasmtime version in test * bastifmt Co-authored-by: Bastian Köcher <bkchr@users.noreply.github.com> * Squash spaces * Trailing new line for testing.rs * Remove controversial code * comment about biasing * Fix suggestion * Add comments * make it more clear why unwrap_err * tmpfile retry * proper proofs for claim_idle * Remove mutex from ValidationHost * Add some more logging * Extract exec timeout into a constant * Add some clarifying logging * Use blake2_256 * Clean up the merge Specifically the leftovers after removing real-overseer * Update parachain/test-parachains/adder/collator/Cargo.toml Co-authored-by: Andronik Ordian <write@reusable.software> Co-authored-by: Bastian Köcher <bkchr@users.noreply.github.com> Co-authored-by: Andronik Ordian <write@reusable.software>
This commit is contained in:
@@ -0,0 +1,27 @@
|
||||
// Copyright 2021 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Execution part of the pipeline.
|
||||
//!
|
||||
//! The validation host [runs the queue][`start`] communicating with it by sending [`ToQueue`]
|
||||
//! messages. The queue will spawn workers in new processes. Those processes should jump to
|
||||
//! [`worker_entrypoint`].
|
||||
|
||||
mod queue;
|
||||
mod worker;
|
||||
|
||||
pub use queue::{ToQueue, start};
|
||||
pub use worker::worker_entrypoint;
|
||||
@@ -0,0 +1,344 @@
|
||||
// Copyright 2021 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! A queue that handles requests for PVF execution.
|
||||
|
||||
use crate::{
|
||||
worker_common::{IdleWorker, WorkerHandle},
|
||||
host::ResultSender,
|
||||
LOG_TARGET, InvalidCandidate, ValidationError,
|
||||
};
|
||||
use super::worker::Outcome;
|
||||
use std::{collections::VecDeque, fmt, time::Duration};
|
||||
use futures::{
|
||||
Future, FutureExt,
|
||||
channel::mpsc,
|
||||
future::BoxFuture,
|
||||
stream::{FuturesUnordered, StreamExt as _},
|
||||
};
|
||||
use async_std::path::PathBuf;
|
||||
use slotmap::HopSlotMap;
|
||||
|
||||
slotmap::new_key_type! { struct Worker; }
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum ToQueue {
|
||||
Enqueue {
|
||||
artifact_path: PathBuf,
|
||||
params: Vec<u8>,
|
||||
result_tx: ResultSender,
|
||||
},
|
||||
}
|
||||
|
||||
struct ExecuteJob {
|
||||
artifact_path: PathBuf,
|
||||
params: Vec<u8>,
|
||||
result_tx: ResultSender,
|
||||
}
|
||||
|
||||
struct WorkerData {
|
||||
idle: Option<IdleWorker>,
|
||||
handle: WorkerHandle,
|
||||
}
|
||||
|
||||
impl fmt::Debug for WorkerData {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "WorkerData(pid={})", self.handle.id())
|
||||
}
|
||||
}
|
||||
|
||||
struct Workers {
|
||||
/// The registry of running workers.
|
||||
running: HopSlotMap<Worker, WorkerData>,
|
||||
|
||||
/// The number of spawning but not yet spawned workers.
|
||||
spawn_inflight: usize,
|
||||
|
||||
/// The maximum number of workers queue can have at once.
|
||||
capacity: usize,
|
||||
}
|
||||
|
||||
impl Workers {
|
||||
fn can_afford_one_more(&self) -> bool {
|
||||
self.spawn_inflight + self.running.len() < self.capacity
|
||||
}
|
||||
|
||||
fn find_available(&self) -> Option<Worker> {
|
||||
self.running
|
||||
.iter()
|
||||
.find_map(|d| if d.1.idle.is_some() { Some(d.0) } else { None })
|
||||
}
|
||||
|
||||
/// Find the associated data by the worker token and extract it's [`IdleWorker`] token.
|
||||
///
|
||||
/// Returns `None` if either worker is not recognized or idle token is absent.
|
||||
fn claim_idle(&mut self, worker: Worker) -> Option<IdleWorker> {
|
||||
self
|
||||
.running
|
||||
.get_mut(worker)?
|
||||
.idle
|
||||
.take()
|
||||
}
|
||||
}
|
||||
|
||||
enum QueueEvent {
|
||||
Spawn((IdleWorker, WorkerHandle)),
|
||||
StartWork(Worker, Outcome, ResultSender),
|
||||
}
|
||||
|
||||
type Mux = FuturesUnordered<BoxFuture<'static, QueueEvent>>;
|
||||
|
||||
struct Queue {
|
||||
/// The receiver that receives messages to the pool.
|
||||
to_queue_rx: mpsc::Receiver<ToQueue>,
|
||||
|
||||
program_path: PathBuf,
|
||||
spawn_timeout: Duration,
|
||||
|
||||
/// The queue of jobs that are waiting for a worker to pick up.
|
||||
queue: VecDeque<ExecuteJob>,
|
||||
workers: Workers,
|
||||
mux: Mux,
|
||||
}
|
||||
|
||||
impl Queue {
|
||||
fn new(
|
||||
program_path: PathBuf,
|
||||
worker_capacity: usize,
|
||||
spawn_timeout: Duration,
|
||||
to_queue_rx: mpsc::Receiver<ToQueue>,
|
||||
) -> Self {
|
||||
Self {
|
||||
program_path,
|
||||
spawn_timeout,
|
||||
to_queue_rx,
|
||||
queue: VecDeque::new(),
|
||||
mux: Mux::new(),
|
||||
workers: Workers {
|
||||
running: HopSlotMap::with_capacity_and_key(10),
|
||||
spawn_inflight: 0,
|
||||
capacity: worker_capacity,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
async fn run(mut self) {
|
||||
loop {
|
||||
futures::select! {
|
||||
to_queue = self.to_queue_rx.next() => {
|
||||
if let Some(to_queue) = to_queue {
|
||||
handle_to_queue(&mut self, to_queue);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
ev = self.mux.select_next_some() => handle_mux(&mut self, ev).await,
|
||||
}
|
||||
|
||||
purge_dead(&mut self.workers).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn purge_dead(workers: &mut Workers) {
|
||||
let mut to_remove = vec![];
|
||||
for (worker, data) in workers.running.iter_mut() {
|
||||
if futures::poll!(&mut data.handle).is_ready() {
|
||||
// a resolved future means that the worker has terminated. Weed it out.
|
||||
to_remove.push(worker);
|
||||
}
|
||||
}
|
||||
for w in to_remove {
|
||||
let _ = workers.running.remove(w);
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_to_queue(queue: &mut Queue, to_queue: ToQueue) {
|
||||
let ToQueue::Enqueue {
|
||||
artifact_path,
|
||||
params,
|
||||
result_tx,
|
||||
} = to_queue;
|
||||
|
||||
let job = ExecuteJob {
|
||||
artifact_path,
|
||||
params,
|
||||
result_tx,
|
||||
};
|
||||
|
||||
if let Some(available) = queue.workers.find_available() {
|
||||
assign(queue, available, job);
|
||||
} else {
|
||||
if queue.workers.can_afford_one_more() {
|
||||
spawn_extra_worker(queue);
|
||||
}
|
||||
queue.queue.push_back(job);
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_mux(queue: &mut Queue, event: QueueEvent) {
|
||||
match event {
|
||||
QueueEvent::Spawn((idle, handle)) => {
|
||||
queue.workers.spawn_inflight -= 1;
|
||||
|
||||
let worker = queue.workers.running.insert(WorkerData {
|
||||
idle: Some(idle),
|
||||
handle,
|
||||
});
|
||||
|
||||
if let Some(job) = queue.queue.pop_front() {
|
||||
assign(queue, worker, job);
|
||||
}
|
||||
}
|
||||
QueueEvent::StartWork(worker, outcome, result_tx) => {
|
||||
handle_job_finish(queue, worker, outcome, result_tx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// If there are pending jobs in the queue, schedules the next of them onto the just freed up
|
||||
/// worker. Otherwise, puts back into the available workers list.
|
||||
fn handle_job_finish(queue: &mut Queue, worker: Worker, outcome: Outcome, result_tx: ResultSender) {
|
||||
let (idle_worker, result) = match outcome {
|
||||
Outcome::Ok {
|
||||
result_descriptor,
|
||||
duration_ms,
|
||||
idle_worker,
|
||||
} => {
|
||||
// TODO: propagate the soft timeout
|
||||
drop(duration_ms);
|
||||
|
||||
(Some(idle_worker), Ok(result_descriptor))
|
||||
}
|
||||
Outcome::InvalidCandidate { err, idle_worker } => (
|
||||
Some(idle_worker),
|
||||
Err(ValidationError::InvalidCandidate(
|
||||
InvalidCandidate::WorkerReportedError(err),
|
||||
)),
|
||||
),
|
||||
Outcome::InternalError { err, idle_worker } => (
|
||||
Some(idle_worker),
|
||||
Err(ValidationError::InternalError(err)),
|
||||
),
|
||||
Outcome::HardTimeout => (
|
||||
None,
|
||||
Err(ValidationError::InvalidCandidate(
|
||||
InvalidCandidate::HardTimeout,
|
||||
)),
|
||||
),
|
||||
Outcome::IoErr => (
|
||||
None,
|
||||
Err(ValidationError::InvalidCandidate(
|
||||
InvalidCandidate::AmbigiousWorkerDeath,
|
||||
)),
|
||||
),
|
||||
};
|
||||
|
||||
// First we send the result. It may fail due the other end of the channel being dropped, that's
|
||||
// legitimate and we don't treat that as an error.
|
||||
let _ = result_tx.send(result);
|
||||
|
||||
// Then, we should deal with the worker:
|
||||
//
|
||||
// - if the `idle_worker` token was returned we should either schedule the next task or just put
|
||||
// it back so that the next incoming job will be able to claim it
|
||||
//
|
||||
// - if the `idle_worker` token was consumed, all the metadata pertaining to that worker should
|
||||
// be removed.
|
||||
if let Some(idle_worker) = idle_worker {
|
||||
if let Some(data) = queue.workers.running.get_mut(worker) {
|
||||
data.idle = Some(idle_worker);
|
||||
|
||||
if let Some(job) = queue.queue.pop_front() {
|
||||
assign(queue, worker, job);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Note it's possible that the worker was purged already by `purge_dead`
|
||||
queue.workers.running.remove(worker);
|
||||
|
||||
if !queue.queue.is_empty() {
|
||||
// The worker has died and we still have work we have to do. Request an extra worker.
|
||||
//
|
||||
// That can potentially overshoot, but that should be OK.
|
||||
spawn_extra_worker(queue);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn spawn_extra_worker(queue: &mut Queue) {
|
||||
queue
|
||||
.mux
|
||||
.push(spawn_worker_task(queue.program_path.clone(), queue.spawn_timeout).boxed());
|
||||
queue.workers.spawn_inflight += 1;
|
||||
}
|
||||
|
||||
async fn spawn_worker_task(program_path: PathBuf, spawn_timeout: Duration) -> QueueEvent {
|
||||
use futures_timer::Delay;
|
||||
|
||||
loop {
|
||||
match super::worker::spawn(&program_path, spawn_timeout).await {
|
||||
Ok((idle, handle)) => break QueueEvent::Spawn((idle, handle)),
|
||||
Err(err) => {
|
||||
tracing::warn!(
|
||||
target: LOG_TARGET,
|
||||
"failed to spawn an execute worker: {:?}",
|
||||
err,
|
||||
);
|
||||
|
||||
// Assume that the failure intermittent and retry after a delay.
|
||||
Delay::new(Duration::from_secs(3)).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Ask the given worker to perform the given job.
|
||||
///
|
||||
/// The worker must be running and idle.
|
||||
fn assign(queue: &mut Queue, worker: Worker, job: ExecuteJob) {
|
||||
let idle = queue
|
||||
.workers
|
||||
.claim_idle(worker)
|
||||
.expect(
|
||||
"this caller must supply a worker which is idle and running;
|
||||
thus claim_idle cannot return None;
|
||||
qed."
|
||||
);
|
||||
queue.mux.push(
|
||||
async move {
|
||||
let outcome = super::worker::start_work(idle, job.artifact_path, job.params).await;
|
||||
QueueEvent::StartWork(worker, outcome, job.result_tx)
|
||||
}
|
||||
.boxed(),
|
||||
);
|
||||
}
|
||||
|
||||
pub fn start(
|
||||
program_path: PathBuf,
|
||||
worker_capacity: usize,
|
||||
spawn_timeout: Duration,
|
||||
) -> (mpsc::Sender<ToQueue>, impl Future<Output = ()>) {
|
||||
let (to_queue_tx, to_queue_rx) = mpsc::channel(20);
|
||||
let run = Queue::new(
|
||||
program_path,
|
||||
worker_capacity,
|
||||
spawn_timeout,
|
||||
to_queue_rx,
|
||||
)
|
||||
.run();
|
||||
(to_queue_tx, run)
|
||||
}
|
||||
@@ -0,0 +1,272 @@
|
||||
// Copyright 2021 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use crate::{
|
||||
artifacts::Artifact,
|
||||
LOG_TARGET,
|
||||
executor_intf::TaskExecutor,
|
||||
worker_common::{
|
||||
IdleWorker, SpawnErr, WorkerHandle, bytes_to_path, framed_recv, framed_send, path_to_bytes,
|
||||
spawn_with_program_path, worker_event_loop,
|
||||
},
|
||||
};
|
||||
use std::time::{Duration, Instant};
|
||||
use async_std::{
|
||||
io,
|
||||
os::unix::net::UnixStream,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
use futures::FutureExt;
|
||||
use futures_timer::Delay;
|
||||
use polkadot_parachain::primitives::ValidationResult;
|
||||
use parity_scale_codec::{Encode, Decode};
|
||||
|
||||
const EXECUTION_TIMEOUT: Duration = Duration::from_secs(3);
|
||||
|
||||
/// Spawns a new worker with the given program path that acts as the worker and the spawn timeout.
|
||||
///
|
||||
/// The program should be able to handle `<program-path> execute-worker <socket-path>` invocation.
|
||||
pub async fn spawn(
|
||||
program_path: &Path,
|
||||
spawn_timeout: Duration,
|
||||
) -> Result<(IdleWorker, WorkerHandle), SpawnErr> {
|
||||
spawn_with_program_path(
|
||||
"execute",
|
||||
program_path,
|
||||
&["execute-worker"],
|
||||
spawn_timeout,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Outcome of PVF execution.
|
||||
pub enum Outcome {
|
||||
/// PVF execution completed successfully and the result is returned. The worker is ready for
|
||||
/// another job.
|
||||
Ok {
|
||||
result_descriptor: ValidationResult,
|
||||
duration_ms: u64,
|
||||
idle_worker: IdleWorker,
|
||||
},
|
||||
/// The candidate validation failed. It may be for example because the preparation process
|
||||
/// produced an error or the wasm execution triggered a trap.
|
||||
InvalidCandidate {
|
||||
err: String,
|
||||
idle_worker: IdleWorker,
|
||||
},
|
||||
/// An internal error happened during the validation. Such an error is most likely related to
|
||||
/// some transient glitch.
|
||||
InternalError {
|
||||
err: String,
|
||||
idle_worker: IdleWorker,
|
||||
},
|
||||
/// The execution time exceeded the hard limit. The worker is terminated.
|
||||
HardTimeout,
|
||||
/// An I/O error happened during communication with the worker. This may mean that the worker
|
||||
/// process already died. The token is not returned in any case.
|
||||
IoErr,
|
||||
}
|
||||
|
||||
/// Given the idle token of a worker and parameters of work, communicates with the worker and
|
||||
/// returns the outcome.
|
||||
pub async fn start_work(
|
||||
worker: IdleWorker,
|
||||
artifact_path: PathBuf,
|
||||
validation_params: Vec<u8>,
|
||||
) -> Outcome {
|
||||
let IdleWorker { mut stream, pid } = worker;
|
||||
|
||||
tracing::debug!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"starting execute for {}",
|
||||
artifact_path.display(),
|
||||
);
|
||||
|
||||
if send_request(&mut stream, &artifact_path, &validation_params).await.is_err() {
|
||||
return Outcome::IoErr;
|
||||
}
|
||||
|
||||
let response = futures::select! {
|
||||
response = recv_response(&mut stream).fuse() => {
|
||||
match response {
|
||||
Err(_err) => return Outcome::IoErr,
|
||||
Ok(response) => response,
|
||||
}
|
||||
},
|
||||
_ = Delay::new(EXECUTION_TIMEOUT).fuse() => return Outcome::HardTimeout,
|
||||
};
|
||||
|
||||
match response {
|
||||
Response::Ok {
|
||||
result_descriptor,
|
||||
duration_ms,
|
||||
} => Outcome::Ok {
|
||||
result_descriptor,
|
||||
duration_ms,
|
||||
idle_worker: IdleWorker { stream, pid },
|
||||
},
|
||||
Response::InvalidCandidate(err) => Outcome::InvalidCandidate {
|
||||
err,
|
||||
idle_worker: IdleWorker { stream, pid },
|
||||
},
|
||||
Response::InternalError(err) => Outcome::InternalError {
|
||||
err,
|
||||
idle_worker: IdleWorker { stream, pid },
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
async fn send_request(
|
||||
stream: &mut UnixStream,
|
||||
artifact_path: &Path,
|
||||
validation_params: &[u8],
|
||||
) -> io::Result<()> {
|
||||
framed_send(stream, path_to_bytes(artifact_path)).await?;
|
||||
framed_send(stream, validation_params).await
|
||||
}
|
||||
|
||||
async fn recv_request(stream: &mut UnixStream) -> io::Result<(PathBuf, Vec<u8>)> {
|
||||
let artifact_path = framed_recv(stream).await?;
|
||||
let artifact_path = bytes_to_path(&artifact_path).ok_or_else(|| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
"execute pvf recv_request: non utf-8 artifact path".to_string(),
|
||||
)
|
||||
})?;
|
||||
let params = framed_recv(stream).await?;
|
||||
Ok((artifact_path, params))
|
||||
}
|
||||
|
||||
async fn send_response(stream: &mut UnixStream, response: Response) -> io::Result<()> {
|
||||
framed_send(stream, &response.encode()).await
|
||||
}
|
||||
|
||||
async fn recv_response(stream: &mut UnixStream) -> io::Result<Response> {
|
||||
let response_bytes = framed_recv(stream).await?;
|
||||
Response::decode(&mut &response_bytes[..]).map_err(|e| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!("execute pvf recv_response: decode error: {:?}", e),
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
#[derive(Encode, Decode)]
|
||||
enum Response {
|
||||
Ok {
|
||||
result_descriptor: ValidationResult,
|
||||
duration_ms: u64,
|
||||
},
|
||||
InvalidCandidate(String),
|
||||
InternalError(String),
|
||||
}
|
||||
|
||||
impl Response {
|
||||
fn format_invalid(ctx: &'static str, msg: &str) -> Self {
|
||||
if msg.is_empty() {
|
||||
Self::InvalidCandidate(ctx.to_string())
|
||||
} else {
|
||||
Self::InvalidCandidate(format!("{}: {}", ctx, msg))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The entrypoint that the spawned execute worker should start with. The socket_path specifies
|
||||
/// the path to the socket used to communicate with the host.
|
||||
pub fn worker_entrypoint(socket_path: &str) {
|
||||
worker_event_loop("execute", socket_path, |mut stream| async move {
|
||||
let executor = TaskExecutor::new().map_err(|e| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!("cannot create task executor: {}", e),
|
||||
)
|
||||
})?;
|
||||
loop {
|
||||
let (artifact_path, params) = recv_request(&mut stream).await?;
|
||||
tracing::debug!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %std::process::id(),
|
||||
"worker: validating artifact {}",
|
||||
artifact_path.display(),
|
||||
);
|
||||
let response = validate_using_artifact(&artifact_path, ¶ms, &executor).await;
|
||||
send_response(&mut stream, response).await?;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
async fn validate_using_artifact(
|
||||
artifact_path: &Path,
|
||||
params: &[u8],
|
||||
spawner: &TaskExecutor,
|
||||
) -> Response {
|
||||
let artifact_bytes = match async_std::fs::read(artifact_path).await {
|
||||
Err(e) => {
|
||||
return Response::InternalError(format!(
|
||||
"failed to read the artifact at {}: {:?}",
|
||||
artifact_path.display(),
|
||||
e,
|
||||
))
|
||||
}
|
||||
Ok(b) => b,
|
||||
};
|
||||
|
||||
let artifact = match Artifact::deserialize(&artifact_bytes) {
|
||||
Err(e) => return Response::InternalError(format!("artifact deserialization: {:?}", e)),
|
||||
Ok(a) => a,
|
||||
};
|
||||
|
||||
let compiled_artifact = match &artifact {
|
||||
Artifact::PrevalidationErr(msg) => {
|
||||
return Response::format_invalid("prevalidation", msg);
|
||||
}
|
||||
Artifact::PreparationErr(msg) => {
|
||||
return Response::format_invalid("preparation", msg);
|
||||
}
|
||||
Artifact::DidntMakeIt => {
|
||||
return Response::format_invalid("preparation timeout", "");
|
||||
}
|
||||
|
||||
Artifact::Compiled { compiled_artifact } => compiled_artifact,
|
||||
};
|
||||
|
||||
let validation_started_at = Instant::now();
|
||||
let descriptor_bytes =
|
||||
match crate::executor_intf::execute(compiled_artifact, params, spawner.clone()) {
|
||||
Err(err) => {
|
||||
return Response::format_invalid("execute", &err.to_string());
|
||||
}
|
||||
Ok(d) => d,
|
||||
};
|
||||
|
||||
let duration_ms = validation_started_at.elapsed().as_millis() as u64;
|
||||
|
||||
let result_descriptor = match ValidationResult::decode(&mut &descriptor_bytes[..]) {
|
||||
Err(err) => {
|
||||
return Response::InvalidCandidate(format!(
|
||||
"validation result decoding failed: {}",
|
||||
err
|
||||
))
|
||||
}
|
||||
Ok(r) => r,
|
||||
};
|
||||
|
||||
Response::Ok {
|
||||
result_descriptor,
|
||||
duration_ms,
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user