mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-06-12 08:51:09 +00:00
PVF: more filesystem sandboxing (#1373)
This commit is contained in:
@@ -30,6 +30,7 @@ use futures::{
|
||||
stream::{FuturesUnordered, StreamExt as _},
|
||||
Future, FutureExt,
|
||||
};
|
||||
use polkadot_node_core_pvf_common::SecurityStatus;
|
||||
use polkadot_primitives::{ExecutorParams, ExecutorParamsHash};
|
||||
use slotmap::HopSlotMap;
|
||||
use std::{
|
||||
@@ -139,8 +140,10 @@ struct Queue {
|
||||
|
||||
// Some variables related to the current session.
|
||||
program_path: PathBuf,
|
||||
cache_path: PathBuf,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<String>,
|
||||
security_status: SecurityStatus,
|
||||
|
||||
/// The queue of jobs that are waiting for a worker to pick up.
|
||||
queue: VecDeque<ExecuteJob>,
|
||||
@@ -152,16 +155,20 @@ impl Queue {
|
||||
fn new(
|
||||
metrics: Metrics,
|
||||
program_path: PathBuf,
|
||||
cache_path: PathBuf,
|
||||
worker_capacity: usize,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<String>,
|
||||
security_status: SecurityStatus,
|
||||
to_queue_rx: mpsc::Receiver<ToQueue>,
|
||||
) -> Self {
|
||||
Self {
|
||||
metrics,
|
||||
program_path,
|
||||
cache_path,
|
||||
spawn_timeout,
|
||||
node_version,
|
||||
security_status,
|
||||
to_queue_rx,
|
||||
queue: VecDeque::new(),
|
||||
mux: Mux::new(),
|
||||
@@ -405,9 +412,11 @@ fn spawn_extra_worker(queue: &mut Queue, job: ExecuteJob) {
|
||||
queue.mux.push(
|
||||
spawn_worker_task(
|
||||
queue.program_path.clone(),
|
||||
queue.cache_path.clone(),
|
||||
job,
|
||||
queue.spawn_timeout,
|
||||
queue.node_version.clone(),
|
||||
queue.security_status.clone(),
|
||||
)
|
||||
.boxed(),
|
||||
);
|
||||
@@ -423,18 +432,22 @@ fn spawn_extra_worker(queue: &mut Queue, job: ExecuteJob) {
|
||||
/// execute other jobs with a compatible execution environment.
|
||||
async fn spawn_worker_task(
|
||||
program_path: PathBuf,
|
||||
cache_path: PathBuf,
|
||||
job: ExecuteJob,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<String>,
|
||||
security_status: SecurityStatus,
|
||||
) -> QueueEvent {
|
||||
use futures_timer::Delay;
|
||||
|
||||
loop {
|
||||
match super::worker_intf::spawn(
|
||||
&program_path,
|
||||
&cache_path,
|
||||
job.executor_params.clone(),
|
||||
spawn_timeout,
|
||||
node_version.as_deref(),
|
||||
security_status.clone(),
|
||||
)
|
||||
.await
|
||||
{
|
||||
@@ -496,17 +509,21 @@ fn assign(queue: &mut Queue, worker: Worker, job: ExecuteJob) {
|
||||
pub fn start(
|
||||
metrics: Metrics,
|
||||
program_path: PathBuf,
|
||||
cache_path: PathBuf,
|
||||
worker_capacity: usize,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<String>,
|
||||
security_status: SecurityStatus,
|
||||
) -> (mpsc::Sender<ToQueue>, impl Future<Output = ()>) {
|
||||
let (to_queue_tx, to_queue_rx) = mpsc::channel(20);
|
||||
let run = Queue::new(
|
||||
metrics,
|
||||
program_path,
|
||||
cache_path,
|
||||
worker_capacity,
|
||||
spawn_timeout,
|
||||
node_version,
|
||||
security_status,
|
||||
to_queue_rx,
|
||||
)
|
||||
.run();
|
||||
|
||||
@@ -19,8 +19,8 @@
|
||||
use crate::{
|
||||
artifacts::ArtifactPathId,
|
||||
worker_intf::{
|
||||
path_to_bytes, spawn_with_program_path, IdleWorker, SpawnErr, WorkerHandle,
|
||||
JOB_TIMEOUT_WALL_CLOCK_FACTOR,
|
||||
clear_worker_dir_path, framed_recv, framed_send, spawn_with_program_path, IdleWorker,
|
||||
SpawnErr, WorkerDir, WorkerHandle, JOB_TIMEOUT_WALL_CLOCK_FACTOR,
|
||||
},
|
||||
LOG_TARGET,
|
||||
};
|
||||
@@ -30,7 +30,7 @@ use parity_scale_codec::{Decode, Encode};
|
||||
use polkadot_node_core_pvf_common::{
|
||||
error::InternalValidationError,
|
||||
execute::{Handshake, Response},
|
||||
framed_recv, framed_send,
|
||||
worker_dir, SecurityStatus,
|
||||
};
|
||||
use polkadot_parachain_primitives::primitives::ValidationResult;
|
||||
use polkadot_primitives::ExecutorParams;
|
||||
@@ -38,21 +38,30 @@ use std::{path::Path, time::Duration};
|
||||
use tokio::{io, net::UnixStream};
|
||||
|
||||
/// Spawns a new worker with the given program path that acts as the worker and the spawn timeout.
|
||||
/// Sends a handshake message to the worker as soon as it is spawned.
|
||||
///
|
||||
/// The program should be able to handle `<program-path> execute-worker <socket-path>` invocation.
|
||||
/// Sends a handshake message to the worker as soon as it is spawned.
|
||||
pub async fn spawn(
|
||||
program_path: &Path,
|
||||
cache_path: &Path,
|
||||
executor_params: ExecutorParams,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<&str>,
|
||||
security_status: SecurityStatus,
|
||||
) -> Result<(IdleWorker, WorkerHandle), SpawnErr> {
|
||||
let mut extra_args = vec!["execute-worker"];
|
||||
if let Some(node_version) = node_version {
|
||||
extra_args.extend_from_slice(&["--node-impl-version", node_version]);
|
||||
}
|
||||
let (mut idle_worker, worker_handle) =
|
||||
spawn_with_program_path("execute", program_path, &extra_args, spawn_timeout).await?;
|
||||
|
||||
let (mut idle_worker, worker_handle) = spawn_with_program_path(
|
||||
"execute",
|
||||
program_path,
|
||||
cache_path,
|
||||
&extra_args,
|
||||
spawn_timeout,
|
||||
security_status,
|
||||
)
|
||||
.await?;
|
||||
send_handshake(&mut idle_worker.stream, Handshake { executor_params })
|
||||
.await
|
||||
.map_err(|error| {
|
||||
@@ -104,89 +113,151 @@ pub async fn start_work(
|
||||
execution_timeout: Duration,
|
||||
validation_params: Vec<u8>,
|
||||
) -> Outcome {
|
||||
let IdleWorker { mut stream, pid } = worker;
|
||||
let IdleWorker { mut stream, pid, worker_dir } = worker;
|
||||
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
?worker_dir,
|
||||
validation_code_hash = ?artifact.id.code_hash,
|
||||
"starting execute for {}",
|
||||
artifact.path.display(),
|
||||
);
|
||||
|
||||
if let Err(error) =
|
||||
send_request(&mut stream, &artifact.path, &validation_params, execution_timeout).await
|
||||
{
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
validation_code_hash = ?artifact.id.code_hash,
|
||||
?error,
|
||||
"failed to send an execute request",
|
||||
);
|
||||
return Outcome::IoErr
|
||||
}
|
||||
|
||||
// We use a generous timeout here. This is in addition to the one in the child process, in
|
||||
// case the child stalls. We have a wall clock timeout here in the host, but a CPU timeout
|
||||
// in the child. We want to use CPU time because it varies less than wall clock time under
|
||||
// load, but the CPU resources of the child can only be measured from the parent after the
|
||||
// child process terminates.
|
||||
let timeout = execution_timeout * JOB_TIMEOUT_WALL_CLOCK_FACTOR;
|
||||
let response = futures::select! {
|
||||
response = recv_response(&mut stream).fuse() => {
|
||||
match response {
|
||||
Err(error) => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
validation_code_hash = ?artifact.id.code_hash,
|
||||
?error,
|
||||
"failed to recv an execute response",
|
||||
);
|
||||
return Outcome::IoErr
|
||||
},
|
||||
Ok(response) => {
|
||||
if let Response::Ok{duration, ..} = response {
|
||||
if duration > execution_timeout {
|
||||
// The job didn't complete within the timeout.
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"execute job took {}ms cpu time, exceeded execution timeout {}ms.",
|
||||
duration.as_millis(),
|
||||
execution_timeout.as_millis(),
|
||||
);
|
||||
|
||||
// Return a timeout error.
|
||||
return Outcome::HardTimeout;
|
||||
}
|
||||
}
|
||||
|
||||
response
|
||||
},
|
||||
}
|
||||
},
|
||||
_ = Delay::new(timeout).fuse() => {
|
||||
with_worker_dir_setup(worker_dir, pid, &artifact.path, |worker_dir| async move {
|
||||
if let Err(error) = send_request(&mut stream, &validation_params, execution_timeout).await {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
validation_code_hash = ?artifact.id.code_hash,
|
||||
"execution worker exceeded lenient timeout for execution, child worker likely stalled",
|
||||
?error,
|
||||
"failed to send an execute request",
|
||||
);
|
||||
Response::TimedOut
|
||||
},
|
||||
};
|
||||
return Outcome::IoErr
|
||||
}
|
||||
|
||||
match response {
|
||||
Response::Ok { result_descriptor, duration } =>
|
||||
Outcome::Ok { result_descriptor, duration, idle_worker: IdleWorker { stream, pid } },
|
||||
Response::InvalidCandidate(err) =>
|
||||
Outcome::InvalidCandidate { err, idle_worker: IdleWorker { stream, pid } },
|
||||
Response::TimedOut => Outcome::HardTimeout,
|
||||
Response::Panic(err) => Outcome::Panic { err },
|
||||
Response::InternalError(err) => Outcome::InternalError { err },
|
||||
// We use a generous timeout here. This is in addition to the one in the child process, in
|
||||
// case the child stalls. We have a wall clock timeout here in the host, but a CPU timeout
|
||||
// in the child. We want to use CPU time because it varies less than wall clock time under
|
||||
// load, but the CPU resources of the child can only be measured from the parent after the
|
||||
// child process terminates.
|
||||
let timeout = execution_timeout * JOB_TIMEOUT_WALL_CLOCK_FACTOR;
|
||||
let response = futures::select! {
|
||||
response = recv_response(&mut stream).fuse() => {
|
||||
match response {
|
||||
Err(error) => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
validation_code_hash = ?artifact.id.code_hash,
|
||||
?error,
|
||||
"failed to recv an execute response",
|
||||
);
|
||||
return Outcome::IoErr
|
||||
},
|
||||
Ok(response) => {
|
||||
if let Response::Ok{duration, ..} = response {
|
||||
if duration > execution_timeout {
|
||||
// The job didn't complete within the timeout.
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"execute job took {}ms cpu time, exceeded execution timeout {}ms.",
|
||||
duration.as_millis(),
|
||||
execution_timeout.as_millis(),
|
||||
);
|
||||
|
||||
// Return a timeout error.
|
||||
return Outcome::HardTimeout;
|
||||
}
|
||||
}
|
||||
|
||||
response
|
||||
},
|
||||
}
|
||||
},
|
||||
_ = Delay::new(timeout).fuse() => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
validation_code_hash = ?artifact.id.code_hash,
|
||||
"execution worker exceeded lenient timeout for execution, child worker likely stalled",
|
||||
);
|
||||
Response::TimedOut
|
||||
},
|
||||
};
|
||||
|
||||
match response {
|
||||
Response::Ok { result_descriptor, duration } => Outcome::Ok {
|
||||
result_descriptor,
|
||||
duration,
|
||||
idle_worker: IdleWorker { stream, pid, worker_dir },
|
||||
},
|
||||
Response::InvalidCandidate(err) => Outcome::InvalidCandidate {
|
||||
err,
|
||||
idle_worker: IdleWorker { stream, pid, worker_dir },
|
||||
},
|
||||
Response::TimedOut => Outcome::HardTimeout,
|
||||
Response::Panic(err) => Outcome::Panic { err },
|
||||
Response::InternalError(err) => Outcome::InternalError { err },
|
||||
}
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
/// Create a temporary file for an artifact in the worker cache, execute the given future/closure
|
||||
/// passing the file path in, and clean up the worker cache.
|
||||
///
|
||||
/// Failure to clean up the worker cache results in an error - leaving any files here could be a
|
||||
/// security issue, and we should shut down the worker. This should be very rare.
|
||||
async fn with_worker_dir_setup<F, Fut>(
|
||||
worker_dir: WorkerDir,
|
||||
pid: u32,
|
||||
artifact_path: &Path,
|
||||
f: F,
|
||||
) -> Outcome
|
||||
where
|
||||
Fut: futures::Future<Output = Outcome>,
|
||||
F: FnOnce(WorkerDir) -> Fut,
|
||||
{
|
||||
// Cheaply create a hard link to the artifact. The artifact is always at a known location in the
|
||||
// worker cache, and the child can't access any other artifacts or gain any information from the
|
||||
// original filename.
|
||||
let link_path = worker_dir::execute_artifact(&worker_dir.path);
|
||||
if let Err(err) = tokio::fs::hard_link(artifact_path, link_path).await {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
?worker_dir,
|
||||
"failed to clear worker cache after the job: {:?}",
|
||||
err,
|
||||
);
|
||||
return Outcome::InternalError {
|
||||
err: InternalValidationError::CouldNotCreateLink(format!("{:?}", err)),
|
||||
}
|
||||
}
|
||||
|
||||
let worker_dir_path = worker_dir.path.clone();
|
||||
let outcome = f(worker_dir).await;
|
||||
|
||||
// Try to clear the worker dir.
|
||||
if let Err(err) = clear_worker_dir_path(&worker_dir_path) {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
?worker_dir_path,
|
||||
"failed to clear worker cache after the job: {:?}",
|
||||
err,
|
||||
);
|
||||
return Outcome::InternalError {
|
||||
err: InternalValidationError::CouldNotClearWorkerDir {
|
||||
err: format!("{:?}", err),
|
||||
path: worker_dir_path.to_str().map(String::from),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
outcome
|
||||
}
|
||||
|
||||
async fn send_handshake(stream: &mut UnixStream, handshake: Handshake) -> io::Result<()> {
|
||||
@@ -195,11 +266,9 @@ async fn send_handshake(stream: &mut UnixStream, handshake: Handshake) -> io::Re
|
||||
|
||||
async fn send_request(
|
||||
stream: &mut UnixStream,
|
||||
artifact_path: &Path,
|
||||
validation_params: &[u8],
|
||||
execution_timeout: Duration,
|
||||
) -> io::Result<()> {
|
||||
framed_send(stream, path_to_bytes(artifact_path)).await?;
|
||||
framed_send(stream, validation_params).await?;
|
||||
framed_send(stream, &execution_timeout.encode()).await
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user