mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-06-13 09:21:05 +00:00
PVF: more filesystem sandboxing (#1373)
This commit is contained in:
@@ -172,9 +172,10 @@ impl Artifacts {
|
||||
///
|
||||
/// The recognized artifacts will be filled in the table and unrecognized will be removed.
|
||||
pub async fn new(cache_path: &Path) -> Self {
|
||||
// Make sure that the cache path directory and all its parents are created.
|
||||
// First delete the entire cache. Nodes are long-running so this should populate shortly.
|
||||
// First delete the entire cache. This includes artifacts and any leftover worker dirs (see
|
||||
// [`WorkerDir`]). Nodes are long-running so this should populate shortly.
|
||||
let _ = tokio::fs::remove_dir_all(cache_path).await;
|
||||
// Make sure that the cache path directory and all its parents are created.
|
||||
let _ = tokio::fs::create_dir_all(cache_path).await;
|
||||
|
||||
Self { artifacts: HashMap::new() }
|
||||
@@ -295,7 +296,7 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn artifacts_removes_cache_on_startup() {
|
||||
let fake_cache_path = crate::worker_intf::tmpfile("test-cache").await.unwrap();
|
||||
let fake_cache_path = crate::worker_intf::tmppath("test-cache").await.unwrap();
|
||||
let fake_artifact_path = {
|
||||
let mut p = fake_cache_path.clone();
|
||||
p.push("wasmtime_0x1234567890123456789012345678901234567890123456789012345678901234");
|
||||
|
||||
@@ -30,6 +30,7 @@ use futures::{
|
||||
stream::{FuturesUnordered, StreamExt as _},
|
||||
Future, FutureExt,
|
||||
};
|
||||
use polkadot_node_core_pvf_common::SecurityStatus;
|
||||
use polkadot_primitives::{ExecutorParams, ExecutorParamsHash};
|
||||
use slotmap::HopSlotMap;
|
||||
use std::{
|
||||
@@ -139,8 +140,10 @@ struct Queue {
|
||||
|
||||
// Some variables related to the current session.
|
||||
program_path: PathBuf,
|
||||
cache_path: PathBuf,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<String>,
|
||||
security_status: SecurityStatus,
|
||||
|
||||
/// The queue of jobs that are waiting for a worker to pick up.
|
||||
queue: VecDeque<ExecuteJob>,
|
||||
@@ -152,16 +155,20 @@ impl Queue {
|
||||
fn new(
|
||||
metrics: Metrics,
|
||||
program_path: PathBuf,
|
||||
cache_path: PathBuf,
|
||||
worker_capacity: usize,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<String>,
|
||||
security_status: SecurityStatus,
|
||||
to_queue_rx: mpsc::Receiver<ToQueue>,
|
||||
) -> Self {
|
||||
Self {
|
||||
metrics,
|
||||
program_path,
|
||||
cache_path,
|
||||
spawn_timeout,
|
||||
node_version,
|
||||
security_status,
|
||||
to_queue_rx,
|
||||
queue: VecDeque::new(),
|
||||
mux: Mux::new(),
|
||||
@@ -405,9 +412,11 @@ fn spawn_extra_worker(queue: &mut Queue, job: ExecuteJob) {
|
||||
queue.mux.push(
|
||||
spawn_worker_task(
|
||||
queue.program_path.clone(),
|
||||
queue.cache_path.clone(),
|
||||
job,
|
||||
queue.spawn_timeout,
|
||||
queue.node_version.clone(),
|
||||
queue.security_status.clone(),
|
||||
)
|
||||
.boxed(),
|
||||
);
|
||||
@@ -423,18 +432,22 @@ fn spawn_extra_worker(queue: &mut Queue, job: ExecuteJob) {
|
||||
/// execute other jobs with a compatible execution environment.
|
||||
async fn spawn_worker_task(
|
||||
program_path: PathBuf,
|
||||
cache_path: PathBuf,
|
||||
job: ExecuteJob,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<String>,
|
||||
security_status: SecurityStatus,
|
||||
) -> QueueEvent {
|
||||
use futures_timer::Delay;
|
||||
|
||||
loop {
|
||||
match super::worker_intf::spawn(
|
||||
&program_path,
|
||||
&cache_path,
|
||||
job.executor_params.clone(),
|
||||
spawn_timeout,
|
||||
node_version.as_deref(),
|
||||
security_status.clone(),
|
||||
)
|
||||
.await
|
||||
{
|
||||
@@ -496,17 +509,21 @@ fn assign(queue: &mut Queue, worker: Worker, job: ExecuteJob) {
|
||||
pub fn start(
|
||||
metrics: Metrics,
|
||||
program_path: PathBuf,
|
||||
cache_path: PathBuf,
|
||||
worker_capacity: usize,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<String>,
|
||||
security_status: SecurityStatus,
|
||||
) -> (mpsc::Sender<ToQueue>, impl Future<Output = ()>) {
|
||||
let (to_queue_tx, to_queue_rx) = mpsc::channel(20);
|
||||
let run = Queue::new(
|
||||
metrics,
|
||||
program_path,
|
||||
cache_path,
|
||||
worker_capacity,
|
||||
spawn_timeout,
|
||||
node_version,
|
||||
security_status,
|
||||
to_queue_rx,
|
||||
)
|
||||
.run();
|
||||
|
||||
@@ -19,8 +19,8 @@
|
||||
use crate::{
|
||||
artifacts::ArtifactPathId,
|
||||
worker_intf::{
|
||||
path_to_bytes, spawn_with_program_path, IdleWorker, SpawnErr, WorkerHandle,
|
||||
JOB_TIMEOUT_WALL_CLOCK_FACTOR,
|
||||
clear_worker_dir_path, framed_recv, framed_send, spawn_with_program_path, IdleWorker,
|
||||
SpawnErr, WorkerDir, WorkerHandle, JOB_TIMEOUT_WALL_CLOCK_FACTOR,
|
||||
},
|
||||
LOG_TARGET,
|
||||
};
|
||||
@@ -30,7 +30,7 @@ use parity_scale_codec::{Decode, Encode};
|
||||
use polkadot_node_core_pvf_common::{
|
||||
error::InternalValidationError,
|
||||
execute::{Handshake, Response},
|
||||
framed_recv, framed_send,
|
||||
worker_dir, SecurityStatus,
|
||||
};
|
||||
use polkadot_parachain_primitives::primitives::ValidationResult;
|
||||
use polkadot_primitives::ExecutorParams;
|
||||
@@ -38,21 +38,30 @@ use std::{path::Path, time::Duration};
|
||||
use tokio::{io, net::UnixStream};
|
||||
|
||||
/// Spawns a new worker with the given program path that acts as the worker and the spawn timeout.
|
||||
/// Sends a handshake message to the worker as soon as it is spawned.
|
||||
///
|
||||
/// The program should be able to handle `<program-path> execute-worker <socket-path>` invocation.
|
||||
/// Sends a handshake message to the worker as soon as it is spawned.
|
||||
pub async fn spawn(
|
||||
program_path: &Path,
|
||||
cache_path: &Path,
|
||||
executor_params: ExecutorParams,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<&str>,
|
||||
security_status: SecurityStatus,
|
||||
) -> Result<(IdleWorker, WorkerHandle), SpawnErr> {
|
||||
let mut extra_args = vec!["execute-worker"];
|
||||
if let Some(node_version) = node_version {
|
||||
extra_args.extend_from_slice(&["--node-impl-version", node_version]);
|
||||
}
|
||||
let (mut idle_worker, worker_handle) =
|
||||
spawn_with_program_path("execute", program_path, &extra_args, spawn_timeout).await?;
|
||||
|
||||
let (mut idle_worker, worker_handle) = spawn_with_program_path(
|
||||
"execute",
|
||||
program_path,
|
||||
cache_path,
|
||||
&extra_args,
|
||||
spawn_timeout,
|
||||
security_status,
|
||||
)
|
||||
.await?;
|
||||
send_handshake(&mut idle_worker.stream, Handshake { executor_params })
|
||||
.await
|
||||
.map_err(|error| {
|
||||
@@ -104,89 +113,151 @@ pub async fn start_work(
|
||||
execution_timeout: Duration,
|
||||
validation_params: Vec<u8>,
|
||||
) -> Outcome {
|
||||
let IdleWorker { mut stream, pid } = worker;
|
||||
let IdleWorker { mut stream, pid, worker_dir } = worker;
|
||||
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
?worker_dir,
|
||||
validation_code_hash = ?artifact.id.code_hash,
|
||||
"starting execute for {}",
|
||||
artifact.path.display(),
|
||||
);
|
||||
|
||||
if let Err(error) =
|
||||
send_request(&mut stream, &artifact.path, &validation_params, execution_timeout).await
|
||||
{
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
validation_code_hash = ?artifact.id.code_hash,
|
||||
?error,
|
||||
"failed to send an execute request",
|
||||
);
|
||||
return Outcome::IoErr
|
||||
}
|
||||
|
||||
// We use a generous timeout here. This is in addition to the one in the child process, in
|
||||
// case the child stalls. We have a wall clock timeout here in the host, but a CPU timeout
|
||||
// in the child. We want to use CPU time because it varies less than wall clock time under
|
||||
// load, but the CPU resources of the child can only be measured from the parent after the
|
||||
// child process terminates.
|
||||
let timeout = execution_timeout * JOB_TIMEOUT_WALL_CLOCK_FACTOR;
|
||||
let response = futures::select! {
|
||||
response = recv_response(&mut stream).fuse() => {
|
||||
match response {
|
||||
Err(error) => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
validation_code_hash = ?artifact.id.code_hash,
|
||||
?error,
|
||||
"failed to recv an execute response",
|
||||
);
|
||||
return Outcome::IoErr
|
||||
},
|
||||
Ok(response) => {
|
||||
if let Response::Ok{duration, ..} = response {
|
||||
if duration > execution_timeout {
|
||||
// The job didn't complete within the timeout.
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"execute job took {}ms cpu time, exceeded execution timeout {}ms.",
|
||||
duration.as_millis(),
|
||||
execution_timeout.as_millis(),
|
||||
);
|
||||
|
||||
// Return a timeout error.
|
||||
return Outcome::HardTimeout;
|
||||
}
|
||||
}
|
||||
|
||||
response
|
||||
},
|
||||
}
|
||||
},
|
||||
_ = Delay::new(timeout).fuse() => {
|
||||
with_worker_dir_setup(worker_dir, pid, &artifact.path, |worker_dir| async move {
|
||||
if let Err(error) = send_request(&mut stream, &validation_params, execution_timeout).await {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
validation_code_hash = ?artifact.id.code_hash,
|
||||
"execution worker exceeded lenient timeout for execution, child worker likely stalled",
|
||||
?error,
|
||||
"failed to send an execute request",
|
||||
);
|
||||
Response::TimedOut
|
||||
},
|
||||
};
|
||||
return Outcome::IoErr
|
||||
}
|
||||
|
||||
match response {
|
||||
Response::Ok { result_descriptor, duration } =>
|
||||
Outcome::Ok { result_descriptor, duration, idle_worker: IdleWorker { stream, pid } },
|
||||
Response::InvalidCandidate(err) =>
|
||||
Outcome::InvalidCandidate { err, idle_worker: IdleWorker { stream, pid } },
|
||||
Response::TimedOut => Outcome::HardTimeout,
|
||||
Response::Panic(err) => Outcome::Panic { err },
|
||||
Response::InternalError(err) => Outcome::InternalError { err },
|
||||
// We use a generous timeout here. This is in addition to the one in the child process, in
|
||||
// case the child stalls. We have a wall clock timeout here in the host, but a CPU timeout
|
||||
// in the child. We want to use CPU time because it varies less than wall clock time under
|
||||
// load, but the CPU resources of the child can only be measured from the parent after the
|
||||
// child process terminates.
|
||||
let timeout = execution_timeout * JOB_TIMEOUT_WALL_CLOCK_FACTOR;
|
||||
let response = futures::select! {
|
||||
response = recv_response(&mut stream).fuse() => {
|
||||
match response {
|
||||
Err(error) => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
validation_code_hash = ?artifact.id.code_hash,
|
||||
?error,
|
||||
"failed to recv an execute response",
|
||||
);
|
||||
return Outcome::IoErr
|
||||
},
|
||||
Ok(response) => {
|
||||
if let Response::Ok{duration, ..} = response {
|
||||
if duration > execution_timeout {
|
||||
// The job didn't complete within the timeout.
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"execute job took {}ms cpu time, exceeded execution timeout {}ms.",
|
||||
duration.as_millis(),
|
||||
execution_timeout.as_millis(),
|
||||
);
|
||||
|
||||
// Return a timeout error.
|
||||
return Outcome::HardTimeout;
|
||||
}
|
||||
}
|
||||
|
||||
response
|
||||
},
|
||||
}
|
||||
},
|
||||
_ = Delay::new(timeout).fuse() => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
validation_code_hash = ?artifact.id.code_hash,
|
||||
"execution worker exceeded lenient timeout for execution, child worker likely stalled",
|
||||
);
|
||||
Response::TimedOut
|
||||
},
|
||||
};
|
||||
|
||||
match response {
|
||||
Response::Ok { result_descriptor, duration } => Outcome::Ok {
|
||||
result_descriptor,
|
||||
duration,
|
||||
idle_worker: IdleWorker { stream, pid, worker_dir },
|
||||
},
|
||||
Response::InvalidCandidate(err) => Outcome::InvalidCandidate {
|
||||
err,
|
||||
idle_worker: IdleWorker { stream, pid, worker_dir },
|
||||
},
|
||||
Response::TimedOut => Outcome::HardTimeout,
|
||||
Response::Panic(err) => Outcome::Panic { err },
|
||||
Response::InternalError(err) => Outcome::InternalError { err },
|
||||
}
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
/// Create a temporary file for an artifact in the worker cache, execute the given future/closure
|
||||
/// passing the file path in, and clean up the worker cache.
|
||||
///
|
||||
/// Failure to clean up the worker cache results in an error - leaving any files here could be a
|
||||
/// security issue, and we should shut down the worker. This should be very rare.
|
||||
async fn with_worker_dir_setup<F, Fut>(
|
||||
worker_dir: WorkerDir,
|
||||
pid: u32,
|
||||
artifact_path: &Path,
|
||||
f: F,
|
||||
) -> Outcome
|
||||
where
|
||||
Fut: futures::Future<Output = Outcome>,
|
||||
F: FnOnce(WorkerDir) -> Fut,
|
||||
{
|
||||
// Cheaply create a hard link to the artifact. The artifact is always at a known location in the
|
||||
// worker cache, and the child can't access any other artifacts or gain any information from the
|
||||
// original filename.
|
||||
let link_path = worker_dir::execute_artifact(&worker_dir.path);
|
||||
if let Err(err) = tokio::fs::hard_link(artifact_path, link_path).await {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
?worker_dir,
|
||||
"failed to clear worker cache after the job: {:?}",
|
||||
err,
|
||||
);
|
||||
return Outcome::InternalError {
|
||||
err: InternalValidationError::CouldNotCreateLink(format!("{:?}", err)),
|
||||
}
|
||||
}
|
||||
|
||||
let worker_dir_path = worker_dir.path.clone();
|
||||
let outcome = f(worker_dir).await;
|
||||
|
||||
// Try to clear the worker dir.
|
||||
if let Err(err) = clear_worker_dir_path(&worker_dir_path) {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
?worker_dir_path,
|
||||
"failed to clear worker cache after the job: {:?}",
|
||||
err,
|
||||
);
|
||||
return Outcome::InternalError {
|
||||
err: InternalValidationError::CouldNotClearWorkerDir {
|
||||
err: format!("{:?}", err),
|
||||
path: worker_dir_path.to_str().map(String::from),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
outcome
|
||||
}
|
||||
|
||||
async fn send_handshake(stream: &mut UnixStream, handshake: Handshake) -> io::Result<()> {
|
||||
@@ -195,11 +266,9 @@ async fn send_handshake(stream: &mut UnixStream, handshake: Handshake) -> io::Re
|
||||
|
||||
async fn send_request(
|
||||
stream: &mut UnixStream,
|
||||
artifact_path: &Path,
|
||||
validation_params: &[u8],
|
||||
execution_timeout: Duration,
|
||||
) -> io::Result<()> {
|
||||
framed_send(stream, path_to_bytes(artifact_path)).await?;
|
||||
framed_send(stream, validation_params).await?;
|
||||
framed_send(stream, &execution_timeout.encode()).await
|
||||
}
|
||||
|
||||
@@ -34,6 +34,7 @@ use futures::{
|
||||
use polkadot_node_core_pvf_common::{
|
||||
error::{PrepareError, PrepareResult},
|
||||
pvf::PvfPrepData,
|
||||
SecurityStatus,
|
||||
};
|
||||
use polkadot_parachain_primitives::primitives::ValidationResult;
|
||||
use std::{
|
||||
@@ -202,8 +203,13 @@ impl Config {
|
||||
pub fn start(config: Config, metrics: Metrics) -> (ValidationHost, impl Future<Output = ()>) {
|
||||
gum::debug!(target: LOG_TARGET, ?config, "starting PVF validation host");
|
||||
|
||||
// Run checks for supported security features once per host startup.
|
||||
warn_if_no_landlock();
|
||||
// Run checks for supported security features once per host startup. Warn here if not enabled.
|
||||
let security_status = {
|
||||
let can_enable_landlock = check_landlock(&config.prepare_worker_program_path);
|
||||
let can_unshare_user_namespace_and_change_root =
|
||||
check_can_unshare_user_namespace_and_change_root(&config.prepare_worker_program_path);
|
||||
SecurityStatus { can_enable_landlock, can_unshare_user_namespace_and_change_root }
|
||||
};
|
||||
|
||||
let (to_host_tx, to_host_rx) = mpsc::channel(10);
|
||||
|
||||
@@ -215,6 +221,7 @@ pub fn start(config: Config, metrics: Metrics) -> (ValidationHost, impl Future<O
|
||||
config.cache_path.clone(),
|
||||
config.prepare_worker_spawn_timeout,
|
||||
config.node_version.clone(),
|
||||
security_status.clone(),
|
||||
);
|
||||
|
||||
let (to_prepare_queue_tx, from_prepare_queue_rx, run_prepare_queue) = prepare::start_queue(
|
||||
@@ -229,9 +236,11 @@ pub fn start(config: Config, metrics: Metrics) -> (ValidationHost, impl Future<O
|
||||
let (to_execute_queue_tx, run_execute_queue) = execute::start(
|
||||
metrics,
|
||||
config.execute_worker_program_path.to_owned(),
|
||||
config.cache_path.clone(),
|
||||
config.execute_workers_max_num,
|
||||
config.execute_worker_spawn_timeout,
|
||||
config.node_version,
|
||||
security_status,
|
||||
);
|
||||
|
||||
let (to_sweeper_tx, to_sweeper_rx) = mpsc::channel(100);
|
||||
@@ -873,28 +882,103 @@ fn pulse_every(interval: std::time::Duration) -> impl futures::Stream<Item = ()>
|
||||
.map(|_| ())
|
||||
}
|
||||
|
||||
/// Check if landlock is supported and emit a warning if not.
|
||||
fn warn_if_no_landlock() {
|
||||
#[cfg(target_os = "linux")]
|
||||
{
|
||||
use polkadot_node_core_pvf_common::worker::security::landlock;
|
||||
let status = landlock::get_status();
|
||||
if !landlock::status_is_fully_enabled(&status) {
|
||||
let abi = landlock::LANDLOCK_ABI as u8;
|
||||
/// Check if we can sandbox the root and emit a warning if not.
|
||||
///
|
||||
/// We do this check by spawning a new process and trying to sandbox it. To get as close as possible
|
||||
/// to running the check in a worker, we try it... in a worker. The expected return status is 0 on
|
||||
/// success and -1 on failure.
|
||||
fn check_can_unshare_user_namespace_and_change_root(
|
||||
#[cfg_attr(not(target_os = "linux"), allow(unused_variables))]
|
||||
prepare_worker_program_path: &Path,
|
||||
) -> bool {
|
||||
cfg_if::cfg_if! {
|
||||
if #[cfg(target_os = "linux")] {
|
||||
let output = std::process::Command::new(prepare_worker_program_path)
|
||||
.arg("--check-can-unshare-user-namespace-and-change-root")
|
||||
.output();
|
||||
|
||||
match output {
|
||||
Ok(output) if output.status.success() => true,
|
||||
Ok(output) => {
|
||||
let stderr = std::str::from_utf8(&output.stderr)
|
||||
.expect("child process writes a UTF-8 string to stderr; qed")
|
||||
.trim();
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
?prepare_worker_program_path,
|
||||
// Docs say to always print status using `Display` implementation.
|
||||
status = %output.status,
|
||||
%stderr,
|
||||
"Cannot unshare user namespace and change root, which are Linux-specific kernel security features. Running validation of malicious PVF code has a higher risk of compromising this machine. Consider running with support for unsharing user namespaces for maximum security."
|
||||
);
|
||||
false
|
||||
},
|
||||
Err(err) => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
?prepare_worker_program_path,
|
||||
"Could not start child process: {}",
|
||||
err
|
||||
);
|
||||
false
|
||||
},
|
||||
}
|
||||
} else {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
?status,
|
||||
%abi,
|
||||
"Cannot fully enable landlock, a Linux kernel security feature. Running validation of malicious PVF code has a higher risk of compromising this machine. Consider upgrading the kernel version for maximum security."
|
||||
"Cannot unshare user namespace and change root, which are Linux-specific kernel security features. Running validation of malicious PVF code has a higher risk of compromising this machine. Consider running on Linux with support for unsharing user namespaces for maximum security."
|
||||
);
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
"Cannot enable landlock, a Linux kernel security feature. Running validation of malicious PVF code has a higher risk of compromising this machine. Consider running on Linux with landlock support for maximum security."
|
||||
);
|
||||
/// Check if landlock is supported and emit a warning if not.
|
||||
///
|
||||
/// We do this check by spawning a new process and trying to sandbox it. To get as close as possible
|
||||
/// to running the check in a worker, we try it... in a worker. The expected return status is 0 on
|
||||
/// success and -1 on failure.
|
||||
fn check_landlock(
|
||||
#[cfg_attr(not(target_os = "linux"), allow(unused_variables))]
|
||||
prepare_worker_program_path: &Path,
|
||||
) -> bool {
|
||||
cfg_if::cfg_if! {
|
||||
if #[cfg(target_os = "linux")] {
|
||||
match std::process::Command::new(prepare_worker_program_path)
|
||||
.arg("--check-can-enable-landlock")
|
||||
.status()
|
||||
{
|
||||
Ok(status) if status.success() => true,
|
||||
Ok(status) => {
|
||||
let abi =
|
||||
polkadot_node_core_pvf_common::worker::security::landlock::LANDLOCK_ABI as u8;
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
?prepare_worker_program_path,
|
||||
?status,
|
||||
%abi,
|
||||
"Cannot fully enable landlock, a Linux-specific kernel security feature. Running validation of malicious PVF code has a higher risk of compromising this machine. Consider upgrading the kernel version for maximum security."
|
||||
);
|
||||
false
|
||||
},
|
||||
Err(err) => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
?prepare_worker_program_path,
|
||||
"Could not start child process: {}",
|
||||
err
|
||||
);
|
||||
false
|
||||
},
|
||||
}
|
||||
} else {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
"Cannot enable landlock, a Linux-specific kernel security feature. Running validation of malicious PVF code has a higher risk of compromising this machine. Consider running on Linux with landlock support for maximum security."
|
||||
);
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -111,6 +111,7 @@ pub use polkadot_node_core_pvf_common::{
|
||||
error::{InternalValidationError, PrepareError},
|
||||
prepare::{PrepareJobKind, PrepareStats},
|
||||
pvf::PvfPrepData,
|
||||
SecurityStatus,
|
||||
};
|
||||
|
||||
/// The log target for this crate.
|
||||
|
||||
@@ -27,6 +27,7 @@ use futures::{
|
||||
use polkadot_node_core_pvf_common::{
|
||||
error::{PrepareError, PrepareResult},
|
||||
pvf::PvfPrepData,
|
||||
SecurityStatus,
|
||||
};
|
||||
use slotmap::HopSlotMap;
|
||||
use std::{
|
||||
@@ -110,10 +111,12 @@ enum PoolEvent {
|
||||
type Mux = FuturesUnordered<BoxFuture<'static, PoolEvent>>;
|
||||
|
||||
struct Pool {
|
||||
// Some variables related to the current session.
|
||||
program_path: PathBuf,
|
||||
cache_path: PathBuf,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<String>,
|
||||
security_status: SecurityStatus,
|
||||
|
||||
to_pool: mpsc::Receiver<ToPool>,
|
||||
from_pool: mpsc::UnboundedSender<FromPool>,
|
||||
@@ -132,6 +135,7 @@ async fn run(
|
||||
cache_path,
|
||||
spawn_timeout,
|
||||
node_version,
|
||||
security_status,
|
||||
to_pool,
|
||||
mut from_pool,
|
||||
mut spawned,
|
||||
@@ -160,6 +164,7 @@ async fn run(
|
||||
&cache_path,
|
||||
spawn_timeout,
|
||||
node_version.clone(),
|
||||
security_status.clone(),
|
||||
&mut spawned,
|
||||
&mut mux,
|
||||
to_pool,
|
||||
@@ -207,6 +212,7 @@ fn handle_to_pool(
|
||||
cache_path: &Path,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<String>,
|
||||
security_status: SecurityStatus,
|
||||
spawned: &mut HopSlotMap<Worker, WorkerData>,
|
||||
mux: &mut Mux,
|
||||
to_pool: ToPool,
|
||||
@@ -216,7 +222,14 @@ fn handle_to_pool(
|
||||
gum::debug!(target: LOG_TARGET, "spawning a new prepare worker");
|
||||
metrics.prepare_worker().on_begin_spawn();
|
||||
mux.push(
|
||||
spawn_worker_task(program_path.to_owned(), spawn_timeout, node_version).boxed(),
|
||||
spawn_worker_task(
|
||||
program_path.to_owned(),
|
||||
cache_path.to_owned(),
|
||||
spawn_timeout,
|
||||
node_version,
|
||||
security_status,
|
||||
)
|
||||
.boxed(),
|
||||
);
|
||||
},
|
||||
ToPool::StartWork { worker, pvf, artifact_path } => {
|
||||
@@ -229,7 +242,6 @@ fn handle_to_pool(
|
||||
worker,
|
||||
idle,
|
||||
pvf,
|
||||
cache_path.to_owned(),
|
||||
artifact_path,
|
||||
preparation_timer,
|
||||
)
|
||||
@@ -258,13 +270,23 @@ fn handle_to_pool(
|
||||
|
||||
async fn spawn_worker_task(
|
||||
program_path: PathBuf,
|
||||
cache_path: PathBuf,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<String>,
|
||||
security_status: SecurityStatus,
|
||||
) -> PoolEvent {
|
||||
use futures_timer::Delay;
|
||||
|
||||
loop {
|
||||
match worker_intf::spawn(&program_path, spawn_timeout, node_version.as_deref()).await {
|
||||
match worker_intf::spawn(
|
||||
&program_path,
|
||||
&cache_path,
|
||||
spawn_timeout,
|
||||
node_version.as_deref(),
|
||||
security_status.clone(),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok((idle, handle)) => break PoolEvent::Spawn(idle, handle),
|
||||
Err(err) => {
|
||||
gum::warn!(target: LOG_TARGET, "failed to spawn a prepare worker: {:?}", err);
|
||||
@@ -281,11 +303,10 @@ async fn start_work_task<Timer>(
|
||||
worker: Worker,
|
||||
idle: IdleWorker,
|
||||
pvf: PvfPrepData,
|
||||
cache_path: PathBuf,
|
||||
artifact_path: PathBuf,
|
||||
_preparation_timer: Option<Timer>,
|
||||
) -> PoolEvent {
|
||||
let outcome = worker_intf::start_work(&metrics, idle, pvf, &cache_path, artifact_path).await;
|
||||
let outcome = worker_intf::start_work(&metrics, idle, pvf, artifact_path).await;
|
||||
PoolEvent::StartWork(worker, outcome)
|
||||
}
|
||||
|
||||
@@ -322,14 +343,29 @@ fn handle_mux(
|
||||
),
|
||||
// Return `Concluded`, but do not kill the worker since the error was on the host
|
||||
// side.
|
||||
Outcome::RenameTmpFileErr { worker: idle, result: _, err } =>
|
||||
Outcome::RenameTmpFileErr { worker: idle, result: _, err, src, dest } =>
|
||||
handle_concluded_no_rip(
|
||||
from_pool,
|
||||
spawned,
|
||||
worker,
|
||||
idle,
|
||||
Err(PrepareError::RenameTmpFileErr(err)),
|
||||
Err(PrepareError::RenameTmpFileErr { err, src, dest }),
|
||||
),
|
||||
// Could not clear worker cache. Kill the worker so other jobs can't see the data.
|
||||
Outcome::ClearWorkerDir { err } => {
|
||||
if attempt_retire(metrics, spawned, worker) {
|
||||
reply(
|
||||
from_pool,
|
||||
FromPool::Concluded {
|
||||
worker,
|
||||
rip: true,
|
||||
result: Err(PrepareError::ClearWorkerDir(err)),
|
||||
},
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
},
|
||||
Outcome::Unreachable => {
|
||||
if attempt_retire(metrics, spawned, worker) {
|
||||
reply(from_pool, FromPool::Rip(worker))?;
|
||||
@@ -434,6 +470,7 @@ pub fn start(
|
||||
cache_path: PathBuf,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<String>,
|
||||
security_status: SecurityStatus,
|
||||
) -> (mpsc::Sender<ToPool>, mpsc::UnboundedReceiver<FromPool>, impl Future<Output = ()>) {
|
||||
let (to_pool_tx, to_pool_rx) = mpsc::channel(10);
|
||||
let (from_pool_tx, from_pool_rx) = mpsc::unbounded();
|
||||
@@ -444,6 +481,7 @@ pub fn start(
|
||||
cache_path,
|
||||
spawn_timeout,
|
||||
node_version,
|
||||
security_status,
|
||||
to_pool: to_pool_rx,
|
||||
from_pool: from_pool_tx,
|
||||
spawned: HopSlotMap::with_capacity_and_key(20),
|
||||
|
||||
@@ -19,17 +19,17 @@
|
||||
use crate::{
|
||||
metrics::Metrics,
|
||||
worker_intf::{
|
||||
path_to_bytes, spawn_with_program_path, tmpfile_in, IdleWorker, SpawnErr, WorkerHandle,
|
||||
JOB_TIMEOUT_WALL_CLOCK_FACTOR,
|
||||
clear_worker_dir_path, framed_recv, framed_send, spawn_with_program_path, IdleWorker,
|
||||
SpawnErr, WorkerDir, WorkerHandle, JOB_TIMEOUT_WALL_CLOCK_FACTOR,
|
||||
},
|
||||
LOG_TARGET,
|
||||
};
|
||||
use parity_scale_codec::{Decode, Encode};
|
||||
use polkadot_node_core_pvf_common::{
|
||||
error::{PrepareError, PrepareResult},
|
||||
framed_recv, framed_send,
|
||||
prepare::PrepareStats,
|
||||
pvf::PvfPrepData,
|
||||
worker_dir, SecurityStatus,
|
||||
};
|
||||
|
||||
use sp_core::hexdisplay::HexDisplay;
|
||||
@@ -41,19 +41,33 @@ use tokio::{io, net::UnixStream};
|
||||
|
||||
/// Spawns a new worker with the given program path that acts as the worker and the spawn timeout.
|
||||
///
|
||||
/// The program should be able to handle `<program-path> prepare-worker <socket-path>` invocation.
|
||||
/// Sends a handshake message to the worker as soon as it is spawned.
|
||||
pub async fn spawn(
|
||||
program_path: &Path,
|
||||
cache_path: &Path,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<&str>,
|
||||
security_status: SecurityStatus,
|
||||
) -> Result<(IdleWorker, WorkerHandle), SpawnErr> {
|
||||
let mut extra_args = vec!["prepare-worker"];
|
||||
if let Some(node_version) = node_version {
|
||||
extra_args.extend_from_slice(&["--node-impl-version", node_version]);
|
||||
}
|
||||
spawn_with_program_path("prepare", program_path, &extra_args, spawn_timeout).await
|
||||
|
||||
spawn_with_program_path(
|
||||
"prepare",
|
||||
program_path,
|
||||
cache_path,
|
||||
&extra_args,
|
||||
spawn_timeout,
|
||||
security_status,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Outcome of PVF preparation.
|
||||
///
|
||||
/// If the idle worker token is not returned, it means the worker must be terminated.
|
||||
pub enum Outcome {
|
||||
/// The worker has finished the work assigned to it.
|
||||
Concluded { worker: IdleWorker, result: PrepareResult },
|
||||
@@ -62,9 +76,19 @@ pub enum Outcome {
|
||||
Unreachable,
|
||||
/// The temporary file for the artifact could not be created at the given cache path.
|
||||
CreateTmpFileErr { worker: IdleWorker, err: String },
|
||||
/// The response from the worker is received, but the file cannot be renamed (moved) to the
|
||||
/// The response from the worker is received, but the tmp file cannot be renamed (moved) to the
|
||||
/// final destination location.
|
||||
RenameTmpFileErr { worker: IdleWorker, result: PrepareResult, err: String },
|
||||
RenameTmpFileErr {
|
||||
worker: IdleWorker,
|
||||
result: PrepareResult,
|
||||
err: String,
|
||||
// Unfortunately `PathBuf` doesn't implement `Encode`/`Decode`, so we do a fallible
|
||||
// conversion to `Option<String>`.
|
||||
src: Option<String>,
|
||||
dest: Option<String>,
|
||||
},
|
||||
/// The worker cache could not be cleared for the given reason.
|
||||
ClearWorkerDir { err: String },
|
||||
/// The worker failed to finish the job until the given deadline.
|
||||
///
|
||||
/// The worker is no longer usable and should be killed.
|
||||
@@ -84,83 +108,88 @@ pub async fn start_work(
|
||||
metrics: &Metrics,
|
||||
worker: IdleWorker,
|
||||
pvf: PvfPrepData,
|
||||
cache_path: &Path,
|
||||
artifact_path: PathBuf,
|
||||
) -> Outcome {
|
||||
let IdleWorker { stream, pid } = worker;
|
||||
let IdleWorker { stream, pid, worker_dir } = worker;
|
||||
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
?worker_dir,
|
||||
"starting prepare for {}",
|
||||
artifact_path.display(),
|
||||
);
|
||||
|
||||
with_tmp_file(stream, pid, cache_path, |tmp_file, mut stream| async move {
|
||||
let preparation_timeout = pvf.prep_timeout();
|
||||
if let Err(err) = send_request(&mut stream, pvf, &tmp_file).await {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"failed to send a prepare request: {:?}",
|
||||
err,
|
||||
);
|
||||
return Outcome::Unreachable
|
||||
}
|
||||
|
||||
// Wait for the result from the worker, keeping in mind that there may be a timeout, the
|
||||
// worker may get killed, or something along these lines. In that case we should propagate
|
||||
// the error to the pool.
|
||||
//
|
||||
// We use a generous timeout here. This is in addition to the one in the child process, in
|
||||
// case the child stalls. We have a wall clock timeout here in the host, but a CPU timeout
|
||||
// in the child. We want to use CPU time because it varies less than wall clock time under
|
||||
// load, but the CPU resources of the child can only be measured from the parent after the
|
||||
// child process terminates.
|
||||
let timeout = preparation_timeout * JOB_TIMEOUT_WALL_CLOCK_FACTOR;
|
||||
let result = tokio::time::timeout(timeout, recv_response(&mut stream, pid)).await;
|
||||
|
||||
match result {
|
||||
// Received bytes from worker within the time limit.
|
||||
Ok(Ok(prepare_result)) =>
|
||||
handle_response(
|
||||
metrics,
|
||||
IdleWorker { stream, pid },
|
||||
prepare_result,
|
||||
pid,
|
||||
tmp_file,
|
||||
artifact_path,
|
||||
preparation_timeout,
|
||||
)
|
||||
.await,
|
||||
Ok(Err(err)) => {
|
||||
// Communication error within the time limit.
|
||||
with_worker_dir_setup(
|
||||
worker_dir,
|
||||
stream,
|
||||
pid,
|
||||
|tmp_artifact_file, mut stream, worker_dir| async move {
|
||||
let preparation_timeout = pvf.prep_timeout();
|
||||
if let Err(err) = send_request(&mut stream, pvf).await {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"failed to recv a prepare response: {:?}",
|
||||
"failed to send a prepare request: {:?}",
|
||||
err,
|
||||
);
|
||||
Outcome::IoErr(err.to_string())
|
||||
},
|
||||
Err(_) => {
|
||||
// Timed out here on the host.
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"did not recv a prepare response within the time limit",
|
||||
);
|
||||
Outcome::TimedOut
|
||||
},
|
||||
}
|
||||
})
|
||||
return Outcome::Unreachable
|
||||
}
|
||||
|
||||
// Wait for the result from the worker, keeping in mind that there may be a timeout, the
|
||||
// worker may get killed, or something along these lines. In that case we should
|
||||
// propagate the error to the pool.
|
||||
//
|
||||
// We use a generous timeout here. This is in addition to the one in the child process,
|
||||
// in case the child stalls. We have a wall clock timeout here in the host, but a CPU
|
||||
// timeout in the child. We want to use CPU time because it varies less than wall clock
|
||||
// time under load, but the CPU resources of the child can only be measured from the
|
||||
// parent after the child process terminates.
|
||||
let timeout = preparation_timeout * JOB_TIMEOUT_WALL_CLOCK_FACTOR;
|
||||
let result = tokio::time::timeout(timeout, recv_response(&mut stream, pid)).await;
|
||||
|
||||
match result {
|
||||
// Received bytes from worker within the time limit.
|
||||
Ok(Ok(prepare_result)) =>
|
||||
handle_response(
|
||||
metrics,
|
||||
IdleWorker { stream, pid, worker_dir },
|
||||
prepare_result,
|
||||
pid,
|
||||
tmp_artifact_file,
|
||||
artifact_path,
|
||||
preparation_timeout,
|
||||
)
|
||||
.await,
|
||||
Ok(Err(err)) => {
|
||||
// Communication error within the time limit.
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"failed to recv a prepare response: {:?}",
|
||||
err,
|
||||
);
|
||||
Outcome::IoErr(err.to_string())
|
||||
},
|
||||
Err(_) => {
|
||||
// Timed out here on the host.
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"did not recv a prepare response within the time limit",
|
||||
);
|
||||
Outcome::TimedOut
|
||||
},
|
||||
}
|
||||
},
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Handles the case where we successfully received response bytes on the host from the child.
|
||||
///
|
||||
/// NOTE: Here we know the artifact exists, but is still located in a temporary file which will be
|
||||
/// cleared by `with_tmp_file`.
|
||||
/// Here we know the artifact exists, but is still located in a temporary file which will be cleared
|
||||
/// by [`with_worker_dir_setup`].
|
||||
async fn handle_response(
|
||||
metrics: &Metrics,
|
||||
worker: IdleWorker,
|
||||
@@ -209,7 +238,13 @@ async fn handle_response(
|
||||
artifact_path.display(),
|
||||
err,
|
||||
);
|
||||
Outcome::RenameTmpFileErr { worker, result, err: format!("{:?}", err) }
|
||||
Outcome::RenameTmpFileErr {
|
||||
worker,
|
||||
result,
|
||||
err: format!("{:?}", err),
|
||||
src: tmp_file.to_str().map(String::from),
|
||||
dest: artifact_path.to_str().map(String::from),
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
@@ -220,61 +255,58 @@ async fn handle_response(
|
||||
outcome
|
||||
}
|
||||
|
||||
/// Create a temporary file for an artifact at the given cache path and execute the given
|
||||
/// future/closure passing the file path in.
|
||||
/// Create a temporary file for an artifact in the worker cache, execute the given future/closure
|
||||
/// passing the file path in, and clean up the worker cache.
|
||||
///
|
||||
/// The function will try best effort to not leave behind the temporary file.
|
||||
async fn with_tmp_file<F, Fut>(stream: UnixStream, pid: u32, cache_path: &Path, f: F) -> Outcome
|
||||
/// Failure to clean up the worker cache results in an error - leaving any files here could be a
|
||||
/// security issue, and we should shut down the worker. This should be very rare.
|
||||
async fn with_worker_dir_setup<F, Fut>(
|
||||
worker_dir: WorkerDir,
|
||||
stream: UnixStream,
|
||||
pid: u32,
|
||||
f: F,
|
||||
) -> Outcome
|
||||
where
|
||||
Fut: futures::Future<Output = Outcome>,
|
||||
F: FnOnce(PathBuf, UnixStream) -> Fut,
|
||||
F: FnOnce(PathBuf, UnixStream, WorkerDir) -> Fut,
|
||||
{
|
||||
let tmp_file = match tmpfile_in("prepare-artifact-", cache_path).await {
|
||||
Ok(f) => f,
|
||||
Err(err) => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"failed to create a temp file for the artifact: {:?}",
|
||||
err,
|
||||
);
|
||||
return Outcome::CreateTmpFileErr {
|
||||
worker: IdleWorker { stream, pid },
|
||||
err: format!("{:?}", err),
|
||||
}
|
||||
},
|
||||
// Create the tmp file here so that the child doesn't need any file creation rights. This will
|
||||
// be cleared at the end of this function.
|
||||
let tmp_file = worker_dir::prepare_tmp_artifact(&worker_dir.path);
|
||||
if let Err(err) = tokio::fs::File::create(&tmp_file).await {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
?worker_dir,
|
||||
"failed to create a temp file for the artifact: {:?}",
|
||||
err,
|
||||
);
|
||||
return Outcome::CreateTmpFileErr {
|
||||
worker: IdleWorker { stream, pid, worker_dir },
|
||||
err: format!("{:?}", err),
|
||||
}
|
||||
};
|
||||
|
||||
let outcome = f(tmp_file.clone(), stream).await;
|
||||
let worker_dir_path = worker_dir.path.clone();
|
||||
let outcome = f(tmp_file, stream, worker_dir).await;
|
||||
|
||||
// The function called above is expected to move `tmp_file` to a new location upon success.
|
||||
// However, the function may as well fail and in that case we should remove the tmp file here.
|
||||
//
|
||||
// In any case, we try to remove the file here so that there are no leftovers. We only report
|
||||
// errors that are different from the `NotFound`.
|
||||
match tokio::fs::remove_file(tmp_file).await {
|
||||
Ok(()) => (),
|
||||
Err(err) if err.kind() == std::io::ErrorKind::NotFound => (),
|
||||
Err(err) => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"failed to remove the tmp file: {:?}",
|
||||
err,
|
||||
);
|
||||
},
|
||||
// Try to clear the worker dir.
|
||||
if let Err(err) = clear_worker_dir_path(&worker_dir_path) {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
?worker_dir_path,
|
||||
"failed to clear worker cache after the job: {:?}",
|
||||
err,
|
||||
);
|
||||
return Outcome::ClearWorkerDir { err: format!("{:?}", err) }
|
||||
}
|
||||
|
||||
outcome
|
||||
}
|
||||
|
||||
async fn send_request(
|
||||
stream: &mut UnixStream,
|
||||
pvf: PvfPrepData,
|
||||
tmp_file: &Path,
|
||||
) -> io::Result<()> {
|
||||
async fn send_request(stream: &mut UnixStream, pvf: PvfPrepData) -> io::Result<()> {
|
||||
framed_send(stream, &pvf.encode()).await?;
|
||||
framed_send(stream, path_to_bytes(tmp_file)).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@ use crate::LOG_TARGET;
|
||||
use futures::FutureExt as _;
|
||||
use futures_timer::Delay;
|
||||
use pin_project::pin_project;
|
||||
use polkadot_node_core_pvf_common::{worker_dir, SecurityStatus};
|
||||
use rand::Rng;
|
||||
use std::{
|
||||
fmt, mem,
|
||||
@@ -39,99 +40,106 @@ use tokio::{
|
||||
pub const JOB_TIMEOUT_WALL_CLOCK_FACTOR: u32 = 4;
|
||||
|
||||
/// This is publicly exposed only for integration tests.
|
||||
///
|
||||
/// # Parameters
|
||||
///
|
||||
/// - `debug_id`: An identifier for the process (e.g. "execute" or "prepare").
|
||||
///
|
||||
/// - `program_path`: The path to the program.
|
||||
///
|
||||
/// - `cache_path`: The path to the artifact cache.
|
||||
///
|
||||
/// - `extra_args`: Optional extra CLI arguments to the program. NOTE: Should only contain data
|
||||
/// required before the handshake, like node/worker versions for the version check. Other data
|
||||
/// should go through the handshake.
|
||||
///
|
||||
/// - `spawn_timeout`: The amount of time to wait for the child process to spawn.
|
||||
///
|
||||
/// - `security_status`: contains the detected status of security features.
|
||||
#[doc(hidden)]
|
||||
pub async fn spawn_with_program_path(
|
||||
debug_id: &'static str,
|
||||
program_path: impl Into<PathBuf>,
|
||||
cache_path: &Path,
|
||||
extra_args: &[&str],
|
||||
spawn_timeout: Duration,
|
||||
security_status: SecurityStatus,
|
||||
) -> Result<(IdleWorker, WorkerHandle), SpawnErr> {
|
||||
let program_path = program_path.into();
|
||||
with_transient_socket_path(debug_id, |socket_path| {
|
||||
let socket_path = socket_path.to_owned();
|
||||
let extra_args: Vec<String> = extra_args.iter().map(|arg| arg.to_string()).collect();
|
||||
let worker_dir = WorkerDir::new(debug_id, cache_path).await?;
|
||||
let socket_path = worker_dir::socket(&worker_dir.path);
|
||||
|
||||
async move {
|
||||
let listener = UnixListener::bind(&socket_path).map_err(|err| {
|
||||
let extra_args: Vec<String> = extra_args.iter().map(|arg| arg.to_string()).collect();
|
||||
|
||||
let listener = UnixListener::bind(&socket_path).map_err(|err| {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
%debug_id,
|
||||
?program_path,
|
||||
?extra_args,
|
||||
?worker_dir,
|
||||
?socket_path,
|
||||
"cannot bind unix socket: {:?}",
|
||||
err,
|
||||
);
|
||||
SpawnErr::Bind
|
||||
})?;
|
||||
|
||||
let handle = WorkerHandle::spawn(&program_path, &extra_args, &worker_dir.path, security_status)
|
||||
.map_err(|err| {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
%debug_id,
|
||||
?program_path,
|
||||
?extra_args,
|
||||
?worker_dir.path,
|
||||
?socket_path,
|
||||
"cannot spawn a worker: {:?}",
|
||||
err,
|
||||
);
|
||||
SpawnErr::ProcessSpawn
|
||||
})?;
|
||||
|
||||
let worker_dir_path = worker_dir.path.clone();
|
||||
futures::select! {
|
||||
accept_result = listener.accept().fuse() => {
|
||||
let (stream, _) = accept_result.map_err(|err| {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
%debug_id,
|
||||
?program_path,
|
||||
?extra_args,
|
||||
"cannot bind unix socket: {:?}",
|
||||
?worker_dir_path,
|
||||
?socket_path,
|
||||
"cannot accept a worker: {:?}",
|
||||
err,
|
||||
);
|
||||
SpawnErr::Bind
|
||||
SpawnErr::Accept
|
||||
})?;
|
||||
|
||||
let handle =
|
||||
WorkerHandle::spawn(&program_path, &extra_args, socket_path).map_err(|err| {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
%debug_id,
|
||||
?program_path,
|
||||
?extra_args,
|
||||
"cannot spawn a worker: {:?}",
|
||||
err,
|
||||
);
|
||||
SpawnErr::ProcessSpawn
|
||||
})?;
|
||||
|
||||
futures::select! {
|
||||
accept_result = listener.accept().fuse() => {
|
||||
let (stream, _) = accept_result.map_err(|err| {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
%debug_id,
|
||||
?program_path,
|
||||
?extra_args,
|
||||
"cannot accept a worker: {:?}",
|
||||
err,
|
||||
);
|
||||
SpawnErr::Accept
|
||||
})?;
|
||||
Ok((IdleWorker { stream, pid: handle.id() }, handle))
|
||||
}
|
||||
_ = Delay::new(spawn_timeout).fuse() => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
%debug_id,
|
||||
?program_path,
|
||||
?extra_args,
|
||||
?spawn_timeout,
|
||||
"spawning and connecting to socket timed out",
|
||||
);
|
||||
Err(SpawnErr::AcceptTimeout)
|
||||
}
|
||||
}
|
||||
Ok((IdleWorker { stream, pid: handle.id(), worker_dir }, handle))
|
||||
}
|
||||
})
|
||||
.await
|
||||
_ = Delay::new(spawn_timeout).fuse() => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
%debug_id,
|
||||
?program_path,
|
||||
?extra_args,
|
||||
?worker_dir_path,
|
||||
?socket_path,
|
||||
?spawn_timeout,
|
||||
"spawning and connecting to socket timed out",
|
||||
);
|
||||
Err(SpawnErr::AcceptTimeout)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn with_transient_socket_path<T, F, Fut>(debug_id: &'static str, f: F) -> Result<T, SpawnErr>
|
||||
where
|
||||
F: FnOnce(&Path) -> Fut,
|
||||
Fut: futures::Future<Output = Result<T, SpawnErr>> + 'static,
|
||||
{
|
||||
let socket_path = tmpfile(&format!("pvf-host-{}", debug_id))
|
||||
.await
|
||||
.map_err(|_| SpawnErr::TmpFile)?;
|
||||
let result = f(&socket_path).await;
|
||||
|
||||
// Best effort to remove the socket file. Under normal circumstances the socket will be removed
|
||||
// by the worker. We make sure that it is removed here, just in case a failed rendezvous.
|
||||
let _ = tokio::fs::remove_file(socket_path).await;
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Returns a path under the given `dir`. The file name will start with the given prefix.
|
||||
/// Returns a path under the given `dir`. The path name will start with the given prefix.
|
||||
///
|
||||
/// There is only a certain number of retries. If exceeded this function will give up and return an
|
||||
/// error.
|
||||
pub async fn tmpfile_in(prefix: &str, dir: &Path) -> io::Result<PathBuf> {
|
||||
fn tmppath(prefix: &str, dir: &Path) -> PathBuf {
|
||||
pub async fn tmppath_in(prefix: &str, dir: &Path) -> io::Result<PathBuf> {
|
||||
fn make_tmppath(prefix: &str, dir: &Path) -> PathBuf {
|
||||
use rand::distributions::Alphanumeric;
|
||||
|
||||
const DESCRIMINATOR_LEN: usize = 10;
|
||||
@@ -143,27 +151,28 @@ pub async fn tmpfile_in(prefix: &str, dir: &Path) -> io::Result<PathBuf> {
|
||||
let s = std::str::from_utf8(&buf)
|
||||
.expect("the string is collected from a valid utf-8 sequence; qed");
|
||||
|
||||
let mut file = dir.to_owned();
|
||||
file.push(s);
|
||||
file
|
||||
let mut path = dir.to_owned();
|
||||
path.push(s);
|
||||
path
|
||||
}
|
||||
|
||||
const NUM_RETRIES: usize = 50;
|
||||
|
||||
for _ in 0..NUM_RETRIES {
|
||||
let candidate_path = tmppath(prefix, dir);
|
||||
if !candidate_path.exists() {
|
||||
return Ok(candidate_path)
|
||||
let tmp_path = make_tmppath(prefix, dir);
|
||||
if !tmp_path.exists() {
|
||||
return Ok(tmp_path)
|
||||
}
|
||||
}
|
||||
|
||||
Err(io::Error::new(io::ErrorKind::Other, "failed to create a temporary file"))
|
||||
Err(io::Error::new(io::ErrorKind::Other, "failed to create a temporary path"))
|
||||
}
|
||||
|
||||
/// The same as [`tmpfile_in`], but uses [`std::env::temp_dir`] as the directory.
|
||||
pub async fn tmpfile(prefix: &str) -> io::Result<PathBuf> {
|
||||
/// The same as [`tmppath_in`], but uses [`std::env::temp_dir`] as the directory.
|
||||
#[cfg(test)]
|
||||
pub async fn tmppath(prefix: &str) -> io::Result<PathBuf> {
|
||||
let temp_dir = PathBuf::from(std::env::temp_dir());
|
||||
tmpfile_in(prefix, &temp_dir).await
|
||||
tmppath_in(prefix, &temp_dir).await
|
||||
}
|
||||
|
||||
/// A struct that represents an idle worker.
|
||||
@@ -177,13 +186,19 @@ pub struct IdleWorker {
|
||||
|
||||
/// The identifier of this process. Used to reset the niceness.
|
||||
pub pid: u32,
|
||||
|
||||
/// The temporary per-worker path. We clean up the worker dir between jobs and delete it when
|
||||
/// the worker dies.
|
||||
pub worker_dir: WorkerDir,
|
||||
}
|
||||
|
||||
/// An error happened during spawning a worker process.
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum SpawnErr {
|
||||
/// Cannot obtain a temporary file location.
|
||||
TmpFile,
|
||||
/// Cannot obtain a temporary path location.
|
||||
TmpPath,
|
||||
/// An FS error occurred.
|
||||
Fs(String),
|
||||
/// Cannot bind the socket to the given path.
|
||||
Bind,
|
||||
/// An error happened during accepting a connection to the socket.
|
||||
@@ -219,12 +234,32 @@ impl WorkerHandle {
|
||||
fn spawn(
|
||||
program: impl AsRef<Path>,
|
||||
extra_args: &[String],
|
||||
socket_path: impl AsRef<Path>,
|
||||
worker_dir_path: impl AsRef<Path>,
|
||||
security_status: SecurityStatus,
|
||||
) -> io::Result<Self> {
|
||||
let mut child = process::Command::new(program.as_ref())
|
||||
let security_args = {
|
||||
let mut args = vec![];
|
||||
if security_status.can_enable_landlock {
|
||||
args.push("--can-enable-landlock".to_string());
|
||||
}
|
||||
if security_status.can_unshare_user_namespace_and_change_root {
|
||||
args.push("--can-unshare-user-namespace-and-change-root".to_string());
|
||||
}
|
||||
args
|
||||
};
|
||||
|
||||
// Clear all env vars from the spawned process.
|
||||
let mut command = process::Command::new(program.as_ref());
|
||||
command.env_clear();
|
||||
// Add back any env vars we want to keep.
|
||||
if let Ok(value) = std::env::var("RUST_LOG") {
|
||||
command.env("RUST_LOG", value);
|
||||
}
|
||||
let mut child = command
|
||||
.args(extra_args)
|
||||
.arg("--socket-path")
|
||||
.arg(socket_path.as_ref().as_os_str())
|
||||
.arg("--worker-dir-path")
|
||||
.arg(worker_dir_path.as_ref().as_os_str())
|
||||
.args(&security_args)
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.kill_on_drop(true)
|
||||
.spawn()?;
|
||||
@@ -306,16 +341,6 @@ impl fmt::Debug for WorkerHandle {
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert the given path into a byte buffer.
|
||||
pub fn path_to_bytes(path: &Path) -> &[u8] {
|
||||
// Ideally, we take the `OsStr` of the path, send that and reconstruct this on the other side.
|
||||
// However, libstd doesn't provide us with such an option. There are crates out there that
|
||||
// allow for extraction of a path, but TBH it doesn't seem to be a real issue.
|
||||
//
|
||||
// However, should be there reports we can incorporate such a crate here.
|
||||
path.to_str().expect("non-UTF-8 path").as_bytes()
|
||||
}
|
||||
|
||||
/// Write some data prefixed by its length into `w`.
|
||||
pub async fn framed_send(w: &mut (impl AsyncWrite + Unpin), buf: &[u8]) -> io::Result<()> {
|
||||
let len_buf = buf.len().to_le_bytes();
|
||||
@@ -333,3 +358,84 @@ pub async fn framed_recv(r: &mut (impl AsyncRead + Unpin)) -> io::Result<Vec<u8>
|
||||
r.read_exact(&mut buf).await?;
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
/// A temporary worker dir that contains only files needed by the worker. The worker will change its
|
||||
/// root (the `/` directory) to this directory; it should have access to no other paths on its
|
||||
/// filesystem.
|
||||
///
|
||||
/// NOTE: This struct cleans up its associated directory when it is dropped. Therefore it should not
|
||||
/// implement `Clone`.
|
||||
///
|
||||
/// # File structure
|
||||
///
|
||||
/// The overall file structure for the PVF system is as follows. The `worker-dir-X`s are managed by
|
||||
/// this struct.
|
||||
///
|
||||
/// ```nocompile
|
||||
/// + /<cache_path>/
|
||||
/// - artifact-1
|
||||
/// - artifact-2
|
||||
/// - [...]
|
||||
/// - worker-dir-1/ (new `/` for worker-1)
|
||||
/// + socket (created by host)
|
||||
/// + tmp-artifact (created by host) (prepare-only)
|
||||
/// + artifact (link -> artifact-1) (created by host) (execute-only)
|
||||
/// - worker-dir-2/ (new `/` for worker-2)
|
||||
/// + [...]
|
||||
/// ```
|
||||
#[derive(Debug)]
|
||||
pub struct WorkerDir {
|
||||
pub path: PathBuf,
|
||||
}
|
||||
|
||||
impl WorkerDir {
|
||||
/// Creates a new, empty worker dir with a random name in the given cache dir.
|
||||
pub async fn new(debug_id: &'static str, cache_dir: &Path) -> Result<Self, SpawnErr> {
|
||||
let prefix = format!("worker-dir-{}-", debug_id);
|
||||
let path = tmppath_in(&prefix, cache_dir).await.map_err(|_| SpawnErr::TmpPath)?;
|
||||
tokio::fs::create_dir(&path)
|
||||
.await
|
||||
.map_err(|err| SpawnErr::Fs(err.to_string()))?;
|
||||
Ok(Self { path })
|
||||
}
|
||||
}
|
||||
|
||||
// Try to clean up the temporary worker dir at the end of the worker's lifetime. It should be wiped
|
||||
// on startup, but we make a best effort not to leave it around.
|
||||
impl Drop for WorkerDir {
|
||||
fn drop(&mut self) {
|
||||
let _ = std::fs::remove_dir_all(&self.path);
|
||||
}
|
||||
}
|
||||
|
||||
// Not async since Rust has trouble with async recursion. There should be few files here anyway.
|
||||
//
|
||||
// TODO: A lingering malicious job can still access future files in this dir. See
|
||||
// <https://github.com/paritytech/polkadot-sdk/issues/574> for how to fully secure this.
|
||||
/// Clear the temporary worker dir without deleting it. Not deleting is important because the worker
|
||||
/// has mounted its own separate filesystem here.
|
||||
///
|
||||
/// Should be called right after a job has finished. We don't want jobs to have access to
|
||||
/// artifacts from previous jobs.
|
||||
pub fn clear_worker_dir_path(worker_dir_path: &Path) -> io::Result<()> {
|
||||
fn remove_dir_contents(path: &Path) -> io::Result<()> {
|
||||
for entry in std::fs::read_dir(&path)? {
|
||||
let entry = entry?;
|
||||
let path = entry.path();
|
||||
|
||||
if entry.file_type()?.is_dir() {
|
||||
remove_dir_contents(&path)?;
|
||||
std::fs::remove_dir(path)?;
|
||||
} else {
|
||||
std::fs::remove_file(path)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Note the worker dir may not exist anymore because of the worker dying and being cleaned up.
|
||||
match remove_dir_contents(worker_dir_path) {
|
||||
Err(err) if matches!(err.kind(), io::ErrorKind::NotFound) => Ok(()),
|
||||
result => result,
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user