mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-05-31 09:51:02 +00:00
PVF: more filesystem sandboxing (#1373)
This commit is contained in:
@@ -27,6 +27,7 @@ use futures::{
|
||||
use polkadot_node_core_pvf_common::{
|
||||
error::{PrepareError, PrepareResult},
|
||||
pvf::PvfPrepData,
|
||||
SecurityStatus,
|
||||
};
|
||||
use slotmap::HopSlotMap;
|
||||
use std::{
|
||||
@@ -110,10 +111,12 @@ enum PoolEvent {
|
||||
type Mux = FuturesUnordered<BoxFuture<'static, PoolEvent>>;
|
||||
|
||||
struct Pool {
|
||||
// Some variables related to the current session.
|
||||
program_path: PathBuf,
|
||||
cache_path: PathBuf,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<String>,
|
||||
security_status: SecurityStatus,
|
||||
|
||||
to_pool: mpsc::Receiver<ToPool>,
|
||||
from_pool: mpsc::UnboundedSender<FromPool>,
|
||||
@@ -132,6 +135,7 @@ async fn run(
|
||||
cache_path,
|
||||
spawn_timeout,
|
||||
node_version,
|
||||
security_status,
|
||||
to_pool,
|
||||
mut from_pool,
|
||||
mut spawned,
|
||||
@@ -160,6 +164,7 @@ async fn run(
|
||||
&cache_path,
|
||||
spawn_timeout,
|
||||
node_version.clone(),
|
||||
security_status.clone(),
|
||||
&mut spawned,
|
||||
&mut mux,
|
||||
to_pool,
|
||||
@@ -207,6 +212,7 @@ fn handle_to_pool(
|
||||
cache_path: &Path,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<String>,
|
||||
security_status: SecurityStatus,
|
||||
spawned: &mut HopSlotMap<Worker, WorkerData>,
|
||||
mux: &mut Mux,
|
||||
to_pool: ToPool,
|
||||
@@ -216,7 +222,14 @@ fn handle_to_pool(
|
||||
gum::debug!(target: LOG_TARGET, "spawning a new prepare worker");
|
||||
metrics.prepare_worker().on_begin_spawn();
|
||||
mux.push(
|
||||
spawn_worker_task(program_path.to_owned(), spawn_timeout, node_version).boxed(),
|
||||
spawn_worker_task(
|
||||
program_path.to_owned(),
|
||||
cache_path.to_owned(),
|
||||
spawn_timeout,
|
||||
node_version,
|
||||
security_status,
|
||||
)
|
||||
.boxed(),
|
||||
);
|
||||
},
|
||||
ToPool::StartWork { worker, pvf, artifact_path } => {
|
||||
@@ -229,7 +242,6 @@ fn handle_to_pool(
|
||||
worker,
|
||||
idle,
|
||||
pvf,
|
||||
cache_path.to_owned(),
|
||||
artifact_path,
|
||||
preparation_timer,
|
||||
)
|
||||
@@ -258,13 +270,23 @@ fn handle_to_pool(
|
||||
|
||||
async fn spawn_worker_task(
|
||||
program_path: PathBuf,
|
||||
cache_path: PathBuf,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<String>,
|
||||
security_status: SecurityStatus,
|
||||
) -> PoolEvent {
|
||||
use futures_timer::Delay;
|
||||
|
||||
loop {
|
||||
match worker_intf::spawn(&program_path, spawn_timeout, node_version.as_deref()).await {
|
||||
match worker_intf::spawn(
|
||||
&program_path,
|
||||
&cache_path,
|
||||
spawn_timeout,
|
||||
node_version.as_deref(),
|
||||
security_status.clone(),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok((idle, handle)) => break PoolEvent::Spawn(idle, handle),
|
||||
Err(err) => {
|
||||
gum::warn!(target: LOG_TARGET, "failed to spawn a prepare worker: {:?}", err);
|
||||
@@ -281,11 +303,10 @@ async fn start_work_task<Timer>(
|
||||
worker: Worker,
|
||||
idle: IdleWorker,
|
||||
pvf: PvfPrepData,
|
||||
cache_path: PathBuf,
|
||||
artifact_path: PathBuf,
|
||||
_preparation_timer: Option<Timer>,
|
||||
) -> PoolEvent {
|
||||
let outcome = worker_intf::start_work(&metrics, idle, pvf, &cache_path, artifact_path).await;
|
||||
let outcome = worker_intf::start_work(&metrics, idle, pvf, artifact_path).await;
|
||||
PoolEvent::StartWork(worker, outcome)
|
||||
}
|
||||
|
||||
@@ -322,14 +343,29 @@ fn handle_mux(
|
||||
),
|
||||
// Return `Concluded`, but do not kill the worker since the error was on the host
|
||||
// side.
|
||||
Outcome::RenameTmpFileErr { worker: idle, result: _, err } =>
|
||||
Outcome::RenameTmpFileErr { worker: idle, result: _, err, src, dest } =>
|
||||
handle_concluded_no_rip(
|
||||
from_pool,
|
||||
spawned,
|
||||
worker,
|
||||
idle,
|
||||
Err(PrepareError::RenameTmpFileErr(err)),
|
||||
Err(PrepareError::RenameTmpFileErr { err, src, dest }),
|
||||
),
|
||||
// Could not clear worker cache. Kill the worker so other jobs can't see the data.
|
||||
Outcome::ClearWorkerDir { err } => {
|
||||
if attempt_retire(metrics, spawned, worker) {
|
||||
reply(
|
||||
from_pool,
|
||||
FromPool::Concluded {
|
||||
worker,
|
||||
rip: true,
|
||||
result: Err(PrepareError::ClearWorkerDir(err)),
|
||||
},
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
},
|
||||
Outcome::Unreachable => {
|
||||
if attempt_retire(metrics, spawned, worker) {
|
||||
reply(from_pool, FromPool::Rip(worker))?;
|
||||
@@ -434,6 +470,7 @@ pub fn start(
|
||||
cache_path: PathBuf,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<String>,
|
||||
security_status: SecurityStatus,
|
||||
) -> (mpsc::Sender<ToPool>, mpsc::UnboundedReceiver<FromPool>, impl Future<Output = ()>) {
|
||||
let (to_pool_tx, to_pool_rx) = mpsc::channel(10);
|
||||
let (from_pool_tx, from_pool_rx) = mpsc::unbounded();
|
||||
@@ -444,6 +481,7 @@ pub fn start(
|
||||
cache_path,
|
||||
spawn_timeout,
|
||||
node_version,
|
||||
security_status,
|
||||
to_pool: to_pool_rx,
|
||||
from_pool: from_pool_tx,
|
||||
spawned: HopSlotMap::with_capacity_and_key(20),
|
||||
|
||||
@@ -19,17 +19,17 @@
|
||||
use crate::{
|
||||
metrics::Metrics,
|
||||
worker_intf::{
|
||||
path_to_bytes, spawn_with_program_path, tmpfile_in, IdleWorker, SpawnErr, WorkerHandle,
|
||||
JOB_TIMEOUT_WALL_CLOCK_FACTOR,
|
||||
clear_worker_dir_path, framed_recv, framed_send, spawn_with_program_path, IdleWorker,
|
||||
SpawnErr, WorkerDir, WorkerHandle, JOB_TIMEOUT_WALL_CLOCK_FACTOR,
|
||||
},
|
||||
LOG_TARGET,
|
||||
};
|
||||
use parity_scale_codec::{Decode, Encode};
|
||||
use polkadot_node_core_pvf_common::{
|
||||
error::{PrepareError, PrepareResult},
|
||||
framed_recv, framed_send,
|
||||
prepare::PrepareStats,
|
||||
pvf::PvfPrepData,
|
||||
worker_dir, SecurityStatus,
|
||||
};
|
||||
|
||||
use sp_core::hexdisplay::HexDisplay;
|
||||
@@ -41,19 +41,33 @@ use tokio::{io, net::UnixStream};
|
||||
|
||||
/// Spawns a new worker with the given program path that acts as the worker and the spawn timeout.
|
||||
///
|
||||
/// The program should be able to handle `<program-path> prepare-worker <socket-path>` invocation.
|
||||
/// Sends a handshake message to the worker as soon as it is spawned.
|
||||
pub async fn spawn(
|
||||
program_path: &Path,
|
||||
cache_path: &Path,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<&str>,
|
||||
security_status: SecurityStatus,
|
||||
) -> Result<(IdleWorker, WorkerHandle), SpawnErr> {
|
||||
let mut extra_args = vec!["prepare-worker"];
|
||||
if let Some(node_version) = node_version {
|
||||
extra_args.extend_from_slice(&["--node-impl-version", node_version]);
|
||||
}
|
||||
spawn_with_program_path("prepare", program_path, &extra_args, spawn_timeout).await
|
||||
|
||||
spawn_with_program_path(
|
||||
"prepare",
|
||||
program_path,
|
||||
cache_path,
|
||||
&extra_args,
|
||||
spawn_timeout,
|
||||
security_status,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Outcome of PVF preparation.
|
||||
///
|
||||
/// If the idle worker token is not returned, it means the worker must be terminated.
|
||||
pub enum Outcome {
|
||||
/// The worker has finished the work assigned to it.
|
||||
Concluded { worker: IdleWorker, result: PrepareResult },
|
||||
@@ -62,9 +76,19 @@ pub enum Outcome {
|
||||
Unreachable,
|
||||
/// The temporary file for the artifact could not be created at the given cache path.
|
||||
CreateTmpFileErr { worker: IdleWorker, err: String },
|
||||
/// The response from the worker is received, but the file cannot be renamed (moved) to the
|
||||
/// The response from the worker is received, but the tmp file cannot be renamed (moved) to the
|
||||
/// final destination location.
|
||||
RenameTmpFileErr { worker: IdleWorker, result: PrepareResult, err: String },
|
||||
RenameTmpFileErr {
|
||||
worker: IdleWorker,
|
||||
result: PrepareResult,
|
||||
err: String,
|
||||
// Unfortunately `PathBuf` doesn't implement `Encode`/`Decode`, so we do a fallible
|
||||
// conversion to `Option<String>`.
|
||||
src: Option<String>,
|
||||
dest: Option<String>,
|
||||
},
|
||||
/// The worker cache could not be cleared for the given reason.
|
||||
ClearWorkerDir { err: String },
|
||||
/// The worker failed to finish the job until the given deadline.
|
||||
///
|
||||
/// The worker is no longer usable and should be killed.
|
||||
@@ -84,83 +108,88 @@ pub async fn start_work(
|
||||
metrics: &Metrics,
|
||||
worker: IdleWorker,
|
||||
pvf: PvfPrepData,
|
||||
cache_path: &Path,
|
||||
artifact_path: PathBuf,
|
||||
) -> Outcome {
|
||||
let IdleWorker { stream, pid } = worker;
|
||||
let IdleWorker { stream, pid, worker_dir } = worker;
|
||||
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
?worker_dir,
|
||||
"starting prepare for {}",
|
||||
artifact_path.display(),
|
||||
);
|
||||
|
||||
with_tmp_file(stream, pid, cache_path, |tmp_file, mut stream| async move {
|
||||
let preparation_timeout = pvf.prep_timeout();
|
||||
if let Err(err) = send_request(&mut stream, pvf, &tmp_file).await {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"failed to send a prepare request: {:?}",
|
||||
err,
|
||||
);
|
||||
return Outcome::Unreachable
|
||||
}
|
||||
|
||||
// Wait for the result from the worker, keeping in mind that there may be a timeout, the
|
||||
// worker may get killed, or something along these lines. In that case we should propagate
|
||||
// the error to the pool.
|
||||
//
|
||||
// We use a generous timeout here. This is in addition to the one in the child process, in
|
||||
// case the child stalls. We have a wall clock timeout here in the host, but a CPU timeout
|
||||
// in the child. We want to use CPU time because it varies less than wall clock time under
|
||||
// load, but the CPU resources of the child can only be measured from the parent after the
|
||||
// child process terminates.
|
||||
let timeout = preparation_timeout * JOB_TIMEOUT_WALL_CLOCK_FACTOR;
|
||||
let result = tokio::time::timeout(timeout, recv_response(&mut stream, pid)).await;
|
||||
|
||||
match result {
|
||||
// Received bytes from worker within the time limit.
|
||||
Ok(Ok(prepare_result)) =>
|
||||
handle_response(
|
||||
metrics,
|
||||
IdleWorker { stream, pid },
|
||||
prepare_result,
|
||||
pid,
|
||||
tmp_file,
|
||||
artifact_path,
|
||||
preparation_timeout,
|
||||
)
|
||||
.await,
|
||||
Ok(Err(err)) => {
|
||||
// Communication error within the time limit.
|
||||
with_worker_dir_setup(
|
||||
worker_dir,
|
||||
stream,
|
||||
pid,
|
||||
|tmp_artifact_file, mut stream, worker_dir| async move {
|
||||
let preparation_timeout = pvf.prep_timeout();
|
||||
if let Err(err) = send_request(&mut stream, pvf).await {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"failed to recv a prepare response: {:?}",
|
||||
"failed to send a prepare request: {:?}",
|
||||
err,
|
||||
);
|
||||
Outcome::IoErr(err.to_string())
|
||||
},
|
||||
Err(_) => {
|
||||
// Timed out here on the host.
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"did not recv a prepare response within the time limit",
|
||||
);
|
||||
Outcome::TimedOut
|
||||
},
|
||||
}
|
||||
})
|
||||
return Outcome::Unreachable
|
||||
}
|
||||
|
||||
// Wait for the result from the worker, keeping in mind that there may be a timeout, the
|
||||
// worker may get killed, or something along these lines. In that case we should
|
||||
// propagate the error to the pool.
|
||||
//
|
||||
// We use a generous timeout here. This is in addition to the one in the child process,
|
||||
// in case the child stalls. We have a wall clock timeout here in the host, but a CPU
|
||||
// timeout in the child. We want to use CPU time because it varies less than wall clock
|
||||
// time under load, but the CPU resources of the child can only be measured from the
|
||||
// parent after the child process terminates.
|
||||
let timeout = preparation_timeout * JOB_TIMEOUT_WALL_CLOCK_FACTOR;
|
||||
let result = tokio::time::timeout(timeout, recv_response(&mut stream, pid)).await;
|
||||
|
||||
match result {
|
||||
// Received bytes from worker within the time limit.
|
||||
Ok(Ok(prepare_result)) =>
|
||||
handle_response(
|
||||
metrics,
|
||||
IdleWorker { stream, pid, worker_dir },
|
||||
prepare_result,
|
||||
pid,
|
||||
tmp_artifact_file,
|
||||
artifact_path,
|
||||
preparation_timeout,
|
||||
)
|
||||
.await,
|
||||
Ok(Err(err)) => {
|
||||
// Communication error within the time limit.
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"failed to recv a prepare response: {:?}",
|
||||
err,
|
||||
);
|
||||
Outcome::IoErr(err.to_string())
|
||||
},
|
||||
Err(_) => {
|
||||
// Timed out here on the host.
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"did not recv a prepare response within the time limit",
|
||||
);
|
||||
Outcome::TimedOut
|
||||
},
|
||||
}
|
||||
},
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Handles the case where we successfully received response bytes on the host from the child.
|
||||
///
|
||||
/// NOTE: Here we know the artifact exists, but is still located in a temporary file which will be
|
||||
/// cleared by `with_tmp_file`.
|
||||
/// Here we know the artifact exists, but is still located in a temporary file which will be cleared
|
||||
/// by [`with_worker_dir_setup`].
|
||||
async fn handle_response(
|
||||
metrics: &Metrics,
|
||||
worker: IdleWorker,
|
||||
@@ -209,7 +238,13 @@ async fn handle_response(
|
||||
artifact_path.display(),
|
||||
err,
|
||||
);
|
||||
Outcome::RenameTmpFileErr { worker, result, err: format!("{:?}", err) }
|
||||
Outcome::RenameTmpFileErr {
|
||||
worker,
|
||||
result,
|
||||
err: format!("{:?}", err),
|
||||
src: tmp_file.to_str().map(String::from),
|
||||
dest: artifact_path.to_str().map(String::from),
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
@@ -220,61 +255,58 @@ async fn handle_response(
|
||||
outcome
|
||||
}
|
||||
|
||||
/// Create a temporary file for an artifact at the given cache path and execute the given
|
||||
/// future/closure passing the file path in.
|
||||
/// Create a temporary file for an artifact in the worker cache, execute the given future/closure
|
||||
/// passing the file path in, and clean up the worker cache.
|
||||
///
|
||||
/// The function will try best effort to not leave behind the temporary file.
|
||||
async fn with_tmp_file<F, Fut>(stream: UnixStream, pid: u32, cache_path: &Path, f: F) -> Outcome
|
||||
/// Failure to clean up the worker cache results in an error - leaving any files here could be a
|
||||
/// security issue, and we should shut down the worker. This should be very rare.
|
||||
async fn with_worker_dir_setup<F, Fut>(
|
||||
worker_dir: WorkerDir,
|
||||
stream: UnixStream,
|
||||
pid: u32,
|
||||
f: F,
|
||||
) -> Outcome
|
||||
where
|
||||
Fut: futures::Future<Output = Outcome>,
|
||||
F: FnOnce(PathBuf, UnixStream) -> Fut,
|
||||
F: FnOnce(PathBuf, UnixStream, WorkerDir) -> Fut,
|
||||
{
|
||||
let tmp_file = match tmpfile_in("prepare-artifact-", cache_path).await {
|
||||
Ok(f) => f,
|
||||
Err(err) => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"failed to create a temp file for the artifact: {:?}",
|
||||
err,
|
||||
);
|
||||
return Outcome::CreateTmpFileErr {
|
||||
worker: IdleWorker { stream, pid },
|
||||
err: format!("{:?}", err),
|
||||
}
|
||||
},
|
||||
// Create the tmp file here so that the child doesn't need any file creation rights. This will
|
||||
// be cleared at the end of this function.
|
||||
let tmp_file = worker_dir::prepare_tmp_artifact(&worker_dir.path);
|
||||
if let Err(err) = tokio::fs::File::create(&tmp_file).await {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
?worker_dir,
|
||||
"failed to create a temp file for the artifact: {:?}",
|
||||
err,
|
||||
);
|
||||
return Outcome::CreateTmpFileErr {
|
||||
worker: IdleWorker { stream, pid, worker_dir },
|
||||
err: format!("{:?}", err),
|
||||
}
|
||||
};
|
||||
|
||||
let outcome = f(tmp_file.clone(), stream).await;
|
||||
let worker_dir_path = worker_dir.path.clone();
|
||||
let outcome = f(tmp_file, stream, worker_dir).await;
|
||||
|
||||
// The function called above is expected to move `tmp_file` to a new location upon success.
|
||||
// However, the function may as well fail and in that case we should remove the tmp file here.
|
||||
//
|
||||
// In any case, we try to remove the file here so that there are no leftovers. We only report
|
||||
// errors that are different from the `NotFound`.
|
||||
match tokio::fs::remove_file(tmp_file).await {
|
||||
Ok(()) => (),
|
||||
Err(err) if err.kind() == std::io::ErrorKind::NotFound => (),
|
||||
Err(err) => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"failed to remove the tmp file: {:?}",
|
||||
err,
|
||||
);
|
||||
},
|
||||
// Try to clear the worker dir.
|
||||
if let Err(err) = clear_worker_dir_path(&worker_dir_path) {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
?worker_dir_path,
|
||||
"failed to clear worker cache after the job: {:?}",
|
||||
err,
|
||||
);
|
||||
return Outcome::ClearWorkerDir { err: format!("{:?}", err) }
|
||||
}
|
||||
|
||||
outcome
|
||||
}
|
||||
|
||||
async fn send_request(
|
||||
stream: &mut UnixStream,
|
||||
pvf: PvfPrepData,
|
||||
tmp_file: &Path,
|
||||
) -> io::Result<()> {
|
||||
async fn send_request(stream: &mut UnixStream, pvf: PvfPrepData) -> io::Result<()> {
|
||||
framed_send(stream, &pvf.encode()).await?;
|
||||
framed_send(stream, path_to_bytes(tmp_file)).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user