mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-05-31 11:01:01 +00:00
PVF: more filesystem sandboxing (#1373)
This commit is contained in:
Generated
+3
@@ -12035,6 +12035,7 @@ version = "1.0.0"
|
||||
dependencies = [
|
||||
"always-assert",
|
||||
"assert_matches",
|
||||
"cfg-if",
|
||||
"futures",
|
||||
"futures-timer",
|
||||
"hex-literal",
|
||||
@@ -12091,6 +12092,7 @@ name = "polkadot-node-core-pvf-common"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"assert_matches",
|
||||
"cfg-if",
|
||||
"cpu-time",
|
||||
"futures",
|
||||
"landlock",
|
||||
@@ -12132,6 +12134,7 @@ dependencies = [
|
||||
name = "polkadot-node-core-pvf-prepare-worker"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"futures",
|
||||
"libc",
|
||||
"parity-scale-codec",
|
||||
|
||||
@@ -8,6 +8,7 @@ license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
always-assert = "0.1"
|
||||
cfg-if = "1.0"
|
||||
futures = "0.3.21"
|
||||
futures-timer = "3.0.2"
|
||||
gum = { package = "tracing-gum", path = "../../gum" }
|
||||
|
||||
@@ -7,6 +7,7 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
cfg-if = "1.0"
|
||||
cpu-time = "1.0.0"
|
||||
futures = "0.3.21"
|
||||
gum = { package = "tracing-gum", path = "../../../gum" }
|
||||
|
||||
@@ -44,7 +44,17 @@ pub enum PrepareError {
|
||||
/// The response from the worker is received, but the file cannot be renamed (moved) to the
|
||||
/// final destination location. This state is reported by the validation host (not by the
|
||||
/// worker).
|
||||
RenameTmpFileErr(String),
|
||||
RenameTmpFileErr {
|
||||
err: String,
|
||||
// Unfortunately `PathBuf` doesn't implement `Encode`/`Decode`, so we do a fallible
|
||||
// conversion to `Option<String>`.
|
||||
src: Option<String>,
|
||||
dest: Option<String>,
|
||||
},
|
||||
/// The response from the worker is received, but the worker cache could not be cleared. The
|
||||
/// worker has to be killed to avoid jobs having access to data from other jobs. This state is
|
||||
/// reported by the validation host (not by the worker).
|
||||
ClearWorkerDir(String),
|
||||
}
|
||||
|
||||
impl PrepareError {
|
||||
@@ -58,7 +68,11 @@ impl PrepareError {
|
||||
use PrepareError::*;
|
||||
match self {
|
||||
Prevalidation(_) | Preparation(_) | Panic(_) => true,
|
||||
TimedOut | IoErr(_) | CreateTmpFileErr(_) | RenameTmpFileErr(_) => false,
|
||||
TimedOut |
|
||||
IoErr(_) |
|
||||
CreateTmpFileErr(_) |
|
||||
RenameTmpFileErr { .. } |
|
||||
ClearWorkerDir(_) => false,
|
||||
// Can occur due to issues with the PVF, but also due to local errors.
|
||||
RuntimeConstruction(_) => false,
|
||||
}
|
||||
@@ -76,7 +90,9 @@ impl fmt::Display for PrepareError {
|
||||
TimedOut => write!(f, "prepare: timeout"),
|
||||
IoErr(err) => write!(f, "prepare: io error while receiving response: {}", err),
|
||||
CreateTmpFileErr(err) => write!(f, "prepare: error creating tmp file: {}", err),
|
||||
RenameTmpFileErr(err) => write!(f, "prepare: error renaming tmp file: {}", err),
|
||||
RenameTmpFileErr { err, src, dest } =>
|
||||
write!(f, "prepare: error renaming tmp file ({:?} -> {:?}): {}", src, dest, err),
|
||||
ClearWorkerDir(err) => write!(f, "prepare: error clearing worker cache: {}", err),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -89,8 +105,17 @@ impl fmt::Display for PrepareError {
|
||||
pub enum InternalValidationError {
|
||||
/// Some communication error occurred with the host.
|
||||
HostCommunication(String),
|
||||
/// Host could not create a hard link to the artifact path.
|
||||
CouldNotCreateLink(String),
|
||||
/// Could not find or open compiled artifact file.
|
||||
CouldNotOpenFile(String),
|
||||
/// Host could not clear the worker cache after a job.
|
||||
CouldNotClearWorkerDir {
|
||||
err: String,
|
||||
// Unfortunately `PathBuf` doesn't implement `Encode`/`Decode`, so we do a fallible
|
||||
// conversion to `Option<String>`.
|
||||
path: Option<String>,
|
||||
},
|
||||
/// An error occurred in the CPU time monitor thread. Should be totally unrelated to
|
||||
/// validation.
|
||||
CpuTimeMonitorThread(String),
|
||||
@@ -104,8 +129,18 @@ impl fmt::Display for InternalValidationError {
|
||||
match self {
|
||||
HostCommunication(err) =>
|
||||
write!(f, "validation: some communication error occurred with the host: {}", err),
|
||||
CouldNotCreateLink(err) => write!(
|
||||
f,
|
||||
"validation: host could not create a hard link to the artifact path: {}",
|
||||
err
|
||||
),
|
||||
CouldNotOpenFile(err) =>
|
||||
write!(f, "validation: could not find or open compiled artifact file: {}", err),
|
||||
CouldNotClearWorkerDir { err, path } => write!(
|
||||
f,
|
||||
"validation: host could not clear the worker cache ({:?}) after a job: {}",
|
||||
path, err
|
||||
),
|
||||
CpuTimeMonitorThread(err) =>
|
||||
write!(f, "validation: an error occurred in the CPU time monitor thread: {}", err),
|
||||
NonDeterministicPrepareError(err) => write!(f, "validation: prepare: {}", err),
|
||||
|
||||
@@ -29,7 +29,7 @@ pub struct Handshake {
|
||||
}
|
||||
|
||||
/// The response from an execution job on the worker.
|
||||
#[derive(Encode, Decode)]
|
||||
#[derive(Debug, Encode, Decode)]
|
||||
pub enum Response {
|
||||
/// The job completed successfully.
|
||||
Ok {
|
||||
|
||||
@@ -22,6 +22,7 @@ pub mod executor_intf;
|
||||
pub mod prepare;
|
||||
pub mod pvf;
|
||||
pub mod worker;
|
||||
pub mod worker_dir;
|
||||
|
||||
pub use cpu_time::ProcessTime;
|
||||
|
||||
@@ -30,8 +31,11 @@ pub use sp_tracing;
|
||||
|
||||
const LOG_TARGET: &str = "parachain::pvf-common";
|
||||
|
||||
use std::mem;
|
||||
use tokio::io::{self, AsyncRead, AsyncReadExt as _, AsyncWrite, AsyncWriteExt as _};
|
||||
use std::{
|
||||
io::{Read, Write},
|
||||
mem,
|
||||
};
|
||||
use tokio::io;
|
||||
|
||||
#[cfg(feature = "test-utils")]
|
||||
pub mod tests {
|
||||
@@ -41,20 +45,31 @@ pub mod tests {
|
||||
pub const TEST_PREPARATION_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
}
|
||||
|
||||
/// Write some data prefixed by its length into `w`.
|
||||
pub async fn framed_send(w: &mut (impl AsyncWrite + Unpin), buf: &[u8]) -> io::Result<()> {
|
||||
/// Status of security features on the current system.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct SecurityStatus {
|
||||
/// Whether the landlock features we use are fully available on this system.
|
||||
pub can_enable_landlock: bool,
|
||||
// Whether we are able to unshare the user namespace and change the filesystem root.
|
||||
pub can_unshare_user_namespace_and_change_root: bool,
|
||||
}
|
||||
|
||||
/// Write some data prefixed by its length into `w`. Sync version of `framed_send` to avoid
|
||||
/// dependency on tokio.
|
||||
pub fn framed_send_blocking(w: &mut (impl Write + Unpin), buf: &[u8]) -> io::Result<()> {
|
||||
let len_buf = buf.len().to_le_bytes();
|
||||
w.write_all(&len_buf).await?;
|
||||
w.write_all(buf).await?;
|
||||
w.write_all(&len_buf)?;
|
||||
w.write_all(buf)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Read some data prefixed by its length from `r`.
|
||||
pub async fn framed_recv(r: &mut (impl AsyncRead + Unpin)) -> io::Result<Vec<u8>> {
|
||||
/// Read some data prefixed by its length from `r`. Sync version of `framed_recv` to avoid
|
||||
/// dependency on tokio.
|
||||
pub fn framed_recv_blocking(r: &mut (impl Read + Unpin)) -> io::Result<Vec<u8>> {
|
||||
let mut len_buf = [0u8; mem::size_of::<usize>()];
|
||||
r.read_exact(&mut len_buf).await?;
|
||||
r.read_exact(&mut len_buf)?;
|
||||
let len = usize::from_le_bytes(len_buf);
|
||||
let mut buf = vec![0; len];
|
||||
r.read_exact(&mut buf).await?;
|
||||
r.read_exact(&mut buf)?;
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
@@ -18,16 +18,18 @@
|
||||
|
||||
pub mod security;
|
||||
|
||||
use crate::LOG_TARGET;
|
||||
use crate::{worker_dir, SecurityStatus, LOG_TARGET};
|
||||
use cpu_time::ProcessTime;
|
||||
use futures::never::Never;
|
||||
use std::{
|
||||
any::Any,
|
||||
fmt,
|
||||
os::unix::net::UnixStream,
|
||||
path::PathBuf,
|
||||
sync::mpsc::{Receiver, RecvTimeoutError},
|
||||
time::Duration,
|
||||
};
|
||||
use tokio::{io, net::UnixStream, runtime::Runtime};
|
||||
use tokio::{io, runtime::Runtime};
|
||||
|
||||
/// Use this macro to declare a `fn main() {}` that will create an executable that can be used for
|
||||
/// spawning the desired worker.
|
||||
@@ -41,10 +43,15 @@ macro_rules! decl_worker_main {
|
||||
}
|
||||
|
||||
fn main() {
|
||||
#[cfg(target_os = "linux")]
|
||||
use $crate::worker::security;
|
||||
|
||||
// TODO: Remove this dependency, and `pub use sp_tracing` in `lib.rs`.
|
||||
// See <https://github.com/paritytech/polkadot/issues/7117>.
|
||||
$crate::sp_tracing::try_init_simple();
|
||||
|
||||
let worker_pid = std::process::id();
|
||||
|
||||
let args = std::env::args().collect::<Vec<_>>();
|
||||
if args.len() == 1 {
|
||||
print_help($expected_command);
|
||||
@@ -60,10 +67,43 @@ macro_rules! decl_worker_main {
|
||||
println!("{}", $worker_version);
|
||||
return
|
||||
},
|
||||
|
||||
"--check-can-enable-landlock" => {
|
||||
#[cfg(target_os = "linux")]
|
||||
let status = if security::landlock::check_is_fully_enabled() { 0 } else { -1 };
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
let status = -1;
|
||||
std::process::exit(status)
|
||||
},
|
||||
"--check-can-unshare-user-namespace-and-change-root" => {
|
||||
#[cfg(target_os = "linux")]
|
||||
let status = if let Err(err) = security::unshare_user_namespace_and_change_root(
|
||||
$crate::worker::WorkerKind::CheckPivotRoot,
|
||||
worker_pid,
|
||||
// We're not accessing any files, so we can try to pivot_root in the temp
|
||||
// dir without conflicts with other processes.
|
||||
&std::env::temp_dir(),
|
||||
) {
|
||||
// Write the error to stderr, log it on the host-side.
|
||||
eprintln!("{}", err);
|
||||
-1
|
||||
} else {
|
||||
0
|
||||
};
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
let status = {
|
||||
// Write the error to stderr, log it on the host-side.
|
||||
eprintln!("not available on macos");
|
||||
-1
|
||||
};
|
||||
std::process::exit(status)
|
||||
},
|
||||
|
||||
"test-sleep" => {
|
||||
std::thread::sleep(std::time::Duration::from_secs(5));
|
||||
return
|
||||
},
|
||||
|
||||
subcommand => {
|
||||
// Must be passed for compatibility with the single-binary test workers.
|
||||
if subcommand != $expected_command {
|
||||
@@ -75,18 +115,39 @@ macro_rules! decl_worker_main {
|
||||
},
|
||||
}
|
||||
|
||||
let mut worker_dir_path = None;
|
||||
let mut node_version = None;
|
||||
let mut socket_path: &str = "";
|
||||
let mut can_enable_landlock = false;
|
||||
let mut can_unshare_user_namespace_and_change_root = false;
|
||||
|
||||
for i in (2..args.len()).step_by(2) {
|
||||
let mut i = 2;
|
||||
while i < args.len() {
|
||||
match args[i].as_ref() {
|
||||
"--socket-path" => socket_path = args[i + 1].as_str(),
|
||||
"--node-impl-version" => node_version = Some(args[i + 1].as_str()),
|
||||
"--worker-dir-path" => {
|
||||
worker_dir_path = Some(args[i + 1].as_str());
|
||||
i += 1
|
||||
},
|
||||
"--node-impl-version" => {
|
||||
node_version = Some(args[i + 1].as_str());
|
||||
i += 1
|
||||
},
|
||||
"--can-enable-landlock" => can_enable_landlock = true,
|
||||
"--can-unshare-user-namespace-and-change-root" =>
|
||||
can_unshare_user_namespace_and_change_root = true,
|
||||
arg => panic!("Unexpected argument found: {}", arg),
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
let worker_dir_path =
|
||||
worker_dir_path.expect("the --worker-dir-path argument is required");
|
||||
|
||||
$entrypoint(&socket_path, node_version, Some($worker_version));
|
||||
let worker_dir_path = std::path::Path::new(worker_dir_path).to_owned();
|
||||
let security_status = $crate::SecurityStatus {
|
||||
can_enable_landlock,
|
||||
can_unshare_user_namespace_and_change_root,
|
||||
};
|
||||
|
||||
$entrypoint(worker_dir_path, node_version, Some($worker_version), security_status);
|
||||
}
|
||||
};
|
||||
}
|
||||
@@ -95,61 +156,181 @@ macro_rules! decl_worker_main {
|
||||
/// child process.
|
||||
pub const JOB_TIMEOUT_OVERHEAD: Duration = Duration::from_millis(50);
|
||||
|
||||
/// Interprets the given bytes as a path. Returns `None` if the given bytes do not constitute a
|
||||
/// a proper utf-8 string.
|
||||
pub fn bytes_to_path(bytes: &[u8]) -> Option<PathBuf> {
|
||||
std::str::from_utf8(bytes).ok().map(PathBuf::from)
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum WorkerKind {
|
||||
Prepare,
|
||||
Execute,
|
||||
CheckPivotRoot,
|
||||
}
|
||||
|
||||
impl fmt::Display for WorkerKind {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::Prepare => write!(f, "prepare"),
|
||||
Self::Execute => write!(f, "execute"),
|
||||
Self::CheckPivotRoot => write!(f, "check pivot root"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The worker version must be passed in so that we accurately get the version of the worker, and not
|
||||
// the version that this crate was compiled with.
|
||||
pub fn worker_event_loop<F, Fut>(
|
||||
debug_id: &'static str,
|
||||
socket_path: &str,
|
||||
worker_kind: WorkerKind,
|
||||
#[cfg_attr(not(target_os = "linux"), allow(unused_mut))] mut worker_dir_path: PathBuf,
|
||||
node_version: Option<&str>,
|
||||
worker_version: Option<&str>,
|
||||
#[cfg_attr(not(target_os = "linux"), allow(unused_variables))] security_status: &SecurityStatus,
|
||||
mut event_loop: F,
|
||||
) where
|
||||
F: FnMut(UnixStream) -> Fut,
|
||||
F: FnMut(UnixStream, PathBuf) -> Fut,
|
||||
Fut: futures::Future<Output = io::Result<Never>>,
|
||||
{
|
||||
let worker_pid = std::process::id();
|
||||
gum::debug!(target: LOG_TARGET, %worker_pid, "starting pvf worker ({})", debug_id);
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
%worker_pid,
|
||||
?worker_dir_path,
|
||||
?security_status,
|
||||
"starting pvf worker ({})",
|
||||
worker_kind
|
||||
);
|
||||
|
||||
// Check for a mismatch between the node and worker versions.
|
||||
if let (Some(node_version), Some(worker_version)) = (node_version, worker_version) {
|
||||
if node_version != worker_version {
|
||||
gum::error!(
|
||||
target: LOG_TARGET,
|
||||
%worker_kind,
|
||||
%worker_pid,
|
||||
%node_version,
|
||||
%worker_version,
|
||||
"Node and worker version mismatch, node needs restarting, forcing shutdown",
|
||||
);
|
||||
kill_parent_node_in_emergency();
|
||||
let err = io::Error::new(io::ErrorKind::Unsupported, "Version mismatch");
|
||||
worker_shutdown_message(debug_id, worker_pid, err);
|
||||
worker_shutdown_message(worker_kind, worker_pid, "Version mismatch");
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
remove_env_vars(debug_id);
|
||||
// Make sure that we can read the worker dir path, and log its contents.
|
||||
let entries = || -> Result<Vec<_>, io::Error> {
|
||||
std::fs::read_dir(&worker_dir_path)?
|
||||
.map(|res| res.map(|e| e.file_name()))
|
||||
.collect()
|
||||
}();
|
||||
match entries {
|
||||
Ok(entries) =>
|
||||
gum::trace!(target: LOG_TARGET, %worker_pid, ?worker_dir_path, "content of worker dir: {:?}", entries),
|
||||
Err(err) => {
|
||||
gum::error!(
|
||||
target: LOG_TARGET,
|
||||
%worker_kind,
|
||||
%worker_pid,
|
||||
?worker_dir_path,
|
||||
"Could not read worker dir: {}",
|
||||
err.to_string()
|
||||
);
|
||||
worker_shutdown_message(worker_kind, worker_pid, &err.to_string());
|
||||
return
|
||||
},
|
||||
}
|
||||
|
||||
// Connect to the socket.
|
||||
let socket_path = worker_dir::socket(&worker_dir_path);
|
||||
let stream = || -> std::io::Result<UnixStream> {
|
||||
let stream = UnixStream::connect(&socket_path)?;
|
||||
// Remove the socket here. We don't also need to do this on the host-side; on failed
|
||||
// rendezvous, the host will delete the whole worker dir.
|
||||
std::fs::remove_file(&socket_path)?;
|
||||
Ok(stream)
|
||||
}();
|
||||
let stream = match stream {
|
||||
Ok(s) => s,
|
||||
Err(err) => {
|
||||
gum::error!(
|
||||
target: LOG_TARGET,
|
||||
%worker_kind,
|
||||
%worker_pid,
|
||||
"{}",
|
||||
err
|
||||
);
|
||||
worker_shutdown_message(worker_kind, worker_pid, &err.to_string());
|
||||
return
|
||||
},
|
||||
};
|
||||
|
||||
// Enable some security features.
|
||||
{
|
||||
// Call based on whether we can change root. Error out if it should work but fails.
|
||||
//
|
||||
// NOTE: This should not be called in a multi-threaded context (i.e. inside the tokio
|
||||
// runtime). `unshare(2)`:
|
||||
//
|
||||
// > CLONE_NEWUSER requires that the calling process is not threaded.
|
||||
#[cfg(target_os = "linux")]
|
||||
if security_status.can_unshare_user_namespace_and_change_root {
|
||||
if let Err(err) = security::unshare_user_namespace_and_change_root(
|
||||
worker_kind,
|
||||
worker_pid,
|
||||
&worker_dir_path,
|
||||
) {
|
||||
// The filesystem may be in an inconsistent state, bail out.
|
||||
gum::error!(
|
||||
target: LOG_TARGET,
|
||||
%worker_kind,
|
||||
%worker_pid,
|
||||
?worker_dir_path,
|
||||
"Could not change root to be the worker cache path: {}",
|
||||
err
|
||||
);
|
||||
worker_shutdown_message(worker_kind, worker_pid, &err);
|
||||
return
|
||||
}
|
||||
worker_dir_path = std::path::Path::new("/").to_owned();
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
if security_status.can_enable_landlock {
|
||||
let landlock_status =
|
||||
security::landlock::enable_for_worker(worker_kind, worker_pid, &worker_dir_path);
|
||||
if !matches!(landlock_status, Ok(landlock::RulesetStatus::FullyEnforced)) {
|
||||
// We previously were able to enable, so this should never happen.
|
||||
//
|
||||
// TODO: Make this a real error in secure-mode. See:
|
||||
// <https://github.com/paritytech/polkadot-sdk/issues/1444>
|
||||
gum::error!(
|
||||
target: LOG_TARGET,
|
||||
%worker_kind,
|
||||
%worker_pid,
|
||||
"could not fully enable landlock: {:?}. This should not happen, please report to the Polkadot devs",
|
||||
landlock_status
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if !security::check_env_vars_were_cleared(worker_kind, worker_pid) {
|
||||
let err = "not all env vars were cleared when spawning the process";
|
||||
gum::error!(
|
||||
target: LOG_TARGET,
|
||||
%worker_kind,
|
||||
%worker_pid,
|
||||
"{}",
|
||||
err
|
||||
);
|
||||
worker_shutdown_message(worker_kind, worker_pid, err);
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Run the main worker loop.
|
||||
let rt = Runtime::new().expect("Creates tokio runtime. If this panics the worker will die and the host will detect that and deal with it.");
|
||||
let err = rt
|
||||
.block_on(async move {
|
||||
let stream = UnixStream::connect(socket_path).await?;
|
||||
let _ = tokio::fs::remove_file(socket_path).await;
|
||||
|
||||
let result = event_loop(stream).await;
|
||||
|
||||
result
|
||||
})
|
||||
.block_on(event_loop(stream, worker_dir_path))
|
||||
// It's never `Ok` because it's `Ok(Never)`.
|
||||
.unwrap_err();
|
||||
|
||||
worker_shutdown_message(debug_id, worker_pid, err);
|
||||
worker_shutdown_message(worker_kind, worker_pid, &err.to_string());
|
||||
|
||||
// We don't want tokio to wait for the tasks to finish. We want to bring down the worker as fast
|
||||
// as possible and not wait for stalled validation to finish. This isn't strictly necessary now,
|
||||
@@ -157,51 +338,9 @@ pub fn worker_event_loop<F, Fut>(
|
||||
rt.shutdown_background();
|
||||
}
|
||||
|
||||
/// Delete all env vars to prevent malicious code from accessing them.
|
||||
fn remove_env_vars(debug_id: &'static str) {
|
||||
for (key, value) in std::env::vars_os() {
|
||||
// TODO: *theoretically* the value (or mere presence) of `RUST_LOG` can be a source of
|
||||
// randomness for malicious code. In the future we can remove it also and log in the host;
|
||||
// see <https://github.com/paritytech/polkadot/issues/7117>.
|
||||
if key == "RUST_LOG" {
|
||||
continue
|
||||
}
|
||||
|
||||
// In case of a key or value that would cause [`env::remove_var` to
|
||||
// panic](https://doc.rust-lang.org/std/env/fn.remove_var.html#panics), we first log a
|
||||
// warning and then proceed to attempt to remove the env var.
|
||||
let mut err_reasons = vec![];
|
||||
let (key_str, value_str) = (key.to_str(), value.to_str());
|
||||
if key.is_empty() {
|
||||
err_reasons.push("key is empty");
|
||||
}
|
||||
if key_str.is_some_and(|s| s.contains('=')) {
|
||||
err_reasons.push("key contains '='");
|
||||
}
|
||||
if key_str.is_some_and(|s| s.contains('\0')) {
|
||||
err_reasons.push("key contains null character");
|
||||
}
|
||||
if value_str.is_some_and(|s| s.contains('\0')) {
|
||||
err_reasons.push("value contains null character");
|
||||
}
|
||||
if !err_reasons.is_empty() {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
%debug_id,
|
||||
?key,
|
||||
?value,
|
||||
"Attempting to remove badly-formatted env var, this may cause the PVF worker to crash. Please remove it yourself. Reasons: {:?}",
|
||||
err_reasons
|
||||
);
|
||||
}
|
||||
|
||||
std::env::remove_var(key);
|
||||
}
|
||||
}
|
||||
|
||||
/// Provide a consistent message on worker shutdown.
|
||||
fn worker_shutdown_message(debug_id: &'static str, worker_pid: u32, err: io::Error) {
|
||||
gum::debug!(target: LOG_TARGET, %worker_pid, "quitting pvf worker ({}): {:?}", debug_id, err);
|
||||
fn worker_shutdown_message(worker_kind: WorkerKind, worker_pid: u32, err: &str) {
|
||||
gum::debug!(target: LOG_TARGET, %worker_pid, "quitting pvf worker ({}): {}", worker_kind, err);
|
||||
}
|
||||
|
||||
/// Loop that runs in the CPU time monitor thread on prepare and execute jobs. Continuously wakes up
|
||||
@@ -305,7 +444,7 @@ pub mod thread {
|
||||
Arc::new((Mutex::new(WaitOutcome::Pending), Condvar::new()))
|
||||
}
|
||||
|
||||
/// Runs a worker thread. Will first enable security features, and afterwards notify the threads
|
||||
/// Runs a worker thread. Will run the requested function, and afterwards notify the threads
|
||||
/// waiting on the condvar. Catches panics during execution and resumes the panics after
|
||||
/// triggering the condvar, so that the waiting thread is notified on panics.
|
||||
///
|
||||
|
||||
@@ -17,27 +17,186 @@
|
||||
//! Functionality for securing workers.
|
||||
//!
|
||||
//! This is needed because workers are used to compile and execute untrusted code (PVFs).
|
||||
//!
|
||||
//! We currently employ the following security measures:
|
||||
//!
|
||||
//! - Restrict filesystem
|
||||
//! - Use Landlock to remove all unnecessary FS access rights.
|
||||
//! - Unshare the user and mount namespaces.
|
||||
//! - Change the root directory to a worker-specific temporary directory.
|
||||
//! - Remove env vars
|
||||
|
||||
/// To what degree landlock is enabled. It's a separate struct from `RulesetStatus` because that is
|
||||
/// only available on Linux, plus this has a nicer name.
|
||||
pub enum LandlockStatus {
|
||||
FullyEnforced,
|
||||
PartiallyEnforced,
|
||||
NotEnforced,
|
||||
/// Thread panicked, we don't know what the status is.
|
||||
Unavailable,
|
||||
}
|
||||
use crate::{worker::WorkerKind, LOG_TARGET};
|
||||
|
||||
impl LandlockStatus {
|
||||
/// Unshare the user namespace and change root to be the artifact directory.
|
||||
///
|
||||
/// NOTE: This should not be called in a multi-threaded context. `unshare(2)`:
|
||||
/// "CLONE_NEWUSER requires that the calling process is not threaded."
|
||||
#[cfg(target_os = "linux")]
|
||||
pub fn from_ruleset_status(ruleset_status: ::landlock::RulesetStatus) -> Self {
|
||||
use ::landlock::RulesetStatus::*;
|
||||
match ruleset_status {
|
||||
FullyEnforced => LandlockStatus::FullyEnforced,
|
||||
PartiallyEnforced => LandlockStatus::PartiallyEnforced,
|
||||
NotEnforced => LandlockStatus::NotEnforced,
|
||||
pub fn unshare_user_namespace_and_change_root(
|
||||
worker_kind: WorkerKind,
|
||||
worker_pid: u32,
|
||||
worker_dir_path: &std::path::Path,
|
||||
) -> Result<(), String> {
|
||||
use std::{env, ffi::CString, os::unix::ffi::OsStrExt, path::Path, ptr};
|
||||
|
||||
// The following was copied from the `cstr_core` crate.
|
||||
//
|
||||
// TODO: Remove this once this is stable: https://github.com/rust-lang/rust/issues/105723
|
||||
#[inline]
|
||||
#[doc(hidden)]
|
||||
const fn cstr_is_valid(bytes: &[u8]) -> bool {
|
||||
if bytes.is_empty() || bytes[bytes.len() - 1] != 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
let mut index = 0;
|
||||
while index < bytes.len() - 1 {
|
||||
if bytes[index] == 0 {
|
||||
return false
|
||||
}
|
||||
index += 1;
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
macro_rules! cstr {
|
||||
($e:expr) => {{
|
||||
const STR: &[u8] = concat!($e, "\0").as_bytes();
|
||||
const STR_VALID: bool = cstr_is_valid(STR);
|
||||
let _ = [(); 0 - (!(STR_VALID) as usize)];
|
||||
#[allow(unused_unsafe)]
|
||||
unsafe {
|
||||
core::ffi::CStr::from_bytes_with_nul_unchecked(STR)
|
||||
}
|
||||
}}
|
||||
}
|
||||
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
%worker_kind,
|
||||
%worker_pid,
|
||||
?worker_dir_path,
|
||||
"unsharing the user namespace and calling pivot_root",
|
||||
);
|
||||
|
||||
let worker_dir_path_c = CString::new(worker_dir_path.as_os_str().as_bytes())
|
||||
.expect("on unix; the path will never contain 0 bytes; qed");
|
||||
|
||||
// Wrapper around all the work to prevent repetitive error handling.
|
||||
//
|
||||
// # Errors
|
||||
//
|
||||
// It's the caller's responsibility to call `Error::last_os_error`. Note that that alone does
|
||||
// not give the context of which call failed, so we return a &str error.
|
||||
|| -> Result<(), &'static str> {
|
||||
// SAFETY: We pass null-terminated C strings and use the APIs as documented. In fact, steps
|
||||
// (2) and (3) are adapted from the example in pivot_root(2), with the additional
|
||||
// change described in the `pivot_root(".", ".")` section.
|
||||
unsafe {
|
||||
// 1. `unshare` the user and the mount namespaces.
|
||||
if libc::unshare(libc::CLONE_NEWUSER | libc::CLONE_NEWNS) < 0 {
|
||||
return Err("unshare user and mount namespaces")
|
||||
}
|
||||
|
||||
// 2. Setup mounts.
|
||||
//
|
||||
// Ensure that new root and its parent mount don't have shared propagation (which would
|
||||
// cause pivot_root() to return an error), and prevent propagation of mount events to
|
||||
// the initial mount namespace.
|
||||
if libc::mount(
|
||||
ptr::null(),
|
||||
cstr!("/").as_ptr(),
|
||||
ptr::null(),
|
||||
libc::MS_REC | libc::MS_PRIVATE,
|
||||
ptr::null(),
|
||||
) < 0
|
||||
{
|
||||
return Err("mount MS_PRIVATE")
|
||||
}
|
||||
// Ensure that the new root is a mount point.
|
||||
let additional_flags =
|
||||
if let WorkerKind::Execute | WorkerKind::CheckPivotRoot = worker_kind {
|
||||
libc::MS_RDONLY
|
||||
} else {
|
||||
0
|
||||
};
|
||||
if libc::mount(
|
||||
worker_dir_path_c.as_ptr(),
|
||||
worker_dir_path_c.as_ptr(),
|
||||
ptr::null(), // ignored when MS_BIND is used
|
||||
libc::MS_BIND |
|
||||
libc::MS_REC | libc::MS_NOEXEC |
|
||||
libc::MS_NODEV | libc::MS_NOSUID |
|
||||
libc::MS_NOATIME | additional_flags,
|
||||
ptr::null(), // ignored when MS_BIND is used
|
||||
) < 0
|
||||
{
|
||||
return Err("mount MS_BIND")
|
||||
}
|
||||
|
||||
// 3. `pivot_root` to the artifact directory.
|
||||
if libc::chdir(worker_dir_path_c.as_ptr()) < 0 {
|
||||
return Err("chdir to worker dir path")
|
||||
}
|
||||
if libc::syscall(libc::SYS_pivot_root, cstr!(".").as_ptr(), cstr!(".").as_ptr()) < 0 {
|
||||
return Err("pivot_root")
|
||||
}
|
||||
if libc::umount2(cstr!(".").as_ptr(), libc::MNT_DETACH) < 0 {
|
||||
return Err("umount the old root mount point")
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}()
|
||||
.map_err(|err_ctx| {
|
||||
let err = std::io::Error::last_os_error();
|
||||
format!("{}: {}", err_ctx, err)
|
||||
})?;
|
||||
|
||||
// Do some assertions.
|
||||
if env::current_dir().map_err(|err| err.to_string())? != Path::new("/") {
|
||||
return Err("expected current dir after pivot_root to be `/`".into())
|
||||
}
|
||||
env::set_current_dir("..").map_err(|err| err.to_string())?;
|
||||
if env::current_dir().map_err(|err| err.to_string())? != Path::new("/") {
|
||||
return Err("expected not to be able to break out of new root by doing `..`".into())
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Require env vars to have been removed when spawning the process, to prevent malicious code from
|
||||
/// accessing them.
|
||||
pub fn check_env_vars_were_cleared(worker_kind: WorkerKind, worker_pid: u32) -> bool {
|
||||
let mut ok = true;
|
||||
|
||||
for (key, value) in std::env::vars_os() {
|
||||
// TODO: *theoretically* the value (or mere presence) of `RUST_LOG` can be a source of
|
||||
// randomness for malicious code. In the future we can remove it also and log in the host;
|
||||
// see <https://github.com/paritytech/polkadot/issues/7117>.
|
||||
if key == "RUST_LOG" {
|
||||
continue
|
||||
}
|
||||
// An exception for MacOS. This is not a secure platform anyway, so we let it slide.
|
||||
#[cfg(target_os = "macos")]
|
||||
if key == "__CF_USER_TEXT_ENCODING" {
|
||||
continue
|
||||
}
|
||||
|
||||
gum::error!(
|
||||
target: LOG_TARGET,
|
||||
%worker_kind,
|
||||
%worker_pid,
|
||||
?key,
|
||||
?value,
|
||||
"env var was present that should have been removed",
|
||||
);
|
||||
|
||||
ok = false;
|
||||
}
|
||||
|
||||
ok
|
||||
}
|
||||
|
||||
/// The [landlock] docs say it best:
|
||||
@@ -52,14 +211,21 @@ impl LandlockStatus {
|
||||
/// [landlock]: https://docs.rs/landlock/latest/landlock/index.html
|
||||
#[cfg(target_os = "linux")]
|
||||
pub mod landlock {
|
||||
use landlock::{Access, AccessFs, Ruleset, RulesetAttr, RulesetError, RulesetStatus, ABI};
|
||||
pub use landlock::RulesetStatus;
|
||||
|
||||
use crate::{worker::WorkerKind, LOG_TARGET};
|
||||
use landlock::*;
|
||||
use std::{
|
||||
fmt,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
/// Landlock ABI version. We use ABI V1 because:
|
||||
///
|
||||
/// 1. It is supported by our reference kernel version.
|
||||
/// 2. Later versions do not (yet) provide additional security.
|
||||
///
|
||||
/// # Versions (June 2023)
|
||||
/// # Versions (as of June 2023)
|
||||
///
|
||||
/// - Polkadot reference kernel version: 5.16+
|
||||
/// - ABI V1: 5.13 - introduces landlock, including full restrictions on file reads
|
||||
@@ -83,46 +249,103 @@ pub mod landlock {
|
||||
/// supports it or if it introduces some new feature that is beneficial to security.
|
||||
pub const LANDLOCK_ABI: ABI = ABI::V1;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum TryRestrictError {
|
||||
InvalidExceptionPath(PathBuf),
|
||||
RulesetError(RulesetError),
|
||||
}
|
||||
|
||||
impl From<RulesetError> for TryRestrictError {
|
||||
fn from(err: RulesetError) -> Self {
|
||||
Self::RulesetError(err)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for TryRestrictError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::InvalidExceptionPath(path) => write!(f, "invalid exception path: {:?}", path),
|
||||
Self::RulesetError(err) => write!(f, "ruleset error: {}", err.to_string()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for TryRestrictError {}
|
||||
|
||||
/// Try to enable landlock for the given kind of worker.
|
||||
pub fn enable_for_worker(
|
||||
worker_kind: WorkerKind,
|
||||
worker_pid: u32,
|
||||
worker_dir_path: &Path,
|
||||
) -> Result<RulesetStatus, Box<dyn std::error::Error>> {
|
||||
let exceptions: Vec<(PathBuf, BitFlags<AccessFs>)> = match worker_kind {
|
||||
WorkerKind::Prepare => {
|
||||
vec![(worker_dir_path.to_owned(), AccessFs::WriteFile.into())]
|
||||
},
|
||||
WorkerKind::Execute => {
|
||||
vec![(worker_dir_path.to_owned(), AccessFs::ReadFile.into())]
|
||||
},
|
||||
WorkerKind::CheckPivotRoot =>
|
||||
panic!("this should only be passed for checking pivot_root; qed"),
|
||||
};
|
||||
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
%worker_kind,
|
||||
%worker_pid,
|
||||
?worker_dir_path,
|
||||
"enabling landlock with exceptions: {:?}",
|
||||
exceptions,
|
||||
);
|
||||
|
||||
Ok(try_restrict(exceptions)?)
|
||||
}
|
||||
|
||||
// TODO: <https://github.com/landlock-lsm/rust-landlock/issues/36>
|
||||
/// Returns to what degree landlock is enabled with the given ABI on the current Linux
|
||||
/// environment.
|
||||
pub fn get_status() -> Result<RulesetStatus, Box<dyn std::error::Error>> {
|
||||
match std::thread::spawn(|| try_restrict_thread()).join() {
|
||||
Ok(Ok(status)) => Ok(status),
|
||||
Ok(Err(ruleset_err)) => Err(ruleset_err.into()),
|
||||
Err(_err) => Err("a panic occurred in try_restrict_thread".into()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Based on the given `status`, returns a single bool indicating whether the given landlock
|
||||
/// ABI is fully enabled on the current Linux environment.
|
||||
pub fn status_is_fully_enabled(
|
||||
status: &Result<RulesetStatus, Box<dyn std::error::Error>>,
|
||||
) -> bool {
|
||||
matches!(status, Ok(RulesetStatus::FullyEnforced))
|
||||
}
|
||||
|
||||
/// Runs a check for landlock and returns a single bool indicating whether the given landlock
|
||||
/// ABI is fully enabled on the current Linux environment.
|
||||
pub fn check_is_fully_enabled() -> bool {
|
||||
status_is_fully_enabled(&get_status())
|
||||
let status_from_thread: Result<RulesetStatus, Box<dyn std::error::Error>> =
|
||||
match std::thread::spawn(|| try_restrict(std::iter::empty::<(PathBuf, AccessFs)>()))
|
||||
.join()
|
||||
{
|
||||
Ok(Ok(status)) => Ok(status),
|
||||
Ok(Err(ruleset_err)) => Err(ruleset_err.into()),
|
||||
Err(_err) => Err("a panic occurred in try_restrict".into()),
|
||||
};
|
||||
|
||||
matches!(status_from_thread, Ok(RulesetStatus::FullyEnforced))
|
||||
}
|
||||
|
||||
/// Tries to restrict the current thread with the following landlock access controls:
|
||||
/// Tries to restrict the current thread (should only be called in a process' main thread) with
|
||||
/// the following landlock access controls:
|
||||
///
|
||||
/// 1. all global filesystem access
|
||||
/// 2. ... more may be supported in the future.
|
||||
/// 1. all global filesystem access restricted, with optional exceptions
|
||||
/// 2. ... more sandbox types (e.g. networking) may be supported in the future.
|
||||
///
|
||||
/// If landlock is not supported in the current environment this is simply a noop.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// The status of the restriction (whether it was fully, partially, or not-at-all enforced).
|
||||
pub fn try_restrict_thread() -> Result<RulesetStatus, RulesetError> {
|
||||
let status = Ruleset::new()
|
||||
.handle_access(AccessFs::from_all(LANDLOCK_ABI))?
|
||||
.create()?
|
||||
.restrict_self()?;
|
||||
fn try_restrict<I, P, A>(fs_exceptions: I) -> Result<RulesetStatus, TryRestrictError>
|
||||
where
|
||||
I: IntoIterator<Item = (P, A)>,
|
||||
P: AsRef<Path>,
|
||||
A: Into<BitFlags<AccessFs>>,
|
||||
{
|
||||
let mut ruleset =
|
||||
Ruleset::new().handle_access(AccessFs::from_all(LANDLOCK_ABI))?.create()?;
|
||||
for (fs_path, access_bits) in fs_exceptions {
|
||||
let paths = &[fs_path.as_ref().to_owned()];
|
||||
let mut rules = path_beneath_rules(paths, access_bits).peekable();
|
||||
if rules.peek().is_none() {
|
||||
// `path_beneath_rules` silently ignores missing paths, so check for it manually.
|
||||
return Err(TryRestrictError::InvalidExceptionPath(fs_path.as_ref().to_owned()))
|
||||
}
|
||||
ruleset = ruleset.add_rules(rules)?;
|
||||
}
|
||||
let status = ruleset.restrict_self()?;
|
||||
Ok(status.ruleset)
|
||||
}
|
||||
|
||||
@@ -132,29 +355,56 @@ pub mod landlock {
|
||||
use std::{fs, io::ErrorKind, thread};
|
||||
|
||||
#[test]
|
||||
fn restricted_thread_cannot_access_fs() {
|
||||
fn restricted_thread_cannot_read_file() {
|
||||
// TODO: This would be nice: <https://github.com/rust-lang/rust/issues/68007>.
|
||||
if !check_is_fully_enabled() {
|
||||
return
|
||||
}
|
||||
|
||||
// Restricted thread cannot read from FS.
|
||||
let handle = thread::spawn(|| {
|
||||
// Write to a tmp file, this should succeed before landlock is applied.
|
||||
let text = "foo";
|
||||
let tmpfile = tempfile::NamedTempFile::new().unwrap();
|
||||
let path = tmpfile.path();
|
||||
fs::write(path, text).unwrap();
|
||||
let s = fs::read_to_string(path).unwrap();
|
||||
assert_eq!(s, text);
|
||||
let handle =
|
||||
thread::spawn(|| {
|
||||
// Create, write, and read two tmp files. This should succeed before any
|
||||
// landlock restrictions are applied.
|
||||
const TEXT: &str = "foo";
|
||||
let tmpfile1 = tempfile::NamedTempFile::new().unwrap();
|
||||
let path1 = tmpfile1.path();
|
||||
let tmpfile2 = tempfile::NamedTempFile::new().unwrap();
|
||||
let path2 = tmpfile2.path();
|
||||
|
||||
let status = try_restrict_thread().unwrap();
|
||||
if !matches!(status, RulesetStatus::FullyEnforced) {
|
||||
panic!("Ruleset should be enforced since we checked if landlock is enabled");
|
||||
fs::write(path1, TEXT).unwrap();
|
||||
let s = fs::read_to_string(path1).unwrap();
|
||||
assert_eq!(s, TEXT);
|
||||
fs::write(path2, TEXT).unwrap();
|
||||
let s = fs::read_to_string(path2).unwrap();
|
||||
assert_eq!(s, TEXT);
|
||||
|
||||
// Apply Landlock with a read exception for only one of the files.
|
||||
let status = try_restrict(vec![(path1, AccessFs::ReadFile)]);
|
||||
if !matches!(status, Ok(RulesetStatus::FullyEnforced)) {
|
||||
panic!("Ruleset should be enforced since we checked if landlock is enabled: {:?}", status);
|
||||
}
|
||||
|
||||
// Try to read from the tmp file after landlock.
|
||||
let result = fs::read_to_string(path);
|
||||
// Try to read from both files, only tmpfile1 should succeed.
|
||||
let result = fs::read_to_string(path1);
|
||||
assert!(matches!(
|
||||
result,
|
||||
Ok(s) if s == TEXT
|
||||
));
|
||||
let result = fs::read_to_string(path2);
|
||||
assert!(matches!(
|
||||
result,
|
||||
Err(err) if matches!(err.kind(), ErrorKind::PermissionDenied)
|
||||
));
|
||||
|
||||
// Apply Landlock for all files.
|
||||
let status = try_restrict(std::iter::empty::<(PathBuf, AccessFs)>());
|
||||
if !matches!(status, Ok(RulesetStatus::FullyEnforced)) {
|
||||
panic!("Ruleset should be enforced since we checked if landlock is enabled: {:?}", status);
|
||||
}
|
||||
|
||||
// Try to read from tmpfile1 after landlock, it should fail.
|
||||
let result = fs::read_to_string(path1);
|
||||
assert!(matches!(
|
||||
result,
|
||||
Err(err) if matches!(err.kind(), ErrorKind::PermissionDenied)
|
||||
@@ -162,20 +412,52 @@ pub mod landlock {
|
||||
});
|
||||
|
||||
assert!(handle.join().is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn restricted_thread_cannot_write_file() {
|
||||
// TODO: This would be nice: <https://github.com/rust-lang/rust/issues/68007>.
|
||||
if !check_is_fully_enabled() {
|
||||
return
|
||||
}
|
||||
|
||||
// Restricted thread cannot write to FS.
|
||||
let handle = thread::spawn(|| {
|
||||
let text = "foo";
|
||||
let tmpfile = tempfile::NamedTempFile::new().unwrap();
|
||||
let path = tmpfile.path();
|
||||
let handle =
|
||||
thread::spawn(|| {
|
||||
// Create and write two tmp files. This should succeed before any landlock
|
||||
// restrictions are applied.
|
||||
const TEXT: &str = "foo";
|
||||
let tmpfile1 = tempfile::NamedTempFile::new().unwrap();
|
||||
let path1 = tmpfile1.path();
|
||||
let tmpfile2 = tempfile::NamedTempFile::new().unwrap();
|
||||
let path2 = tmpfile2.path();
|
||||
|
||||
let status = try_restrict_thread().unwrap();
|
||||
if !matches!(status, RulesetStatus::FullyEnforced) {
|
||||
panic!("Ruleset should be enforced since we checked if landlock is enabled");
|
||||
fs::write(path1, TEXT).unwrap();
|
||||
fs::write(path2, TEXT).unwrap();
|
||||
|
||||
// Apply Landlock with a write exception for only one of the files.
|
||||
let status = try_restrict(vec![(path1, AccessFs::WriteFile)]);
|
||||
if !matches!(status, Ok(RulesetStatus::FullyEnforced)) {
|
||||
panic!("Ruleset should be enforced since we checked if landlock is enabled: {:?}", status);
|
||||
}
|
||||
|
||||
// Try to write to the tmp file after landlock.
|
||||
let result = fs::write(path, text);
|
||||
// Try to write to both files, only tmpfile1 should succeed.
|
||||
let result = fs::write(path1, TEXT);
|
||||
assert!(matches!(result, Ok(_)));
|
||||
let result = fs::write(path2, TEXT);
|
||||
assert!(matches!(
|
||||
result,
|
||||
Err(err) if matches!(err.kind(), ErrorKind::PermissionDenied)
|
||||
));
|
||||
|
||||
// Apply Landlock for all files.
|
||||
let status = try_restrict(std::iter::empty::<(PathBuf, AccessFs)>());
|
||||
if !matches!(status, Ok(RulesetStatus::FullyEnforced)) {
|
||||
panic!("Ruleset should be enforced since we checked if landlock is enabled: {:?}", status);
|
||||
}
|
||||
|
||||
// Try to write to tmpfile1 after landlock, it should fail.
|
||||
let result = fs::write(path1, TEXT);
|
||||
assert!(matches!(
|
||||
result,
|
||||
Err(err) if matches!(err.kind(), ErrorKind::PermissionDenied)
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Shared functions for getting the known worker files.
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
const WORKER_EXECUTE_ARTIFACT_NAME: &str = "artifact";
|
||||
const WORKER_PREPARE_TMP_ARTIFACT_NAME: &str = "tmp-artifact";
|
||||
const WORKER_SOCKET_NAME: &str = "socket";
|
||||
|
||||
pub fn execute_artifact(worker_dir_path: &Path) -> PathBuf {
|
||||
worker_dir_path.join(WORKER_EXECUTE_ARTIFACT_NAME)
|
||||
}
|
||||
|
||||
pub fn prepare_tmp_artifact(worker_dir_path: &Path) -> PathBuf {
|
||||
worker_dir_path.join(WORKER_PREPARE_TMP_ARTIFACT_NAME)
|
||||
}
|
||||
|
||||
pub fn socket(worker_dir_path: &Path) -> PathBuf {
|
||||
worker_dir_path.join(WORKER_SOCKET_NAME)
|
||||
}
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
//! Contains the logic for executing PVFs. Used by the polkadot-execute-worker binary.
|
||||
|
||||
pub use polkadot_node_core_pvf_common::executor_intf::Executor;
|
||||
pub use polkadot_node_core_pvf_common::{executor_intf::Executor, worker_dir, SecurityStatus};
|
||||
|
||||
// NOTE: Initializing logging in e.g. tests will not have an effect in the workers, as they are
|
||||
// separate spawned processes. Run with e.g. `RUST_LOG=parachain::pvf-execute-worker=trace`.
|
||||
@@ -28,22 +28,21 @@ use polkadot_node_core_pvf_common::{
|
||||
error::InternalValidationError,
|
||||
execute::{Handshake, Response},
|
||||
executor_intf::NATIVE_STACK_MAX,
|
||||
framed_recv, framed_send,
|
||||
framed_recv_blocking, framed_send_blocking,
|
||||
worker::{
|
||||
bytes_to_path, cpu_time_monitor_loop,
|
||||
security::LandlockStatus,
|
||||
stringify_panic_payload,
|
||||
cpu_time_monitor_loop, stringify_panic_payload,
|
||||
thread::{self, WaitOutcome},
|
||||
worker_event_loop,
|
||||
worker_event_loop, WorkerKind,
|
||||
},
|
||||
};
|
||||
use polkadot_parachain_primitives::primitives::ValidationResult;
|
||||
use std::{
|
||||
os::unix::net::UnixStream,
|
||||
path::PathBuf,
|
||||
sync::{mpsc::channel, Arc},
|
||||
time::Duration,
|
||||
};
|
||||
use tokio::{io, net::UnixStream};
|
||||
use tokio::io;
|
||||
|
||||
// Wasmtime powers the Substrate Executor. It compiles the wasm bytecode into native code.
|
||||
// That native code does not create any stacks and just reuses the stack of the thread that
|
||||
@@ -81,8 +80,8 @@ use tokio::{io, net::UnixStream};
|
||||
/// The stack size for the execute thread.
|
||||
pub const EXECUTE_THREAD_STACK_SIZE: usize = 2 * 1024 * 1024 + NATIVE_STACK_MAX as usize;
|
||||
|
||||
async fn recv_handshake(stream: &mut UnixStream) -> io::Result<Handshake> {
|
||||
let handshake_enc = framed_recv(stream).await?;
|
||||
fn recv_handshake(stream: &mut UnixStream) -> io::Result<Handshake> {
|
||||
let handshake_enc = framed_recv_blocking(stream)?;
|
||||
let handshake = Handshake::decode(&mut &handshake_enc[..]).map_err(|_| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
@@ -92,57 +91,58 @@ async fn recv_handshake(stream: &mut UnixStream) -> io::Result<Handshake> {
|
||||
Ok(handshake)
|
||||
}
|
||||
|
||||
async fn recv_request(stream: &mut UnixStream) -> io::Result<(PathBuf, Vec<u8>, Duration)> {
|
||||
let artifact_path = framed_recv(stream).await?;
|
||||
let artifact_path = bytes_to_path(&artifact_path).ok_or_else(|| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
"execute pvf recv_request: non utf-8 artifact path".to_string(),
|
||||
)
|
||||
})?;
|
||||
let params = framed_recv(stream).await?;
|
||||
let execution_timeout = framed_recv(stream).await?;
|
||||
fn recv_request(stream: &mut UnixStream) -> io::Result<(Vec<u8>, Duration)> {
|
||||
let params = framed_recv_blocking(stream)?;
|
||||
let execution_timeout = framed_recv_blocking(stream)?;
|
||||
let execution_timeout = Duration::decode(&mut &execution_timeout[..]).map_err(|_| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
"execute pvf recv_request: failed to decode duration".to_string(),
|
||||
)
|
||||
})?;
|
||||
Ok((artifact_path, params, execution_timeout))
|
||||
Ok((params, execution_timeout))
|
||||
}
|
||||
|
||||
async fn send_response(stream: &mut UnixStream, response: Response) -> io::Result<()> {
|
||||
framed_send(stream, &response.encode()).await
|
||||
fn send_response(stream: &mut UnixStream, response: Response) -> io::Result<()> {
|
||||
framed_send_blocking(stream, &response.encode())
|
||||
}
|
||||
|
||||
/// The entrypoint that the spawned execute worker should start with.
|
||||
///
|
||||
/// # Parameters
|
||||
///
|
||||
/// The `socket_path` specifies the path to the socket used to communicate with the host. The
|
||||
/// `node_version`, if `Some`, is checked against the worker version. A mismatch results in
|
||||
/// - `worker_dir_path`: specifies the path to the worker-specific temporary directory.
|
||||
///
|
||||
/// - `node_version`: if `Some`, is checked against the `worker_version`. A mismatch results in
|
||||
/// immediate worker termination. `None` is used for tests and in other situations when version
|
||||
/// check is not necessary.
|
||||
///
|
||||
/// - `worker_version`: see above
|
||||
///
|
||||
/// - `security_status`: contains the detected status of security features.
|
||||
pub fn worker_entrypoint(
|
||||
socket_path: &str,
|
||||
worker_dir_path: PathBuf,
|
||||
node_version: Option<&str>,
|
||||
worker_version: Option<&str>,
|
||||
security_status: SecurityStatus,
|
||||
) {
|
||||
worker_event_loop(
|
||||
"execute",
|
||||
socket_path,
|
||||
WorkerKind::Execute,
|
||||
worker_dir_path,
|
||||
node_version,
|
||||
worker_version,
|
||||
|mut stream| async move {
|
||||
&security_status,
|
||||
|mut stream, worker_dir_path| async move {
|
||||
let worker_pid = std::process::id();
|
||||
let artifact_path = worker_dir::execute_artifact(&worker_dir_path);
|
||||
|
||||
let handshake = recv_handshake(&mut stream).await?;
|
||||
let executor = Executor::new(handshake.executor_params).map_err(|e| {
|
||||
let Handshake { executor_params } = recv_handshake(&mut stream)?;
|
||||
let executor = Executor::new(executor_params).map_err(|e| {
|
||||
io::Error::new(io::ErrorKind::Other, format!("cannot create executor: {}", e))
|
||||
})?;
|
||||
|
||||
loop {
|
||||
let (artifact_path, params, execution_timeout) = recv_request(&mut stream).await?;
|
||||
let (params, execution_timeout) = recv_request(&mut stream)?;
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
%worker_pid,
|
||||
@@ -151,15 +151,13 @@ pub fn worker_entrypoint(
|
||||
);
|
||||
|
||||
// Get the artifact bytes.
|
||||
//
|
||||
// We do this outside the thread so that we can lock down filesystem access there.
|
||||
let compiled_artifact_blob = match std::fs::read(artifact_path) {
|
||||
let compiled_artifact_blob = match std::fs::read(&artifact_path) {
|
||||
Ok(bytes) => bytes,
|
||||
Err(err) => {
|
||||
let response = Response::InternalError(
|
||||
InternalValidationError::CouldNotOpenFile(err.to_string()),
|
||||
);
|
||||
send_response(&mut stream, response).await?;
|
||||
send_response(&mut stream, response)?;
|
||||
continue
|
||||
},
|
||||
};
|
||||
@@ -187,22 +185,11 @@ pub fn worker_entrypoint(
|
||||
let execute_thread = thread::spawn_worker_thread_with_stack_size(
|
||||
"execute thread",
|
||||
move || {
|
||||
// Try to enable landlock.
|
||||
#[cfg(target_os = "linux")]
|
||||
let landlock_status = polkadot_node_core_pvf_common::worker::security::landlock::try_restrict_thread()
|
||||
.map(LandlockStatus::from_ruleset_status)
|
||||
.map_err(|e| e.to_string());
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
let landlock_status: Result<LandlockStatus, String> = Ok(LandlockStatus::NotEnforced);
|
||||
|
||||
(
|
||||
validate_using_artifact(
|
||||
&compiled_artifact_blob,
|
||||
¶ms,
|
||||
executor_2,
|
||||
cpu_time_start,
|
||||
),
|
||||
landlock_status,
|
||||
)
|
||||
},
|
||||
Arc::clone(&condvar),
|
||||
@@ -215,24 +202,9 @@ pub fn worker_entrypoint(
|
||||
let response = match outcome {
|
||||
WaitOutcome::Finished => {
|
||||
let _ = cpu_time_monitor_tx.send(());
|
||||
let (result, landlock_status) = execute_thread.join().unwrap_or_else(|e| {
|
||||
(
|
||||
Response::Panic(stringify_panic_payload(e)),
|
||||
Ok(LandlockStatus::Unavailable),
|
||||
)
|
||||
});
|
||||
|
||||
// Log if landlock threw an error.
|
||||
if let Err(err) = landlock_status {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
%worker_pid,
|
||||
"error enabling landlock: {}",
|
||||
err
|
||||
);
|
||||
}
|
||||
|
||||
result
|
||||
execute_thread
|
||||
.join()
|
||||
.unwrap_or_else(|e| Response::Panic(stringify_panic_payload(e)))
|
||||
},
|
||||
// If the CPU thread is not selected, we signal it to end, the join handle is
|
||||
// dropped and the thread will finish in the background.
|
||||
@@ -267,7 +239,13 @@ pub fn worker_entrypoint(
|
||||
),
|
||||
};
|
||||
|
||||
send_response(&mut stream, response).await?;
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
%worker_pid,
|
||||
"worker: sending response to host: {:?}",
|
||||
response
|
||||
);
|
||||
send_response(&mut stream, response)?;
|
||||
}
|
||||
},
|
||||
);
|
||||
|
||||
@@ -7,6 +7,7 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
cfg-if = "1.0"
|
||||
futures = "0.3.21"
|
||||
gum = { package = "tracing-gum", path = "../../../gum" }
|
||||
libc = "0.2.139"
|
||||
|
||||
@@ -33,25 +33,24 @@ use parity_scale_codec::{Decode, Encode};
|
||||
use polkadot_node_core_pvf_common::{
|
||||
error::{PrepareError, PrepareResult},
|
||||
executor_intf::Executor,
|
||||
framed_recv, framed_send,
|
||||
framed_recv_blocking, framed_send_blocking,
|
||||
prepare::{MemoryStats, PrepareJobKind, PrepareStats},
|
||||
pvf::PvfPrepData,
|
||||
worker::{
|
||||
bytes_to_path, cpu_time_monitor_loop,
|
||||
security::LandlockStatus,
|
||||
stringify_panic_payload,
|
||||
cpu_time_monitor_loop, stringify_panic_payload,
|
||||
thread::{self, WaitOutcome},
|
||||
worker_event_loop,
|
||||
worker_event_loop, WorkerKind,
|
||||
},
|
||||
ProcessTime,
|
||||
worker_dir, ProcessTime, SecurityStatus,
|
||||
};
|
||||
use polkadot_primitives::ExecutorParams;
|
||||
use std::{
|
||||
os::unix::net::UnixStream,
|
||||
path::PathBuf,
|
||||
sync::{mpsc::channel, Arc},
|
||||
time::Duration,
|
||||
};
|
||||
use tokio::{io, net::UnixStream};
|
||||
use tokio::io;
|
||||
|
||||
/// Contains the bytes for a successfully compiled artifact.
|
||||
pub struct CompiledArtifact(Vec<u8>);
|
||||
@@ -69,37 +68,35 @@ impl AsRef<[u8]> for CompiledArtifact {
|
||||
}
|
||||
}
|
||||
|
||||
async fn recv_request(stream: &mut UnixStream) -> io::Result<(PvfPrepData, PathBuf)> {
|
||||
let pvf = framed_recv(stream).await?;
|
||||
fn recv_request(stream: &mut UnixStream) -> io::Result<PvfPrepData> {
|
||||
let pvf = framed_recv_blocking(stream)?;
|
||||
let pvf = PvfPrepData::decode(&mut &pvf[..]).map_err(|e| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!("prepare pvf recv_request: failed to decode PvfPrepData: {}", e),
|
||||
)
|
||||
})?;
|
||||
let tmp_file = framed_recv(stream).await?;
|
||||
let tmp_file = bytes_to_path(&tmp_file).ok_or_else(|| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
"prepare pvf recv_request: non utf-8 artifact path".to_string(),
|
||||
)
|
||||
})?;
|
||||
Ok((pvf, tmp_file))
|
||||
Ok(pvf)
|
||||
}
|
||||
|
||||
async fn send_response(stream: &mut UnixStream, result: PrepareResult) -> io::Result<()> {
|
||||
framed_send(stream, &result.encode()).await
|
||||
fn send_response(stream: &mut UnixStream, result: PrepareResult) -> io::Result<()> {
|
||||
framed_send_blocking(stream, &result.encode())
|
||||
}
|
||||
|
||||
/// The entrypoint that the spawned prepare worker should start with.
|
||||
///
|
||||
/// # Parameters
|
||||
///
|
||||
/// The `socket_path` specifies the path to the socket used to communicate with the host. The
|
||||
/// `node_version`, if `Some`, is checked against the worker version. A mismatch results in
|
||||
/// - `worker_dir_path`: specifies the path to the worker-specific temporary directory.
|
||||
///
|
||||
/// - `node_version`: if `Some`, is checked against the `worker_version`. A mismatch results in
|
||||
/// immediate worker termination. `None` is used for tests and in other situations when version
|
||||
/// check is not necessary.
|
||||
///
|
||||
/// - `worker_version`: see above
|
||||
///
|
||||
/// - `security_status`: contains the detected status of security features.
|
||||
///
|
||||
/// # Flow
|
||||
///
|
||||
/// This runs the following in a loop:
|
||||
@@ -119,20 +116,23 @@ async fn send_response(stream: &mut UnixStream, result: PrepareResult) -> io::Re
|
||||
/// 7. Send the result of preparation back to the host. If any error occurred in the above steps, we
|
||||
/// send that in the `PrepareResult`.
|
||||
pub fn worker_entrypoint(
|
||||
socket_path: &str,
|
||||
worker_dir_path: PathBuf,
|
||||
node_version: Option<&str>,
|
||||
worker_version: Option<&str>,
|
||||
security_status: SecurityStatus,
|
||||
) {
|
||||
worker_event_loop(
|
||||
"prepare",
|
||||
socket_path,
|
||||
WorkerKind::Prepare,
|
||||
worker_dir_path,
|
||||
node_version,
|
||||
worker_version,
|
||||
|mut stream| async move {
|
||||
&security_status,
|
||||
|mut stream, worker_dir_path| async move {
|
||||
let worker_pid = std::process::id();
|
||||
let temp_artifact_dest = worker_dir::prepare_tmp_artifact(&worker_dir_path);
|
||||
|
||||
loop {
|
||||
let (pvf, temp_artifact_dest) = recv_request(&mut stream).await?;
|
||||
let pvf = recv_request(&mut stream)?;
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
%worker_pid,
|
||||
@@ -172,14 +172,6 @@ pub fn worker_entrypoint(
|
||||
let prepare_thread = thread::spawn_worker_thread(
|
||||
"prepare thread",
|
||||
move || {
|
||||
// Try to enable landlock.
|
||||
#[cfg(target_os = "linux")]
|
||||
let landlock_status = polkadot_node_core_pvf_common::worker::security::landlock::try_restrict_thread()
|
||||
.map(LandlockStatus::from_ruleset_status)
|
||||
.map_err(|e| e.to_string());
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
let landlock_status: Result<LandlockStatus, String> = Ok(LandlockStatus::NotEnforced);
|
||||
|
||||
#[allow(unused_mut)]
|
||||
let mut result = prepare_artifact(pvf, cpu_time_start);
|
||||
|
||||
@@ -200,7 +192,7 @@ pub fn worker_entrypoint(
|
||||
});
|
||||
}
|
||||
|
||||
(result, landlock_status)
|
||||
result
|
||||
},
|
||||
Arc::clone(&condvar),
|
||||
WaitOutcome::Finished,
|
||||
@@ -213,20 +205,20 @@ pub fn worker_entrypoint(
|
||||
let _ = cpu_time_monitor_tx.send(());
|
||||
|
||||
match prepare_thread.join().unwrap_or_else(|err| {
|
||||
(
|
||||
Err(PrepareError::Panic(stringify_panic_payload(err))),
|
||||
Ok(LandlockStatus::Unavailable),
|
||||
)
|
||||
Err(PrepareError::Panic(stringify_panic_payload(err)))
|
||||
}) {
|
||||
(Err(err), _) => {
|
||||
Err(err) => {
|
||||
// Serialized error will be written into the socket.
|
||||
Err(err)
|
||||
},
|
||||
(Ok(ok), landlock_status) => {
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
let (artifact, cpu_time_elapsed) = ok;
|
||||
#[cfg(target_os = "linux")]
|
||||
Ok(ok) => {
|
||||
cfg_if::cfg_if! {
|
||||
if #[cfg(target_os = "linux")] {
|
||||
let (artifact, cpu_time_elapsed, max_rss) = ok;
|
||||
} else {
|
||||
let (artifact, cpu_time_elapsed) = ok;
|
||||
}
|
||||
}
|
||||
|
||||
// Stop the memory stats worker and get its observed memory stats.
|
||||
#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))]
|
||||
@@ -242,16 +234,6 @@ pub fn worker_entrypoint(
|
||||
max_rss: extract_max_rss_stat(max_rss, worker_pid),
|
||||
};
|
||||
|
||||
// Log if landlock threw an error.
|
||||
if let Err(err) = landlock_status {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
%worker_pid,
|
||||
"error enabling landlock: {}",
|
||||
err
|
||||
);
|
||||
}
|
||||
|
||||
// Write the serialized artifact into a temp file.
|
||||
//
|
||||
// PVF host only keeps artifacts statuses in its memory,
|
||||
@@ -300,7 +282,13 @@ pub fn worker_entrypoint(
|
||||
),
|
||||
};
|
||||
|
||||
send_response(&mut stream, result).await?;
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
%worker_pid,
|
||||
"worker: sending response to host: {:?}",
|
||||
result
|
||||
);
|
||||
send_response(&mut stream, result)?;
|
||||
}
|
||||
},
|
||||
);
|
||||
|
||||
@@ -172,9 +172,10 @@ impl Artifacts {
|
||||
///
|
||||
/// The recognized artifacts will be filled in the table and unrecognized will be removed.
|
||||
pub async fn new(cache_path: &Path) -> Self {
|
||||
// Make sure that the cache path directory and all its parents are created.
|
||||
// First delete the entire cache. Nodes are long-running so this should populate shortly.
|
||||
// First delete the entire cache. This includes artifacts and any leftover worker dirs (see
|
||||
// [`WorkerDir`]). Nodes are long-running so this should populate shortly.
|
||||
let _ = tokio::fs::remove_dir_all(cache_path).await;
|
||||
// Make sure that the cache path directory and all its parents are created.
|
||||
let _ = tokio::fs::create_dir_all(cache_path).await;
|
||||
|
||||
Self { artifacts: HashMap::new() }
|
||||
@@ -295,7 +296,7 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn artifacts_removes_cache_on_startup() {
|
||||
let fake_cache_path = crate::worker_intf::tmpfile("test-cache").await.unwrap();
|
||||
let fake_cache_path = crate::worker_intf::tmppath("test-cache").await.unwrap();
|
||||
let fake_artifact_path = {
|
||||
let mut p = fake_cache_path.clone();
|
||||
p.push("wasmtime_0x1234567890123456789012345678901234567890123456789012345678901234");
|
||||
|
||||
@@ -30,6 +30,7 @@ use futures::{
|
||||
stream::{FuturesUnordered, StreamExt as _},
|
||||
Future, FutureExt,
|
||||
};
|
||||
use polkadot_node_core_pvf_common::SecurityStatus;
|
||||
use polkadot_primitives::{ExecutorParams, ExecutorParamsHash};
|
||||
use slotmap::HopSlotMap;
|
||||
use std::{
|
||||
@@ -139,8 +140,10 @@ struct Queue {
|
||||
|
||||
// Some variables related to the current session.
|
||||
program_path: PathBuf,
|
||||
cache_path: PathBuf,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<String>,
|
||||
security_status: SecurityStatus,
|
||||
|
||||
/// The queue of jobs that are waiting for a worker to pick up.
|
||||
queue: VecDeque<ExecuteJob>,
|
||||
@@ -152,16 +155,20 @@ impl Queue {
|
||||
fn new(
|
||||
metrics: Metrics,
|
||||
program_path: PathBuf,
|
||||
cache_path: PathBuf,
|
||||
worker_capacity: usize,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<String>,
|
||||
security_status: SecurityStatus,
|
||||
to_queue_rx: mpsc::Receiver<ToQueue>,
|
||||
) -> Self {
|
||||
Self {
|
||||
metrics,
|
||||
program_path,
|
||||
cache_path,
|
||||
spawn_timeout,
|
||||
node_version,
|
||||
security_status,
|
||||
to_queue_rx,
|
||||
queue: VecDeque::new(),
|
||||
mux: Mux::new(),
|
||||
@@ -405,9 +412,11 @@ fn spawn_extra_worker(queue: &mut Queue, job: ExecuteJob) {
|
||||
queue.mux.push(
|
||||
spawn_worker_task(
|
||||
queue.program_path.clone(),
|
||||
queue.cache_path.clone(),
|
||||
job,
|
||||
queue.spawn_timeout,
|
||||
queue.node_version.clone(),
|
||||
queue.security_status.clone(),
|
||||
)
|
||||
.boxed(),
|
||||
);
|
||||
@@ -423,18 +432,22 @@ fn spawn_extra_worker(queue: &mut Queue, job: ExecuteJob) {
|
||||
/// execute other jobs with a compatible execution environment.
|
||||
async fn spawn_worker_task(
|
||||
program_path: PathBuf,
|
||||
cache_path: PathBuf,
|
||||
job: ExecuteJob,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<String>,
|
||||
security_status: SecurityStatus,
|
||||
) -> QueueEvent {
|
||||
use futures_timer::Delay;
|
||||
|
||||
loop {
|
||||
match super::worker_intf::spawn(
|
||||
&program_path,
|
||||
&cache_path,
|
||||
job.executor_params.clone(),
|
||||
spawn_timeout,
|
||||
node_version.as_deref(),
|
||||
security_status.clone(),
|
||||
)
|
||||
.await
|
||||
{
|
||||
@@ -496,17 +509,21 @@ fn assign(queue: &mut Queue, worker: Worker, job: ExecuteJob) {
|
||||
pub fn start(
|
||||
metrics: Metrics,
|
||||
program_path: PathBuf,
|
||||
cache_path: PathBuf,
|
||||
worker_capacity: usize,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<String>,
|
||||
security_status: SecurityStatus,
|
||||
) -> (mpsc::Sender<ToQueue>, impl Future<Output = ()>) {
|
||||
let (to_queue_tx, to_queue_rx) = mpsc::channel(20);
|
||||
let run = Queue::new(
|
||||
metrics,
|
||||
program_path,
|
||||
cache_path,
|
||||
worker_capacity,
|
||||
spawn_timeout,
|
||||
node_version,
|
||||
security_status,
|
||||
to_queue_rx,
|
||||
)
|
||||
.run();
|
||||
|
||||
@@ -19,8 +19,8 @@
|
||||
use crate::{
|
||||
artifacts::ArtifactPathId,
|
||||
worker_intf::{
|
||||
path_to_bytes, spawn_with_program_path, IdleWorker, SpawnErr, WorkerHandle,
|
||||
JOB_TIMEOUT_WALL_CLOCK_FACTOR,
|
||||
clear_worker_dir_path, framed_recv, framed_send, spawn_with_program_path, IdleWorker,
|
||||
SpawnErr, WorkerDir, WorkerHandle, JOB_TIMEOUT_WALL_CLOCK_FACTOR,
|
||||
},
|
||||
LOG_TARGET,
|
||||
};
|
||||
@@ -30,7 +30,7 @@ use parity_scale_codec::{Decode, Encode};
|
||||
use polkadot_node_core_pvf_common::{
|
||||
error::InternalValidationError,
|
||||
execute::{Handshake, Response},
|
||||
framed_recv, framed_send,
|
||||
worker_dir, SecurityStatus,
|
||||
};
|
||||
use polkadot_parachain_primitives::primitives::ValidationResult;
|
||||
use polkadot_primitives::ExecutorParams;
|
||||
@@ -38,21 +38,30 @@ use std::{path::Path, time::Duration};
|
||||
use tokio::{io, net::UnixStream};
|
||||
|
||||
/// Spawns a new worker with the given program path that acts as the worker and the spawn timeout.
|
||||
/// Sends a handshake message to the worker as soon as it is spawned.
|
||||
///
|
||||
/// The program should be able to handle `<program-path> execute-worker <socket-path>` invocation.
|
||||
/// Sends a handshake message to the worker as soon as it is spawned.
|
||||
pub async fn spawn(
|
||||
program_path: &Path,
|
||||
cache_path: &Path,
|
||||
executor_params: ExecutorParams,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<&str>,
|
||||
security_status: SecurityStatus,
|
||||
) -> Result<(IdleWorker, WorkerHandle), SpawnErr> {
|
||||
let mut extra_args = vec!["execute-worker"];
|
||||
if let Some(node_version) = node_version {
|
||||
extra_args.extend_from_slice(&["--node-impl-version", node_version]);
|
||||
}
|
||||
let (mut idle_worker, worker_handle) =
|
||||
spawn_with_program_path("execute", program_path, &extra_args, spawn_timeout).await?;
|
||||
|
||||
let (mut idle_worker, worker_handle) = spawn_with_program_path(
|
||||
"execute",
|
||||
program_path,
|
||||
cache_path,
|
||||
&extra_args,
|
||||
spawn_timeout,
|
||||
security_status,
|
||||
)
|
||||
.await?;
|
||||
send_handshake(&mut idle_worker.stream, Handshake { executor_params })
|
||||
.await
|
||||
.map_err(|error| {
|
||||
@@ -104,19 +113,19 @@ pub async fn start_work(
|
||||
execution_timeout: Duration,
|
||||
validation_params: Vec<u8>,
|
||||
) -> Outcome {
|
||||
let IdleWorker { mut stream, pid } = worker;
|
||||
let IdleWorker { mut stream, pid, worker_dir } = worker;
|
||||
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
?worker_dir,
|
||||
validation_code_hash = ?artifact.id.code_hash,
|
||||
"starting execute for {}",
|
||||
artifact.path.display(),
|
||||
);
|
||||
|
||||
if let Err(error) =
|
||||
send_request(&mut stream, &artifact.path, &validation_params, execution_timeout).await
|
||||
{
|
||||
with_worker_dir_setup(worker_dir, pid, &artifact.path, |worker_dir| async move {
|
||||
if let Err(error) = send_request(&mut stream, &validation_params, execution_timeout).await {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
@@ -179,14 +188,76 @@ pub async fn start_work(
|
||||
};
|
||||
|
||||
match response {
|
||||
Response::Ok { result_descriptor, duration } =>
|
||||
Outcome::Ok { result_descriptor, duration, idle_worker: IdleWorker { stream, pid } },
|
||||
Response::InvalidCandidate(err) =>
|
||||
Outcome::InvalidCandidate { err, idle_worker: IdleWorker { stream, pid } },
|
||||
Response::Ok { result_descriptor, duration } => Outcome::Ok {
|
||||
result_descriptor,
|
||||
duration,
|
||||
idle_worker: IdleWorker { stream, pid, worker_dir },
|
||||
},
|
||||
Response::InvalidCandidate(err) => Outcome::InvalidCandidate {
|
||||
err,
|
||||
idle_worker: IdleWorker { stream, pid, worker_dir },
|
||||
},
|
||||
Response::TimedOut => Outcome::HardTimeout,
|
||||
Response::Panic(err) => Outcome::Panic { err },
|
||||
Response::InternalError(err) => Outcome::InternalError { err },
|
||||
}
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
/// Create a temporary file for an artifact in the worker cache, execute the given future/closure
|
||||
/// passing the file path in, and clean up the worker cache.
|
||||
///
|
||||
/// Failure to clean up the worker cache results in an error - leaving any files here could be a
|
||||
/// security issue, and we should shut down the worker. This should be very rare.
|
||||
async fn with_worker_dir_setup<F, Fut>(
|
||||
worker_dir: WorkerDir,
|
||||
pid: u32,
|
||||
artifact_path: &Path,
|
||||
f: F,
|
||||
) -> Outcome
|
||||
where
|
||||
Fut: futures::Future<Output = Outcome>,
|
||||
F: FnOnce(WorkerDir) -> Fut,
|
||||
{
|
||||
// Cheaply create a hard link to the artifact. The artifact is always at a known location in the
|
||||
// worker cache, and the child can't access any other artifacts or gain any information from the
|
||||
// original filename.
|
||||
let link_path = worker_dir::execute_artifact(&worker_dir.path);
|
||||
if let Err(err) = tokio::fs::hard_link(artifact_path, link_path).await {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
?worker_dir,
|
||||
"failed to clear worker cache after the job: {:?}",
|
||||
err,
|
||||
);
|
||||
return Outcome::InternalError {
|
||||
err: InternalValidationError::CouldNotCreateLink(format!("{:?}", err)),
|
||||
}
|
||||
}
|
||||
|
||||
let worker_dir_path = worker_dir.path.clone();
|
||||
let outcome = f(worker_dir).await;
|
||||
|
||||
// Try to clear the worker dir.
|
||||
if let Err(err) = clear_worker_dir_path(&worker_dir_path) {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
?worker_dir_path,
|
||||
"failed to clear worker cache after the job: {:?}",
|
||||
err,
|
||||
);
|
||||
return Outcome::InternalError {
|
||||
err: InternalValidationError::CouldNotClearWorkerDir {
|
||||
err: format!("{:?}", err),
|
||||
path: worker_dir_path.to_str().map(String::from),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
outcome
|
||||
}
|
||||
|
||||
async fn send_handshake(stream: &mut UnixStream, handshake: Handshake) -> io::Result<()> {
|
||||
@@ -195,11 +266,9 @@ async fn send_handshake(stream: &mut UnixStream, handshake: Handshake) -> io::Re
|
||||
|
||||
async fn send_request(
|
||||
stream: &mut UnixStream,
|
||||
artifact_path: &Path,
|
||||
validation_params: &[u8],
|
||||
execution_timeout: Duration,
|
||||
) -> io::Result<()> {
|
||||
framed_send(stream, path_to_bytes(artifact_path)).await?;
|
||||
framed_send(stream, validation_params).await?;
|
||||
framed_send(stream, &execution_timeout.encode()).await
|
||||
}
|
||||
|
||||
@@ -34,6 +34,7 @@ use futures::{
|
||||
use polkadot_node_core_pvf_common::{
|
||||
error::{PrepareError, PrepareResult},
|
||||
pvf::PvfPrepData,
|
||||
SecurityStatus,
|
||||
};
|
||||
use polkadot_parachain_primitives::primitives::ValidationResult;
|
||||
use std::{
|
||||
@@ -202,8 +203,13 @@ impl Config {
|
||||
pub fn start(config: Config, metrics: Metrics) -> (ValidationHost, impl Future<Output = ()>) {
|
||||
gum::debug!(target: LOG_TARGET, ?config, "starting PVF validation host");
|
||||
|
||||
// Run checks for supported security features once per host startup.
|
||||
warn_if_no_landlock();
|
||||
// Run checks for supported security features once per host startup. Warn here if not enabled.
|
||||
let security_status = {
|
||||
let can_enable_landlock = check_landlock(&config.prepare_worker_program_path);
|
||||
let can_unshare_user_namespace_and_change_root =
|
||||
check_can_unshare_user_namespace_and_change_root(&config.prepare_worker_program_path);
|
||||
SecurityStatus { can_enable_landlock, can_unshare_user_namespace_and_change_root }
|
||||
};
|
||||
|
||||
let (to_host_tx, to_host_rx) = mpsc::channel(10);
|
||||
|
||||
@@ -215,6 +221,7 @@ pub fn start(config: Config, metrics: Metrics) -> (ValidationHost, impl Future<O
|
||||
config.cache_path.clone(),
|
||||
config.prepare_worker_spawn_timeout,
|
||||
config.node_version.clone(),
|
||||
security_status.clone(),
|
||||
);
|
||||
|
||||
let (to_prepare_queue_tx, from_prepare_queue_rx, run_prepare_queue) = prepare::start_queue(
|
||||
@@ -229,9 +236,11 @@ pub fn start(config: Config, metrics: Metrics) -> (ValidationHost, impl Future<O
|
||||
let (to_execute_queue_tx, run_execute_queue) = execute::start(
|
||||
metrics,
|
||||
config.execute_worker_program_path.to_owned(),
|
||||
config.cache_path.clone(),
|
||||
config.execute_workers_max_num,
|
||||
config.execute_worker_spawn_timeout,
|
||||
config.node_version,
|
||||
security_status,
|
||||
);
|
||||
|
||||
let (to_sweeper_tx, to_sweeper_rx) = mpsc::channel(100);
|
||||
@@ -873,28 +882,103 @@ fn pulse_every(interval: std::time::Duration) -> impl futures::Stream<Item = ()>
|
||||
.map(|_| ())
|
||||
}
|
||||
|
||||
/// Check if landlock is supported and emit a warning if not.
|
||||
fn warn_if_no_landlock() {
|
||||
#[cfg(target_os = "linux")]
|
||||
{
|
||||
use polkadot_node_core_pvf_common::worker::security::landlock;
|
||||
let status = landlock::get_status();
|
||||
if !landlock::status_is_fully_enabled(&status) {
|
||||
let abi = landlock::LANDLOCK_ABI as u8;
|
||||
/// Check if we can sandbox the root and emit a warning if not.
|
||||
///
|
||||
/// We do this check by spawning a new process and trying to sandbox it. To get as close as possible
|
||||
/// to running the check in a worker, we try it... in a worker. The expected return status is 0 on
|
||||
/// success and -1 on failure.
|
||||
fn check_can_unshare_user_namespace_and_change_root(
|
||||
#[cfg_attr(not(target_os = "linux"), allow(unused_variables))]
|
||||
prepare_worker_program_path: &Path,
|
||||
) -> bool {
|
||||
cfg_if::cfg_if! {
|
||||
if #[cfg(target_os = "linux")] {
|
||||
let output = std::process::Command::new(prepare_worker_program_path)
|
||||
.arg("--check-can-unshare-user-namespace-and-change-root")
|
||||
.output();
|
||||
|
||||
match output {
|
||||
Ok(output) if output.status.success() => true,
|
||||
Ok(output) => {
|
||||
let stderr = std::str::from_utf8(&output.stderr)
|
||||
.expect("child process writes a UTF-8 string to stderr; qed")
|
||||
.trim();
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
?status,
|
||||
%abi,
|
||||
"Cannot fully enable landlock, a Linux kernel security feature. Running validation of malicious PVF code has a higher risk of compromising this machine. Consider upgrading the kernel version for maximum security."
|
||||
?prepare_worker_program_path,
|
||||
// Docs say to always print status using `Display` implementation.
|
||||
status = %output.status,
|
||||
%stderr,
|
||||
"Cannot unshare user namespace and change root, which are Linux-specific kernel security features. Running validation of malicious PVF code has a higher risk of compromising this machine. Consider running with support for unsharing user namespaces for maximum security."
|
||||
);
|
||||
false
|
||||
},
|
||||
Err(err) => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
?prepare_worker_program_path,
|
||||
"Could not start child process: {}",
|
||||
err
|
||||
);
|
||||
false
|
||||
},
|
||||
}
|
||||
} else {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
"Cannot unshare user namespace and change root, which are Linux-specific kernel security features. Running validation of malicious PVF code has a higher risk of compromising this machine. Consider running on Linux with support for unsharing user namespaces for maximum security."
|
||||
);
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
/// Check if landlock is supported and emit a warning if not.
|
||||
///
|
||||
/// We do this check by spawning a new process and trying to sandbox it. To get as close as possible
|
||||
/// to running the check in a worker, we try it... in a worker. The expected return status is 0 on
|
||||
/// success and -1 on failure.
|
||||
fn check_landlock(
|
||||
#[cfg_attr(not(target_os = "linux"), allow(unused_variables))]
|
||||
prepare_worker_program_path: &Path,
|
||||
) -> bool {
|
||||
cfg_if::cfg_if! {
|
||||
if #[cfg(target_os = "linux")] {
|
||||
match std::process::Command::new(prepare_worker_program_path)
|
||||
.arg("--check-can-enable-landlock")
|
||||
.status()
|
||||
{
|
||||
Ok(status) if status.success() => true,
|
||||
Ok(status) => {
|
||||
let abi =
|
||||
polkadot_node_core_pvf_common::worker::security::landlock::LANDLOCK_ABI as u8;
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
"Cannot enable landlock, a Linux kernel security feature. Running validation of malicious PVF code has a higher risk of compromising this machine. Consider running on Linux with landlock support for maximum security."
|
||||
?prepare_worker_program_path,
|
||||
?status,
|
||||
%abi,
|
||||
"Cannot fully enable landlock, a Linux-specific kernel security feature. Running validation of malicious PVF code has a higher risk of compromising this machine. Consider upgrading the kernel version for maximum security."
|
||||
);
|
||||
false
|
||||
},
|
||||
Err(err) => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
?prepare_worker_program_path,
|
||||
"Could not start child process: {}",
|
||||
err
|
||||
);
|
||||
false
|
||||
},
|
||||
}
|
||||
} else {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
"Cannot enable landlock, a Linux-specific kernel security feature. Running validation of malicious PVF code has a higher risk of compromising this machine. Consider running on Linux with landlock support for maximum security."
|
||||
);
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -111,6 +111,7 @@ pub use polkadot_node_core_pvf_common::{
|
||||
error::{InternalValidationError, PrepareError},
|
||||
prepare::{PrepareJobKind, PrepareStats},
|
||||
pvf::PvfPrepData,
|
||||
SecurityStatus,
|
||||
};
|
||||
|
||||
/// The log target for this crate.
|
||||
|
||||
@@ -27,6 +27,7 @@ use futures::{
|
||||
use polkadot_node_core_pvf_common::{
|
||||
error::{PrepareError, PrepareResult},
|
||||
pvf::PvfPrepData,
|
||||
SecurityStatus,
|
||||
};
|
||||
use slotmap::HopSlotMap;
|
||||
use std::{
|
||||
@@ -110,10 +111,12 @@ enum PoolEvent {
|
||||
type Mux = FuturesUnordered<BoxFuture<'static, PoolEvent>>;
|
||||
|
||||
struct Pool {
|
||||
// Some variables related to the current session.
|
||||
program_path: PathBuf,
|
||||
cache_path: PathBuf,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<String>,
|
||||
security_status: SecurityStatus,
|
||||
|
||||
to_pool: mpsc::Receiver<ToPool>,
|
||||
from_pool: mpsc::UnboundedSender<FromPool>,
|
||||
@@ -132,6 +135,7 @@ async fn run(
|
||||
cache_path,
|
||||
spawn_timeout,
|
||||
node_version,
|
||||
security_status,
|
||||
to_pool,
|
||||
mut from_pool,
|
||||
mut spawned,
|
||||
@@ -160,6 +164,7 @@ async fn run(
|
||||
&cache_path,
|
||||
spawn_timeout,
|
||||
node_version.clone(),
|
||||
security_status.clone(),
|
||||
&mut spawned,
|
||||
&mut mux,
|
||||
to_pool,
|
||||
@@ -207,6 +212,7 @@ fn handle_to_pool(
|
||||
cache_path: &Path,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<String>,
|
||||
security_status: SecurityStatus,
|
||||
spawned: &mut HopSlotMap<Worker, WorkerData>,
|
||||
mux: &mut Mux,
|
||||
to_pool: ToPool,
|
||||
@@ -216,7 +222,14 @@ fn handle_to_pool(
|
||||
gum::debug!(target: LOG_TARGET, "spawning a new prepare worker");
|
||||
metrics.prepare_worker().on_begin_spawn();
|
||||
mux.push(
|
||||
spawn_worker_task(program_path.to_owned(), spawn_timeout, node_version).boxed(),
|
||||
spawn_worker_task(
|
||||
program_path.to_owned(),
|
||||
cache_path.to_owned(),
|
||||
spawn_timeout,
|
||||
node_version,
|
||||
security_status,
|
||||
)
|
||||
.boxed(),
|
||||
);
|
||||
},
|
||||
ToPool::StartWork { worker, pvf, artifact_path } => {
|
||||
@@ -229,7 +242,6 @@ fn handle_to_pool(
|
||||
worker,
|
||||
idle,
|
||||
pvf,
|
||||
cache_path.to_owned(),
|
||||
artifact_path,
|
||||
preparation_timer,
|
||||
)
|
||||
@@ -258,13 +270,23 @@ fn handle_to_pool(
|
||||
|
||||
async fn spawn_worker_task(
|
||||
program_path: PathBuf,
|
||||
cache_path: PathBuf,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<String>,
|
||||
security_status: SecurityStatus,
|
||||
) -> PoolEvent {
|
||||
use futures_timer::Delay;
|
||||
|
||||
loop {
|
||||
match worker_intf::spawn(&program_path, spawn_timeout, node_version.as_deref()).await {
|
||||
match worker_intf::spawn(
|
||||
&program_path,
|
||||
&cache_path,
|
||||
spawn_timeout,
|
||||
node_version.as_deref(),
|
||||
security_status.clone(),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok((idle, handle)) => break PoolEvent::Spawn(idle, handle),
|
||||
Err(err) => {
|
||||
gum::warn!(target: LOG_TARGET, "failed to spawn a prepare worker: {:?}", err);
|
||||
@@ -281,11 +303,10 @@ async fn start_work_task<Timer>(
|
||||
worker: Worker,
|
||||
idle: IdleWorker,
|
||||
pvf: PvfPrepData,
|
||||
cache_path: PathBuf,
|
||||
artifact_path: PathBuf,
|
||||
_preparation_timer: Option<Timer>,
|
||||
) -> PoolEvent {
|
||||
let outcome = worker_intf::start_work(&metrics, idle, pvf, &cache_path, artifact_path).await;
|
||||
let outcome = worker_intf::start_work(&metrics, idle, pvf, artifact_path).await;
|
||||
PoolEvent::StartWork(worker, outcome)
|
||||
}
|
||||
|
||||
@@ -322,14 +343,29 @@ fn handle_mux(
|
||||
),
|
||||
// Return `Concluded`, but do not kill the worker since the error was on the host
|
||||
// side.
|
||||
Outcome::RenameTmpFileErr { worker: idle, result: _, err } =>
|
||||
Outcome::RenameTmpFileErr { worker: idle, result: _, err, src, dest } =>
|
||||
handle_concluded_no_rip(
|
||||
from_pool,
|
||||
spawned,
|
||||
worker,
|
||||
idle,
|
||||
Err(PrepareError::RenameTmpFileErr(err)),
|
||||
Err(PrepareError::RenameTmpFileErr { err, src, dest }),
|
||||
),
|
||||
// Could not clear worker cache. Kill the worker so other jobs can't see the data.
|
||||
Outcome::ClearWorkerDir { err } => {
|
||||
if attempt_retire(metrics, spawned, worker) {
|
||||
reply(
|
||||
from_pool,
|
||||
FromPool::Concluded {
|
||||
worker,
|
||||
rip: true,
|
||||
result: Err(PrepareError::ClearWorkerDir(err)),
|
||||
},
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
},
|
||||
Outcome::Unreachable => {
|
||||
if attempt_retire(metrics, spawned, worker) {
|
||||
reply(from_pool, FromPool::Rip(worker))?;
|
||||
@@ -434,6 +470,7 @@ pub fn start(
|
||||
cache_path: PathBuf,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<String>,
|
||||
security_status: SecurityStatus,
|
||||
) -> (mpsc::Sender<ToPool>, mpsc::UnboundedReceiver<FromPool>, impl Future<Output = ()>) {
|
||||
let (to_pool_tx, to_pool_rx) = mpsc::channel(10);
|
||||
let (from_pool_tx, from_pool_rx) = mpsc::unbounded();
|
||||
@@ -444,6 +481,7 @@ pub fn start(
|
||||
cache_path,
|
||||
spawn_timeout,
|
||||
node_version,
|
||||
security_status,
|
||||
to_pool: to_pool_rx,
|
||||
from_pool: from_pool_tx,
|
||||
spawned: HopSlotMap::with_capacity_and_key(20),
|
||||
|
||||
@@ -19,17 +19,17 @@
|
||||
use crate::{
|
||||
metrics::Metrics,
|
||||
worker_intf::{
|
||||
path_to_bytes, spawn_with_program_path, tmpfile_in, IdleWorker, SpawnErr, WorkerHandle,
|
||||
JOB_TIMEOUT_WALL_CLOCK_FACTOR,
|
||||
clear_worker_dir_path, framed_recv, framed_send, spawn_with_program_path, IdleWorker,
|
||||
SpawnErr, WorkerDir, WorkerHandle, JOB_TIMEOUT_WALL_CLOCK_FACTOR,
|
||||
},
|
||||
LOG_TARGET,
|
||||
};
|
||||
use parity_scale_codec::{Decode, Encode};
|
||||
use polkadot_node_core_pvf_common::{
|
||||
error::{PrepareError, PrepareResult},
|
||||
framed_recv, framed_send,
|
||||
prepare::PrepareStats,
|
||||
pvf::PvfPrepData,
|
||||
worker_dir, SecurityStatus,
|
||||
};
|
||||
|
||||
use sp_core::hexdisplay::HexDisplay;
|
||||
@@ -41,19 +41,33 @@ use tokio::{io, net::UnixStream};
|
||||
|
||||
/// Spawns a new worker with the given program path that acts as the worker and the spawn timeout.
|
||||
///
|
||||
/// The program should be able to handle `<program-path> prepare-worker <socket-path>` invocation.
|
||||
/// Sends a handshake message to the worker as soon as it is spawned.
|
||||
pub async fn spawn(
|
||||
program_path: &Path,
|
||||
cache_path: &Path,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<&str>,
|
||||
security_status: SecurityStatus,
|
||||
) -> Result<(IdleWorker, WorkerHandle), SpawnErr> {
|
||||
let mut extra_args = vec!["prepare-worker"];
|
||||
if let Some(node_version) = node_version {
|
||||
extra_args.extend_from_slice(&["--node-impl-version", node_version]);
|
||||
}
|
||||
spawn_with_program_path("prepare", program_path, &extra_args, spawn_timeout).await
|
||||
|
||||
spawn_with_program_path(
|
||||
"prepare",
|
||||
program_path,
|
||||
cache_path,
|
||||
&extra_args,
|
||||
spawn_timeout,
|
||||
security_status,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Outcome of PVF preparation.
|
||||
///
|
||||
/// If the idle worker token is not returned, it means the worker must be terminated.
|
||||
pub enum Outcome {
|
||||
/// The worker has finished the work assigned to it.
|
||||
Concluded { worker: IdleWorker, result: PrepareResult },
|
||||
@@ -62,9 +76,19 @@ pub enum Outcome {
|
||||
Unreachable,
|
||||
/// The temporary file for the artifact could not be created at the given cache path.
|
||||
CreateTmpFileErr { worker: IdleWorker, err: String },
|
||||
/// The response from the worker is received, but the file cannot be renamed (moved) to the
|
||||
/// The response from the worker is received, but the tmp file cannot be renamed (moved) to the
|
||||
/// final destination location.
|
||||
RenameTmpFileErr { worker: IdleWorker, result: PrepareResult, err: String },
|
||||
RenameTmpFileErr {
|
||||
worker: IdleWorker,
|
||||
result: PrepareResult,
|
||||
err: String,
|
||||
// Unfortunately `PathBuf` doesn't implement `Encode`/`Decode`, so we do a fallible
|
||||
// conversion to `Option<String>`.
|
||||
src: Option<String>,
|
||||
dest: Option<String>,
|
||||
},
|
||||
/// The worker cache could not be cleared for the given reason.
|
||||
ClearWorkerDir { err: String },
|
||||
/// The worker failed to finish the job until the given deadline.
|
||||
///
|
||||
/// The worker is no longer usable and should be killed.
|
||||
@@ -84,21 +108,25 @@ pub async fn start_work(
|
||||
metrics: &Metrics,
|
||||
worker: IdleWorker,
|
||||
pvf: PvfPrepData,
|
||||
cache_path: &Path,
|
||||
artifact_path: PathBuf,
|
||||
) -> Outcome {
|
||||
let IdleWorker { stream, pid } = worker;
|
||||
let IdleWorker { stream, pid, worker_dir } = worker;
|
||||
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
?worker_dir,
|
||||
"starting prepare for {}",
|
||||
artifact_path.display(),
|
||||
);
|
||||
|
||||
with_tmp_file(stream, pid, cache_path, |tmp_file, mut stream| async move {
|
||||
with_worker_dir_setup(
|
||||
worker_dir,
|
||||
stream,
|
||||
pid,
|
||||
|tmp_artifact_file, mut stream, worker_dir| async move {
|
||||
let preparation_timeout = pvf.prep_timeout();
|
||||
if let Err(err) = send_request(&mut stream, pvf, &tmp_file).await {
|
||||
if let Err(err) = send_request(&mut stream, pvf).await {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
@@ -109,14 +137,14 @@ pub async fn start_work(
|
||||
}
|
||||
|
||||
// Wait for the result from the worker, keeping in mind that there may be a timeout, the
|
||||
// worker may get killed, or something along these lines. In that case we should propagate
|
||||
// the error to the pool.
|
||||
// worker may get killed, or something along these lines. In that case we should
|
||||
// propagate the error to the pool.
|
||||
//
|
||||
// We use a generous timeout here. This is in addition to the one in the child process, in
|
||||
// case the child stalls. We have a wall clock timeout here in the host, but a CPU timeout
|
||||
// in the child. We want to use CPU time because it varies less than wall clock time under
|
||||
// load, but the CPU resources of the child can only be measured from the parent after the
|
||||
// child process terminates.
|
||||
// We use a generous timeout here. This is in addition to the one in the child process,
|
||||
// in case the child stalls. We have a wall clock timeout here in the host, but a CPU
|
||||
// timeout in the child. We want to use CPU time because it varies less than wall clock
|
||||
// time under load, but the CPU resources of the child can only be measured from the
|
||||
// parent after the child process terminates.
|
||||
let timeout = preparation_timeout * JOB_TIMEOUT_WALL_CLOCK_FACTOR;
|
||||
let result = tokio::time::timeout(timeout, recv_response(&mut stream, pid)).await;
|
||||
|
||||
@@ -125,10 +153,10 @@ pub async fn start_work(
|
||||
Ok(Ok(prepare_result)) =>
|
||||
handle_response(
|
||||
metrics,
|
||||
IdleWorker { stream, pid },
|
||||
IdleWorker { stream, pid, worker_dir },
|
||||
prepare_result,
|
||||
pid,
|
||||
tmp_file,
|
||||
tmp_artifact_file,
|
||||
artifact_path,
|
||||
preparation_timeout,
|
||||
)
|
||||
@@ -153,14 +181,15 @@ pub async fn start_work(
|
||||
Outcome::TimedOut
|
||||
},
|
||||
}
|
||||
})
|
||||
},
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Handles the case where we successfully received response bytes on the host from the child.
|
||||
///
|
||||
/// NOTE: Here we know the artifact exists, but is still located in a temporary file which will be
|
||||
/// cleared by `with_tmp_file`.
|
||||
/// Here we know the artifact exists, but is still located in a temporary file which will be cleared
|
||||
/// by [`with_worker_dir_setup`].
|
||||
async fn handle_response(
|
||||
metrics: &Metrics,
|
||||
worker: IdleWorker,
|
||||
@@ -209,7 +238,13 @@ async fn handle_response(
|
||||
artifact_path.display(),
|
||||
err,
|
||||
);
|
||||
Outcome::RenameTmpFileErr { worker, result, err: format!("{:?}", err) }
|
||||
Outcome::RenameTmpFileErr {
|
||||
worker,
|
||||
result,
|
||||
err: format!("{:?}", err),
|
||||
src: tmp_file.to_str().map(String::from),
|
||||
dest: artifact_path.to_str().map(String::from),
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
@@ -220,61 +255,58 @@ async fn handle_response(
|
||||
outcome
|
||||
}
|
||||
|
||||
/// Create a temporary file for an artifact at the given cache path and execute the given
|
||||
/// future/closure passing the file path in.
|
||||
/// Create a temporary file for an artifact in the worker cache, execute the given future/closure
|
||||
/// passing the file path in, and clean up the worker cache.
|
||||
///
|
||||
/// The function will try best effort to not leave behind the temporary file.
|
||||
async fn with_tmp_file<F, Fut>(stream: UnixStream, pid: u32, cache_path: &Path, f: F) -> Outcome
|
||||
/// Failure to clean up the worker cache results in an error - leaving any files here could be a
|
||||
/// security issue, and we should shut down the worker. This should be very rare.
|
||||
async fn with_worker_dir_setup<F, Fut>(
|
||||
worker_dir: WorkerDir,
|
||||
stream: UnixStream,
|
||||
pid: u32,
|
||||
f: F,
|
||||
) -> Outcome
|
||||
where
|
||||
Fut: futures::Future<Output = Outcome>,
|
||||
F: FnOnce(PathBuf, UnixStream) -> Fut,
|
||||
F: FnOnce(PathBuf, UnixStream, WorkerDir) -> Fut,
|
||||
{
|
||||
let tmp_file = match tmpfile_in("prepare-artifact-", cache_path).await {
|
||||
Ok(f) => f,
|
||||
Err(err) => {
|
||||
// Create the tmp file here so that the child doesn't need any file creation rights. This will
|
||||
// be cleared at the end of this function.
|
||||
let tmp_file = worker_dir::prepare_tmp_artifact(&worker_dir.path);
|
||||
if let Err(err) = tokio::fs::File::create(&tmp_file).await {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
?worker_dir,
|
||||
"failed to create a temp file for the artifact: {:?}",
|
||||
err,
|
||||
);
|
||||
return Outcome::CreateTmpFileErr {
|
||||
worker: IdleWorker { stream, pid },
|
||||
worker: IdleWorker { stream, pid, worker_dir },
|
||||
err: format!("{:?}", err),
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
let outcome = f(tmp_file.clone(), stream).await;
|
||||
let worker_dir_path = worker_dir.path.clone();
|
||||
let outcome = f(tmp_file, stream, worker_dir).await;
|
||||
|
||||
// The function called above is expected to move `tmp_file` to a new location upon success.
|
||||
// However, the function may as well fail and in that case we should remove the tmp file here.
|
||||
//
|
||||
// In any case, we try to remove the file here so that there are no leftovers. We only report
|
||||
// errors that are different from the `NotFound`.
|
||||
match tokio::fs::remove_file(tmp_file).await {
|
||||
Ok(()) => (),
|
||||
Err(err) if err.kind() == std::io::ErrorKind::NotFound => (),
|
||||
Err(err) => {
|
||||
// Try to clear the worker dir.
|
||||
if let Err(err) = clear_worker_dir_path(&worker_dir_path) {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"failed to remove the tmp file: {:?}",
|
||||
?worker_dir_path,
|
||||
"failed to clear worker cache after the job: {:?}",
|
||||
err,
|
||||
);
|
||||
},
|
||||
return Outcome::ClearWorkerDir { err: format!("{:?}", err) }
|
||||
}
|
||||
|
||||
outcome
|
||||
}
|
||||
|
||||
async fn send_request(
|
||||
stream: &mut UnixStream,
|
||||
pvf: PvfPrepData,
|
||||
tmp_file: &Path,
|
||||
) -> io::Result<()> {
|
||||
async fn send_request(stream: &mut UnixStream, pvf: PvfPrepData) -> io::Result<()> {
|
||||
framed_send(stream, &pvf.encode()).await?;
|
||||
framed_send(stream, path_to_bytes(tmp_file)).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@ use crate::LOG_TARGET;
|
||||
use futures::FutureExt as _;
|
||||
use futures_timer::Delay;
|
||||
use pin_project::pin_project;
|
||||
use polkadot_node_core_pvf_common::{worker_dir, SecurityStatus};
|
||||
use rand::Rng;
|
||||
use std::{
|
||||
fmt, mem,
|
||||
@@ -39,44 +40,67 @@ use tokio::{
|
||||
pub const JOB_TIMEOUT_WALL_CLOCK_FACTOR: u32 = 4;
|
||||
|
||||
/// This is publicly exposed only for integration tests.
|
||||
///
|
||||
/// # Parameters
|
||||
///
|
||||
/// - `debug_id`: An identifier for the process (e.g. "execute" or "prepare").
|
||||
///
|
||||
/// - `program_path`: The path to the program.
|
||||
///
|
||||
/// - `cache_path`: The path to the artifact cache.
|
||||
///
|
||||
/// - `extra_args`: Optional extra CLI arguments to the program. NOTE: Should only contain data
|
||||
/// required before the handshake, like node/worker versions for the version check. Other data
|
||||
/// should go through the handshake.
|
||||
///
|
||||
/// - `spawn_timeout`: The amount of time to wait for the child process to spawn.
|
||||
///
|
||||
/// - `security_status`: contains the detected status of security features.
|
||||
#[doc(hidden)]
|
||||
pub async fn spawn_with_program_path(
|
||||
debug_id: &'static str,
|
||||
program_path: impl Into<PathBuf>,
|
||||
cache_path: &Path,
|
||||
extra_args: &[&str],
|
||||
spawn_timeout: Duration,
|
||||
security_status: SecurityStatus,
|
||||
) -> Result<(IdleWorker, WorkerHandle), SpawnErr> {
|
||||
let program_path = program_path.into();
|
||||
with_transient_socket_path(debug_id, |socket_path| {
|
||||
let socket_path = socket_path.to_owned();
|
||||
let worker_dir = WorkerDir::new(debug_id, cache_path).await?;
|
||||
let socket_path = worker_dir::socket(&worker_dir.path);
|
||||
|
||||
let extra_args: Vec<String> = extra_args.iter().map(|arg| arg.to_string()).collect();
|
||||
|
||||
async move {
|
||||
let listener = UnixListener::bind(&socket_path).map_err(|err| {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
%debug_id,
|
||||
?program_path,
|
||||
?extra_args,
|
||||
?worker_dir,
|
||||
?socket_path,
|
||||
"cannot bind unix socket: {:?}",
|
||||
err,
|
||||
);
|
||||
SpawnErr::Bind
|
||||
})?;
|
||||
|
||||
let handle =
|
||||
WorkerHandle::spawn(&program_path, &extra_args, socket_path).map_err(|err| {
|
||||
let handle = WorkerHandle::spawn(&program_path, &extra_args, &worker_dir.path, security_status)
|
||||
.map_err(|err| {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
%debug_id,
|
||||
?program_path,
|
||||
?extra_args,
|
||||
?worker_dir.path,
|
||||
?socket_path,
|
||||
"cannot spawn a worker: {:?}",
|
||||
err,
|
||||
);
|
||||
SpawnErr::ProcessSpawn
|
||||
})?;
|
||||
|
||||
let worker_dir_path = worker_dir.path.clone();
|
||||
futures::select! {
|
||||
accept_result = listener.accept().fuse() => {
|
||||
let (stream, _) = accept_result.map_err(|err| {
|
||||
@@ -85,12 +109,14 @@ pub async fn spawn_with_program_path(
|
||||
%debug_id,
|
||||
?program_path,
|
||||
?extra_args,
|
||||
?worker_dir_path,
|
||||
?socket_path,
|
||||
"cannot accept a worker: {:?}",
|
||||
err,
|
||||
);
|
||||
SpawnErr::Accept
|
||||
})?;
|
||||
Ok((IdleWorker { stream, pid: handle.id() }, handle))
|
||||
Ok((IdleWorker { stream, pid: handle.id(), worker_dir }, handle))
|
||||
}
|
||||
_ = Delay::new(spawn_timeout).fuse() => {
|
||||
gum::warn!(
|
||||
@@ -98,6 +124,8 @@ pub async fn spawn_with_program_path(
|
||||
%debug_id,
|
||||
?program_path,
|
||||
?extra_args,
|
||||
?worker_dir_path,
|
||||
?socket_path,
|
||||
?spawn_timeout,
|
||||
"spawning and connecting to socket timed out",
|
||||
);
|
||||
@@ -105,33 +133,13 @@ pub async fn spawn_with_program_path(
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
async fn with_transient_socket_path<T, F, Fut>(debug_id: &'static str, f: F) -> Result<T, SpawnErr>
|
||||
where
|
||||
F: FnOnce(&Path) -> Fut,
|
||||
Fut: futures::Future<Output = Result<T, SpawnErr>> + 'static,
|
||||
{
|
||||
let socket_path = tmpfile(&format!("pvf-host-{}", debug_id))
|
||||
.await
|
||||
.map_err(|_| SpawnErr::TmpFile)?;
|
||||
let result = f(&socket_path).await;
|
||||
|
||||
// Best effort to remove the socket file. Under normal circumstances the socket will be removed
|
||||
// by the worker. We make sure that it is removed here, just in case a failed rendezvous.
|
||||
let _ = tokio::fs::remove_file(socket_path).await;
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Returns a path under the given `dir`. The file name will start with the given prefix.
|
||||
/// Returns a path under the given `dir`. The path name will start with the given prefix.
|
||||
///
|
||||
/// There is only a certain number of retries. If exceeded this function will give up and return an
|
||||
/// error.
|
||||
pub async fn tmpfile_in(prefix: &str, dir: &Path) -> io::Result<PathBuf> {
|
||||
fn tmppath(prefix: &str, dir: &Path) -> PathBuf {
|
||||
pub async fn tmppath_in(prefix: &str, dir: &Path) -> io::Result<PathBuf> {
|
||||
fn make_tmppath(prefix: &str, dir: &Path) -> PathBuf {
|
||||
use rand::distributions::Alphanumeric;
|
||||
|
||||
const DESCRIMINATOR_LEN: usize = 10;
|
||||
@@ -143,27 +151,28 @@ pub async fn tmpfile_in(prefix: &str, dir: &Path) -> io::Result<PathBuf> {
|
||||
let s = std::str::from_utf8(&buf)
|
||||
.expect("the string is collected from a valid utf-8 sequence; qed");
|
||||
|
||||
let mut file = dir.to_owned();
|
||||
file.push(s);
|
||||
file
|
||||
let mut path = dir.to_owned();
|
||||
path.push(s);
|
||||
path
|
||||
}
|
||||
|
||||
const NUM_RETRIES: usize = 50;
|
||||
|
||||
for _ in 0..NUM_RETRIES {
|
||||
let candidate_path = tmppath(prefix, dir);
|
||||
if !candidate_path.exists() {
|
||||
return Ok(candidate_path)
|
||||
let tmp_path = make_tmppath(prefix, dir);
|
||||
if !tmp_path.exists() {
|
||||
return Ok(tmp_path)
|
||||
}
|
||||
}
|
||||
|
||||
Err(io::Error::new(io::ErrorKind::Other, "failed to create a temporary file"))
|
||||
Err(io::Error::new(io::ErrorKind::Other, "failed to create a temporary path"))
|
||||
}
|
||||
|
||||
/// The same as [`tmpfile_in`], but uses [`std::env::temp_dir`] as the directory.
|
||||
pub async fn tmpfile(prefix: &str) -> io::Result<PathBuf> {
|
||||
/// The same as [`tmppath_in`], but uses [`std::env::temp_dir`] as the directory.
|
||||
#[cfg(test)]
|
||||
pub async fn tmppath(prefix: &str) -> io::Result<PathBuf> {
|
||||
let temp_dir = PathBuf::from(std::env::temp_dir());
|
||||
tmpfile_in(prefix, &temp_dir).await
|
||||
tmppath_in(prefix, &temp_dir).await
|
||||
}
|
||||
|
||||
/// A struct that represents an idle worker.
|
||||
@@ -177,13 +186,19 @@ pub struct IdleWorker {
|
||||
|
||||
/// The identifier of this process. Used to reset the niceness.
|
||||
pub pid: u32,
|
||||
|
||||
/// The temporary per-worker path. We clean up the worker dir between jobs and delete it when
|
||||
/// the worker dies.
|
||||
pub worker_dir: WorkerDir,
|
||||
}
|
||||
|
||||
/// An error happened during spawning a worker process.
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum SpawnErr {
|
||||
/// Cannot obtain a temporary file location.
|
||||
TmpFile,
|
||||
/// Cannot obtain a temporary path location.
|
||||
TmpPath,
|
||||
/// An FS error occurred.
|
||||
Fs(String),
|
||||
/// Cannot bind the socket to the given path.
|
||||
Bind,
|
||||
/// An error happened during accepting a connection to the socket.
|
||||
@@ -219,12 +234,32 @@ impl WorkerHandle {
|
||||
fn spawn(
|
||||
program: impl AsRef<Path>,
|
||||
extra_args: &[String],
|
||||
socket_path: impl AsRef<Path>,
|
||||
worker_dir_path: impl AsRef<Path>,
|
||||
security_status: SecurityStatus,
|
||||
) -> io::Result<Self> {
|
||||
let mut child = process::Command::new(program.as_ref())
|
||||
let security_args = {
|
||||
let mut args = vec![];
|
||||
if security_status.can_enable_landlock {
|
||||
args.push("--can-enable-landlock".to_string());
|
||||
}
|
||||
if security_status.can_unshare_user_namespace_and_change_root {
|
||||
args.push("--can-unshare-user-namespace-and-change-root".to_string());
|
||||
}
|
||||
args
|
||||
};
|
||||
|
||||
// Clear all env vars from the spawned process.
|
||||
let mut command = process::Command::new(program.as_ref());
|
||||
command.env_clear();
|
||||
// Add back any env vars we want to keep.
|
||||
if let Ok(value) = std::env::var("RUST_LOG") {
|
||||
command.env("RUST_LOG", value);
|
||||
}
|
||||
let mut child = command
|
||||
.args(extra_args)
|
||||
.arg("--socket-path")
|
||||
.arg(socket_path.as_ref().as_os_str())
|
||||
.arg("--worker-dir-path")
|
||||
.arg(worker_dir_path.as_ref().as_os_str())
|
||||
.args(&security_args)
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.kill_on_drop(true)
|
||||
.spawn()?;
|
||||
@@ -306,16 +341,6 @@ impl fmt::Debug for WorkerHandle {
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert the given path into a byte buffer.
|
||||
pub fn path_to_bytes(path: &Path) -> &[u8] {
|
||||
// Ideally, we take the `OsStr` of the path, send that and reconstruct this on the other side.
|
||||
// However, libstd doesn't provide us with such an option. There are crates out there that
|
||||
// allow for extraction of a path, but TBH it doesn't seem to be a real issue.
|
||||
//
|
||||
// However, should be there reports we can incorporate such a crate here.
|
||||
path.to_str().expect("non-UTF-8 path").as_bytes()
|
||||
}
|
||||
|
||||
/// Write some data prefixed by its length into `w`.
|
||||
pub async fn framed_send(w: &mut (impl AsyncWrite + Unpin), buf: &[u8]) -> io::Result<()> {
|
||||
let len_buf = buf.len().to_le_bytes();
|
||||
@@ -333,3 +358,84 @@ pub async fn framed_recv(r: &mut (impl AsyncRead + Unpin)) -> io::Result<Vec<u8>
|
||||
r.read_exact(&mut buf).await?;
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
/// A temporary worker dir that contains only files needed by the worker. The worker will change its
|
||||
/// root (the `/` directory) to this directory; it should have access to no other paths on its
|
||||
/// filesystem.
|
||||
///
|
||||
/// NOTE: This struct cleans up its associated directory when it is dropped. Therefore it should not
|
||||
/// implement `Clone`.
|
||||
///
|
||||
/// # File structure
|
||||
///
|
||||
/// The overall file structure for the PVF system is as follows. The `worker-dir-X`s are managed by
|
||||
/// this struct.
|
||||
///
|
||||
/// ```nocompile
|
||||
/// + /<cache_path>/
|
||||
/// - artifact-1
|
||||
/// - artifact-2
|
||||
/// - [...]
|
||||
/// - worker-dir-1/ (new `/` for worker-1)
|
||||
/// + socket (created by host)
|
||||
/// + tmp-artifact (created by host) (prepare-only)
|
||||
/// + artifact (link -> artifact-1) (created by host) (execute-only)
|
||||
/// - worker-dir-2/ (new `/` for worker-2)
|
||||
/// + [...]
|
||||
/// ```
|
||||
#[derive(Debug)]
|
||||
pub struct WorkerDir {
|
||||
pub path: PathBuf,
|
||||
}
|
||||
|
||||
impl WorkerDir {
|
||||
/// Creates a new, empty worker dir with a random name in the given cache dir.
|
||||
pub async fn new(debug_id: &'static str, cache_dir: &Path) -> Result<Self, SpawnErr> {
|
||||
let prefix = format!("worker-dir-{}-", debug_id);
|
||||
let path = tmppath_in(&prefix, cache_dir).await.map_err(|_| SpawnErr::TmpPath)?;
|
||||
tokio::fs::create_dir(&path)
|
||||
.await
|
||||
.map_err(|err| SpawnErr::Fs(err.to_string()))?;
|
||||
Ok(Self { path })
|
||||
}
|
||||
}
|
||||
|
||||
// Try to clean up the temporary worker dir at the end of the worker's lifetime. It should be wiped
|
||||
// on startup, but we make a best effort not to leave it around.
|
||||
impl Drop for WorkerDir {
|
||||
fn drop(&mut self) {
|
||||
let _ = std::fs::remove_dir_all(&self.path);
|
||||
}
|
||||
}
|
||||
|
||||
// Not async since Rust has trouble with async recursion. There should be few files here anyway.
|
||||
//
|
||||
// TODO: A lingering malicious job can still access future files in this dir. See
|
||||
// <https://github.com/paritytech/polkadot-sdk/issues/574> for how to fully secure this.
|
||||
/// Clear the temporary worker dir without deleting it. Not deleting is important because the worker
|
||||
/// has mounted its own separate filesystem here.
|
||||
///
|
||||
/// Should be called right after a job has finished. We don't want jobs to have access to
|
||||
/// artifacts from previous jobs.
|
||||
pub fn clear_worker_dir_path(worker_dir_path: &Path) -> io::Result<()> {
|
||||
fn remove_dir_contents(path: &Path) -> io::Result<()> {
|
||||
for entry in std::fs::read_dir(&path)? {
|
||||
let entry = entry?;
|
||||
let path = entry.path();
|
||||
|
||||
if entry.file_type()?.is_dir() {
|
||||
remove_dir_contents(&path)?;
|
||||
std::fs::remove_dir(path)?;
|
||||
} else {
|
||||
std::fs::remove_file(path)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Note the worker dir may not exist anymore because of the worker dying and being cleaned up.
|
||||
match remove_dir_contents(worker_dir_path) {
|
||||
Err(err) if matches!(err.kind(), io::ErrorKind::NotFound) => Ok(()),
|
||||
result => result,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -100,7 +100,7 @@ async fn execute_bad_block_on_parent() {
|
||||
|
||||
let host = TestHost::new();
|
||||
|
||||
let _ret = host
|
||||
let _err = host
|
||||
.validate_candidate(
|
||||
adder::wasm_binary_unwrap(),
|
||||
ValidationParams {
|
||||
@@ -145,3 +145,37 @@ async fn stress_spawn() {
|
||||
|
||||
futures::future::join_all((0..100).map(|_| execute(host.clone()))).await;
|
||||
}
|
||||
|
||||
// With one worker, run multiple execution jobs serially. They should not conflict.
|
||||
#[tokio::test]
|
||||
async fn execute_can_run_serially() {
|
||||
let host = std::sync::Arc::new(TestHost::new_with_config(|cfg| {
|
||||
cfg.execute_workers_max_num = 1;
|
||||
}));
|
||||
|
||||
async fn execute(host: std::sync::Arc<TestHost>) {
|
||||
let parent_head = HeadData { number: 0, parent_hash: [0; 32], post_state: hash_state(0) };
|
||||
let block_data = BlockData { state: 0, add: 512 };
|
||||
let ret = host
|
||||
.validate_candidate(
|
||||
adder::wasm_binary_unwrap(),
|
||||
ValidationParams {
|
||||
parent_head: GenericHeadData(parent_head.encode()),
|
||||
block_data: GenericBlockData(block_data.encode()),
|
||||
relay_parent_number: 1,
|
||||
relay_parent_storage_root: Default::default(),
|
||||
},
|
||||
Default::default(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let new_head = HeadData::decode(&mut &ret.head_data.0[..]).unwrap();
|
||||
|
||||
assert_eq!(new_head.number, 1);
|
||||
assert_eq!(new_head.parent_hash, parent_head.hash());
|
||||
assert_eq!(new_head.post_state, hash_state(512));
|
||||
}
|
||||
|
||||
futures::future::join_all((0..5).map(|_| execute(host.clone()))).await;
|
||||
}
|
||||
|
||||
@@ -18,8 +18,8 @@
|
||||
use assert_matches::assert_matches;
|
||||
use parity_scale_codec::Encode as _;
|
||||
use polkadot_node_core_pvf::{
|
||||
start, Config, InvalidCandidate, Metrics, PrepareJobKind, PvfPrepData, ValidationError,
|
||||
ValidationHost, JOB_TIMEOUT_WALL_CLOCK_FACTOR,
|
||||
start, Config, InvalidCandidate, Metrics, PrepareError, PrepareJobKind, PrepareStats,
|
||||
PvfPrepData, ValidationError, ValidationHost, JOB_TIMEOUT_WALL_CLOCK_FACTOR,
|
||||
};
|
||||
use polkadot_parachain_primitives::primitives::{BlockData, ValidationParams, ValidationResult};
|
||||
use polkadot_primitives::ExecutorParams;
|
||||
@@ -70,6 +70,33 @@ impl TestHost {
|
||||
Self { cache_dir, host: Mutex::new(host) }
|
||||
}
|
||||
|
||||
async fn precheck_pvf(
|
||||
&self,
|
||||
code: &[u8],
|
||||
executor_params: ExecutorParams,
|
||||
) -> Result<PrepareStats, PrepareError> {
|
||||
let (result_tx, result_rx) = futures::channel::oneshot::channel();
|
||||
|
||||
let code = sp_maybe_compressed_blob::decompress(code, 16 * 1024 * 1024)
|
||||
.expect("Compression works");
|
||||
|
||||
self.host
|
||||
.lock()
|
||||
.await
|
||||
.precheck_pvf(
|
||||
PvfPrepData::from_code(
|
||||
code.into(),
|
||||
executor_params,
|
||||
TEST_PREPARATION_TIMEOUT,
|
||||
PrepareJobKind::Prechecking,
|
||||
),
|
||||
result_tx,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
result_rx.await.unwrap()
|
||||
}
|
||||
|
||||
async fn validate_candidate(
|
||||
&self,
|
||||
code: &[u8],
|
||||
@@ -291,8 +318,12 @@ async fn deleting_prepared_artifact_does_not_dispute() {
|
||||
{
|
||||
// Get the artifact path (asserting it exists).
|
||||
let mut cache_dir: Vec<_> = std::fs::read_dir(cache_dir).unwrap().collect();
|
||||
assert_eq!(cache_dir.len(), 1);
|
||||
let artifact_path = cache_dir.pop().unwrap().unwrap();
|
||||
// Should contain the artifact and the worker dir.
|
||||
assert_eq!(cache_dir.len(), 2);
|
||||
let mut artifact_path = cache_dir.pop().unwrap().unwrap();
|
||||
if artifact_path.path().is_dir() {
|
||||
artifact_path = cache_dir.pop().unwrap().unwrap();
|
||||
}
|
||||
|
||||
// Delete the artifact.
|
||||
std::fs::remove_file(artifact_path.path()).unwrap();
|
||||
@@ -317,3 +348,19 @@ async fn deleting_prepared_artifact_does_not_dispute() {
|
||||
r => panic!("{:?}", r),
|
||||
}
|
||||
}
|
||||
|
||||
// With one worker, run multiple preparation jobs serially. They should not conflict.
|
||||
#[tokio::test]
|
||||
async fn prepare_can_run_serially() {
|
||||
let host = TestHost::new_with_config(|cfg| {
|
||||
cfg.prepare_workers_hard_max_num = 1;
|
||||
});
|
||||
|
||||
let _stats = host
|
||||
.precheck_pvf(::adder::wasm_binary_unwrap(), Default::default())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Prepare a different wasm blob to prevent skipping work.
|
||||
let _stats = host.precheck_pvf(halt::wasm_binary_unwrap(), Default::default()).await.unwrap();
|
||||
}
|
||||
|
||||
@@ -14,8 +14,11 @@
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use polkadot_node_core_pvf::testing::{spawn_with_program_path, SpawnErr};
|
||||
use std::time::Duration;
|
||||
use polkadot_node_core_pvf::{
|
||||
testing::{spawn_with_program_path, SpawnErr},
|
||||
SecurityStatus,
|
||||
};
|
||||
use std::{env, time::Duration};
|
||||
|
||||
fn worker_path(name: &str) -> std::path::PathBuf {
|
||||
let mut worker_path = std::env::current_exe().unwrap();
|
||||
@@ -33,8 +36,10 @@ async fn spawn_immediate_exit() {
|
||||
let result = spawn_with_program_path(
|
||||
"integration-test",
|
||||
worker_path("polkadot-prepare-worker"),
|
||||
&env::temp_dir(),
|
||||
&["exit"],
|
||||
Duration::from_secs(2),
|
||||
SecurityStatus::default(),
|
||||
)
|
||||
.await;
|
||||
assert!(matches!(result, Err(SpawnErr::AcceptTimeout)));
|
||||
@@ -45,8 +50,10 @@ async fn spawn_timeout() {
|
||||
let result = spawn_with_program_path(
|
||||
"integration-test",
|
||||
worker_path("polkadot-execute-worker"),
|
||||
&env::temp_dir(),
|
||||
&["test-sleep"],
|
||||
Duration::from_secs(2),
|
||||
SecurityStatus::default(),
|
||||
)
|
||||
.await;
|
||||
assert!(matches!(result, Err(SpawnErr::AcceptTimeout)));
|
||||
@@ -57,8 +64,10 @@ async fn should_connect() {
|
||||
let _ = spawn_with_program_path(
|
||||
"integration-test",
|
||||
worker_path("polkadot-prepare-worker"),
|
||||
&env::temp_dir(),
|
||||
&["prepare-worker"],
|
||||
Duration::from_secs(2),
|
||||
SecurityStatus::default(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
@@ -121,10 +121,10 @@ So what are we actually worried about? Things that come to mind:
|
||||
|
||||
### Restricting file-system access
|
||||
|
||||
A basic security mechanism is to make sure that any thread directly interfacing
|
||||
with untrusted code does not have access to the file-system. This provides some
|
||||
protection against attackers accessing sensitive data or modifying data on the
|
||||
host machine.
|
||||
A basic security mechanism is to make sure that any process directly interfacing
|
||||
with untrusted code does not have unnecessary access to the file-system. This
|
||||
provides some protection against attackers accessing sensitive data or modifying
|
||||
data on the host machine.
|
||||
|
||||
### Clearing env vars
|
||||
|
||||
|
||||
Reference in New Issue
Block a user