mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-06-12 13:31:10 +00:00
PVF worker: switch on seccomp networking restrictions (#2221)
This commit is contained in:
@@ -77,7 +77,7 @@ pub enum PrepareError {
|
||||
#[codec(index = 9)]
|
||||
ClearWorkerDir(String),
|
||||
/// The preparation job process died, due to OOM, a seccomp violation, or some other factor.
|
||||
JobDied(String),
|
||||
JobDied { err: String, job_pid: i32 },
|
||||
#[codec(index = 10)]
|
||||
/// Some error occurred when interfacing with the kernel.
|
||||
#[codec(index = 11)]
|
||||
@@ -96,7 +96,7 @@ impl PrepareError {
|
||||
match self {
|
||||
Prevalidation(_) | Preparation(_) | JobError(_) | OutOfMemory => true,
|
||||
IoErr(_) |
|
||||
JobDied(_) |
|
||||
JobDied { .. } |
|
||||
CreateTmpFile(_) |
|
||||
RenameTmpFile { .. } |
|
||||
ClearWorkerDir(_) |
|
||||
@@ -119,7 +119,8 @@ impl fmt::Display for PrepareError {
|
||||
JobError(err) => write!(f, "panic: {}", err),
|
||||
TimedOut => write!(f, "prepare: timeout"),
|
||||
IoErr(err) => write!(f, "prepare: io error while receiving response: {}", err),
|
||||
JobDied(err) => write!(f, "prepare: prepare job died: {}", err),
|
||||
JobDied { err, job_pid } =>
|
||||
write!(f, "prepare: prepare job with pid {job_pid} died: {err}"),
|
||||
CreateTmpFile(err) => write!(f, "prepare: error creating tmp file: {}", err),
|
||||
RenameTmpFile { err, src, dest } =>
|
||||
write!(f, "prepare: error renaming tmp file ({:?} -> {:?}): {}", src, dest, err),
|
||||
|
||||
@@ -46,7 +46,7 @@ pub enum WorkerResponse {
|
||||
///
|
||||
/// We cannot treat this as an internal error because malicious code may have killed the job.
|
||||
/// We still retry it, because in the non-malicious case it is likely spurious.
|
||||
JobDied(String),
|
||||
JobDied { err: String, job_pid: i32 },
|
||||
/// An unexpected error occurred in the job process, e.g. failing to spawn a thread, panic,
|
||||
/// etc.
|
||||
///
|
||||
|
||||
@@ -53,7 +53,7 @@ pub struct SecurityStatus {
|
||||
pub can_enable_landlock: bool,
|
||||
/// Whether the seccomp features we use are fully available on this system.
|
||||
pub can_enable_seccomp: bool,
|
||||
// Whether we are able to unshare the user namespace and change the filesystem root.
|
||||
/// Whether we are able to unshare the user namespace and change the filesystem root.
|
||||
pub can_unshare_user_namespace_and_change_root: bool,
|
||||
}
|
||||
|
||||
|
||||
@@ -219,7 +219,7 @@ pub fn run_worker<F>(
|
||||
#[cfg_attr(not(target_os = "linux"), allow(unused_mut))] mut worker_dir_path: PathBuf,
|
||||
node_version: Option<&str>,
|
||||
worker_version: Option<&str>,
|
||||
#[cfg_attr(not(target_os = "linux"), allow(unused_variables))] security_status: &SecurityStatus,
|
||||
security_status: &SecurityStatus,
|
||||
mut event_loop: F,
|
||||
) where
|
||||
F: FnMut(UnixStream, PathBuf) -> io::Result<Never>,
|
||||
|
||||
@@ -67,11 +67,9 @@
|
||||
//!
|
||||
//! # Action on syscall violations
|
||||
//!
|
||||
//! On syscall violations we currently only log, to make sure this works correctly before enforcing.
|
||||
//!
|
||||
//! In the future, when a forbidden syscall is attempted we immediately kill the process in order to
|
||||
//! prevent the attacker from doing anything else. In execution, this will result in voting against
|
||||
//! the candidate.
|
||||
//! When a forbidden syscall is attempted we immediately kill the process in order to prevent the
|
||||
//! attacker from doing anything else. In execution, this will result in voting against the
|
||||
//! candidate.
|
||||
|
||||
use crate::{
|
||||
worker::{stringify_panic_payload, WorkerKind},
|
||||
@@ -82,7 +80,7 @@ use std::{collections::BTreeMap, path::Path};
|
||||
|
||||
/// The action to take on caught syscalls.
|
||||
#[cfg(not(test))]
|
||||
const CAUGHT_ACTION: SeccompAction = SeccompAction::Log;
|
||||
const CAUGHT_ACTION: SeccompAction = SeccompAction::KillProcess;
|
||||
/// Don't kill the process when testing.
|
||||
#[cfg(test)]
|
||||
const CAUGHT_ACTION: SeccompAction = SeccompAction::Errno(libc::EACCES as u32);
|
||||
|
||||
@@ -222,12 +222,6 @@ pub fn worker_entrypoint(
|
||||
},
|
||||
};
|
||||
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
%worker_pid,
|
||||
"worker: sending response to host: {:?}",
|
||||
response
|
||||
);
|
||||
send_response(&mut stream, response)?;
|
||||
}
|
||||
},
|
||||
@@ -360,7 +354,7 @@ fn handle_child_process(
|
||||
/// - The response, either `Ok` or some error state.
|
||||
fn handle_parent_process(
|
||||
mut pipe_read: PipeReader,
|
||||
child: Pid,
|
||||
job_pid: Pid,
|
||||
worker_pid: u32,
|
||||
usage_before: Usage,
|
||||
timeout: Duration,
|
||||
@@ -373,10 +367,11 @@ fn handle_parent_process(
|
||||
// Should retry at any rate.
|
||||
.map_err(|err| io::Error::new(io::ErrorKind::Other, err.to_string()))?;
|
||||
|
||||
let status = nix::sys::wait::waitpid(child, None);
|
||||
let status = nix::sys::wait::waitpid(job_pid, None);
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
%worker_pid,
|
||||
%job_pid,
|
||||
"execute worker received wait status from job: {:?}",
|
||||
status,
|
||||
);
|
||||
@@ -396,6 +391,7 @@ fn handle_parent_process(
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
%worker_pid,
|
||||
%job_pid,
|
||||
"execute job took {}ms cpu time, exceeded execute timeout {}ms",
|
||||
cpu_tv.as_millis(),
|
||||
timeout.as_millis(),
|
||||
@@ -428,6 +424,7 @@ fn handle_parent_process(
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
%worker_pid,
|
||||
%job_pid,
|
||||
"execute job error: {}",
|
||||
job_error,
|
||||
);
|
||||
@@ -443,15 +440,18 @@ fn handle_parent_process(
|
||||
//
|
||||
// The job gets SIGSYS on seccomp violations, but this signal may have been sent for some
|
||||
// other reason, so we still need to check for seccomp violations elsewhere.
|
||||
Ok(WaitStatus::Signaled(_pid, signal, _core_dump)) =>
|
||||
Ok(WorkerResponse::JobDied(format!("received signal: {signal:?}"))),
|
||||
Ok(WaitStatus::Signaled(_pid, signal, _core_dump)) => Ok(WorkerResponse::JobDied {
|
||||
err: format!("received signal: {signal:?}"),
|
||||
job_pid: job_pid.as_raw(),
|
||||
}),
|
||||
Err(errno) => Ok(internal_error_from_errno("waitpid", errno)),
|
||||
|
||||
// It is within an attacker's power to send an unexpected exit status. So we cannot treat
|
||||
// this as an internal error (which would make us abstain), but must vote against.
|
||||
Ok(unexpected_wait_status) => Ok(WorkerResponse::JobDied(format!(
|
||||
"unexpected status from wait: {unexpected_wait_status:?}"
|
||||
))),
|
||||
Ok(unexpected_wait_status) => Ok(WorkerResponse::JobDied {
|
||||
err: format!("unexpected status from wait: {unexpected_wait_status:?}"),
|
||||
job_pid: job_pid.as_raw(),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -257,9 +257,9 @@ pub fn worker_entrypoint(
|
||||
|
||||
handle_parent_process(
|
||||
pipe_reader,
|
||||
worker_pid,
|
||||
child,
|
||||
temp_artifact_dest.clone(),
|
||||
worker_pid,
|
||||
usage_before,
|
||||
preparation_timeout,
|
||||
)
|
||||
@@ -506,9 +506,9 @@ fn handle_child_process(
|
||||
/// - If the child process timeout, it returns `PrepareError::TimedOut`.
|
||||
fn handle_parent_process(
|
||||
mut pipe_read: PipeReader,
|
||||
child: Pid,
|
||||
temp_artifact_dest: PathBuf,
|
||||
worker_pid: u32,
|
||||
job_pid: Pid,
|
||||
temp_artifact_dest: PathBuf,
|
||||
usage_before: Usage,
|
||||
timeout: Duration,
|
||||
) -> Result<PrepareWorkerSuccess, PrepareError> {
|
||||
@@ -518,10 +518,11 @@ fn handle_parent_process(
|
||||
.read_to_end(&mut received_data)
|
||||
.map_err(|err| PrepareError::IoErr(err.to_string()))?;
|
||||
|
||||
let status = nix::sys::wait::waitpid(child, None);
|
||||
let status = nix::sys::wait::waitpid(job_pid, None);
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
%worker_pid,
|
||||
%job_pid,
|
||||
"prepare worker received wait status from job: {:?}",
|
||||
status,
|
||||
);
|
||||
@@ -539,6 +540,7 @@ fn handle_parent_process(
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
%worker_pid,
|
||||
%job_pid,
|
||||
"prepare job took {}ms cpu time, exceeded prepare timeout {}ms",
|
||||
cpu_tv.as_millis(),
|
||||
timeout.as_millis(),
|
||||
@@ -573,6 +575,7 @@ fn handle_parent_process(
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
%worker_pid,
|
||||
%job_pid,
|
||||
"worker: writing artifact to {}",
|
||||
temp_artifact_dest.display(),
|
||||
);
|
||||
@@ -593,15 +596,18 @@ fn handle_parent_process(
|
||||
//
|
||||
// The job gets SIGSYS on seccomp violations, but this signal may have been sent for some
|
||||
// other reason, so we still need to check for seccomp violations elsewhere.
|
||||
Ok(WaitStatus::Signaled(_pid, signal, _core_dump)) =>
|
||||
Err(PrepareError::JobDied(format!("received signal: {signal:?}"))),
|
||||
Ok(WaitStatus::Signaled(_pid, signal, _core_dump)) => Err(PrepareError::JobDied {
|
||||
err: format!("received signal: {signal:?}"),
|
||||
job_pid: job_pid.as_raw(),
|
||||
}),
|
||||
Err(errno) => Err(error_from_errno("waitpid", errno)),
|
||||
|
||||
// An attacker can make the child process return any exit status it wants. So we can treat
|
||||
// all unexpected cases the same way.
|
||||
Ok(unexpected_wait_status) => Err(PrepareError::JobDied(format!(
|
||||
"unexpected status from wait: {unexpected_wait_status:?}"
|
||||
))),
|
||||
Ok(unexpected_wait_status) => Err(PrepareError::JobDied {
|
||||
err: format!("unexpected status from wait: {unexpected_wait_status:?}"),
|
||||
job_pid: job_pid.as_raw(),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@ use polkadot_node_core_pvf_common::{
|
||||
execute::{Handshake, WorkerResponse},
|
||||
worker_dir, SecurityStatus,
|
||||
};
|
||||
use polkadot_parachain_primitives::primitives::ValidationResult;
|
||||
use polkadot_parachain_primitives::primitives::{ValidationCodeHash, ValidationResult};
|
||||
use polkadot_primitives::ExecutorParams;
|
||||
use std::{path::Path, time::Duration};
|
||||
use tokio::{io, net::UnixStream};
|
||||
@@ -156,6 +156,16 @@ pub async fn start_work(
|
||||
let response = futures::select! {
|
||||
response = recv_response(&mut stream).fuse() => {
|
||||
match response {
|
||||
Ok(response) =>
|
||||
handle_response(
|
||||
response,
|
||||
pid,
|
||||
&artifact.id.code_hash,
|
||||
&artifact_path,
|
||||
execution_timeout,
|
||||
audit_log_file
|
||||
)
|
||||
.await,
|
||||
Err(error) => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
@@ -164,56 +174,9 @@ pub async fn start_work(
|
||||
?error,
|
||||
"failed to recv an execute response",
|
||||
);
|
||||
// The worker died. Check if it was due to a seccomp violation.
|
||||
//
|
||||
// NOTE: Log, but don't change the outcome. Not all validators may have
|
||||
// auditing enabled, so we don't want attackers to abuse a non-deterministic
|
||||
// outcome.
|
||||
for syscall in security::check_seccomp_violations_for_worker(audit_log_file, pid).await {
|
||||
gum::error!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
%syscall,
|
||||
validation_code_hash = ?artifact.id.code_hash,
|
||||
?artifact_path,
|
||||
"A forbidden syscall was attempted! This is a violation of our seccomp security policy. Report an issue ASAP!"
|
||||
);
|
||||
}
|
||||
|
||||
return Outcome::WorkerIntfErr
|
||||
},
|
||||
Ok(response) => {
|
||||
// Check if any syscall violations occurred during the job. For now this is
|
||||
// only informative, as we are not enforcing the seccomp policy yet.
|
||||
for syscall in security::check_seccomp_violations_for_worker(audit_log_file, pid).await {
|
||||
gum::error!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
%syscall,
|
||||
validation_code_hash = ?artifact.id.code_hash,
|
||||
?artifact_path,
|
||||
"A forbidden syscall was attempted! This is a violation of our seccomp security policy. Report an issue ASAP!"
|
||||
);
|
||||
}
|
||||
|
||||
if let WorkerResponse::Ok{duration, ..} = response {
|
||||
if duration > execution_timeout {
|
||||
// The job didn't complete within the timeout.
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"execute job took {}ms cpu time, exceeded execution timeout {}ms.",
|
||||
duration.as_millis(),
|
||||
execution_timeout.as_millis(),
|
||||
);
|
||||
|
||||
// Return a timeout error.
|
||||
return Outcome::HardTimeout
|
||||
}
|
||||
}
|
||||
|
||||
response
|
||||
},
|
||||
}
|
||||
},
|
||||
_ = Delay::new(timeout).fuse() => {
|
||||
@@ -238,7 +201,7 @@ pub async fn start_work(
|
||||
idle_worker: IdleWorker { stream, pid, worker_dir },
|
||||
},
|
||||
WorkerResponse::JobTimedOut => Outcome::HardTimeout,
|
||||
WorkerResponse::JobDied(err) => Outcome::JobDied { err },
|
||||
WorkerResponse::JobDied { err, job_pid: _ } => Outcome::JobDied { err },
|
||||
WorkerResponse::JobError(err) => Outcome::JobError { err },
|
||||
|
||||
WorkerResponse::InternalError(err) => Outcome::InternalError { err },
|
||||
@@ -247,6 +210,56 @@ pub async fn start_work(
|
||||
.await
|
||||
}
|
||||
|
||||
/// Handles the case where we successfully received response bytes on the host from the child.
|
||||
///
|
||||
/// Here we know the artifact exists, but is still located in a temporary file which will be cleared
|
||||
/// by [`with_worker_dir_setup`].
|
||||
async fn handle_response(
|
||||
response: WorkerResponse,
|
||||
worker_pid: u32,
|
||||
validation_code_hash: &ValidationCodeHash,
|
||||
artifact_path: &Path,
|
||||
execution_timeout: Duration,
|
||||
audit_log_file: Option<security::AuditLogFile>,
|
||||
) -> WorkerResponse {
|
||||
if let WorkerResponse::Ok { duration, .. } = response {
|
||||
if duration > execution_timeout {
|
||||
// The job didn't complete within the timeout.
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid,
|
||||
"execute job took {}ms cpu time, exceeded execution timeout {}ms.",
|
||||
duration.as_millis(),
|
||||
execution_timeout.as_millis(),
|
||||
);
|
||||
|
||||
// Return a timeout error.
|
||||
return WorkerResponse::JobTimedOut
|
||||
}
|
||||
}
|
||||
|
||||
if let WorkerResponse::JobDied { err: _, job_pid } = response {
|
||||
// The job died. Check if it was due to a seccomp violation.
|
||||
//
|
||||
// NOTE: Log, but don't change the outcome. Not all validators may have
|
||||
// auditing enabled, so we don't want attackers to abuse a non-deterministic
|
||||
// outcome.
|
||||
for syscall in security::check_seccomp_violations_for_job(audit_log_file, job_pid).await {
|
||||
gum::error!(
|
||||
target: LOG_TARGET,
|
||||
%worker_pid,
|
||||
%job_pid,
|
||||
%syscall,
|
||||
?validation_code_hash,
|
||||
?artifact_path,
|
||||
"A forbidden syscall was attempted! This is a violation of our seccomp security policy. Report an issue ASAP!"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
response
|
||||
}
|
||||
|
||||
/// Create a temporary file for an artifact in the worker cache, execute the given future/closure
|
||||
/// passing the file path in, and clean up the worker cache.
|
||||
///
|
||||
|
||||
@@ -388,14 +388,14 @@ fn handle_mux(
|
||||
Ok(())
|
||||
},
|
||||
// The worker might still be usable, but we kill it just in case.
|
||||
Outcome::JobDied(err) => {
|
||||
Outcome::JobDied { err, job_pid } => {
|
||||
if attempt_retire(metrics, spawned, worker) {
|
||||
reply(
|
||||
from_pool,
|
||||
FromPool::Concluded {
|
||||
worker,
|
||||
rip: true,
|
||||
result: Err(PrepareError::JobDied(err)),
|
||||
result: Err(PrepareError::JobDied { err, job_pid }),
|
||||
},
|
||||
)?;
|
||||
}
|
||||
|
||||
@@ -104,7 +104,7 @@ pub enum Outcome {
|
||||
/// The preparation job process died, due to OOM, a seccomp violation, or some other factor.
|
||||
///
|
||||
/// The worker might still be usable, but we kill it just in case.
|
||||
JobDied(String),
|
||||
JobDied { err: String, job_pid: i32 },
|
||||
}
|
||||
|
||||
/// Given the idle token of a worker and parameters of work, communicates with the worker and
|
||||
@@ -160,19 +160,7 @@ pub async fn start_work(
|
||||
|
||||
match result {
|
||||
// Received bytes from worker within the time limit.
|
||||
Ok(Ok(prepare_worker_result)) => {
|
||||
// Check if any syscall violations occurred during the job. For now this is only
|
||||
// informative, as we are not enforcing the seccomp policy yet.
|
||||
for syscall in security::check_seccomp_violations_for_worker(audit_log_file, pid).await {
|
||||
gum::error!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
%syscall,
|
||||
?pvf,
|
||||
"A forbidden syscall was attempted! This is a violation of our seccomp security policy. Report an issue ASAP!"
|
||||
);
|
||||
}
|
||||
|
||||
Ok(Ok(prepare_worker_result)) =>
|
||||
handle_response(
|
||||
metrics,
|
||||
IdleWorker { stream, pid, worker_dir },
|
||||
@@ -182,9 +170,9 @@ pub async fn start_work(
|
||||
&pvf,
|
||||
&cache_path,
|
||||
preparation_timeout,
|
||||
audit_log_file,
|
||||
)
|
||||
.await
|
||||
},
|
||||
.await,
|
||||
Ok(Err(err)) => {
|
||||
// Communication error within the time limit.
|
||||
gum::warn!(
|
||||
@@ -221,15 +209,36 @@ async fn handle_response(
|
||||
worker_pid: u32,
|
||||
tmp_file: PathBuf,
|
||||
pvf: &PvfPrepData,
|
||||
cache_path: &PathBuf,
|
||||
cache_path: &Path,
|
||||
preparation_timeout: Duration,
|
||||
audit_log_file: Option<security::AuditLogFile>,
|
||||
) -> Outcome {
|
||||
let PrepareWorkerSuccess { checksum, stats: PrepareStats { cpu_time_elapsed, memory_stats } } =
|
||||
match result.clone() {
|
||||
Ok(result) => result,
|
||||
// Timed out on the child. This should already be logged by the child.
|
||||
Err(PrepareError::TimedOut) => return Outcome::TimedOut,
|
||||
Err(PrepareError::JobDied(err)) => return Outcome::JobDied(err),
|
||||
Err(PrepareError::JobDied { err, job_pid }) => {
|
||||
// The job died. Check if it was due to a seccomp violation.
|
||||
//
|
||||
// NOTE: Log, but don't change the outcome. Not all validators may have
|
||||
// auditing enabled, so we don't want attackers to abuse a non-deterministic
|
||||
// outcome.
|
||||
for syscall in
|
||||
security::check_seccomp_violations_for_job(audit_log_file, job_pid).await
|
||||
{
|
||||
gum::error!(
|
||||
target: LOG_TARGET,
|
||||
%worker_pid,
|
||||
%job_pid,
|
||||
%syscall,
|
||||
?pvf,
|
||||
"A forbidden syscall was attempted! This is a violation of our seccomp security policy. Report an issue ASAP!"
|
||||
);
|
||||
}
|
||||
|
||||
return Outcome::JobDied { err, job_pid }
|
||||
},
|
||||
Err(PrepareError::OutOfMemory) => return Outcome::OutOfMemory,
|
||||
Err(err) => return Outcome::Concluded { worker, result: Err(err) },
|
||||
};
|
||||
|
||||
@@ -178,9 +178,15 @@ async fn check_can_unshare_user_namespace_and_change_root(
|
||||
let stderr = std::str::from_utf8(&output.stderr)
|
||||
.expect("child process writes a UTF-8 string to stderr; qed")
|
||||
.trim();
|
||||
Err(SecureModeError::CannotUnshareUserNamespaceAndChangeRoot(
|
||||
format!("not available: {}", stderr)
|
||||
))
|
||||
if stderr.is_empty() {
|
||||
Err(SecureModeError::CannotUnshareUserNamespaceAndChangeRoot(
|
||||
"not available".into()
|
||||
))
|
||||
} else {
|
||||
Err(SecureModeError::CannotUnshareUserNamespaceAndChangeRoot(
|
||||
format!("not available: {}", stderr)
|
||||
))
|
||||
}
|
||||
},
|
||||
Err(err) =>
|
||||
Err(SecureModeError::CannotUnshareUserNamespaceAndChangeRoot(
|
||||
@@ -208,16 +214,25 @@ async fn check_landlock(
|
||||
if #[cfg(target_os = "linux")] {
|
||||
match tokio::process::Command::new(prepare_worker_program_path)
|
||||
.arg("--check-can-enable-landlock")
|
||||
.status()
|
||||
.output()
|
||||
.await
|
||||
{
|
||||
Ok(status) if status.success() => Ok(()),
|
||||
Ok(_status) => {
|
||||
Ok(output) if output.status.success() => Ok(()),
|
||||
Ok(output) => {
|
||||
let abi =
|
||||
polkadot_node_core_pvf_common::worker::security::landlock::LANDLOCK_ABI as u8;
|
||||
Err(SecureModeError::CannotEnableLandlock(
|
||||
format!("landlock ABI {} not available", abi)
|
||||
))
|
||||
let stderr = std::str::from_utf8(&output.stderr)
|
||||
.expect("child process writes a UTF-8 string to stderr; qed")
|
||||
.trim();
|
||||
if stderr.is_empty() {
|
||||
Err(SecureModeError::CannotEnableLandlock(
|
||||
format!("landlock ABI {} not available", abi)
|
||||
))
|
||||
} else {
|
||||
Err(SecureModeError::CannotEnableLandlock(
|
||||
format!("not available: {}", stderr)
|
||||
))
|
||||
}
|
||||
},
|
||||
Err(err) =>
|
||||
Err(SecureModeError::CannotEnableLandlock(
|
||||
@@ -238,7 +253,7 @@ async fn check_landlock(
|
||||
/// to running the check in a worker, we try it... in a worker. The expected return status is 0 on
|
||||
/// success and -1 on failure.
|
||||
async fn check_seccomp(
|
||||
#[cfg_attr(not(target_os = "linux"), allow(unused_variables))]
|
||||
#[cfg_attr(not(all(target_os = "linux", target_arch = "x86_64")), allow(unused_variables))]
|
||||
prepare_worker_program_path: &Path,
|
||||
) -> SecureModeResult {
|
||||
cfg_if::cfg_if! {
|
||||
@@ -247,14 +262,24 @@ async fn check_seccomp(
|
||||
if #[cfg(target_arch = "x86_64")] {
|
||||
match tokio::process::Command::new(prepare_worker_program_path)
|
||||
.arg("--check-can-enable-seccomp")
|
||||
.status()
|
||||
.output()
|
||||
.await
|
||||
{
|
||||
Ok(status) if status.success() => Ok(()),
|
||||
Ok(_status) =>
|
||||
Err(SecureModeError::CannotEnableSeccomp(
|
||||
"not available".into()
|
||||
)),
|
||||
Ok(output) if output.status.success() => Ok(()),
|
||||
Ok(output) => {
|
||||
let stderr = std::str::from_utf8(&output.stderr)
|
||||
.expect("child process writes a UTF-8 string to stderr; qed")
|
||||
.trim();
|
||||
if stderr.is_empty() {
|
||||
Err(SecureModeError::CannotEnableSeccomp(
|
||||
"not available".into()
|
||||
))
|
||||
} else {
|
||||
Err(SecureModeError::CannotEnableSeccomp(
|
||||
format!("not available: {}", stderr)
|
||||
))
|
||||
}
|
||||
},
|
||||
Err(err) =>
|
||||
Err(SecureModeError::CannotEnableSeccomp(
|
||||
format!("could not start child process: {}", err)
|
||||
@@ -320,25 +345,25 @@ impl AuditLogFile {
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if a seccomp violation occurred for the given worker. As the syslog may be in a different
|
||||
/// location, or seccomp auditing may be disabled, this function provides a best-effort attempt
|
||||
/// only.
|
||||
/// Check if a seccomp violation occurred for the given job process. As the syslog may be in a
|
||||
/// different location, or seccomp auditing may be disabled, this function provides a best-effort
|
||||
/// attempt only.
|
||||
///
|
||||
/// The `audit_log_file` must have been obtained before the job started. It only allows reading
|
||||
/// entries that were written since it was obtained, so that we do not consider events from previous
|
||||
/// processes with the same pid. This can still be racy, but it's unlikely and fine for a
|
||||
/// best-effort attempt.
|
||||
pub async fn check_seccomp_violations_for_worker(
|
||||
pub async fn check_seccomp_violations_for_job(
|
||||
audit_log_file: Option<AuditLogFile>,
|
||||
worker_pid: u32,
|
||||
job_pid: i32,
|
||||
) -> Vec<u32> {
|
||||
let audit_event_pid_field = format!("pid={worker_pid}");
|
||||
let audit_event_pid_field = format!("pid={job_pid}");
|
||||
|
||||
let audit_log_file = match audit_log_file {
|
||||
Some(file) => {
|
||||
gum::debug!(
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
%worker_pid,
|
||||
%job_pid,
|
||||
audit_log_path = ?file.path,
|
||||
"checking audit log for seccomp violations",
|
||||
);
|
||||
@@ -347,7 +372,7 @@ pub async fn check_seccomp_violations_for_worker(
|
||||
None => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
%worker_pid,
|
||||
%job_pid,
|
||||
"could not open either {AUDIT_LOG_PATH} or {SYSLOG_PATH} for reading audit logs"
|
||||
);
|
||||
return vec![]
|
||||
|
||||
@@ -248,7 +248,7 @@ rusty_fork_test! {
|
||||
// Note that we get a more specific error if the job died than if the whole worker died.
|
||||
assert_matches!(
|
||||
result,
|
||||
Err(PrepareError::JobDied(err)) if err == "received signal: SIGKILL"
|
||||
Err(PrepareError::JobDied{ err, job_pid: _ }) if err == "received signal: SIGKILL"
|
||||
);
|
||||
})
|
||||
}
|
||||
|
||||
@@ -38,16 +38,22 @@ getting backed and honest backers getting slashed.
|
||||
We currently know of the following specific cases that will lead to a retried
|
||||
execution request:
|
||||
|
||||
1. **OOM:** The host might have been temporarily low on memory due to other
|
||||
processes running on the same machine. **NOTE:** This case will lead to
|
||||
voting against the candidate (and possibly a dispute) if the retry is still
|
||||
not successful.
|
||||
2. **Artifact missing:** The prepared artifact might have been deleted due to
|
||||
1. **OOM:** We have memory limits to try to prevent attackers from exhausting
|
||||
host memory. If the memory limit is hit, we kill the job process and retry
|
||||
the job. Alternatively, the host might have been temporarily low on memory
|
||||
due to other processes running on the same machine. **NOTE:** This case will
|
||||
lead to voting against the candidate (and possibly a dispute) if the retry is
|
||||
still not successful.
|
||||
2. **Syscall violations:** If the job attempts a system call that is blocked by
|
||||
the sandbox's security policy, the job process is immediately killed and we
|
||||
retry. **NOTE:** In the future, if we have a proper way to detect that the
|
||||
job died due to a security violation, it might make sense not to retry in
|
||||
this case.
|
||||
3. **Artifact missing:** The prepared artifact might have been deleted due to
|
||||
operator error or some bug in the system.
|
||||
3. **Job errors:** For example, the worker thread panicked for some
|
||||
indeterminate reason, which may or may not be independent of the candidate or
|
||||
PVF.
|
||||
4. **Internal errors:** See "Internal Errors" section. In this case, after the
|
||||
4. **Job errors:** For example, the job process panicked for some indeterminate
|
||||
reason, which may or may not be independent of the candidate or PVF.
|
||||
5. **Internal errors:** See "Internal Errors" section. In this case, after the
|
||||
retry we abstain from voting.
|
||||
|
||||
### Preparation timeouts
|
||||
@@ -159,16 +165,14 @@ data on the host machine.
|
||||
|
||||
*Currently this is only supported on Linux.*
|
||||
|
||||
<!-- TODO: Uncomment when this has been enabled. -->
|
||||
### Restricting networking
|
||||
|
||||
<!-- ### Restricting networking -->
|
||||
We also disable networking on PVF threads by disabling certain syscalls, such as
|
||||
the creation of sockets. This prevents attackers from either downloading
|
||||
payloads or communicating sensitive data from the validator's machine to the
|
||||
outside world.
|
||||
|
||||
<!-- We also disable networking on PVF threads by disabling certain syscalls, such as -->
|
||||
<!-- the creation of sockets. This prevents attackers from either downloading -->
|
||||
<!-- payloads or communicating sensitive data from the validator's machine to the -->
|
||||
<!-- outside world. -->
|
||||
|
||||
<!-- *Currently this is only supported on Linux.* -->
|
||||
*Currently this is only supported on Linux.*
|
||||
|
||||
### Clearing env vars
|
||||
|
||||
|
||||
Reference in New Issue
Block a user