PVF worker: switch on seccomp networking restrictions (#2221)

This commit is contained in:
Marcin S
2023-11-21 12:52:46 +01:00
committed by GitHub
parent 40afc77c4e
commit 552be4800d
13 changed files with 202 additions and 146 deletions
@@ -33,7 +33,7 @@ use polkadot_node_core_pvf_common::{
execute::{Handshake, WorkerResponse},
worker_dir, SecurityStatus,
};
use polkadot_parachain_primitives::primitives::ValidationResult;
use polkadot_parachain_primitives::primitives::{ValidationCodeHash, ValidationResult};
use polkadot_primitives::ExecutorParams;
use std::{path::Path, time::Duration};
use tokio::{io, net::UnixStream};
@@ -156,6 +156,16 @@ pub async fn start_work(
let response = futures::select! {
response = recv_response(&mut stream).fuse() => {
match response {
Ok(response) =>
handle_response(
response,
pid,
&artifact.id.code_hash,
&artifact_path,
execution_timeout,
audit_log_file
)
.await,
Err(error) => {
gum::warn!(
target: LOG_TARGET,
@@ -164,56 +174,9 @@ pub async fn start_work(
?error,
"failed to recv an execute response",
);
// The worker died. Check if it was due to a seccomp violation.
//
// NOTE: Log, but don't change the outcome. Not all validators may have
// auditing enabled, so we don't want attackers to abuse a non-deterministic
// outcome.
for syscall in security::check_seccomp_violations_for_worker(audit_log_file, pid).await {
gum::error!(
target: LOG_TARGET,
worker_pid = %pid,
%syscall,
validation_code_hash = ?artifact.id.code_hash,
?artifact_path,
"A forbidden syscall was attempted! This is a violation of our seccomp security policy. Report an issue ASAP!"
);
}
return Outcome::WorkerIntfErr
},
Ok(response) => {
// Check if any syscall violations occurred during the job. For now this is
// only informative, as we are not enforcing the seccomp policy yet.
for syscall in security::check_seccomp_violations_for_worker(audit_log_file, pid).await {
gum::error!(
target: LOG_TARGET,
worker_pid = %pid,
%syscall,
validation_code_hash = ?artifact.id.code_hash,
?artifact_path,
"A forbidden syscall was attempted! This is a violation of our seccomp security policy. Report an issue ASAP!"
);
}
if let WorkerResponse::Ok{duration, ..} = response {
if duration > execution_timeout {
// The job didn't complete within the timeout.
gum::warn!(
target: LOG_TARGET,
worker_pid = %pid,
"execute job took {}ms cpu time, exceeded execution timeout {}ms.",
duration.as_millis(),
execution_timeout.as_millis(),
);
// Return a timeout error.
return Outcome::HardTimeout
}
}
response
},
}
},
_ = Delay::new(timeout).fuse() => {
@@ -238,7 +201,7 @@ pub async fn start_work(
idle_worker: IdleWorker { stream, pid, worker_dir },
},
WorkerResponse::JobTimedOut => Outcome::HardTimeout,
WorkerResponse::JobDied(err) => Outcome::JobDied { err },
WorkerResponse::JobDied { err, job_pid: _ } => Outcome::JobDied { err },
WorkerResponse::JobError(err) => Outcome::JobError { err },
WorkerResponse::InternalError(err) => Outcome::InternalError { err },
@@ -247,6 +210,56 @@ pub async fn start_work(
.await
}
/// Handles the case where we successfully received response bytes on the host from the child.
///
/// Here we know the artifact exists, but is still located in a temporary file which will be cleared
/// by [`with_worker_dir_setup`].
async fn handle_response(
response: WorkerResponse,
worker_pid: u32,
validation_code_hash: &ValidationCodeHash,
artifact_path: &Path,
execution_timeout: Duration,
audit_log_file: Option<security::AuditLogFile>,
) -> WorkerResponse {
if let WorkerResponse::Ok { duration, .. } = response {
if duration > execution_timeout {
// The job didn't complete within the timeout.
gum::warn!(
target: LOG_TARGET,
worker_pid,
"execute job took {}ms cpu time, exceeded execution timeout {}ms.",
duration.as_millis(),
execution_timeout.as_millis(),
);
// Return a timeout error.
return WorkerResponse::JobTimedOut
}
}
if let WorkerResponse::JobDied { err: _, job_pid } = response {
// The job died. Check if it was due to a seccomp violation.
//
// NOTE: Log, but don't change the outcome. Not all validators may have
// auditing enabled, so we don't want attackers to abuse a non-deterministic
// outcome.
for syscall in security::check_seccomp_violations_for_job(audit_log_file, job_pid).await {
gum::error!(
target: LOG_TARGET,
%worker_pid,
%job_pid,
%syscall,
?validation_code_hash,
?artifact_path,
"A forbidden syscall was attempted! This is a violation of our seccomp security policy. Report an issue ASAP!"
);
}
}
response
}
/// Create a temporary file for an artifact in the worker cache, execute the given future/closure
/// passing the file path in, and clean up the worker cache.
///
+2 -2
View File
@@ -388,14 +388,14 @@ fn handle_mux(
Ok(())
},
// The worker might still be usable, but we kill it just in case.
Outcome::JobDied(err) => {
Outcome::JobDied { err, job_pid } => {
if attempt_retire(metrics, spawned, worker) {
reply(
from_pool,
FromPool::Concluded {
worker,
rip: true,
result: Err(PrepareError::JobDied(err)),
result: Err(PrepareError::JobDied { err, job_pid }),
},
)?;
}
@@ -104,7 +104,7 @@ pub enum Outcome {
/// The preparation job process died, due to OOM, a seccomp violation, or some other factor.
///
/// The worker might still be usable, but we kill it just in case.
JobDied(String),
JobDied { err: String, job_pid: i32 },
}
/// Given the idle token of a worker and parameters of work, communicates with the worker and
@@ -160,19 +160,7 @@ pub async fn start_work(
match result {
// Received bytes from worker within the time limit.
Ok(Ok(prepare_worker_result)) => {
// Check if any syscall violations occurred during the job. For now this is only
// informative, as we are not enforcing the seccomp policy yet.
for syscall in security::check_seccomp_violations_for_worker(audit_log_file, pid).await {
gum::error!(
target: LOG_TARGET,
worker_pid = %pid,
%syscall,
?pvf,
"A forbidden syscall was attempted! This is a violation of our seccomp security policy. Report an issue ASAP!"
);
}
Ok(Ok(prepare_worker_result)) =>
handle_response(
metrics,
IdleWorker { stream, pid, worker_dir },
@@ -182,9 +170,9 @@ pub async fn start_work(
&pvf,
&cache_path,
preparation_timeout,
audit_log_file,
)
.await
},
.await,
Ok(Err(err)) => {
// Communication error within the time limit.
gum::warn!(
@@ -221,15 +209,36 @@ async fn handle_response(
worker_pid: u32,
tmp_file: PathBuf,
pvf: &PvfPrepData,
cache_path: &PathBuf,
cache_path: &Path,
preparation_timeout: Duration,
audit_log_file: Option<security::AuditLogFile>,
) -> Outcome {
let PrepareWorkerSuccess { checksum, stats: PrepareStats { cpu_time_elapsed, memory_stats } } =
match result.clone() {
Ok(result) => result,
// Timed out on the child. This should already be logged by the child.
Err(PrepareError::TimedOut) => return Outcome::TimedOut,
Err(PrepareError::JobDied(err)) => return Outcome::JobDied(err),
Err(PrepareError::JobDied { err, job_pid }) => {
// The job died. Check if it was due to a seccomp violation.
//
// NOTE: Log, but don't change the outcome. Not all validators may have
// auditing enabled, so we don't want attackers to abuse a non-deterministic
// outcome.
for syscall in
security::check_seccomp_violations_for_job(audit_log_file, job_pid).await
{
gum::error!(
target: LOG_TARGET,
%worker_pid,
%job_pid,
%syscall,
?pvf,
"A forbidden syscall was attempted! This is a violation of our seccomp security policy. Report an issue ASAP!"
);
}
return Outcome::JobDied { err, job_pid }
},
Err(PrepareError::OutOfMemory) => return Outcome::OutOfMemory,
Err(err) => return Outcome::Concluded { worker, result: Err(err) },
};
+50 -25
View File
@@ -178,9 +178,15 @@ async fn check_can_unshare_user_namespace_and_change_root(
let stderr = std::str::from_utf8(&output.stderr)
.expect("child process writes a UTF-8 string to stderr; qed")
.trim();
Err(SecureModeError::CannotUnshareUserNamespaceAndChangeRoot(
format!("not available: {}", stderr)
))
if stderr.is_empty() {
Err(SecureModeError::CannotUnshareUserNamespaceAndChangeRoot(
"not available".into()
))
} else {
Err(SecureModeError::CannotUnshareUserNamespaceAndChangeRoot(
format!("not available: {}", stderr)
))
}
},
Err(err) =>
Err(SecureModeError::CannotUnshareUserNamespaceAndChangeRoot(
@@ -208,16 +214,25 @@ async fn check_landlock(
if #[cfg(target_os = "linux")] {
match tokio::process::Command::new(prepare_worker_program_path)
.arg("--check-can-enable-landlock")
.status()
.output()
.await
{
Ok(status) if status.success() => Ok(()),
Ok(_status) => {
Ok(output) if output.status.success() => Ok(()),
Ok(output) => {
let abi =
polkadot_node_core_pvf_common::worker::security::landlock::LANDLOCK_ABI as u8;
Err(SecureModeError::CannotEnableLandlock(
format!("landlock ABI {} not available", abi)
))
let stderr = std::str::from_utf8(&output.stderr)
.expect("child process writes a UTF-8 string to stderr; qed")
.trim();
if stderr.is_empty() {
Err(SecureModeError::CannotEnableLandlock(
format!("landlock ABI {} not available", abi)
))
} else {
Err(SecureModeError::CannotEnableLandlock(
format!("not available: {}", stderr)
))
}
},
Err(err) =>
Err(SecureModeError::CannotEnableLandlock(
@@ -238,7 +253,7 @@ async fn check_landlock(
/// to running the check in a worker, we try it... in a worker. The expected return status is 0 on
/// success and -1 on failure.
async fn check_seccomp(
#[cfg_attr(not(target_os = "linux"), allow(unused_variables))]
#[cfg_attr(not(all(target_os = "linux", target_arch = "x86_64")), allow(unused_variables))]
prepare_worker_program_path: &Path,
) -> SecureModeResult {
cfg_if::cfg_if! {
@@ -247,14 +262,24 @@ async fn check_seccomp(
if #[cfg(target_arch = "x86_64")] {
match tokio::process::Command::new(prepare_worker_program_path)
.arg("--check-can-enable-seccomp")
.status()
.output()
.await
{
Ok(status) if status.success() => Ok(()),
Ok(_status) =>
Err(SecureModeError::CannotEnableSeccomp(
"not available".into()
)),
Ok(output) if output.status.success() => Ok(()),
Ok(output) => {
let stderr = std::str::from_utf8(&output.stderr)
.expect("child process writes a UTF-8 string to stderr; qed")
.trim();
if stderr.is_empty() {
Err(SecureModeError::CannotEnableSeccomp(
"not available".into()
))
} else {
Err(SecureModeError::CannotEnableSeccomp(
format!("not available: {}", stderr)
))
}
},
Err(err) =>
Err(SecureModeError::CannotEnableSeccomp(
format!("could not start child process: {}", err)
@@ -320,25 +345,25 @@ impl AuditLogFile {
}
}
/// Check if a seccomp violation occurred for the given worker. As the syslog may be in a different
/// location, or seccomp auditing may be disabled, this function provides a best-effort attempt
/// only.
/// Check if a seccomp violation occurred for the given job process. As the syslog may be in a
/// different location, or seccomp auditing may be disabled, this function provides a best-effort
/// attempt only.
///
/// The `audit_log_file` must have been obtained before the job started. It only allows reading
/// entries that were written since it was obtained, so that we do not consider events from previous
/// processes with the same pid. This can still be racy, but it's unlikely and fine for a
/// best-effort attempt.
pub async fn check_seccomp_violations_for_worker(
pub async fn check_seccomp_violations_for_job(
audit_log_file: Option<AuditLogFile>,
worker_pid: u32,
job_pid: i32,
) -> Vec<u32> {
let audit_event_pid_field = format!("pid={worker_pid}");
let audit_event_pid_field = format!("pid={job_pid}");
let audit_log_file = match audit_log_file {
Some(file) => {
gum::debug!(
gum::trace!(
target: LOG_TARGET,
%worker_pid,
%job_pid,
audit_log_path = ?file.path,
"checking audit log for seccomp violations",
);
@@ -347,7 +372,7 @@ pub async fn check_seccomp_violations_for_worker(
None => {
gum::warn!(
target: LOG_TARGET,
%worker_pid,
%job_pid,
"could not open either {AUDIT_LOG_PATH} or {SYSLOG_PATH} for reading audit logs"
);
return vec![]