PVF: Add Secure Validator Mode (#2486)

Co-authored-by: Javier Viola <javier@parity.io>
This commit is contained in:
Marcin S
2023-12-05 13:32:56 +01:00
committed by GitHub
parent f240e02557
commit c046a9d5ed
31 changed files with 690 additions and 469 deletions
+11 -1
View File
@@ -33,6 +33,7 @@ const LOG_TARGET: &str = "parachain::pvf-common";
pub const RUNTIME_VERSION: &str = env!("SUBSTRATE_WASMTIME_VERSION");
use parity_scale_codec::{Decode, Encode};
use std::{
io::{self, Read, Write},
mem,
@@ -47,8 +48,11 @@ pub mod tests {
}
/// Status of security features on the current system.
#[derive(Debug, Clone, Default, PartialEq, Eq)]
#[derive(Debug, Clone, Default, PartialEq, Eq, Encode, Decode)]
pub struct SecurityStatus {
/// Whether Secure Validator Mode is enabled. This mode enforces that all required security
/// features are present. All features are enabled on a best-effort basis regardless.
pub secure_validator_mode: bool,
/// Whether the landlock features we use are fully available on this system.
pub can_enable_landlock: bool,
/// Whether the seccomp features we use are fully available on this system.
@@ -57,6 +61,12 @@ pub struct SecurityStatus {
pub can_unshare_user_namespace_and_change_root: bool,
}
/// A handshake with information for the worker.
#[derive(Debug, Encode, Decode)]
pub struct WorkerHandshake {
pub security_status: SecurityStatus,
}
/// Write some data prefixed by its length into `w`. Sync version of `framed_send` to avoid
/// dependency on tokio.
pub fn framed_send_blocking(w: &mut (impl Write + Unpin), buf: &[u8]) -> io::Result<()> {
+113 -119
View File
@@ -18,9 +18,10 @@
pub mod security;
use crate::{SecurityStatus, LOG_TARGET};
use crate::{framed_recv_blocking, WorkerHandshake, LOG_TARGET};
use cpu_time::ProcessTime;
use futures::never::Never;
use parity_scale_codec::Decode;
use std::{
any::Any,
fmt, io,
@@ -50,8 +51,6 @@ macro_rules! decl_worker_main {
#[cfg(target_os = "linux")]
use $crate::worker::security;
// TODO: Remove this dependency, and `pub use sp_tracing` in `lib.rs`.
// See <https://github.com/paritytech/polkadot/issues/7117>.
$crate::sp_tracing::try_init_simple();
let worker_pid = std::process::id();
@@ -79,14 +78,26 @@ macro_rules! decl_worker_main {
"--check-can-enable-landlock" => {
#[cfg(target_os = "linux")]
let status = if security::landlock::check_is_fully_enabled() { 0 } else { -1 };
let status = if let Err(err) = security::landlock::check_is_fully_enabled() {
// Write the error to stderr, log it on the host-side.
eprintln!("{}", err);
-1
} else {
0
};
#[cfg(not(target_os = "linux"))]
let status = -1;
std::process::exit(status)
},
"--check-can-enable-seccomp" => {
#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
let status = if security::seccomp::check_is_fully_enabled() { 0 } else { -1 };
let status = if let Err(err) = security::seccomp::check_is_fully_enabled() {
// Write the error to stderr, log it on the host-side.
eprintln!("{}", err);
-1
} else {
0
};
#[cfg(not(all(target_os = "linux", target_arch = "x86_64")))]
let status = -1;
std::process::exit(status)
@@ -95,11 +106,9 @@ macro_rules! decl_worker_main {
#[cfg(target_os = "linux")]
let cache_path_tempdir = std::path::Path::new(&args[2]);
#[cfg(target_os = "linux")]
let status = if let Err(err) = security::unshare_user_namespace_and_change_root(
$crate::worker::WorkerKind::CheckPivotRoot,
worker_pid,
&cache_path_tempdir,
) {
let status = if let Err(err) =
security::change_root::check_is_fully_enabled(&cache_path_tempdir)
{
// Write the error to stderr, log it on the host-side.
eprintln!("{}", err);
-1
@@ -107,11 +116,7 @@ macro_rules! decl_worker_main {
0
};
#[cfg(not(target_os = "linux"))]
let status = {
// Write the error to stderr, log it on the host-side.
eprintln!("not available on macos");
-1
};
let status = -1;
std::process::exit(status)
},
@@ -134,9 +139,6 @@ macro_rules! decl_worker_main {
let mut socket_path = None;
let mut worker_dir_path = None;
let mut node_version = None;
let mut can_enable_landlock = false;
let mut can_enable_seccomp = false;
let mut can_unshare_user_namespace_and_change_root = false;
let mut i = 2;
while i < args.len() {
@@ -153,10 +155,6 @@ macro_rules! decl_worker_main {
node_version = Some(args[i + 1].as_str());
i += 1
},
"--can-enable-landlock" => can_enable_landlock = true,
"--can-enable-seccomp" => can_enable_seccomp = true,
"--can-unshare-user-namespace-and-change-root" =>
can_unshare_user_namespace_and_change_root = true,
arg => panic!("Unexpected argument found: {}", arg),
}
i += 1;
@@ -167,19 +165,8 @@ macro_rules! decl_worker_main {
let socket_path = std::path::Path::new(socket_path).to_owned();
let worker_dir_path = std::path::Path::new(worker_dir_path).to_owned();
let security_status = $crate::SecurityStatus {
can_enable_landlock,
can_enable_seccomp,
can_unshare_user_namespace_and_change_root,
};
$entrypoint(
socket_path,
worker_dir_path,
node_version,
Some($worker_version),
security_status,
);
$entrypoint(socket_path, worker_dir_path, node_version, Some($worker_version));
}
};
}
@@ -205,73 +192,75 @@ impl fmt::Display for WorkerKind {
}
}
// Some fields are only used for logging, and dead-code analysis ignores Debug.
#[allow(dead_code)]
#[derive(Debug)]
pub struct WorkerInfo {
pid: u32,
kind: WorkerKind,
version: Option<String>,
worker_dir_path: PathBuf,
}
// NOTE: The worker version must be passed in so that we accurately get the version of the worker,
// and not the version that this crate was compiled with.
//
// NOTE: This must not spawn any threads due to safety requirements in `event_loop` and to avoid
// errors in [`security::unshare_user_namespace_and_change_root`].
// errors in [`security::change_root::try_restrict`].
//
/// Initializes the worker process, then runs the given event loop, which spawns a new job process
/// to securely handle each incoming request.
pub fn run_worker<F>(
worker_kind: WorkerKind,
socket_path: PathBuf,
#[cfg_attr(not(target_os = "linux"), allow(unused_mut))] mut worker_dir_path: PathBuf,
worker_dir_path: PathBuf,
node_version: Option<&str>,
worker_version: Option<&str>,
security_status: &SecurityStatus,
mut event_loop: F,
) where
F: FnMut(UnixStream, PathBuf) -> io::Result<Never>,
{
let worker_pid = std::process::id();
#[cfg_attr(not(target_os = "linux"), allow(unused_mut))]
let mut worker_info = WorkerInfo {
pid: std::process::id(),
kind: worker_kind,
version: worker_version.map(|v| v.to_string()),
worker_dir_path,
};
gum::debug!(
target: LOG_TARGET,
%worker_pid,
?worker_info,
?socket_path,
?worker_dir_path,
?security_status,
"starting pvf worker ({})",
worker_kind
worker_info.kind
);
// Check for a mismatch between the node and worker versions.
if let (Some(node_version), Some(worker_version)) = (node_version, worker_version) {
if let (Some(node_version), Some(worker_version)) = (node_version, &worker_info.version) {
if node_version != worker_version {
gum::error!(
target: LOG_TARGET,
%worker_kind,
%worker_pid,
?worker_info,
%node_version,
%worker_version,
"Node and worker version mismatch, node needs restarting, forcing shutdown",
);
kill_parent_node_in_emergency();
worker_shutdown_message(worker_kind, worker_pid, "Version mismatch");
return
worker_shutdown(worker_info, "Version mismatch");
}
}
// Make sure that we can read the worker dir path, and log its contents.
let entries = || -> Result<Vec<_>, io::Error> {
std::fs::read_dir(&worker_dir_path)?
std::fs::read_dir(&worker_info.worker_dir_path)?
.map(|res| res.map(|e| e.file_name()))
.collect()
}();
match entries {
Ok(entries) =>
gum::trace!(target: LOG_TARGET, %worker_pid, ?worker_dir_path, "content of worker dir: {:?}", entries),
gum::trace!(target: LOG_TARGET, ?worker_info, "content of worker dir: {:?}", entries),
Err(err) => {
gum::error!(
target: LOG_TARGET,
%worker_kind,
%worker_pid,
?worker_dir_path,
"Could not read worker dir: {}",
err.to_string()
);
worker_shutdown_message(worker_kind, worker_pid, &err.to_string());
return
let err = format!("Could not read worker dir: {}", err.to_string());
worker_shutdown_error(worker_info, &err);
},
}
@@ -281,23 +270,20 @@ pub fn run_worker<F>(
let _ = std::fs::remove_file(&socket_path);
Ok(stream)
}();
let stream = match stream {
Ok(s) => s,
Err(err) => {
gum::error!(
target: LOG_TARGET,
%worker_kind,
%worker_pid,
"{}",
err
);
worker_shutdown_message(worker_kind, worker_pid, &err.to_string());
return
},
let mut stream = match stream {
Ok(ok) => ok,
Err(err) => worker_shutdown_error(worker_info, &err.to_string()),
};
let WorkerHandshake { security_status } = match recv_worker_handshake(&mut stream) {
Ok(ok) => ok,
Err(err) => worker_shutdown_error(worker_info, &err.to_string()),
};
// Enable some security features.
{
gum::trace!(target: LOG_TARGET, ?security_status, "Enabling security features");
// Call based on whether we can change root. Error out if it should work but fails.
//
// NOTE: This should not be called in a multi-threaded context (i.e. inside the tokio
@@ -306,39 +292,29 @@ pub fn run_worker<F>(
// > CLONE_NEWUSER requires that the calling process is not threaded.
#[cfg(target_os = "linux")]
if security_status.can_unshare_user_namespace_and_change_root {
if let Err(err) = security::unshare_user_namespace_and_change_root(
worker_kind,
worker_pid,
&worker_dir_path,
) {
// The filesystem may be in an inconsistent state, bail out.
gum::error!(
target: LOG_TARGET,
%worker_kind,
%worker_pid,
?worker_dir_path,
"Could not change root to be the worker cache path: {}",
err
);
worker_shutdown_message(worker_kind, worker_pid, &err);
return
if let Err(err) = security::change_root::enable_for_worker(&worker_info) {
// The filesystem may be in an inconsistent state, always bail out.
let err = format!("Could not change root to be the worker cache path: {}", err);
worker_shutdown_error(worker_info, &err);
}
worker_dir_path = std::path::Path::new("/").to_owned();
worker_info.worker_dir_path = std::path::Path::new("/").to_owned();
}
#[cfg(target_os = "linux")]
if security_status.can_enable_landlock {
let landlock_status =
security::landlock::enable_for_worker(worker_kind, worker_pid, &worker_dir_path);
if !matches!(landlock_status, Ok(landlock::RulesetStatus::FullyEnforced)) {
// We previously were able to enable, so this should never happen.
if let Err(err) = security::landlock::enable_for_worker(&worker_info) {
// We previously were able to enable, so this should never happen. Shutdown if
// running in secure mode.
let err = format!("could not fully enable landlock: {:?}", err);
gum::error!(
target: LOG_TARGET,
%worker_kind,
%worker_pid,
"could not fully enable landlock: {:?}. This should not happen, please report an issue",
landlock_status
?worker_info,
"{}. This should not happen, please report an issue",
err
);
if security_status.secure_validator_mode {
worker_shutdown(worker_info, &err);
}
}
}
@@ -346,48 +322,54 @@ pub fn run_worker<F>(
// job to catch regressions. See <https://github.com/paritytech/ci_cd/issues/609>.
#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
if security_status.can_enable_seccomp {
let seccomp_status =
security::seccomp::enable_for_worker(worker_kind, worker_pid, &worker_dir_path);
if !matches!(seccomp_status, Ok(())) {
// We previously were able to enable, so this should never happen.
//
// TODO: Make this a real error in secure-mode. See:
// <https://github.com/paritytech/polkadot-sdk/issues/1444>
if let Err(err) = security::seccomp::enable_for_worker(&worker_info) {
// We previously were able to enable, so this should never happen. Shutdown if
// running in secure mode.
let err = format!("could not fully enable seccomp: {:?}", err);
gum::error!(
target: LOG_TARGET,
%worker_kind,
%worker_pid,
"could not fully enable seccomp: {:?}. This should not happen, please report an issue",
seccomp_status
?worker_info,
"{}. This should not happen, please report an issue",
err
);
if security_status.secure_validator_mode {
worker_shutdown(worker_info, &err);
}
}
}
if !security::check_env_vars_were_cleared(worker_kind, worker_pid) {
if !security::check_env_vars_were_cleared(&worker_info) {
let err = "not all env vars were cleared when spawning the process";
gum::error!(
target: LOG_TARGET,
%worker_kind,
%worker_pid,
?worker_info,
"{}",
err
);
worker_shutdown_message(worker_kind, worker_pid, err);
return
if security_status.secure_validator_mode {
worker_shutdown(worker_info, err);
}
}
}
// Run the main worker loop.
let err = event_loop(stream, worker_dir_path)
let err = event_loop(stream, worker_info.worker_dir_path.clone())
// It's never `Ok` because it's `Ok(Never)`.
.unwrap_err();
worker_shutdown_message(worker_kind, worker_pid, &err.to_string());
worker_shutdown(worker_info, &err.to_string());
}
/// Provide a consistent message on worker shutdown.
fn worker_shutdown_message(worker_kind: WorkerKind, worker_pid: u32, err: &str) {
gum::debug!(target: LOG_TARGET, %worker_pid, "quitting pvf worker ({}): {}", worker_kind, err);
/// Provide a consistent message on unexpected worker shutdown.
fn worker_shutdown(worker_info: WorkerInfo, err: &str) -> ! {
gum::warn!(target: LOG_TARGET, ?worker_info, "quitting pvf worker ({}): {}", worker_info.kind, err);
std::process::exit(1);
}
/// Provide a consistent error on unexpected worker shutdown.
fn worker_shutdown_error(worker_info: WorkerInfo, err: &str) -> ! {
gum::error!(target: LOG_TARGET, ?worker_info, "quitting pvf worker ({}): {}", worker_info.kind, err);
std::process::exit(1);
}
/// Loop that runs in the CPU time monitor thread on prepare and execute jobs. Continuously wakes up
@@ -458,6 +440,18 @@ fn kill_parent_node_in_emergency() {
}
}
/// Receives a handshake with information for the worker.
fn recv_worker_handshake(stream: &mut UnixStream) -> io::Result<WorkerHandshake> {
let worker_handshake = framed_recv_blocking(stream)?;
let worker_handshake = WorkerHandshake::decode(&mut &worker_handshake[..]).map_err(|e| {
io::Error::new(
io::ErrorKind::Other,
format!("recv_worker_handshake: failed to decode WorkerHandshake: {}", e),
)
})?;
Ok(worker_handshake)
}
/// Functionality related to threads spawned by the workers.
///
/// The motivation for this module is to coordinate worker threads without using async Rust.
@@ -0,0 +1,173 @@
// Copyright (C) Parity Technologies (UK) Ltd.
// This file is part of Polkadot.
// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
//! Functionality for securing workers by unsharing some namespaces from other processes and
//! changing the root.
use crate::{
worker::{WorkerInfo, WorkerKind},
LOG_TARGET,
};
use std::{env, ffi::CString, io, os::unix::ffi::OsStrExt, path::Path, ptr};
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("{0}")]
OsErrWithContext(String),
#[error(transparent)]
Io(#[from] io::Error),
#[error("assertion failed: {0}")]
AssertionFailed(String),
}
pub type Result<T> = std::result::Result<T, Error>;
/// Try to enable for the given kind of worker.
///
/// NOTE: This should not be called in a multi-threaded context. `unshare(2)`:
/// "CLONE_NEWUSER requires that the calling process is not threaded."
pub fn enable_for_worker(worker_info: &WorkerInfo) -> Result<()> {
gum::trace!(
target: LOG_TARGET,
?worker_info,
"enabling change-root",
);
try_restrict(worker_info)
}
/// Runs a check for unshare-and-change-root and returns an error indicating whether it can be fully
/// enabled on the current Linux environment.
///
/// NOTE: This should not be called in a multi-threaded context. `unshare(2)`:
/// "CLONE_NEWUSER requires that the calling process is not threaded."
#[cfg(target_os = "linux")]
pub fn check_is_fully_enabled(tempdir: &Path) -> Result<()> {
let worker_dir_path = tempdir.to_owned();
try_restrict(&WorkerInfo {
pid: std::process::id(),
kind: WorkerKind::CheckPivotRoot,
version: None,
worker_dir_path,
})
}
/// Unshare the user namespace and change root to be the worker directory.
///
/// NOTE: This should not be called in a multi-threaded context. `unshare(2)`:
/// "CLONE_NEWUSER requires that the calling process is not threaded."
#[cfg(target_os = "linux")]
fn try_restrict(worker_info: &WorkerInfo) -> Result<()> {
// TODO: Remove this once this is stable: https://github.com/rust-lang/rust/issues/105723
macro_rules! cstr_ptr {
($e:expr) => {
concat!($e, "\0").as_ptr().cast::<core::ffi::c_char>()
};
}
gum::trace!(
target: LOG_TARGET,
?worker_info,
"unsharing the user namespace and calling pivot_root",
);
let worker_dir_path_c = CString::new(worker_info.worker_dir_path.as_os_str().as_bytes())
.expect("on unix; the path will never contain 0 bytes; qed");
// Wrapper around all the work to prevent repetitive error handling.
//
// # Errors
//
// It's the caller's responsibility to call `Error::last_os_error`. Note that that alone does
// not give the context of which call failed, so we return a &str error.
|| -> std::result::Result<(), &'static str> {
// SAFETY: We pass null-terminated C strings and use the APIs as documented. In fact, steps
// (2) and (3) are adapted from the example in pivot_root(2), with the additional
// change described in the `pivot_root(".", ".")` section.
unsafe {
// 1. `unshare` the user and the mount namespaces.
if libc::unshare(libc::CLONE_NEWUSER | libc::CLONE_NEWNS) < 0 {
return Err("unshare user and mount namespaces")
}
// 2. Setup mounts.
//
// Ensure that new root and its parent mount don't have shared propagation (which would
// cause pivot_root() to return an error), and prevent propagation of mount events to
// the initial mount namespace.
if libc::mount(
ptr::null(),
cstr_ptr!("/"),
ptr::null(),
libc::MS_REC | libc::MS_PRIVATE,
ptr::null(),
) < 0
{
return Err("mount MS_PRIVATE")
}
// Ensure that the new root is a mount point.
let additional_flags =
if let WorkerKind::Execute | WorkerKind::CheckPivotRoot = worker_info.kind {
libc::MS_RDONLY
} else {
0
};
if libc::mount(
worker_dir_path_c.as_ptr(),
worker_dir_path_c.as_ptr(),
ptr::null(), // ignored when MS_BIND is used
libc::MS_BIND |
libc::MS_REC | libc::MS_NOEXEC |
libc::MS_NODEV | libc::MS_NOSUID |
libc::MS_NOATIME | additional_flags,
ptr::null(), // ignored when MS_BIND is used
) < 0
{
return Err("mount MS_BIND")
}
// 3. `pivot_root` to the artifact directory.
if libc::chdir(worker_dir_path_c.as_ptr()) < 0 {
return Err("chdir to worker dir path")
}
if libc::syscall(libc::SYS_pivot_root, cstr_ptr!("."), cstr_ptr!(".")) < 0 {
return Err("pivot_root")
}
if libc::umount2(cstr_ptr!("."), libc::MNT_DETACH) < 0 {
return Err("umount the old root mount point")
}
}
Ok(())
}()
.map_err(|err_ctx| {
let err = io::Error::last_os_error();
Error::OsErrWithContext(format!("{}: {}", err_ctx, err))
})?;
// Do some assertions.
if env::current_dir()? != Path::new("/") {
return Err(Error::AssertionFailed("expected current dir after pivot_root to be `/`".into()))
}
env::set_current_dir("..")?;
if env::current_dir()? != Path::new("/") {
return Err(Error::AssertionFailed(
"expected not to be able to break out of new root by doing `..`".into(),
))
}
Ok(())
}
@@ -28,7 +28,7 @@
pub use landlock::RulesetStatus;
use crate::{
worker::{stringify_panic_payload, WorkerKind},
worker::{stringify_panic_payload, WorkerInfo, WorkerKind},
LOG_TARGET,
};
use landlock::*;
@@ -74,6 +74,8 @@ pub const LANDLOCK_ABI: ABI = ABI::V1;
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("Could not fully enable: {0:?}")]
NotFullyEnabled(RulesetStatus),
#[error("Invalid exception path: {0:?}")]
InvalidExceptionPath(PathBuf),
#[error(transparent)]
@@ -85,17 +87,13 @@ pub enum Error {
pub type Result<T> = std::result::Result<T, Error>;
/// Try to enable landlock for the given kind of worker.
pub fn enable_for_worker(
worker_kind: WorkerKind,
worker_pid: u32,
worker_dir_path: &Path,
) -> Result<RulesetStatus> {
let exceptions: Vec<(PathBuf, BitFlags<AccessFs>)> = match worker_kind {
pub fn enable_for_worker(worker_info: &WorkerInfo) -> Result<()> {
let exceptions: Vec<(PathBuf, BitFlags<AccessFs>)> = match worker_info.kind {
WorkerKind::Prepare => {
vec![(worker_dir_path.to_owned(), AccessFs::WriteFile.into())]
vec![(worker_info.worker_dir_path.to_owned(), AccessFs::WriteFile.into())]
},
WorkerKind::Execute => {
vec![(worker_dir_path.to_owned(), AccessFs::ReadFile.into())]
vec![(worker_info.worker_dir_path.to_owned(), AccessFs::ReadFile.into())]
},
WorkerKind::CheckPivotRoot =>
panic!("this should only be passed for checking pivot_root; qed"),
@@ -103,9 +101,7 @@ pub fn enable_for_worker(
gum::trace!(
target: LOG_TARGET,
%worker_kind,
%worker_pid,
?worker_dir_path,
?worker_info,
"enabling landlock with exceptions: {:?}",
exceptions,
);
@@ -114,18 +110,14 @@ pub fn enable_for_worker(
}
// TODO: <https://github.com/landlock-lsm/rust-landlock/issues/36>
/// Runs a check for landlock and returns a single bool indicating whether the given landlock
/// ABI is fully enabled on the current Linux environment.
pub fn check_is_fully_enabled() -> bool {
let status_from_thread: Result<RulesetStatus> =
match std::thread::spawn(|| try_restrict(std::iter::empty::<(PathBuf, AccessFs)>())).join()
{
Ok(Ok(status)) => Ok(status),
Ok(Err(ruleset_err)) => Err(ruleset_err.into()),
Err(err) => Err(Error::Panic(stringify_panic_payload(err))),
};
matches!(status_from_thread, Ok(RulesetStatus::FullyEnforced))
/// Runs a check for landlock in its own thread, and returns an error indicating whether the given
/// landlock ABI is fully enabled on the current Linux environment.
pub fn check_is_fully_enabled() -> Result<()> {
match std::thread::spawn(|| try_restrict(std::iter::empty::<(PathBuf, AccessFs)>())).join() {
Ok(Ok(())) => Ok(()),
Ok(Err(err)) => Err(err),
Err(err) => Err(Error::Panic(stringify_panic_payload(err))),
}
}
/// Tries to restrict the current thread (should only be called in a process' main thread) with
@@ -139,7 +131,7 @@ pub fn check_is_fully_enabled() -> bool {
/// # Returns
///
/// The status of the restriction (whether it was fully, partially, or not-at-all enforced).
fn try_restrict<I, P, A>(fs_exceptions: I) -> Result<RulesetStatus>
fn try_restrict<I, P, A>(fs_exceptions: I) -> Result<()>
where
I: IntoIterator<Item = (P, A)>,
P: AsRef<Path>,
@@ -156,8 +148,13 @@ where
}
ruleset = ruleset.add_rules(rules)?;
}
let status = ruleset.restrict_self()?;
Ok(status.ruleset)
if !matches!(status.ruleset, RulesetStatus::FullyEnforced) {
return Err(Error::NotFullyEnabled(status.ruleset))
}
Ok(())
}
#[cfg(test)]
@@ -168,7 +165,7 @@ mod tests {
#[test]
fn restricted_thread_cannot_read_file() {
// TODO: This would be nice: <https://github.com/rust-lang/rust/issues/68007>.
if !check_is_fully_enabled() {
if check_is_fully_enabled().is_err() {
return
}
@@ -191,7 +188,7 @@ mod tests {
// Apply Landlock with a read exception for only one of the files.
let status = try_restrict(vec![(path1, AccessFs::ReadFile)]);
if !matches!(status, Ok(RulesetStatus::FullyEnforced)) {
if !matches!(status, Ok(())) {
panic!(
"Ruleset should be enforced since we checked if landlock is enabled: {:?}",
status
@@ -212,7 +209,7 @@ mod tests {
// Apply Landlock for all files.
let status = try_restrict(std::iter::empty::<(PathBuf, AccessFs)>());
if !matches!(status, Ok(RulesetStatus::FullyEnforced)) {
if !matches!(status, Ok(())) {
panic!(
"Ruleset should be enforced since we checked if landlock is enabled: {:?}",
status
@@ -233,7 +230,7 @@ mod tests {
#[test]
fn restricted_thread_cannot_write_file() {
// TODO: This would be nice: <https://github.com/rust-lang/rust/issues/68007>.
if !check_is_fully_enabled() {
if check_is_fully_enabled().is_err() {
return
}
@@ -252,7 +249,7 @@ mod tests {
// Apply Landlock with a write exception for only one of the files.
let status = try_restrict(vec![(path1, AccessFs::WriteFile)]);
if !matches!(status, Ok(RulesetStatus::FullyEnforced)) {
if !matches!(status, Ok(())) {
panic!(
"Ruleset should be enforced since we checked if landlock is enabled: {:?}",
status
@@ -270,7 +267,7 @@ mod tests {
// Apply Landlock for all files.
let status = try_restrict(std::iter::empty::<(PathBuf, AccessFs)>());
if !matches!(status, Ok(RulesetStatus::FullyEnforced)) {
if !matches!(status, Ok(())) {
panic!(
"Ruleset should be enforced since we checked if landlock is enabled: {:?}",
status
@@ -292,7 +289,7 @@ mod tests {
#[test]
fn restricted_thread_can_truncate_file() {
// TODO: This would be nice: <https://github.com/rust-lang/rust/issues/68007>.
if !check_is_fully_enabled() {
if check_is_fully_enabled().is_err() {
return
}
@@ -308,7 +305,7 @@ mod tests {
// Apply Landlock with all exceptions under the current ABI.
let status = try_restrict(vec![(path, AccessFs::from_all(LANDLOCK_ABI))]);
if !matches!(status, Ok(RulesetStatus::FullyEnforced)) {
if !matches!(status, Ok(())) {
panic!(
"Ruleset should be enforced since we checked if landlock is enabled: {:?}",
status
@@ -27,134 +27,21 @@
//! - Restrict networking by blocking socket creation and io_uring.
//! - Remove env vars
use crate::{worker::WorkerKind, LOG_TARGET};
use crate::{worker::WorkerInfo, LOG_TARGET};
#[cfg(target_os = "linux")]
pub mod change_root;
#[cfg(target_os = "linux")]
pub mod landlock;
#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
pub mod seccomp;
/// Unshare the user namespace and change root to be the artifact directory.
///
/// NOTE: This should not be called in a multi-threaded context. `unshare(2)`:
/// "CLONE_NEWUSER requires that the calling process is not threaded."
#[cfg(target_os = "linux")]
pub fn unshare_user_namespace_and_change_root(
worker_kind: WorkerKind,
worker_pid: u32,
worker_dir_path: &std::path::Path,
) -> Result<(), String> {
use std::{env, ffi::CString, os::unix::ffi::OsStrExt, path::Path, ptr};
// TODO: Remove this once this is stable: https://github.com/rust-lang/rust/issues/105723
macro_rules! cstr_ptr {
($e:expr) => {
concat!($e, "\0").as_ptr().cast::<core::ffi::c_char>()
};
}
gum::trace!(
target: LOG_TARGET,
%worker_kind,
%worker_pid,
?worker_dir_path,
"unsharing the user namespace and calling pivot_root",
);
let worker_dir_path_c = CString::new(worker_dir_path.as_os_str().as_bytes())
.expect("on unix; the path will never contain 0 bytes; qed");
// Wrapper around all the work to prevent repetitive error handling.
//
// # Errors
//
// It's the caller's responsibility to call `Error::last_os_error`. Note that that alone does
// not give the context of which call failed, so we return a &str error.
|| -> Result<(), &'static str> {
// SAFETY: We pass null-terminated C strings and use the APIs as documented. In fact, steps
// (2) and (3) are adapted from the example in pivot_root(2), with the additional
// change described in the `pivot_root(".", ".")` section.
unsafe {
// 1. `unshare` the user and the mount namespaces.
if libc::unshare(libc::CLONE_NEWUSER | libc::CLONE_NEWNS) < 0 {
return Err("unshare user and mount namespaces")
}
// 2. Setup mounts.
//
// Ensure that new root and its parent mount don't have shared propagation (which would
// cause pivot_root() to return an error), and prevent propagation of mount events to
// the initial mount namespace.
if libc::mount(
ptr::null(),
cstr_ptr!("/"),
ptr::null(),
libc::MS_REC | libc::MS_PRIVATE,
ptr::null(),
) < 0
{
return Err("mount MS_PRIVATE")
}
// Ensure that the new root is a mount point.
let additional_flags =
if let WorkerKind::Execute | WorkerKind::CheckPivotRoot = worker_kind {
libc::MS_RDONLY
} else {
0
};
if libc::mount(
worker_dir_path_c.as_ptr(),
worker_dir_path_c.as_ptr(),
ptr::null(), // ignored when MS_BIND is used
libc::MS_BIND |
libc::MS_REC | libc::MS_NOEXEC |
libc::MS_NODEV | libc::MS_NOSUID |
libc::MS_NOATIME | additional_flags,
ptr::null(), // ignored when MS_BIND is used
) < 0
{
return Err("mount MS_BIND")
}
// 3. `pivot_root` to the artifact directory.
if libc::chdir(worker_dir_path_c.as_ptr()) < 0 {
return Err("chdir to worker dir path")
}
if libc::syscall(libc::SYS_pivot_root, cstr_ptr!("."), cstr_ptr!(".")) < 0 {
return Err("pivot_root")
}
if libc::umount2(cstr_ptr!("."), libc::MNT_DETACH) < 0 {
return Err("umount the old root mount point")
}
}
Ok(())
}()
.map_err(|err_ctx| {
let err = std::io::Error::last_os_error();
format!("{}: {}", err_ctx, err)
})?;
// Do some assertions.
if env::current_dir().map_err(|err| err.to_string())? != Path::new("/") {
return Err("expected current dir after pivot_root to be `/`".into())
}
env::set_current_dir("..").map_err(|err| err.to_string())?;
if env::current_dir().map_err(|err| err.to_string())? != Path::new("/") {
return Err("expected not to be able to break out of new root by doing `..`".into())
}
Ok(())
}
/// Require env vars to have been removed when spawning the process, to prevent malicious code from
/// accessing them.
pub fn check_env_vars_were_cleared(worker_kind: WorkerKind, worker_pid: u32) -> bool {
pub fn check_env_vars_were_cleared(worker_info: &WorkerInfo) -> bool {
gum::trace!(
target: LOG_TARGET,
%worker_kind,
%worker_pid,
?worker_info,
"clearing env vars in worker",
);
@@ -162,8 +49,8 @@ pub fn check_env_vars_were_cleared(worker_kind: WorkerKind, worker_pid: u32) ->
for (key, value) in std::env::vars_os() {
// TODO: *theoretically* the value (or mere presence) of `RUST_LOG` can be a source of
// randomness for malicious code. In the future we can remove it also and log in the host;
// see <https://github.com/paritytech/polkadot/issues/7117>.
// randomness for malicious code. It should be removed in the job process, which does no
// logging.
if key == "RUST_LOG" {
continue
}
@@ -175,8 +62,7 @@ pub fn check_env_vars_were_cleared(worker_kind: WorkerKind, worker_pid: u32) ->
gum::error!(
target: LOG_TARGET,
%worker_kind,
%worker_pid,
?worker_info,
?key,
?value,
"env var was present that should have been removed",
@@ -72,11 +72,11 @@
//! candidate.
use crate::{
worker::{stringify_panic_payload, WorkerKind},
worker::{stringify_panic_payload, WorkerInfo},
LOG_TARGET,
};
use seccompiler::*;
use std::{collections::BTreeMap, path::Path};
use std::collections::BTreeMap;
/// The action to take on caught syscalls.
#[cfg(not(test))]
@@ -98,36 +98,28 @@ pub enum Error {
pub type Result<T> = std::result::Result<T, Error>;
/// Try to enable seccomp for the given kind of worker.
pub fn enable_for_worker(
worker_kind: WorkerKind,
worker_pid: u32,
worker_dir_path: &Path,
) -> Result<()> {
pub fn enable_for_worker(worker_info: &WorkerInfo) -> Result<()> {
gum::trace!(
target: LOG_TARGET,
%worker_kind,
%worker_pid,
?worker_dir_path,
?worker_info,
"enabling seccomp",
);
try_restrict()
}
/// Runs a check for seccomp and returns a single bool indicating whether seccomp with our rules is
/// fully enabled on the current Linux environment.
pub fn check_is_fully_enabled() -> bool {
let status_from_thread: Result<()> = match std::thread::spawn(|| try_restrict()).join() {
/// Runs a check for seccomp in its own thread, and returns an error indicating whether seccomp with
/// our rules is fully enabled on the current Linux environment.
pub fn check_is_fully_enabled() -> Result<()> {
match std::thread::spawn(|| try_restrict()).join() {
Ok(Ok(())) => Ok(()),
Ok(Err(err)) => Err(err.into()),
Ok(Err(err)) => Err(err),
Err(err) => Err(Error::Panic(stringify_panic_payload(err))),
};
matches!(status_from_thread, Ok(()))
}
}
/// Applies a `seccomp` filter to disable networking for the PVF threads.
pub fn try_restrict() -> Result<()> {
fn try_restrict() -> Result<()> {
// Build a `seccomp` filter which by default allows all syscalls except those blocked in the
// blacklist.
let mut blacklisted_rules = BTreeMap::default();
@@ -169,7 +161,7 @@ mod tests {
#[test]
fn sandboxed_thread_cannot_use_sockets() {
// TODO: This would be nice: <https://github.com/rust-lang/rust/issues/68007>.
if !check_is_fully_enabled() {
if check_is_fully_enabled().is_err() {
return
}