PVF: more filesystem sandboxing (#1373)

This commit is contained in:
Marcin S
2023-09-28 18:24:29 +02:00
committed by GitHub
parent de71fecc4e
commit c1eb342b14
24 changed files with 1528 additions and 612 deletions
+38 -3
View File
@@ -44,7 +44,17 @@ pub enum PrepareError {
/// The response from the worker is received, but the file cannot be renamed (moved) to the
/// final destination location. This state is reported by the validation host (not by the
/// worker).
RenameTmpFileErr(String),
RenameTmpFileErr {
err: String,
// Unfortunately `PathBuf` doesn't implement `Encode`/`Decode`, so we do a fallible
// conversion to `Option<String>`.
src: Option<String>,
dest: Option<String>,
},
/// The response from the worker is received, but the worker cache could not be cleared. The
/// worker has to be killed to avoid jobs having access to data from other jobs. This state is
/// reported by the validation host (not by the worker).
ClearWorkerDir(String),
}
impl PrepareError {
@@ -58,7 +68,11 @@ impl PrepareError {
use PrepareError::*;
match self {
Prevalidation(_) | Preparation(_) | Panic(_) => true,
TimedOut | IoErr(_) | CreateTmpFileErr(_) | RenameTmpFileErr(_) => false,
TimedOut |
IoErr(_) |
CreateTmpFileErr(_) |
RenameTmpFileErr { .. } |
ClearWorkerDir(_) => false,
// Can occur due to issues with the PVF, but also due to local errors.
RuntimeConstruction(_) => false,
}
@@ -76,7 +90,9 @@ impl fmt::Display for PrepareError {
TimedOut => write!(f, "prepare: timeout"),
IoErr(err) => write!(f, "prepare: io error while receiving response: {}", err),
CreateTmpFileErr(err) => write!(f, "prepare: error creating tmp file: {}", err),
RenameTmpFileErr(err) => write!(f, "prepare: error renaming tmp file: {}", err),
RenameTmpFileErr { err, src, dest } =>
write!(f, "prepare: error renaming tmp file ({:?} -> {:?}): {}", src, dest, err),
ClearWorkerDir(err) => write!(f, "prepare: error clearing worker cache: {}", err),
}
}
}
@@ -89,8 +105,17 @@ impl fmt::Display for PrepareError {
pub enum InternalValidationError {
/// Some communication error occurred with the host.
HostCommunication(String),
/// Host could not create a hard link to the artifact path.
CouldNotCreateLink(String),
/// Could not find or open compiled artifact file.
CouldNotOpenFile(String),
/// Host could not clear the worker cache after a job.
CouldNotClearWorkerDir {
err: String,
// Unfortunately `PathBuf` doesn't implement `Encode`/`Decode`, so we do a fallible
// conversion to `Option<String>`.
path: Option<String>,
},
/// An error occurred in the CPU time monitor thread. Should be totally unrelated to
/// validation.
CpuTimeMonitorThread(String),
@@ -104,8 +129,18 @@ impl fmt::Display for InternalValidationError {
match self {
HostCommunication(err) =>
write!(f, "validation: some communication error occurred with the host: {}", err),
CouldNotCreateLink(err) => write!(
f,
"validation: host could not create a hard link to the artifact path: {}",
err
),
CouldNotOpenFile(err) =>
write!(f, "validation: could not find or open compiled artifact file: {}", err),
CouldNotClearWorkerDir { err, path } => write!(
f,
"validation: host could not clear the worker cache ({:?}) after a job: {}",
path, err
),
CpuTimeMonitorThread(err) =>
write!(f, "validation: an error occurred in the CPU time monitor thread: {}", err),
NonDeterministicPrepareError(err) => write!(f, "validation: prepare: {}", err),
+1 -1
View File
@@ -29,7 +29,7 @@ pub struct Handshake {
}
/// The response from an execution job on the worker.
#[derive(Encode, Decode)]
#[derive(Debug, Encode, Decode)]
pub enum Response {
/// The job completed successfully.
Ok {
+25 -10
View File
@@ -22,6 +22,7 @@ pub mod executor_intf;
pub mod prepare;
pub mod pvf;
pub mod worker;
pub mod worker_dir;
pub use cpu_time::ProcessTime;
@@ -30,8 +31,11 @@ pub use sp_tracing;
const LOG_TARGET: &str = "parachain::pvf-common";
use std::mem;
use tokio::io::{self, AsyncRead, AsyncReadExt as _, AsyncWrite, AsyncWriteExt as _};
use std::{
io::{Read, Write},
mem,
};
use tokio::io;
#[cfg(feature = "test-utils")]
pub mod tests {
@@ -41,20 +45,31 @@ pub mod tests {
pub const TEST_PREPARATION_TIMEOUT: Duration = Duration::from_secs(30);
}
/// Write some data prefixed by its length into `w`.
pub async fn framed_send(w: &mut (impl AsyncWrite + Unpin), buf: &[u8]) -> io::Result<()> {
/// Status of security features on the current system.
#[derive(Debug, Clone, Default)]
pub struct SecurityStatus {
/// Whether the landlock features we use are fully available on this system.
pub can_enable_landlock: bool,
// Whether we are able to unshare the user namespace and change the filesystem root.
pub can_unshare_user_namespace_and_change_root: bool,
}
/// Write some data prefixed by its length into `w`. Sync version of `framed_send` to avoid
/// dependency on tokio.
pub fn framed_send_blocking(w: &mut (impl Write + Unpin), buf: &[u8]) -> io::Result<()> {
let len_buf = buf.len().to_le_bytes();
w.write_all(&len_buf).await?;
w.write_all(buf).await?;
w.write_all(&len_buf)?;
w.write_all(buf)?;
Ok(())
}
/// Read some data prefixed by its length from `r`.
pub async fn framed_recv(r: &mut (impl AsyncRead + Unpin)) -> io::Result<Vec<u8>> {
/// Read some data prefixed by its length from `r`. Sync version of `framed_recv` to avoid
/// dependency on tokio.
pub fn framed_recv_blocking(r: &mut (impl Read + Unpin)) -> io::Result<Vec<u8>> {
let mut len_buf = [0u8; mem::size_of::<usize>()];
r.read_exact(&mut len_buf).await?;
r.read_exact(&mut len_buf)?;
let len = usize::from_le_bytes(len_buf);
let mut buf = vec![0; len];
r.read_exact(&mut buf).await?;
r.read_exact(&mut buf)?;
Ok(buf)
}
+211 -72
View File
@@ -18,16 +18,18 @@
pub mod security;
use crate::LOG_TARGET;
use crate::{worker_dir, SecurityStatus, LOG_TARGET};
use cpu_time::ProcessTime;
use futures::never::Never;
use std::{
any::Any,
fmt,
os::unix::net::UnixStream,
path::PathBuf,
sync::mpsc::{Receiver, RecvTimeoutError},
time::Duration,
};
use tokio::{io, net::UnixStream, runtime::Runtime};
use tokio::{io, runtime::Runtime};
/// Use this macro to declare a `fn main() {}` that will create an executable that can be used for
/// spawning the desired worker.
@@ -41,10 +43,15 @@ macro_rules! decl_worker_main {
}
fn main() {
#[cfg(target_os = "linux")]
use $crate::worker::security;
// TODO: Remove this dependency, and `pub use sp_tracing` in `lib.rs`.
// See <https://github.com/paritytech/polkadot/issues/7117>.
$crate::sp_tracing::try_init_simple();
let worker_pid = std::process::id();
let args = std::env::args().collect::<Vec<_>>();
if args.len() == 1 {
print_help($expected_command);
@@ -60,10 +67,43 @@ macro_rules! decl_worker_main {
println!("{}", $worker_version);
return
},
"--check-can-enable-landlock" => {
#[cfg(target_os = "linux")]
let status = if security::landlock::check_is_fully_enabled() { 0 } else { -1 };
#[cfg(not(target_os = "linux"))]
let status = -1;
std::process::exit(status)
},
"--check-can-unshare-user-namespace-and-change-root" => {
#[cfg(target_os = "linux")]
let status = if let Err(err) = security::unshare_user_namespace_and_change_root(
$crate::worker::WorkerKind::CheckPivotRoot,
worker_pid,
// We're not accessing any files, so we can try to pivot_root in the temp
// dir without conflicts with other processes.
&std::env::temp_dir(),
) {
// Write the error to stderr, log it on the host-side.
eprintln!("{}", err);
-1
} else {
0
};
#[cfg(not(target_os = "linux"))]
let status = {
// Write the error to stderr, log it on the host-side.
eprintln!("not available on macos");
-1
};
std::process::exit(status)
},
"test-sleep" => {
std::thread::sleep(std::time::Duration::from_secs(5));
return
},
subcommand => {
// Must be passed for compatibility with the single-binary test workers.
if subcommand != $expected_command {
@@ -75,18 +115,39 @@ macro_rules! decl_worker_main {
},
}
let mut worker_dir_path = None;
let mut node_version = None;
let mut socket_path: &str = "";
let mut can_enable_landlock = false;
let mut can_unshare_user_namespace_and_change_root = false;
for i in (2..args.len()).step_by(2) {
let mut i = 2;
while i < args.len() {
match args[i].as_ref() {
"--socket-path" => socket_path = args[i + 1].as_str(),
"--node-impl-version" => node_version = Some(args[i + 1].as_str()),
"--worker-dir-path" => {
worker_dir_path = Some(args[i + 1].as_str());
i += 1
},
"--node-impl-version" => {
node_version = Some(args[i + 1].as_str());
i += 1
},
"--can-enable-landlock" => can_enable_landlock = true,
"--can-unshare-user-namespace-and-change-root" =>
can_unshare_user_namespace_and_change_root = true,
arg => panic!("Unexpected argument found: {}", arg),
}
i += 1;
}
let worker_dir_path =
worker_dir_path.expect("the --worker-dir-path argument is required");
$entrypoint(&socket_path, node_version, Some($worker_version));
let worker_dir_path = std::path::Path::new(worker_dir_path).to_owned();
let security_status = $crate::SecurityStatus {
can_enable_landlock,
can_unshare_user_namespace_and_change_root,
};
$entrypoint(worker_dir_path, node_version, Some($worker_version), security_status);
}
};
}
@@ -95,61 +156,181 @@ macro_rules! decl_worker_main {
/// child process.
pub const JOB_TIMEOUT_OVERHEAD: Duration = Duration::from_millis(50);
/// Interprets the given bytes as a path. Returns `None` if the given bytes do not constitute a
/// a proper utf-8 string.
pub fn bytes_to_path(bytes: &[u8]) -> Option<PathBuf> {
std::str::from_utf8(bytes).ok().map(PathBuf::from)
#[derive(Debug, Clone, Copy)]
pub enum WorkerKind {
Prepare,
Execute,
CheckPivotRoot,
}
impl fmt::Display for WorkerKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Prepare => write!(f, "prepare"),
Self::Execute => write!(f, "execute"),
Self::CheckPivotRoot => write!(f, "check pivot root"),
}
}
}
// The worker version must be passed in so that we accurately get the version of the worker, and not
// the version that this crate was compiled with.
pub fn worker_event_loop<F, Fut>(
debug_id: &'static str,
socket_path: &str,
worker_kind: WorkerKind,
#[cfg_attr(not(target_os = "linux"), allow(unused_mut))] mut worker_dir_path: PathBuf,
node_version: Option<&str>,
worker_version: Option<&str>,
#[cfg_attr(not(target_os = "linux"), allow(unused_variables))] security_status: &SecurityStatus,
mut event_loop: F,
) where
F: FnMut(UnixStream) -> Fut,
F: FnMut(UnixStream, PathBuf) -> Fut,
Fut: futures::Future<Output = io::Result<Never>>,
{
let worker_pid = std::process::id();
gum::debug!(target: LOG_TARGET, %worker_pid, "starting pvf worker ({})", debug_id);
gum::debug!(
target: LOG_TARGET,
%worker_pid,
?worker_dir_path,
?security_status,
"starting pvf worker ({})",
worker_kind
);
// Check for a mismatch between the node and worker versions.
if let (Some(node_version), Some(worker_version)) = (node_version, worker_version) {
if node_version != worker_version {
gum::error!(
target: LOG_TARGET,
%worker_kind,
%worker_pid,
%node_version,
%worker_version,
"Node and worker version mismatch, node needs restarting, forcing shutdown",
);
kill_parent_node_in_emergency();
let err = io::Error::new(io::ErrorKind::Unsupported, "Version mismatch");
worker_shutdown_message(debug_id, worker_pid, err);
worker_shutdown_message(worker_kind, worker_pid, "Version mismatch");
return
}
}
remove_env_vars(debug_id);
// Make sure that we can read the worker dir path, and log its contents.
let entries = || -> Result<Vec<_>, io::Error> {
std::fs::read_dir(&worker_dir_path)?
.map(|res| res.map(|e| e.file_name()))
.collect()
}();
match entries {
Ok(entries) =>
gum::trace!(target: LOG_TARGET, %worker_pid, ?worker_dir_path, "content of worker dir: {:?}", entries),
Err(err) => {
gum::error!(
target: LOG_TARGET,
%worker_kind,
%worker_pid,
?worker_dir_path,
"Could not read worker dir: {}",
err.to_string()
);
worker_shutdown_message(worker_kind, worker_pid, &err.to_string());
return
},
}
// Connect to the socket.
let socket_path = worker_dir::socket(&worker_dir_path);
let stream = || -> std::io::Result<UnixStream> {
let stream = UnixStream::connect(&socket_path)?;
// Remove the socket here. We don't also need to do this on the host-side; on failed
// rendezvous, the host will delete the whole worker dir.
std::fs::remove_file(&socket_path)?;
Ok(stream)
}();
let stream = match stream {
Ok(s) => s,
Err(err) => {
gum::error!(
target: LOG_TARGET,
%worker_kind,
%worker_pid,
"{}",
err
);
worker_shutdown_message(worker_kind, worker_pid, &err.to_string());
return
},
};
// Enable some security features.
{
// Call based on whether we can change root. Error out if it should work but fails.
//
// NOTE: This should not be called in a multi-threaded context (i.e. inside the tokio
// runtime). `unshare(2)`:
//
// > CLONE_NEWUSER requires that the calling process is not threaded.
#[cfg(target_os = "linux")]
if security_status.can_unshare_user_namespace_and_change_root {
if let Err(err) = security::unshare_user_namespace_and_change_root(
worker_kind,
worker_pid,
&worker_dir_path,
) {
// The filesystem may be in an inconsistent state, bail out.
gum::error!(
target: LOG_TARGET,
%worker_kind,
%worker_pid,
?worker_dir_path,
"Could not change root to be the worker cache path: {}",
err
);
worker_shutdown_message(worker_kind, worker_pid, &err);
return
}
worker_dir_path = std::path::Path::new("/").to_owned();
}
#[cfg(target_os = "linux")]
if security_status.can_enable_landlock {
let landlock_status =
security::landlock::enable_for_worker(worker_kind, worker_pid, &worker_dir_path);
if !matches!(landlock_status, Ok(landlock::RulesetStatus::FullyEnforced)) {
// We previously were able to enable, so this should never happen.
//
// TODO: Make this a real error in secure-mode. See:
// <https://github.com/paritytech/polkadot-sdk/issues/1444>
gum::error!(
target: LOG_TARGET,
%worker_kind,
%worker_pid,
"could not fully enable landlock: {:?}. This should not happen, please report to the Polkadot devs",
landlock_status
);
}
}
if !security::check_env_vars_were_cleared(worker_kind, worker_pid) {
let err = "not all env vars were cleared when spawning the process";
gum::error!(
target: LOG_TARGET,
%worker_kind,
%worker_pid,
"{}",
err
);
worker_shutdown_message(worker_kind, worker_pid, err);
return
}
}
// Run the main worker loop.
let rt = Runtime::new().expect("Creates tokio runtime. If this panics the worker will die and the host will detect that and deal with it.");
let err = rt
.block_on(async move {
let stream = UnixStream::connect(socket_path).await?;
let _ = tokio::fs::remove_file(socket_path).await;
let result = event_loop(stream).await;
result
})
.block_on(event_loop(stream, worker_dir_path))
// It's never `Ok` because it's `Ok(Never)`.
.unwrap_err();
worker_shutdown_message(debug_id, worker_pid, err);
worker_shutdown_message(worker_kind, worker_pid, &err.to_string());
// We don't want tokio to wait for the tasks to finish. We want to bring down the worker as fast
// as possible and not wait for stalled validation to finish. This isn't strictly necessary now,
@@ -157,51 +338,9 @@ pub fn worker_event_loop<F, Fut>(
rt.shutdown_background();
}
/// Delete all env vars to prevent malicious code from accessing them.
fn remove_env_vars(debug_id: &'static str) {
for (key, value) in std::env::vars_os() {
// TODO: *theoretically* the value (or mere presence) of `RUST_LOG` can be a source of
// randomness for malicious code. In the future we can remove it also and log in the host;
// see <https://github.com/paritytech/polkadot/issues/7117>.
if key == "RUST_LOG" {
continue
}
// In case of a key or value that would cause [`env::remove_var` to
// panic](https://doc.rust-lang.org/std/env/fn.remove_var.html#panics), we first log a
// warning and then proceed to attempt to remove the env var.
let mut err_reasons = vec![];
let (key_str, value_str) = (key.to_str(), value.to_str());
if key.is_empty() {
err_reasons.push("key is empty");
}
if key_str.is_some_and(|s| s.contains('=')) {
err_reasons.push("key contains '='");
}
if key_str.is_some_and(|s| s.contains('\0')) {
err_reasons.push("key contains null character");
}
if value_str.is_some_and(|s| s.contains('\0')) {
err_reasons.push("value contains null character");
}
if !err_reasons.is_empty() {
gum::warn!(
target: LOG_TARGET,
%debug_id,
?key,
?value,
"Attempting to remove badly-formatted env var, this may cause the PVF worker to crash. Please remove it yourself. Reasons: {:?}",
err_reasons
);
}
std::env::remove_var(key);
}
}
/// Provide a consistent message on worker shutdown.
fn worker_shutdown_message(debug_id: &'static str, worker_pid: u32, err: io::Error) {
gum::debug!(target: LOG_TARGET, %worker_pid, "quitting pvf worker ({}): {:?}", debug_id, err);
fn worker_shutdown_message(worker_kind: WorkerKind, worker_pid: u32, err: &str) {
gum::debug!(target: LOG_TARGET, %worker_pid, "quitting pvf worker ({}): {}", worker_kind, err);
}
/// Loop that runs in the CPU time monitor thread on prepare and execute jobs. Continuously wakes up
@@ -305,7 +444,7 @@ pub mod thread {
Arc::new((Mutex::new(WaitOutcome::Pending), Condvar::new()))
}
/// Runs a worker thread. Will first enable security features, and afterwards notify the threads
/// Runs a worker thread. Will run the requested function, and afterwards notify the threads
/// waiting on the condvar. Catches panics during execution and resumes the panics after
/// triggering the condvar, so that the waiting thread is notified on panics.
///
@@ -17,30 +17,189 @@
//! Functionality for securing workers.
//!
//! This is needed because workers are used to compile and execute untrusted code (PVFs).
//!
//! We currently employ the following security measures:
//!
//! - Restrict filesystem
//! - Use Landlock to remove all unnecessary FS access rights.
//! - Unshare the user and mount namespaces.
//! - Change the root directory to a worker-specific temporary directory.
//! - Remove env vars
/// To what degree landlock is enabled. It's a separate struct from `RulesetStatus` because that is
/// only available on Linux, plus this has a nicer name.
pub enum LandlockStatus {
FullyEnforced,
PartiallyEnforced,
NotEnforced,
/// Thread panicked, we don't know what the status is.
Unavailable,
}
use crate::{worker::WorkerKind, LOG_TARGET};
impl LandlockStatus {
#[cfg(target_os = "linux")]
pub fn from_ruleset_status(ruleset_status: ::landlock::RulesetStatus) -> Self {
use ::landlock::RulesetStatus::*;
match ruleset_status {
FullyEnforced => LandlockStatus::FullyEnforced,
PartiallyEnforced => LandlockStatus::PartiallyEnforced,
NotEnforced => LandlockStatus::NotEnforced,
/// Unshare the user namespace and change root to be the artifact directory.
///
/// NOTE: This should not be called in a multi-threaded context. `unshare(2)`:
/// "CLONE_NEWUSER requires that the calling process is not threaded."
#[cfg(target_os = "linux")]
pub fn unshare_user_namespace_and_change_root(
worker_kind: WorkerKind,
worker_pid: u32,
worker_dir_path: &std::path::Path,
) -> Result<(), String> {
use std::{env, ffi::CString, os::unix::ffi::OsStrExt, path::Path, ptr};
// The following was copied from the `cstr_core` crate.
//
// TODO: Remove this once this is stable: https://github.com/rust-lang/rust/issues/105723
#[inline]
#[doc(hidden)]
const fn cstr_is_valid(bytes: &[u8]) -> bool {
if bytes.is_empty() || bytes[bytes.len() - 1] != 0 {
return false
}
let mut index = 0;
while index < bytes.len() - 1 {
if bytes[index] == 0 {
return false
}
index += 1;
}
true
}
macro_rules! cstr {
($e:expr) => {{
const STR: &[u8] = concat!($e, "\0").as_bytes();
const STR_VALID: bool = cstr_is_valid(STR);
let _ = [(); 0 - (!(STR_VALID) as usize)];
#[allow(unused_unsafe)]
unsafe {
core::ffi::CStr::from_bytes_with_nul_unchecked(STR)
}
}}
}
gum::debug!(
target: LOG_TARGET,
%worker_kind,
%worker_pid,
?worker_dir_path,
"unsharing the user namespace and calling pivot_root",
);
let worker_dir_path_c = CString::new(worker_dir_path.as_os_str().as_bytes())
.expect("on unix; the path will never contain 0 bytes; qed");
// Wrapper around all the work to prevent repetitive error handling.
//
// # Errors
//
// It's the caller's responsibility to call `Error::last_os_error`. Note that that alone does
// not give the context of which call failed, so we return a &str error.
|| -> Result<(), &'static str> {
// SAFETY: We pass null-terminated C strings and use the APIs as documented. In fact, steps
// (2) and (3) are adapted from the example in pivot_root(2), with the additional
// change described in the `pivot_root(".", ".")` section.
unsafe {
// 1. `unshare` the user and the mount namespaces.
if libc::unshare(libc::CLONE_NEWUSER | libc::CLONE_NEWNS) < 0 {
return Err("unshare user and mount namespaces")
}
// 2. Setup mounts.
//
// Ensure that new root and its parent mount don't have shared propagation (which would
// cause pivot_root() to return an error), and prevent propagation of mount events to
// the initial mount namespace.
if libc::mount(
ptr::null(),
cstr!("/").as_ptr(),
ptr::null(),
libc::MS_REC | libc::MS_PRIVATE,
ptr::null(),
) < 0
{
return Err("mount MS_PRIVATE")
}
// Ensure that the new root is a mount point.
let additional_flags =
if let WorkerKind::Execute | WorkerKind::CheckPivotRoot = worker_kind {
libc::MS_RDONLY
} else {
0
};
if libc::mount(
worker_dir_path_c.as_ptr(),
worker_dir_path_c.as_ptr(),
ptr::null(), // ignored when MS_BIND is used
libc::MS_BIND |
libc::MS_REC | libc::MS_NOEXEC |
libc::MS_NODEV | libc::MS_NOSUID |
libc::MS_NOATIME | additional_flags,
ptr::null(), // ignored when MS_BIND is used
) < 0
{
return Err("mount MS_BIND")
}
// 3. `pivot_root` to the artifact directory.
if libc::chdir(worker_dir_path_c.as_ptr()) < 0 {
return Err("chdir to worker dir path")
}
if libc::syscall(libc::SYS_pivot_root, cstr!(".").as_ptr(), cstr!(".").as_ptr()) < 0 {
return Err("pivot_root")
}
if libc::umount2(cstr!(".").as_ptr(), libc::MNT_DETACH) < 0 {
return Err("umount the old root mount point")
}
}
Ok(())
}()
.map_err(|err_ctx| {
let err = std::io::Error::last_os_error();
format!("{}: {}", err_ctx, err)
})?;
// Do some assertions.
if env::current_dir().map_err(|err| err.to_string())? != Path::new("/") {
return Err("expected current dir after pivot_root to be `/`".into())
}
env::set_current_dir("..").map_err(|err| err.to_string())?;
if env::current_dir().map_err(|err| err.to_string())? != Path::new("/") {
return Err("expected not to be able to break out of new root by doing `..`".into())
}
Ok(())
}
/// The [landlock] docs say it best:
/// Require env vars to have been removed when spawning the process, to prevent malicious code from
/// accessing them.
pub fn check_env_vars_were_cleared(worker_kind: WorkerKind, worker_pid: u32) -> bool {
let mut ok = true;
for (key, value) in std::env::vars_os() {
// TODO: *theoretically* the value (or mere presence) of `RUST_LOG` can be a source of
// randomness for malicious code. In the future we can remove it also and log in the host;
// see <https://github.com/paritytech/polkadot/issues/7117>.
if key == "RUST_LOG" {
continue
}
// An exception for MacOS. This is not a secure platform anyway, so we let it slide.
#[cfg(target_os = "macos")]
if key == "__CF_USER_TEXT_ENCODING" {
continue
}
gum::error!(
target: LOG_TARGET,
%worker_kind,
%worker_pid,
?key,
?value,
"env var was present that should have been removed",
);
ok = false;
}
ok
}
/// The [landlock] docs say it best:
///
/// > "Landlock is a security feature available since Linux 5.13. The goal is to enable to restrict
/// ambient rights (e.g., global filesystem access) for a set of processes by creating safe security
@@ -52,14 +211,21 @@ impl LandlockStatus {
/// [landlock]: https://docs.rs/landlock/latest/landlock/index.html
#[cfg(target_os = "linux")]
pub mod landlock {
use landlock::{Access, AccessFs, Ruleset, RulesetAttr, RulesetError, RulesetStatus, ABI};
pub use landlock::RulesetStatus;
use crate::{worker::WorkerKind, LOG_TARGET};
use landlock::*;
use std::{
fmt,
path::{Path, PathBuf},
};
/// Landlock ABI version. We use ABI V1 because:
///
/// 1. It is supported by our reference kernel version.
/// 2. Later versions do not (yet) provide additional security.
///
/// # Versions (June 2023)
/// # Versions (as of June 2023)
///
/// - Polkadot reference kernel version: 5.16+
/// - ABI V1: 5.13 - introduces landlock, including full restrictions on file reads
@@ -83,46 +249,103 @@ pub mod landlock {
/// supports it or if it introduces some new feature that is beneficial to security.
pub const LANDLOCK_ABI: ABI = ABI::V1;
// TODO: <https://github.com/landlock-lsm/rust-landlock/issues/36>
/// Returns to what degree landlock is enabled with the given ABI on the current Linux
/// environment.
pub fn get_status() -> Result<RulesetStatus, Box<dyn std::error::Error>> {
match std::thread::spawn(|| try_restrict_thread()).join() {
Ok(Ok(status)) => Ok(status),
Ok(Err(ruleset_err)) => Err(ruleset_err.into()),
Err(_err) => Err("a panic occurred in try_restrict_thread".into()),
#[derive(Debug)]
pub enum TryRestrictError {
InvalidExceptionPath(PathBuf),
RulesetError(RulesetError),
}
impl From<RulesetError> for TryRestrictError {
fn from(err: RulesetError) -> Self {
Self::RulesetError(err)
}
}
/// Based on the given `status`, returns a single bool indicating whether the given landlock
/// ABI is fully enabled on the current Linux environment.
pub fn status_is_fully_enabled(
status: &Result<RulesetStatus, Box<dyn std::error::Error>>,
) -> bool {
matches!(status, Ok(RulesetStatus::FullyEnforced))
impl fmt::Display for TryRestrictError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::InvalidExceptionPath(path) => write!(f, "invalid exception path: {:?}", path),
Self::RulesetError(err) => write!(f, "ruleset error: {}", err.to_string()),
}
}
}
impl std::error::Error for TryRestrictError {}
/// Try to enable landlock for the given kind of worker.
pub fn enable_for_worker(
worker_kind: WorkerKind,
worker_pid: u32,
worker_dir_path: &Path,
) -> Result<RulesetStatus, Box<dyn std::error::Error>> {
let exceptions: Vec<(PathBuf, BitFlags<AccessFs>)> = match worker_kind {
WorkerKind::Prepare => {
vec![(worker_dir_path.to_owned(), AccessFs::WriteFile.into())]
},
WorkerKind::Execute => {
vec![(worker_dir_path.to_owned(), AccessFs::ReadFile.into())]
},
WorkerKind::CheckPivotRoot =>
panic!("this should only be passed for checking pivot_root; qed"),
};
gum::debug!(
target: LOG_TARGET,
%worker_kind,
%worker_pid,
?worker_dir_path,
"enabling landlock with exceptions: {:?}",
exceptions,
);
Ok(try_restrict(exceptions)?)
}
// TODO: <https://github.com/landlock-lsm/rust-landlock/issues/36>
/// Runs a check for landlock and returns a single bool indicating whether the given landlock
/// ABI is fully enabled on the current Linux environment.
pub fn check_is_fully_enabled() -> bool {
status_is_fully_enabled(&get_status())
let status_from_thread: Result<RulesetStatus, Box<dyn std::error::Error>> =
match std::thread::spawn(|| try_restrict(std::iter::empty::<(PathBuf, AccessFs)>()))
.join()
{
Ok(Ok(status)) => Ok(status),
Ok(Err(ruleset_err)) => Err(ruleset_err.into()),
Err(_err) => Err("a panic occurred in try_restrict".into()),
};
matches!(status_from_thread, Ok(RulesetStatus::FullyEnforced))
}
/// Tries to restrict the current thread with the following landlock access controls:
/// Tries to restrict the current thread (should only be called in a process' main thread) with
/// the following landlock access controls:
///
/// 1. all global filesystem access
/// 2. ... more may be supported in the future.
/// 1. all global filesystem access restricted, with optional exceptions
/// 2. ... more sandbox types (e.g. networking) may be supported in the future.
///
/// If landlock is not supported in the current environment this is simply a noop.
///
/// # Returns
///
/// The status of the restriction (whether it was fully, partially, or not-at-all enforced).
pub fn try_restrict_thread() -> Result<RulesetStatus, RulesetError> {
let status = Ruleset::new()
.handle_access(AccessFs::from_all(LANDLOCK_ABI))?
.create()?
.restrict_self()?;
fn try_restrict<I, P, A>(fs_exceptions: I) -> Result<RulesetStatus, TryRestrictError>
where
I: IntoIterator<Item = (P, A)>,
P: AsRef<Path>,
A: Into<BitFlags<AccessFs>>,
{
let mut ruleset =
Ruleset::new().handle_access(AccessFs::from_all(LANDLOCK_ABI))?.create()?;
for (fs_path, access_bits) in fs_exceptions {
let paths = &[fs_path.as_ref().to_owned()];
let mut rules = path_beneath_rules(paths, access_bits).peekable();
if rules.peek().is_none() {
// `path_beneath_rules` silently ignores missing paths, so check for it manually.
return Err(TryRestrictError::InvalidExceptionPath(fs_path.as_ref().to_owned()))
}
ruleset = ruleset.add_rules(rules)?;
}
let status = ruleset.restrict_self()?;
Ok(status.ruleset)
}
@@ -132,55 +355,114 @@ pub mod landlock {
use std::{fs, io::ErrorKind, thread};
#[test]
fn restricted_thread_cannot_access_fs() {
fn restricted_thread_cannot_read_file() {
// TODO: This would be nice: <https://github.com/rust-lang/rust/issues/68007>.
if !check_is_fully_enabled() {
return
}
// Restricted thread cannot read from FS.
let handle = thread::spawn(|| {
// Write to a tmp file, this should succeed before landlock is applied.
let text = "foo";
let tmpfile = tempfile::NamedTempFile::new().unwrap();
let path = tmpfile.path();
fs::write(path, text).unwrap();
let s = fs::read_to_string(path).unwrap();
assert_eq!(s, text);
let handle =
thread::spawn(|| {
// Create, write, and read two tmp files. This should succeed before any
// landlock restrictions are applied.
const TEXT: &str = "foo";
let tmpfile1 = tempfile::NamedTempFile::new().unwrap();
let path1 = tmpfile1.path();
let tmpfile2 = tempfile::NamedTempFile::new().unwrap();
let path2 = tmpfile2.path();
let status = try_restrict_thread().unwrap();
if !matches!(status, RulesetStatus::FullyEnforced) {
panic!("Ruleset should be enforced since we checked if landlock is enabled");
}
fs::write(path1, TEXT).unwrap();
let s = fs::read_to_string(path1).unwrap();
assert_eq!(s, TEXT);
fs::write(path2, TEXT).unwrap();
let s = fs::read_to_string(path2).unwrap();
assert_eq!(s, TEXT);
// Try to read from the tmp file after landlock.
let result = fs::read_to_string(path);
assert!(matches!(
result,
Err(err) if matches!(err.kind(), ErrorKind::PermissionDenied)
));
});
// Apply Landlock with a read exception for only one of the files.
let status = try_restrict(vec![(path1, AccessFs::ReadFile)]);
if !matches!(status, Ok(RulesetStatus::FullyEnforced)) {
panic!("Ruleset should be enforced since we checked if landlock is enabled: {:?}", status);
}
// Try to read from both files, only tmpfile1 should succeed.
let result = fs::read_to_string(path1);
assert!(matches!(
result,
Ok(s) if s == TEXT
));
let result = fs::read_to_string(path2);
assert!(matches!(
result,
Err(err) if matches!(err.kind(), ErrorKind::PermissionDenied)
));
// Apply Landlock for all files.
let status = try_restrict(std::iter::empty::<(PathBuf, AccessFs)>());
if !matches!(status, Ok(RulesetStatus::FullyEnforced)) {
panic!("Ruleset should be enforced since we checked if landlock is enabled: {:?}", status);
}
// Try to read from tmpfile1 after landlock, it should fail.
let result = fs::read_to_string(path1);
assert!(matches!(
result,
Err(err) if matches!(err.kind(), ErrorKind::PermissionDenied)
));
});
assert!(handle.join().is_ok());
}
#[test]
fn restricted_thread_cannot_write_file() {
// TODO: This would be nice: <https://github.com/rust-lang/rust/issues/68007>.
if !check_is_fully_enabled() {
return
}
// Restricted thread cannot write to FS.
let handle = thread::spawn(|| {
let text = "foo";
let tmpfile = tempfile::NamedTempFile::new().unwrap();
let path = tmpfile.path();
let handle =
thread::spawn(|| {
// Create and write two tmp files. This should succeed before any landlock
// restrictions are applied.
const TEXT: &str = "foo";
let tmpfile1 = tempfile::NamedTempFile::new().unwrap();
let path1 = tmpfile1.path();
let tmpfile2 = tempfile::NamedTempFile::new().unwrap();
let path2 = tmpfile2.path();
let status = try_restrict_thread().unwrap();
if !matches!(status, RulesetStatus::FullyEnforced) {
panic!("Ruleset should be enforced since we checked if landlock is enabled");
}
fs::write(path1, TEXT).unwrap();
fs::write(path2, TEXT).unwrap();
// Try to write to the tmp file after landlock.
let result = fs::write(path, text);
assert!(matches!(
result,
Err(err) if matches!(err.kind(), ErrorKind::PermissionDenied)
));
});
// Apply Landlock with a write exception for only one of the files.
let status = try_restrict(vec![(path1, AccessFs::WriteFile)]);
if !matches!(status, Ok(RulesetStatus::FullyEnforced)) {
panic!("Ruleset should be enforced since we checked if landlock is enabled: {:?}", status);
}
// Try to write to both files, only tmpfile1 should succeed.
let result = fs::write(path1, TEXT);
assert!(matches!(result, Ok(_)));
let result = fs::write(path2, TEXT);
assert!(matches!(
result,
Err(err) if matches!(err.kind(), ErrorKind::PermissionDenied)
));
// Apply Landlock for all files.
let status = try_restrict(std::iter::empty::<(PathBuf, AccessFs)>());
if !matches!(status, Ok(RulesetStatus::FullyEnforced)) {
panic!("Ruleset should be enforced since we checked if landlock is enabled: {:?}", status);
}
// Try to write to tmpfile1 after landlock, it should fail.
let result = fs::write(path1, TEXT);
assert!(matches!(
result,
Err(err) if matches!(err.kind(), ErrorKind::PermissionDenied)
));
});
assert!(handle.join().is_ok());
}
@@ -0,0 +1,35 @@
// Copyright (C) Parity Technologies (UK) Ltd.
// This file is part of Polkadot.
// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
//! Shared functions for getting the known worker files.
use std::path::{Path, PathBuf};
const WORKER_EXECUTE_ARTIFACT_NAME: &str = "artifact";
const WORKER_PREPARE_TMP_ARTIFACT_NAME: &str = "tmp-artifact";
const WORKER_SOCKET_NAME: &str = "socket";
pub fn execute_artifact(worker_dir_path: &Path) -> PathBuf {
worker_dir_path.join(WORKER_EXECUTE_ARTIFACT_NAME)
}
pub fn prepare_tmp_artifact(worker_dir_path: &Path) -> PathBuf {
worker_dir_path.join(WORKER_PREPARE_TMP_ARTIFACT_NAME)
}
pub fn socket(worker_dir_path: &Path) -> PathBuf {
worker_dir_path.join(WORKER_SOCKET_NAME)
}