mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-05-07 01:28:07 +00:00
PVF: add landlock sandboxing (#7303)
* Begin adding landlock + test * Move PVF implementer's guide section to own page, document security * Implement test * Add some docs * Do some cleanup * Fix typo * Warn on host startup if landlock is not supported * Clarify docs a bit * Minor improvements * Add some docs about determinism * Address review comments (mainly add warning on landlock error) * Update node/core/pvf/src/host.rs Co-authored-by: Andrei Sandu <54316454+sandreim@users.noreply.github.com> * Update node/core/pvf/src/host.rs Co-authored-by: Andrei Sandu <54316454+sandreim@users.noreply.github.com> * Fix unused fn * Update ABI docs to reflect latest discussions * Remove outdated notes * Try to trigger new test-linux-oldkernel-stable job Job introduced in https://github.com/paritytech/polkadot/pull/7371. --------- Co-authored-by: Andrei Sandu <54316454+sandreim@users.noreply.github.com>
This commit is contained in:
Generated
+14
@@ -4029,6 +4029,17 @@ dependencies = [
|
||||
"kvdb",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "landlock"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "520baa32708c4e957d2fc3a186bc5bd8d26637c33137f399ddfc202adb240068"
|
||||
dependencies = [
|
||||
"enumflags2",
|
||||
"libc",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.4.0"
|
||||
@@ -7424,8 +7435,10 @@ dependencies = [
|
||||
name = "polkadot-node-core-pvf-common"
|
||||
version = "0.9.43"
|
||||
dependencies = [
|
||||
"assert_matches",
|
||||
"cpu-time",
|
||||
"futures",
|
||||
"landlock",
|
||||
"libc",
|
||||
"parity-scale-codec",
|
||||
"polkadot-parachain",
|
||||
@@ -7438,6 +7451,7 @@ dependencies = [
|
||||
"sp-io",
|
||||
"sp-tracing",
|
||||
"substrate-build-script-utils",
|
||||
"tempfile",
|
||||
"tokio",
|
||||
"tracing-gum",
|
||||
]
|
||||
|
||||
@@ -25,5 +25,12 @@ sp-externalities = { git = "https://github.com/paritytech/substrate", branch = "
|
||||
sp-io = { git = "https://github.com/paritytech/substrate", branch = "master" }
|
||||
sp-tracing = { git = "https://github.com/paritytech/substrate", branch = "master" }
|
||||
|
||||
[target.'cfg(target_os = "linux")'.dependencies]
|
||||
landlock = "0.2.0"
|
||||
|
||||
[dev-dependencies]
|
||||
assert_matches = "1.4.0"
|
||||
tempfile = "3.3.0"
|
||||
|
||||
[build-dependencies]
|
||||
substrate-build-script-utils = { git = "https://github.com/paritytech/substrate", branch = "master" }
|
||||
|
||||
+11
-3
@@ -16,6 +16,8 @@
|
||||
|
||||
//! Functionality common to both prepare and execute workers.
|
||||
|
||||
pub mod security;
|
||||
|
||||
use crate::LOG_TARGET;
|
||||
use cpu_time::ProcessTime;
|
||||
use futures::never::Never;
|
||||
@@ -203,7 +205,7 @@ pub mod thread {
|
||||
};
|
||||
|
||||
/// Contains the outcome of waiting on threads, or `Pending` if none are ready.
|
||||
#[derive(Clone, Copy)]
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum WaitOutcome {
|
||||
Finished,
|
||||
TimedOut,
|
||||
@@ -224,8 +226,14 @@ pub mod thread {
|
||||
Arc::new((Mutex::new(WaitOutcome::Pending), Condvar::new()))
|
||||
}
|
||||
|
||||
/// Runs a thread, afterwards notifying the threads waiting on the condvar. Catches panics and
|
||||
/// resumes them after triggering the condvar, so that the waiting thread is notified on panics.
|
||||
/// Runs a worker thread. Will first enable security features, and afterwards notify the threads waiting on the
|
||||
/// condvar. Catches panics during execution and resumes the panics after triggering the condvar, so that the
|
||||
/// waiting thread is notified on panics.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// Returns the thread's join handle. Calling `.join()` on it returns the result of executing
|
||||
/// `f()`, as well as whether we were able to enable sandboxing.
|
||||
pub fn spawn_worker_thread<F, R>(
|
||||
name: &str,
|
||||
f: F,
|
||||
@@ -0,0 +1,188 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Functionality for securing workers.
|
||||
//!
|
||||
//! This is needed because workers are used to compile and execute untrusted code (PVFs).
|
||||
|
||||
/// To what degree landlock is enabled. It's a separate struct from `RulesetStatus` because that is
|
||||
/// only available on Linux, plus this has a nicer name.
|
||||
pub enum LandlockStatus {
|
||||
FullyEnforced,
|
||||
PartiallyEnforced,
|
||||
NotEnforced,
|
||||
/// Thread panicked, we don't know what the status is.
|
||||
Unavailable,
|
||||
}
|
||||
|
||||
impl LandlockStatus {
|
||||
#[cfg(target_os = "linux")]
|
||||
pub fn from_ruleset_status(ruleset_status: ::landlock::RulesetStatus) -> Self {
|
||||
use ::landlock::RulesetStatus::*;
|
||||
match ruleset_status {
|
||||
FullyEnforced => LandlockStatus::FullyEnforced,
|
||||
PartiallyEnforced => LandlockStatus::PartiallyEnforced,
|
||||
NotEnforced => LandlockStatus::NotEnforced,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The [landlock] docs say it best:
|
||||
///
|
||||
/// > "Landlock is a security feature available since Linux 5.13. The goal is to enable to restrict
|
||||
/// ambient rights (e.g., global filesystem access) for a set of processes by creating safe security
|
||||
/// sandboxes as new security layers in addition to the existing system-wide access-controls. This
|
||||
/// kind of sandbox is expected to help mitigate the security impact of bugs, unexpected or
|
||||
/// malicious behaviors in applications. Landlock empowers any process, including unprivileged ones,
|
||||
/// to securely restrict themselves."
|
||||
///
|
||||
/// [landlock]: https://docs.rs/landlock/latest/landlock/index.html
|
||||
#[cfg(target_os = "linux")]
|
||||
pub mod landlock {
|
||||
use landlock::{Access, AccessFs, Ruleset, RulesetAttr, RulesetError, RulesetStatus, ABI};
|
||||
|
||||
/// Landlock ABI version. We use ABI V1 because:
|
||||
///
|
||||
/// 1. It is supported by our reference kernel version.
|
||||
/// 2. Later versions do not (yet) provide additional security.
|
||||
///
|
||||
/// # Versions (June 2023)
|
||||
///
|
||||
/// - Polkadot reference kernel version: 5.16+
|
||||
/// - ABI V1: 5.13 - introduces landlock, including full restrictions on file reads
|
||||
/// - ABI V2: 5.19 - adds ability to configure file renaming (not used by us)
|
||||
///
|
||||
/// # Determinism
|
||||
///
|
||||
/// You may wonder whether we could always use the latest ABI instead of only the ABI supported
|
||||
/// by the reference kernel version. It seems plausible, since landlock provides a best-effort
|
||||
/// approach to enabling sandboxing. For example, if the reference version only supported V1 and
|
||||
/// we were on V2, then landlock would use V2 if it was supported on the current machine, and
|
||||
/// just fall back to V1 if not.
|
||||
///
|
||||
/// The issue with this is indeterminacy. If half of validators were on V2 and half were on V1,
|
||||
/// they may have different semantics on some PVFs. So a malicious PVF now has a new attack
|
||||
/// vector: they can exploit this indeterminism between landlock ABIs!
|
||||
///
|
||||
/// On the other hand we do want validators to be as secure as possible and protect their keys
|
||||
/// from attackers. And, the risk with indeterminacy is low and there are other indeterminacy
|
||||
/// vectors anyway. So we will only upgrade to a new ABI if either the reference kernel version
|
||||
/// supports it or if it introduces some new feature that is beneficial to security.
|
||||
pub const LANDLOCK_ABI: ABI = ABI::V1;
|
||||
|
||||
// TODO: <https://github.com/landlock-lsm/rust-landlock/issues/36>
|
||||
/// Returns to what degree landlock is enabled with the given ABI on the current Linux
|
||||
/// environment.
|
||||
pub fn get_status() -> Result<RulesetStatus, Box<dyn std::error::Error>> {
|
||||
match std::thread::spawn(|| try_restrict_thread()).join() {
|
||||
Ok(Ok(status)) => Ok(status),
|
||||
Ok(Err(ruleset_err)) => Err(ruleset_err.into()),
|
||||
Err(_err) => Err("a panic occurred in try_restrict_thread".into()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Basaed on the given `status`, returns a single bool indicating whether the given landlock
|
||||
/// ABI is fully enabled on the current Linux environment.
|
||||
pub fn status_is_fully_enabled(
|
||||
status: &Result<RulesetStatus, Box<dyn std::error::Error>>,
|
||||
) -> bool {
|
||||
matches!(status, Ok(RulesetStatus::FullyEnforced))
|
||||
}
|
||||
|
||||
/// Runs a check for landlock and returns a single bool indicating whether the given landlock
|
||||
/// ABI is fully enabled on the current Linux environment.
|
||||
pub fn check_is_fully_enabled() -> bool {
|
||||
status_is_fully_enabled(&get_status())
|
||||
}
|
||||
|
||||
/// Tries to restrict the current thread with the following landlock access controls:
|
||||
///
|
||||
/// 1. all global filesystem access
|
||||
/// 2. ... more may be supported in the future.
|
||||
///
|
||||
/// If landlock is not supported in the current environment this is simply a noop.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// The status of the restriction (whether it was fully, partially, or not-at-all enforced).
|
||||
pub fn try_restrict_thread() -> Result<RulesetStatus, RulesetError> {
|
||||
let status = Ruleset::new()
|
||||
.handle_access(AccessFs::from_all(LANDLOCK_ABI))?
|
||||
.create()?
|
||||
.restrict_self()?;
|
||||
Ok(status.ruleset)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::{fs, io::ErrorKind, thread};
|
||||
|
||||
#[test]
|
||||
fn restricted_thread_cannot_access_fs() {
|
||||
// TODO: This would be nice: <https://github.com/rust-lang/rust/issues/68007>.
|
||||
if !check_is_fully_enabled() {
|
||||
return
|
||||
}
|
||||
|
||||
// Restricted thread cannot read from FS.
|
||||
let handle = thread::spawn(|| {
|
||||
// Write to a tmp file, this should succeed before landlock is applied.
|
||||
let text = "foo";
|
||||
let tmpfile = tempfile::NamedTempFile::new().unwrap();
|
||||
let path = tmpfile.path();
|
||||
fs::write(path, text).unwrap();
|
||||
let s = fs::read_to_string(path).unwrap();
|
||||
assert_eq!(s, text);
|
||||
|
||||
let status = try_restrict_thread().unwrap();
|
||||
if !matches!(status, RulesetStatus::FullyEnforced) {
|
||||
panic!("Ruleset should be enforced since we checked if landlock is enabled");
|
||||
}
|
||||
|
||||
// Try to read from the tmp file after landlock.
|
||||
let result = fs::read_to_string(path);
|
||||
assert!(matches!(
|
||||
result,
|
||||
Err(err) if matches!(err.kind(), ErrorKind::PermissionDenied)
|
||||
));
|
||||
});
|
||||
|
||||
assert!(handle.join().is_ok());
|
||||
|
||||
// Restricted thread cannot write to FS.
|
||||
let handle = thread::spawn(|| {
|
||||
let text = "foo";
|
||||
let tmpfile = tempfile::NamedTempFile::new().unwrap();
|
||||
let path = tmpfile.path();
|
||||
|
||||
let status = try_restrict_thread().unwrap();
|
||||
if !matches!(status, RulesetStatus::FullyEnforced) {
|
||||
panic!("Ruleset should be enforced since we checked if landlock is enabled");
|
||||
}
|
||||
|
||||
// Try to write to the tmp file after landlock.
|
||||
let result = fs::write(path, text);
|
||||
assert!(matches!(
|
||||
result,
|
||||
Err(err) if matches!(err.kind(), ErrorKind::PermissionDenied)
|
||||
));
|
||||
});
|
||||
|
||||
assert!(handle.join().is_ok());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -28,7 +28,9 @@ use polkadot_node_core_pvf_common::{
|
||||
executor_intf::NATIVE_STACK_MAX,
|
||||
framed_recv, framed_send,
|
||||
worker::{
|
||||
bytes_to_path, cpu_time_monitor_loop, stringify_panic_payload,
|
||||
bytes_to_path, cpu_time_monitor_loop,
|
||||
security::LandlockStatus,
|
||||
stringify_panic_payload,
|
||||
thread::{self, WaitOutcome},
|
||||
worker_event_loop,
|
||||
},
|
||||
@@ -170,11 +172,22 @@ pub fn worker_entrypoint(socket_path: &str, node_version: Option<&str>) {
|
||||
let execute_thread = thread::spawn_worker_thread_with_stack_size(
|
||||
"execute thread",
|
||||
move || {
|
||||
validate_using_artifact(
|
||||
&compiled_artifact_blob,
|
||||
¶ms,
|
||||
executor_2,
|
||||
cpu_time_start,
|
||||
// Try to enable landlock.
|
||||
#[cfg(target_os = "linux")]
|
||||
let landlock_status = polkadot_node_core_pvf_common::worker::security::landlock::try_restrict_thread()
|
||||
.map(LandlockStatus::from_ruleset_status)
|
||||
.map_err(|e| e.to_string());
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
let landlock_status: Result<LandlockStatus, String> = Ok(LandlockStatus::NotEnforced);
|
||||
|
||||
(
|
||||
validate_using_artifact(
|
||||
&compiled_artifact_blob,
|
||||
¶ms,
|
||||
executor_2,
|
||||
cpu_time_start,
|
||||
),
|
||||
landlock_status,
|
||||
)
|
||||
},
|
||||
Arc::clone(&condvar),
|
||||
@@ -187,9 +200,24 @@ pub fn worker_entrypoint(socket_path: &str, node_version: Option<&str>) {
|
||||
let response = match outcome {
|
||||
WaitOutcome::Finished => {
|
||||
let _ = cpu_time_monitor_tx.send(());
|
||||
execute_thread
|
||||
.join()
|
||||
.unwrap_or_else(|e| Response::Panic(stringify_panic_payload(e)))
|
||||
let (result, landlock_status) = execute_thread.join().unwrap_or_else(|e| {
|
||||
(
|
||||
Response::Panic(stringify_panic_payload(e)),
|
||||
Ok(LandlockStatus::Unavailable),
|
||||
)
|
||||
});
|
||||
|
||||
// Log if landlock threw an error.
|
||||
if let Err(err) = landlock_status {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
%worker_pid,
|
||||
"error enabling landlock: {}",
|
||||
err
|
||||
);
|
||||
}
|
||||
|
||||
result
|
||||
},
|
||||
// If the CPU thread is not selected, we signal it to end, the join handle is
|
||||
// dropped and the thread will finish in the background.
|
||||
|
||||
@@ -35,7 +35,9 @@ use polkadot_node_core_pvf_common::{
|
||||
prepare::{MemoryStats, PrepareJobKind, PrepareStats},
|
||||
pvf::PvfPrepData,
|
||||
worker::{
|
||||
bytes_to_path, cpu_time_monitor_loop, stringify_panic_payload,
|
||||
bytes_to_path, cpu_time_monitor_loop,
|
||||
security::LandlockStatus,
|
||||
stringify_panic_payload,
|
||||
thread::{self, WaitOutcome},
|
||||
worker_event_loop,
|
||||
},
|
||||
@@ -155,6 +157,14 @@ pub fn worker_entrypoint(socket_path: &str, node_version: Option<&str>) {
|
||||
let prepare_thread = thread::spawn_worker_thread(
|
||||
"prepare thread",
|
||||
move || {
|
||||
// Try to enable landlock.
|
||||
#[cfg(target_os = "linux")]
|
||||
let landlock_status = polkadot_node_core_pvf_common::worker::security::landlock::try_restrict_thread()
|
||||
.map(LandlockStatus::from_ruleset_status)
|
||||
.map_err(|e| e.to_string());
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
let landlock_status: Result<LandlockStatus, String> = Ok(LandlockStatus::NotEnforced);
|
||||
|
||||
#[allow(unused_mut)]
|
||||
let mut result = prepare_artifact(pvf, cpu_time_start);
|
||||
|
||||
@@ -173,7 +183,7 @@ pub fn worker_entrypoint(socket_path: &str, node_version: Option<&str>) {
|
||||
});
|
||||
}
|
||||
|
||||
result
|
||||
(result, landlock_status)
|
||||
},
|
||||
Arc::clone(&condvar),
|
||||
WaitOutcome::Finished,
|
||||
@@ -186,13 +196,16 @@ pub fn worker_entrypoint(socket_path: &str, node_version: Option<&str>) {
|
||||
let _ = cpu_time_monitor_tx.send(());
|
||||
|
||||
match prepare_thread.join().unwrap_or_else(|err| {
|
||||
Err(PrepareError::Panic(stringify_panic_payload(err)))
|
||||
(
|
||||
Err(PrepareError::Panic(stringify_panic_payload(err))),
|
||||
Ok(LandlockStatus::Unavailable),
|
||||
)
|
||||
}) {
|
||||
Err(err) => {
|
||||
(Err(err), _) => {
|
||||
// Serialized error will be written into the socket.
|
||||
Err(err)
|
||||
},
|
||||
Ok(ok) => {
|
||||
(Ok(ok), landlock_status) => {
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
let (artifact, cpu_time_elapsed) = ok;
|
||||
#[cfg(target_os = "linux")]
|
||||
@@ -208,6 +221,16 @@ pub fn worker_entrypoint(socket_path: &str, node_version: Option<&str>) {
|
||||
max_rss: extract_max_rss_stat(max_rss, worker_pid),
|
||||
};
|
||||
|
||||
// Log if landlock threw an error.
|
||||
if let Err(err) = landlock_status {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
%worker_pid,
|
||||
"error enabling landlock: {}",
|
||||
err
|
||||
);
|
||||
}
|
||||
|
||||
// Write the serialized artifact into a temp file.
|
||||
//
|
||||
// PVF host only keeps artifacts statuses in its memory, successfully
|
||||
|
||||
@@ -140,6 +140,7 @@ struct ExecutePvfInputs {
|
||||
}
|
||||
|
||||
/// Configuration for the validation host.
|
||||
#[derive(Debug)]
|
||||
pub struct Config {
|
||||
/// The root directory where the prepared artifacts can be stored.
|
||||
pub cache_path: PathBuf,
|
||||
@@ -189,6 +190,11 @@ impl Config {
|
||||
/// In that case all pending requests will be canceled, dropping the result senders and new ones
|
||||
/// will be rejected.
|
||||
pub fn start(config: Config, metrics: Metrics) -> (ValidationHost, impl Future<Output = ()>) {
|
||||
gum::debug!(target: LOG_TARGET, ?config, "starting PVF validation host");
|
||||
|
||||
// Run checks for supported security features once per host startup.
|
||||
warn_if_no_landlock();
|
||||
|
||||
let (to_host_tx, to_host_rx) = mpsc::channel(10);
|
||||
|
||||
let validation_host = ValidationHost { to_host_tx };
|
||||
@@ -854,6 +860,30 @@ fn pulse_every(interval: std::time::Duration) -> impl futures::Stream<Item = ()>
|
||||
.map(|_| ())
|
||||
}
|
||||
|
||||
/// Check if landlock is supported and emit a warning if not.
|
||||
fn warn_if_no_landlock() {
|
||||
#[cfg(target_os = "linux")]
|
||||
{
|
||||
use polkadot_node_core_pvf_common::worker::security::landlock;
|
||||
let status = landlock::get_status();
|
||||
if !landlock::status_is_fully_enabled(&status) {
|
||||
let abi = landlock::LANDLOCK_ABI as u8;
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
?status,
|
||||
%abi,
|
||||
"Cannot fully enable landlock, a Linux kernel security feature. Running validation of malicious PVF code has a higher risk of compromising this machine. Consider upgrading the kernel version for maximum security."
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
"Cannot enable landlock, a Linux kernel security feature. Running validation of malicious PVF code has a higher risk of compromising this machine. Consider running on Linux with landlock support for maximum security."
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests {
|
||||
use super::*;
|
||||
|
||||
@@ -60,6 +60,7 @@
|
||||
- [Utility Subsystems](node/utility/README.md)
|
||||
- [Availability Store](node/utility/availability-store.md)
|
||||
- [Candidate Validation](node/utility/candidate-validation.md)
|
||||
- [PVF Host and Workers](node/utility/pvf-host-and-workers.md)
|
||||
- [Provisioner](node/utility/provisioner.md)
|
||||
- [Network Bridge](node/utility/network-bridge.md)
|
||||
- [Gossip Support](node/utility/gossip-support.md)
|
||||
|
||||
@@ -44,86 +44,10 @@ Once we have all parameters, we can spin up a background task to perform the val
|
||||
* The collator signature is valid
|
||||
* The PoV provided matches the `pov_hash` field of the descriptor
|
||||
|
||||
For more details please see [PVF Host and Workers](pvf-host-and-workers.md).
|
||||
|
||||
### Checking Validation Outputs
|
||||
|
||||
If we can assume the presence of the relay-chain state (that is, during processing [`CandidateValidationMessage`][CVM]`::ValidateFromChainState`) we can run all the checks that the relay-chain would run at the inclusion time thus confirming that the candidate will be accepted.
|
||||
|
||||
### PVF Host
|
||||
|
||||
The PVF host is responsible for handling requests to prepare and execute PVF
|
||||
code blobs.
|
||||
|
||||
One high-level goal is to make PVF operations as deterministic as possible, to
|
||||
reduce the rate of disputes. Disputes can happen due to e.g. a job timing out on
|
||||
one machine, but not another. While we do not yet have full determinism, there
|
||||
are some dispute reduction mechanisms in place right now.
|
||||
|
||||
#### Retrying execution requests
|
||||
|
||||
If the execution request fails during **preparation**, we will retry if it is
|
||||
possible that the preparation error was transient (e.g. if the error was a panic
|
||||
or time out). We will only retry preparation if another request comes in after
|
||||
15 minutes, to ensure any potential transient conditions had time to be
|
||||
resolved. We will retry up to 5 times.
|
||||
|
||||
If the actual **execution** of the artifact fails, we will retry once if it was
|
||||
a possibly transient error, to allow the conditions that led to the error to
|
||||
hopefully resolve. We use a more brief delay here (1 second as opposed to 15
|
||||
minutes for preparation (see above)), because a successful execution must happen
|
||||
in a short amount of time.
|
||||
|
||||
We currently know of the following specific cases that will lead to a retried
|
||||
execution request:
|
||||
|
||||
1. **OOM:** The host might have been temporarily low on memory due to other
|
||||
processes running on the same machine. **NOTE:** This case will lead to
|
||||
voting against the candidate (and possibly a dispute) if the retry is still
|
||||
not successful.
|
||||
2. **Artifact missing:** The prepared artifact might have been deleted due to
|
||||
operator error or some bug in the system.
|
||||
3. **Panic:** The worker thread panicked for some indeterminate reason, which
|
||||
may or may not be independent of the candidate or PVF.
|
||||
|
||||
#### Preparation timeouts
|
||||
|
||||
We use timeouts for both preparation and execution jobs to limit the amount of
|
||||
time they can take. As the time for a job can vary depending on the machine and
|
||||
load on the machine, this can potentially lead to disputes where some validators
|
||||
successfuly execute a PVF and others don't.
|
||||
|
||||
One dispute mitigation we have in place is a more lenient timeout for
|
||||
preparation during execution than during pre-checking. The rationale is that the
|
||||
PVF has already passed pre-checking, so we know it should be valid, and we allow
|
||||
it to take longer than expected, as this is likely due to an issue with the
|
||||
machine and not the PVF.
|
||||
|
||||
#### CPU clock timeouts
|
||||
|
||||
Another timeout-related mitigation we employ is to measure the time taken by
|
||||
jobs using CPU time, rather than wall clock time. This is because the CPU time
|
||||
of a process is less variable under different system conditions. When the
|
||||
overall system is under heavy load, the wall clock time of a job is affected
|
||||
more than the CPU time.
|
||||
|
||||
#### Internal errors
|
||||
|
||||
In general, for errors not raising a dispute we have to be very careful. This is
|
||||
only sound, if we either:
|
||||
|
||||
1. Ruled out that error in pre-checking. If something is not checked in
|
||||
pre-checking, even if independent of the candidate and PVF, we must raise a
|
||||
dispute.
|
||||
2. We are 100% confident that it is a hardware/local issue: Like corrupted file,
|
||||
etc.
|
||||
|
||||
Reasoning: Otherwise it would be possible to register a PVF where candidates can
|
||||
not be checked, but we don't get a dispute - so nobody gets punished. Second, we
|
||||
end up with a finality stall that is not going to resolve!
|
||||
|
||||
There are some error conditions where we can't be sure whether the candidate is
|
||||
really invalid or some internal glitch occurred, e.g. panics. Whenever we are
|
||||
unsure, we can never treat an error as internal as we would abstain from voting.
|
||||
So we will first retry the candidate, and if the issue persists we are forced to
|
||||
vote invalid.
|
||||
|
||||
[CVM]: ../../types/overseer-protocol.md#validationrequesttype
|
||||
|
||||
@@ -0,0 +1,127 @@
|
||||
# PVF Host and Workers
|
||||
|
||||
The PVF host is responsible for handling requests to prepare and execute PVF
|
||||
code blobs, which it sends to PVF workers running in their own child processes.
|
||||
|
||||
This system has two high-levels goals that we will touch on here: *determinism*
|
||||
and *security*.
|
||||
|
||||
## Determinism
|
||||
|
||||
One high-level goal is to make PVF operations as deterministic as possible, to
|
||||
reduce the rate of disputes. Disputes can happen due to e.g. a job timing out on
|
||||
one machine, but not another. While we do not have full determinism, there are
|
||||
some dispute reduction mechanisms in place right now.
|
||||
|
||||
### Retrying execution requests
|
||||
|
||||
If the execution request fails during **preparation**, we will retry if it is
|
||||
possible that the preparation error was transient (e.g. if the error was a panic
|
||||
or time out). We will only retry preparation if another request comes in after
|
||||
15 minutes, to ensure any potential transient conditions had time to be
|
||||
resolved. We will retry up to 5 times.
|
||||
|
||||
If the actual **execution** of the artifact fails, we will retry once if it was
|
||||
a possibly transient error, to allow the conditions that led to the error to
|
||||
hopefully resolve. We use a more brief delay here (1 second as opposed to 15
|
||||
minutes for preparation (see above)), because a successful execution must happen
|
||||
in a short amount of time.
|
||||
|
||||
We currently know of the following specific cases that will lead to a retried
|
||||
execution request:
|
||||
|
||||
1. **OOM:** The host might have been temporarily low on memory due to other
|
||||
processes running on the same machine. **NOTE:** This case will lead to
|
||||
voting against the candidate (and possibly a dispute) if the retry is still
|
||||
not successful.
|
||||
2. **Artifact missing:** The prepared artifact might have been deleted due to
|
||||
operator error or some bug in the system.
|
||||
3. **Panic:** The worker thread panicked for some indeterminate reason, which
|
||||
may or may not be independent of the candidate or PVF.
|
||||
|
||||
### Preparation timeouts
|
||||
|
||||
We use timeouts for both preparation and execution jobs to limit the amount of
|
||||
time they can take. As the time for a job can vary depending on the machine and
|
||||
load on the machine, this can potentially lead to disputes where some validators
|
||||
successfuly execute a PVF and others don't.
|
||||
|
||||
One dispute mitigation we have in place is a more lenient timeout for
|
||||
preparation during execution than during pre-checking. The rationale is that the
|
||||
PVF has already passed pre-checking, so we know it should be valid, and we allow
|
||||
it to take longer than expected, as this is likely due to an issue with the
|
||||
machine and not the PVF.
|
||||
|
||||
### CPU clock timeouts
|
||||
|
||||
Another timeout-related mitigation we employ is to measure the time taken by
|
||||
jobs using CPU time, rather than wall clock time. This is because the CPU time
|
||||
of a process is less variable under different system conditions. When the
|
||||
overall system is under heavy load, the wall clock time of a job is affected
|
||||
more than the CPU time.
|
||||
|
||||
### Internal errors
|
||||
|
||||
In general, for errors not raising a dispute we have to be very careful. This is
|
||||
only sound, if we either:
|
||||
|
||||
1. Ruled out that error in pre-checking. If something is not checked in
|
||||
pre-checking, even if independent of the candidate and PVF, we must raise a
|
||||
dispute.
|
||||
2. We are 100% confident that it is a hardware/local issue: Like corrupted file,
|
||||
etc.
|
||||
|
||||
Reasoning: Otherwise it would be possible to register a PVF where candidates can
|
||||
not be checked, but we don't get a dispute - so nobody gets punished. Second, we
|
||||
end up with a finality stall that is not going to resolve!
|
||||
|
||||
There are some error conditions where we can't be sure whether the candidate is
|
||||
really invalid or some internal glitch occurred, e.g. panics. Whenever we are
|
||||
unsure, we can never treat an error as internal as we would abstain from voting.
|
||||
So we will first retry the candidate, and if the issue persists we are forced to
|
||||
vote invalid.
|
||||
|
||||
## Security
|
||||
|
||||
With [on-demand parachains](https://github.com/orgs/paritytech/projects/67), it
|
||||
is much easier to submit PVFs to the chain for preparation and execution. This
|
||||
makes it easier for erroneous disputes and slashing to occur, whether
|
||||
intentional (as a result of a malicious attacker) or not (a bug or operator
|
||||
error occurred).
|
||||
|
||||
Therefore, another goal of ours is to harden our security around PVFs, in order
|
||||
to protect the economic interests of validators and increase overall confidence
|
||||
in the system.
|
||||
|
||||
### Possible attacks / threat model
|
||||
|
||||
Webassembly is already sandboxed, but there have already been reported multiple
|
||||
CVEs enabling remote code execution. See e.g. these two advisories from
|
||||
[Mar 2023](https://github.com/bytecodealliance/wasmtime/security/advisories/GHSA-ff4p-7xrq-q5r8)
|
||||
and [Jul 2022](https://github.com/bytecodealliance/wasmtime/security/advisories/GHSA-7f6x-jwh5-m9r4).
|
||||
|
||||
So what are we actually worried about? Things that come to mind:
|
||||
|
||||
1. **Consensus faults** - If an attacker can get some source of randomness they
|
||||
could vote against with 50% chance and cause unresolvable disputes.
|
||||
2. **Targeted slashes** - An attacker can target certain validators (e.g. some
|
||||
validators running on vulnerable hardware) and make them vote invalid and get
|
||||
them slashed.
|
||||
3. **Mass slashes** - With some source of randomness they can do an untargeted
|
||||
attack. I.e. a baddie can do significant economic damage by voting against
|
||||
with 1/3 chance, without even stealing keys or completely replacing the
|
||||
binary.
|
||||
4. **Stealing keys** - That would be pretty bad. Should not be possible with
|
||||
sandboxing. We should at least not allow filesystem-access or network access.
|
||||
5. **Taking control over the validator.** E.g. replacing the `polkadot` binary
|
||||
with a `polkadot-evil` binary. Should again not be possible with the above
|
||||
sandboxing in place.
|
||||
6. **Intercepting and manipulating packages** - Effect very similar to the
|
||||
above, hard to do without also being able to do 4 or 5.
|
||||
|
||||
### Restricting file-system access
|
||||
|
||||
A basic security mechanism is to make sure that any thread directly interfacing
|
||||
with untrusted code does not have access to the file-system. This provides some
|
||||
protection against attackers accessing sensitive data or modifying data on the
|
||||
host machine.
|
||||
Reference in New Issue
Block a user