From 4018994ed56fac69c5d9ccd508532b14d67775b0 Mon Sep 17 00:00:00 2001 From: Sergei Shulepov Date: Mon, 9 Nov 2020 16:39:37 +0100 Subject: [PATCH] Rename ExecutionMode to IsolationStrategy (#1932) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Rename ExecutionMode to IsolationStrategy Execution mode is too generic name and can imply a lot of different aspects of execution. The notion of isolation better describes the meant aspect. And while I am at it, I also renamed mode -> strategy cause it seems a bit more appropriate, although that is way more subjective. * Fix compilation in wasm_executor tests. * Add a comment to IsolationStrategy * Update comments on IsolationStrategy * Update node/core/candidate-validation/src/lib.rs Co-authored-by: Bastian Köcher * Accomodate the point on interruption * Update parachain/src/wasm_executor/mod.rs Co-authored-by: Andronik Ordian * Naming nits Co-authored-by: Bastian Köcher Co-authored-by: Andronik Ordian --- .../node/core/candidate-validation/src/lib.rs | 37 ++++++++------- polkadot/node/service/src/lib.rs | 12 ++--- polkadot/node/test/service/src/lib.rs | 2 +- polkadot/parachain/src/wasm_executor/mod.rs | 47 +++++++++++++++---- .../test-parachains/adder/collator/src/lib.rs | 4 +- .../test-parachains/tests/adder/mod.rs | 26 +++++----- .../tests/wasm_executor/mod.rs | 19 ++++---- 7 files changed, 91 insertions(+), 56 deletions(-) diff --git a/polkadot/node/core/candidate-validation/src/lib.rs b/polkadot/node/core/candidate-validation/src/lib.rs index dbcd1eab4b..2a03a0facf 100644 --- a/polkadot/node/core/candidate-validation/src/lib.rs +++ b/polkadot/node/core/candidate-validation/src/lib.rs @@ -38,7 +38,9 @@ use polkadot_primitives::v1::{ ValidationCode, PoV, CandidateDescriptor, PersistedValidationData, OccupiedCoreAssumption, Hash, ValidationOutputs, }; -use polkadot_parachain::wasm_executor::{self, ExecutionMode, ValidationError, InvalidCandidate as WasmInvalidCandidate}; +use polkadot_parachain::wasm_executor::{ + self, IsolationStrategy, ValidationError, InvalidCandidate as WasmInvalidCandidate +}; use polkadot_parachain::primitives::{ValidationResult as WasmValidationResult, ValidationParams}; use parity_scale_codec::Encode; @@ -55,13 +57,16 @@ const LOG_TARGET: &'static str = "candidate_validation"; pub struct CandidateValidationSubsystem { spawn: S, metrics: Metrics, - execution_mode: ExecutionMode, + isolation_strategy: IsolationStrategy, } impl CandidateValidationSubsystem { - /// Create a new `CandidateValidationSubsystem` with the given task spawner. - pub fn new(spawn: S, metrics: Metrics, execution_mode: ExecutionMode) -> Self { - CandidateValidationSubsystem { spawn, metrics, execution_mode } + /// Create a new `CandidateValidationSubsystem` with the given task spawner and isolation + /// strategy. + /// + /// Check out [`IsolationStrategy`] to get more details. + pub fn new(spawn: S, metrics: Metrics, isolation_strategy: IsolationStrategy) -> Self { + CandidateValidationSubsystem { spawn, metrics, isolation_strategy } } } @@ -70,7 +75,7 @@ impl Subsystem for CandidateValidationSubsystem where S: SpawnNamed + Clone + 'static, { fn start(self, ctx: C) -> SpawnedSubsystem { - let future = run(ctx, self.spawn, self.metrics, self.execution_mode) + let future = run(ctx, self.spawn, self.metrics, self.isolation_strategy) .map_err(|e| SubsystemError::with_origin("candidate-validation", e)) .boxed(); SpawnedSubsystem { @@ -84,7 +89,7 @@ async fn run( mut ctx: impl SubsystemContext, spawn: impl SpawnNamed + Clone + 'static, metrics: Metrics, - execution_mode: ExecutionMode, + isolation_strategy: IsolationStrategy, ) -> SubsystemResult<()> { loop { match ctx.recv().await? { @@ -99,7 +104,7 @@ async fn run( ) => { let res = spawn_validate_from_chain_state( &mut ctx, - execution_mode.clone(), + isolation_strategy.clone(), descriptor, pov, spawn.clone(), @@ -122,7 +127,7 @@ async fn run( ) => { let res = spawn_validate_exhaustive( &mut ctx, - execution_mode.clone(), + isolation_strategy.clone(), persisted_validation_data, validation_code, descriptor, @@ -254,7 +259,7 @@ async fn find_assumed_validation_data( async fn spawn_validate_from_chain_state( ctx: &mut impl SubsystemContext, - execution_mode: ExecutionMode, + isolation_strategy: IsolationStrategy, descriptor: CandidateDescriptor, pov: Arc, spawn: impl SpawnNamed + 'static, @@ -277,7 +282,7 @@ async fn spawn_validate_from_chain_state( let validation_result = spawn_validate_exhaustive( ctx, - execution_mode, + isolation_strategy, validation_data, validation_code, descriptor.clone(), @@ -313,7 +318,7 @@ async fn spawn_validate_from_chain_state( async fn spawn_validate_exhaustive( ctx: &mut impl SubsystemContext, - execution_mode: ExecutionMode, + isolation_strategy: IsolationStrategy, persisted_validation_data: PersistedValidationData, validation_code: ValidationCode, descriptor: CandidateDescriptor, @@ -323,7 +328,7 @@ async fn spawn_validate_exhaustive( let (tx, rx) = oneshot::channel(); let fut = async move { let res = validate_candidate_exhaustive::( - execution_mode, + isolation_strategy, persisted_validation_data, validation_code, descriptor, @@ -379,10 +384,10 @@ trait ValidationBackend { struct RealValidationBackend; impl ValidationBackend for RealValidationBackend { - type Arg = ExecutionMode; + type Arg = IsolationStrategy; fn validate( - execution_mode: ExecutionMode, + isolation_strategy: IsolationStrategy, validation_code: &ValidationCode, params: ValidationParams, spawn: S, @@ -390,7 +395,7 @@ impl ValidationBackend for RealValidationBackend { wasm_executor::validate_candidate( &validation_code.0, params, - &execution_mode, + &isolation_strategy, spawn, ) } diff --git a/polkadot/node/service/src/lib.rs b/polkadot/node/service/src/lib.rs index 450dcb2acf..0c351d1aab 100644 --- a/polkadot/node/service/src/lib.rs +++ b/polkadot/node/service/src/lib.rs @@ -50,7 +50,7 @@ use service::RpcHandlers; pub use self::client::{AbstractClient, Client, ClientHandle, ExecuteWithClient, RuntimeApiCollection}; pub use chain_spec::{PolkadotChainSpec, KusamaChainSpec, WestendChainSpec, RococoChainSpec}; pub use consensus_common::{Proposal, SelectChain, BlockImport, RecordProof, block_validation::Chain}; -pub use polkadot_parachain::wasm_executor::ExecutionMode; +pub use polkadot_parachain::wasm_executor::IsolationStrategy; pub use polkadot_primitives::v1::{Block, BlockId, CollatorId, Hash, Id as ParaId}; pub use sc_client_api::{Backend, ExecutionStrategy, CallExecutor}; pub use sc_consensus::LongestChain; @@ -296,7 +296,7 @@ fn real_overseer( registry: Option<&Registry>, spawner: Spawner, _: IsCollator, - _: ExecutionMode, + _: IsolationStrategy, ) -> Result<(Overseer, OverseerHandler), Error> where RuntimeClient: 'static + ProvideRuntimeApi + HeaderBackend, @@ -322,7 +322,7 @@ fn real_overseer( registry: Option<&Registry>, spawner: Spawner, is_collator: IsCollator, - execution_mode: ExecutionMode, + isolation_strategy: IsolationStrategy, ) -> Result<(Overseer, OverseerHandler), Error> where RuntimeClient: 'static + ProvideRuntimeApi + HeaderBackend, @@ -377,7 +377,7 @@ where candidate_validation: CandidateValidationSubsystem::new( spawner.clone(), Metrics::register(registry)?, - execution_mode, + isolation_strategy, ), chain_api: ChainApiSubsystem::new( runtime_client.clone(), @@ -479,7 +479,7 @@ pub fn new_full( is_collator: IsCollator, grandpa_pause: Option<(u32, u32)>, authority_discovery_config: Option, - execution_mode: ExecutionMode, + isolation_strategy: IsolationStrategy, ) -> Result>>, Error> where RuntimeApi: ConstructRuntimeApi> + Send + Sync + 'static, @@ -615,7 +615,7 @@ pub fn new_full( prometheus_registry.as_ref(), spawner, is_collator, - execution_mode, + isolation_strategy, )?; let overseer_handler_clone = overseer_handler.clone(); diff --git a/polkadot/node/test/service/src/lib.rs b/polkadot/node/test/service/src/lib.rs index 794df3779e..efd68cd3a8 100644 --- a/polkadot/node/test/service/src/lib.rs +++ b/polkadot/node/test/service/src/lib.rs @@ -87,7 +87,7 @@ pub fn new_full( query_start_delay: Duration::from_secs(0), ..Default::default() }), - polkadot_parachain::wasm_executor::ExecutionMode::InProcess, + polkadot_parachain::wasm_executor::IsolationStrategy::InProcess, ).map_err(Into::into) } diff --git a/polkadot/parachain/src/wasm_executor/mod.rs b/polkadot/parachain/src/wasm_executor/mod.rs index 735ec7f072..677501e6ef 100644 --- a/polkadot/parachain/src/wasm_executor/mod.rs +++ b/polkadot/parachain/src/wasm_executor/mod.rs @@ -37,9 +37,40 @@ const MAX_RUNTIME_MEM: usize = 1024 * 1024 * 1024; // 1 GiB const MAX_CODE_MEM: usize = 16 * 1024 * 1024; // 16 MiB const MAX_VALIDATION_RESULT_HEADER_MEM: usize = MAX_CODE_MEM + 1024; // 16.001 MiB -/// The execution mode for the `ValidationPool`. +/// The strategy we employ for isolating execution of wasm parachain validation function (PVF). +/// +/// For a typical validator an external process is the default way to run PVF. The rationale is based +/// on the following observations: +/// +/// (a) PVF is completely under control of parachain developers who may or may not be malicious. +/// (b) Collators are in charge of providing PoV who also may or may not be malicious. +/// (c) PVF is executed by a wasm engine based on optimizing compiler which is a very complex piece +/// of machinery. +/// +/// (a) and (b) may lead to a situation where due to a combination of PVF and PoV the validation work +/// can stuck in an infinite loop, which can open up resource exhaustion or DoS attack vectors. +/// +/// While some execution engines provide functionality to interrupt execution of wasm module from +/// another thread, there are also some caveats to that: there is no clean way to interrupt execution +/// if the control flow is in the host side and at the moment we haven't rigoriously vetted that all +/// host functions terminate or, at least, return in a short amount of time. Additionally, we want +/// some freedom on choosing wasm execution environment. +/// +/// On top of that, execution in a separate process helps to minimize impact of (c) if exploited. +/// It's not only the risk of miscompilation, but it also includes risk of JIT-bombs, i.e. cases +/// of specially crafted code that take enourmous amounts of time and memory to compile. +/// +/// At the same time, since PVF validates self-contained candidates, validation workers don't require +/// extensive communication with polkadot host, therefore there should be no observable performance penalty +/// coming from inter process communication. +/// +/// All of the above should give a sense why isolation is crucial for a typical use-case. +/// +/// However, in some cases, e.g. when running PVF validation on android (for whatever reason), we +/// cannot afford the luxury of process isolation and thus there is an option to run validation in +/// process. Also, running in process is convenient for testing. #[derive(Clone, Debug)] -pub enum ExecutionMode { +pub enum IsolationStrategy { /// The validation worker is ran in a thread inside the same process. InProcess, /// The validation worker is ran using the process' executable and the subcommand `validation-worker` is passed @@ -60,7 +91,7 @@ pub enum ExecutionMode { }, } -impl Default for ExecutionMode { +impl Default for IsolationStrategy { fn default() -> Self { #[cfg(not(any(target_os = "android", target_os = "unknown")))] { @@ -136,19 +167,19 @@ pub enum InternalError { pub fn validate_candidate( validation_code: &[u8], params: ValidationParams, - execution_mode: &ExecutionMode, + isolation_strategy: &IsolationStrategy, spawner: impl SpawnNamed + 'static, ) -> Result { - match execution_mode { - ExecutionMode::InProcess => { + match isolation_strategy { + IsolationStrategy::InProcess => { validate_candidate_internal(validation_code, ¶ms.encode(), spawner) }, #[cfg(not(any(target_os = "android", target_os = "unknown")))] - ExecutionMode::ExternalProcessSelfHost(pool) => { + IsolationStrategy::ExternalProcessSelfHost(pool) => { pool.validate_candidate(validation_code, params) }, #[cfg(not(any(target_os = "android", target_os = "unknown")))] - ExecutionMode::ExternalProcessCustomHost { pool, binary, args } => { + IsolationStrategy::ExternalProcessCustomHost { pool, binary, args } => { let args: Vec<&str> = args.iter().map(|x| x.as_str()).collect(); pool.validate_candidate_custom(validation_code, params, binary, &args) }, diff --git a/polkadot/parachain/test-parachains/adder/collator/src/lib.rs b/polkadot/parachain/test-parachains/adder/collator/src/lib.rs index 4ed2d5fe52..ca2375b2d8 100644 --- a/polkadot/parachain/test-parachains/adder/collator/src/lib.rs +++ b/polkadot/parachain/test-parachains/adder/collator/src/lib.rs @@ -166,7 +166,7 @@ mod tests { use super::*; use futures::executor::block_on; - use polkadot_parachain::{primitives::ValidationParams, wasm_executor::ExecutionMode}; + use polkadot_parachain::{primitives::ValidationParams, wasm_executor::IsolationStrategy}; use polkadot_primitives::v1::{ValidationData, PersistedValidationData}; use codec::Decode; @@ -201,7 +201,7 @@ mod tests { hrmp_mqc_heads: Vec::new(), dmq_mqc_head: Default::default(), }, - &ExecutionMode::InProcess, + &IsolationStrategy::InProcess, sp_core::testing::TaskExecutor::new(), ).unwrap(); diff --git a/polkadot/parachain/test-parachains/tests/adder/mod.rs b/polkadot/parachain/test-parachains/tests/adder/mod.rs index 8e581d187a..c9d15d53d7 100644 --- a/polkadot/parachain/test-parachains/tests/adder/mod.rs +++ b/polkadot/parachain/test-parachains/tests/adder/mod.rs @@ -25,13 +25,13 @@ use parachain::{ HeadData as GenericHeadData, ValidationParams, }, - wasm_executor::{ValidationPool, ExecutionMode} + wasm_executor::{ValidationPool, IsolationStrategy} }; use codec::{Decode, Encode}; use adder::{HeadData, BlockData, hash_state}; -fn execution_mode() -> ExecutionMode { - ExecutionMode::ExternalProcessCustomHost { +fn isolation_strategy() -> IsolationStrategy { + IsolationStrategy::ExternalProcessCustomHost { pool: ValidationPool::new(), binary: std::env::current_exe().unwrap(), args: WORKER_ARGS_TEST.iter().map(|x| x.to_string()).collect(), @@ -40,17 +40,17 @@ fn execution_mode() -> ExecutionMode { #[test] fn execute_good_on_parent_with_inprocess_validation() { - let execution_mode = ExecutionMode::InProcess; - execute_good_on_parent(execution_mode); + let isolation_strategy = IsolationStrategy::InProcess; + execute_good_on_parent(isolation_strategy); } #[test] pub fn execute_good_on_parent_with_external_process_validation() { - let execution_mode = execution_mode(); - execute_good_on_parent(execution_mode); + let isolation_strategy = isolation_strategy(); + execute_good_on_parent(isolation_strategy); } -fn execute_good_on_parent(execution_mode: ExecutionMode) { +fn execute_good_on_parent(isolation_strategy: IsolationStrategy) { let parent_head = HeadData { number: 0, parent_hash: [0; 32], @@ -71,7 +71,7 @@ fn execute_good_on_parent(execution_mode: ExecutionMode) { hrmp_mqc_heads: Vec::new(), dmq_mqc_head: Default::default(), }, - &execution_mode, + &isolation_strategy, sp_core::testing::TaskExecutor::new(), ).unwrap(); @@ -87,7 +87,7 @@ fn execute_good_chain_on_parent() { let mut number = 0; let mut parent_hash = [0; 32]; let mut last_state = 0; - let execution_mode = execution_mode(); + let isolation_strategy = isolation_strategy(); for add in 0..10 { let parent_head = HeadData { @@ -110,7 +110,7 @@ fn execute_good_chain_on_parent() { hrmp_mqc_heads: Vec::new(), dmq_mqc_head: Default::default(), }, - &execution_mode, + &isolation_strategy, sp_core::testing::TaskExecutor::new(), ).unwrap(); @@ -128,7 +128,7 @@ fn execute_good_chain_on_parent() { #[test] fn execute_bad_on_parent() { - let execution_mode = execution_mode(); + let isolation_strategy = isolation_strategy(); let parent_head = HeadData { number: 0, @@ -150,7 +150,7 @@ fn execute_bad_on_parent() { hrmp_mqc_heads: Vec::new(), dmq_mqc_head: Default::default(), }, - &execution_mode, + &isolation_strategy, sp_core::testing::TaskExecutor::new(), ).unwrap_err(); } diff --git a/polkadot/parachain/test-parachains/tests/wasm_executor/mod.rs b/polkadot/parachain/test-parachains/tests/wasm_executor/mod.rs index ad60e4e04e..e092adc2f4 100644 --- a/polkadot/parachain/test-parachains/tests/wasm_executor/mod.rs +++ b/polkadot/parachain/test-parachains/tests/wasm_executor/mod.rs @@ -21,11 +21,11 @@ const WORKER_ARGS_TEST: &[&'static str] = &["--nocapture", "validation_worker"]; use crate::adder; use parachain::{ primitives::{BlockData, ValidationParams}, - wasm_executor::{ValidationError, InvalidCandidate, EXECUTION_TIMEOUT_SEC, ExecutionMode, ValidationPool}, + wasm_executor::{ValidationError, InvalidCandidate, EXECUTION_TIMEOUT_SEC, IsolationStrategy, ValidationPool}, }; -fn execution_mode() -> ExecutionMode { - ExecutionMode::ExternalProcessCustomHost { +fn isolation_strategy() -> IsolationStrategy { + IsolationStrategy::ExternalProcessCustomHost { pool: ValidationPool::new(), binary: std::env::current_exe().unwrap(), args: WORKER_ARGS_TEST.iter().map(|x| x.to_string()).collect(), @@ -34,7 +34,7 @@ fn execution_mode() -> ExecutionMode { #[test] fn terminates_on_timeout() { - let execution_mode = execution_mode(); + let isolation_strategy = isolation_strategy(); let result = parachain::wasm_executor::validate_candidate( halt::wasm_binary_unwrap(), @@ -45,7 +45,7 @@ fn terminates_on_timeout() { hrmp_mqc_heads: Vec::new(), dmq_mqc_head: Default::default(), }, - &execution_mode, + &isolation_strategy, sp_core::testing::TaskExecutor::new(), ); match result { @@ -59,11 +59,10 @@ fn terminates_on_timeout() { #[test] fn parallel_execution() { - let execution_mode = execution_mode(); + let isolation_strategy = isolation_strategy(); + let isolation_strategy_clone = isolation_strategy.clone(); let start = std::time::Instant::now(); - - let execution_mode2 = execution_mode.clone(); let thread = std::thread::spawn(move || parachain::wasm_executor::validate_candidate( halt::wasm_binary_unwrap(), @@ -74,7 +73,7 @@ fn parallel_execution() { hrmp_mqc_heads: Vec::new(), dmq_mqc_head: Default::default(), }, - &execution_mode, + &isolation_strategy, sp_core::testing::TaskExecutor::new(), ).ok()); let _ = parachain::wasm_executor::validate_candidate( @@ -86,7 +85,7 @@ fn parallel_execution() { hrmp_mqc_heads: Vec::new(), dmq_mqc_head: Default::default(), }, - &execution_mode2, + &isolation_strategy_clone, sp_core::testing::TaskExecutor::new(), ); thread.join().unwrap();