PVF: Instantiate wasm in pre-checking (#7246)

* PVF: Instantiate wasm in pre-checking * Move `runtime_construction_check` to prepare thread, use bytes * [minor] Update comment * Fix compile error * Update Cargo.lock * Update docs * Add some missing docs!
2026-06-14 02:51:08 +00:00 · 2023-06-02 08:04:04 -04:00
parent 2d73e39d2f
commit 5bbb87c46b
16 changed files with 437 additions and 355 deletions
@@ -24,8 +24,8 @@
 #![warn(missing_docs)]

 use polkadot_node_core_pvf::{
-	InternalValidationError, InvalidCandidate as WasmInvalidCandidate, PrepareError, PrepareStats,
-	PvfPrepData, ValidationError, ValidationHost,
+	InternalValidationError, InvalidCandidate as WasmInvalidCandidate, PrepareError,
+	PrepareJobKind, PrepareStats, PvfPrepData, ValidationError, ValidationHost,
 };
 use polkadot_node_primitives::{
 	BlockData, InvalidCandidate, PoV, ValidationResult, POV_BOMB_LIMIT, VALIDATION_CODE_BOMB_LIMIT,
@@ -356,7 +356,12 @@ where
 		&validation_code.0,
 		VALIDATION_CODE_BOMB_LIMIT,
 	) {
-		Ok(code) => PvfPrepData::from_code(code.into_owned(), executor_params, timeout),
+		Ok(code) => PvfPrepData::from_code(
+			code.into_owned(),
+			executor_params,
+			timeout,
+			PrepareJobKind::Prechecking,
+		),
 		Err(e) => {
 			gum::debug!(target: LOG_TARGET, err=?e, "precheck: cannot decompress validation code");
 			return PreCheckOutcome::Invalid
@@ -727,7 +732,12 @@ trait ValidationBackend {
 	) -> Result<WasmValidationResult, ValidationError> {
 		let prep_timeout = pvf_prep_timeout(&executor_params, PvfPrepTimeoutKind::Lenient);
 		// Construct the PVF a single time, since it is an expensive operation. Cloning it is cheap.
-		let pvf = PvfPrepData::from_code(raw_validation_code, executor_params, prep_timeout);
+		let pvf = PvfPrepData::from_code(
+			raw_validation_code,
+			executor_params,
+			prep_timeout,
+			PrepareJobKind::Compilation,
+		);
 		// We keep track of the total time that has passed and stop retrying if we are taking too long.
 		let total_time_start = Instant::now();

@@ -16,10 +16,13 @@ parity-scale-codec = { version = "3.4.0", default-features = false, features = [
 polkadot-parachain = { path = "../../../../parachain" }
 polkadot-primitives = { path = "../../../../primitives" }

+sc-executor = { git = "https://github.com/paritytech/substrate", branch = "master" }
 sc-executor-common = { git = "https://github.com/paritytech/substrate", branch = "master" }
 sc-executor-wasmtime = { git = "https://github.com/paritytech/substrate", branch = "master" }

 sp-core = { git = "https://github.com/paritytech/substrate", branch = "master" }
+sp-externalities = { git = "https://github.com/paritytech/substrate", branch = "master" }
+sp-io = { git = "https://github.com/paritytech/substrate", branch = "master" }
 sp-tracing = { git = "https://github.com/paritytech/substrate", branch = "master" }

 [build-dependencies]
@@ -29,6 +29,8 @@ pub enum PrepareError {
 	Prevalidation(String),
 	/// Compilation failed for the given PVF.
 	Preparation(String),
+	/// Instantiation of the WASM module instance failed.
+	RuntimeConstruction(String),
 	/// An unexpected panic has occurred in the preparation worker.
 	Panic(String),
 	/// Failed to prepare the PVF due to the time limit.
@@ -55,6 +57,8 @@ impl PrepareError {
 		match self {
 			Prevalidation(_) | Preparation(_) | Panic(_) => true,
 			TimedOut | IoErr(_) | CreateTmpFileErr(_) | RenameTmpFileErr(_) => false,
+			// Can occur due to issues with the PVF, but also due to local errors.
+			RuntimeConstruction(_) => false,
 		}
 	}
 }
@@ -65,6 +69,7 @@ impl fmt::Display for PrepareError {
 		match self {
 			Prevalidation(err) => write!(f, "prevalidation: {}", err),
 			Preparation(err) => write!(f, "preparation: {}", err),
+			RuntimeConstruction(err) => write!(f, "runtime construction: {}", err),
 			Panic(err) => write!(f, "panic: {}", err),
 			TimedOut => write!(f, "prepare: timeout"),
 			IoErr(err) => write!(f, "prepare: io error while receiving response: {}", err),
@@ -17,8 +17,15 @@
 //! Interface to the Substrate Executor

 use polkadot_primitives::{ExecutorParam, ExecutorParams};
-use sc_executor_common::wasm_runtime::HeapAllocStrategy;
-use sc_executor_wasmtime::{Config, DeterministicStackLimit, Semantics};
+use sc_executor_common::{
+	error::WasmError,
+	runtime_blob::RuntimeBlob,
+	wasm_runtime::{HeapAllocStrategy, InvokeMethod, WasmModule as _},
+};
+use sc_executor_wasmtime::{Config, DeterministicStackLimit, Semantics, WasmtimeRuntime};
+use sp_core::storage::{ChildInfo, TrackedStorageKey};
+use sp_externalities::MultiRemovalResults;
+use std::any::{Any, TypeId};

 // Memory configuration
 //
@@ -112,3 +119,255 @@ pub fn params_to_wasmtime_semantics(par: &ExecutorParams) -> Result<Semantics, S
 	sem.deterministic_stack_limit = Some(stack_limit);
 	Ok(sem)
 }
+
+/// A WASM executor with a given configuration. It is instantiated once per execute worker and is
+/// specific to that worker.
+#[derive(Clone)]
+pub struct Executor {
+	config: Config,
+}
+
+impl Executor {
+	pub fn new(params: ExecutorParams) -> Result<Self, String> {
+		let mut config = DEFAULT_CONFIG.clone();
+		config.semantics = params_to_wasmtime_semantics(&params)?;
+
+		Ok(Self { config })
+	}
+
+	/// Executes the given PVF in the form of a compiled artifact and returns the result of execution
+	/// upon success.
+	///
+	/// # Safety
+	///
+	/// The caller must ensure that the compiled artifact passed here was:
+	///   1) produced by [`prepare`],
+	///   2) was not modified,
+	///
+	/// Failure to adhere to these requirements might lead to crashes and arbitrary code execution.
+	pub unsafe fn execute(
+		&self,
+		compiled_artifact_blob: &[u8],
+		params: &[u8],
+	) -> Result<Vec<u8>, String> {
+		let mut extensions = sp_externalities::Extensions::new();
+
+		extensions.register(sp_core::traits::ReadRuntimeVersionExt::new(ReadRuntimeVersion));
+
+		let mut ext = ValidationExternalities(extensions);
+
+		match sc_executor::with_externalities_safe(&mut ext, || {
+			let runtime = self.create_runtime_from_bytes(compiled_artifact_blob)?;
+			runtime.new_instance()?.call(InvokeMethod::Export("validate_block"), params)
+		}) {
+			Ok(Ok(ok)) => Ok(ok),
+			Ok(Err(err)) | Err(err) => Err(err),
+		}
+		.map_err(|err| format!("execute error: {:?}", err))
+	}
+
+	/// Constructs the runtime for the given PVF, given the artifact bytes.
+	///
+	/// # Safety
+	///
+	/// The caller must ensure that the compiled artifact passed here was:
+	///   1) produced by [`prepare`],
+	///   2) was not modified,
+	///
+	/// Failure to adhere to these requirements might lead to crashes and arbitrary code execution.
+	pub unsafe fn create_runtime_from_bytes(
+		&self,
+		compiled_artifact_blob: &[u8],
+	) -> Result<WasmtimeRuntime, WasmError> {
+		sc_executor_wasmtime::create_runtime_from_artifact_bytes::<HostFunctions>(
+			compiled_artifact_blob,
+			self.config.clone(),
+		)
+	}
+}
+
+/// Available host functions. We leave out:
+///
+/// 1. storage related stuff (PVF doesn't have a notion of a persistent storage/trie)
+/// 2. tracing
+/// 3. off chain workers (PVFs do not have such a notion)
+/// 4. runtime tasks
+/// 5. sandbox
+type HostFunctions = (
+	sp_io::misc::HostFunctions,
+	sp_io::crypto::HostFunctions,
+	sp_io::hashing::HostFunctions,
+	sp_io::allocator::HostFunctions,
+	sp_io::logging::HostFunctions,
+	sp_io::trie::HostFunctions,
+);
+
+/// The validation externalities that will panic on any storage related access. (PVFs should not
+/// have a notion of a persistent storage/trie.)
+struct ValidationExternalities(sp_externalities::Extensions);
+
+impl sp_externalities::Externalities for ValidationExternalities {
+	fn storage(&self, _: &[u8]) -> Option<Vec<u8>> {
+		panic!("storage: unsupported feature for parachain validation")
+	}
+
+	fn storage_hash(&self, _: &[u8]) -> Option<Vec<u8>> {
+		panic!("storage_hash: unsupported feature for parachain validation")
+	}
+
+	fn child_storage_hash(&self, _: &ChildInfo, _: &[u8]) -> Option<Vec<u8>> {
+		panic!("child_storage_hash: unsupported feature for parachain validation")
+	}
+
+	fn child_storage(&self, _: &ChildInfo, _: &[u8]) -> Option<Vec<u8>> {
+		panic!("child_storage: unsupported feature for parachain validation")
+	}
+
+	fn kill_child_storage(
+		&mut self,
+		_child_info: &ChildInfo,
+		_maybe_limit: Option<u32>,
+		_maybe_cursor: Option<&[u8]>,
+	) -> MultiRemovalResults {
+		panic!("kill_child_storage: unsupported feature for parachain validation")
+	}
+
+	fn clear_prefix(
+		&mut self,
+		_prefix: &[u8],
+		_maybe_limit: Option<u32>,
+		_maybe_cursor: Option<&[u8]>,
+	) -> MultiRemovalResults {
+		panic!("clear_prefix: unsupported feature for parachain validation")
+	}
+
+	fn clear_child_prefix(
+		&mut self,
+		_child_info: &ChildInfo,
+		_prefix: &[u8],
+		_maybe_limit: Option<u32>,
+		_maybe_cursor: Option<&[u8]>,
+	) -> MultiRemovalResults {
+		panic!("clear_child_prefix: unsupported feature for parachain validation")
+	}
+
+	fn place_storage(&mut self, _: Vec<u8>, _: Option<Vec<u8>>) {
+		panic!("place_storage: unsupported feature for parachain validation")
+	}
+
+	fn place_child_storage(&mut self, _: &ChildInfo, _: Vec<u8>, _: Option<Vec<u8>>) {
+		panic!("place_child_storage: unsupported feature for parachain validation")
+	}
+
+	fn storage_root(&mut self, _: sp_core::storage::StateVersion) -> Vec<u8> {
+		panic!("storage_root: unsupported feature for parachain validation")
+	}
+
+	fn child_storage_root(&mut self, _: &ChildInfo, _: sp_core::storage::StateVersion) -> Vec<u8> {
+		panic!("child_storage_root: unsupported feature for parachain validation")
+	}
+
+	fn next_child_storage_key(&self, _: &ChildInfo, _: &[u8]) -> Option<Vec<u8>> {
+		panic!("next_child_storage_key: unsupported feature for parachain validation")
+	}
+
+	fn next_storage_key(&self, _: &[u8]) -> Option<Vec<u8>> {
+		panic!("next_storage_key: unsupported feature for parachain validation")
+	}
+
+	fn storage_append(&mut self, _key: Vec<u8>, _value: Vec<u8>) {
+		panic!("storage_append: unsupported feature for parachain validation")
+	}
+
+	fn storage_start_transaction(&mut self) {
+		panic!("storage_start_transaction: unsupported feature for parachain validation")
+	}
+
+	fn storage_rollback_transaction(&mut self) -> Result<(), ()> {
+		panic!("storage_rollback_transaction: unsupported feature for parachain validation")
+	}
+
+	fn storage_commit_transaction(&mut self) -> Result<(), ()> {
+		panic!("storage_commit_transaction: unsupported feature for parachain validation")
+	}
+
+	fn wipe(&mut self) {
+		panic!("wipe: unsupported feature for parachain validation")
+	}
+
+	fn commit(&mut self) {
+		panic!("commit: unsupported feature for parachain validation")
+	}
+
+	fn read_write_count(&self) -> (u32, u32, u32, u32) {
+		panic!("read_write_count: unsupported feature for parachain validation")
+	}
+
+	fn reset_read_write_count(&mut self) {
+		panic!("reset_read_write_count: unsupported feature for parachain validation")
+	}
+
+	fn get_whitelist(&self) -> Vec<TrackedStorageKey> {
+		panic!("get_whitelist: unsupported feature for parachain validation")
+	}
+
+	fn set_whitelist(&mut self, _: Vec<TrackedStorageKey>) {
+		panic!("set_whitelist: unsupported feature for parachain validation")
+	}
+
+	fn set_offchain_storage(&mut self, _: &[u8], _: std::option::Option<&[u8]>) {
+		panic!("set_offchain_storage: unsupported feature for parachain validation")
+	}
+
+	fn get_read_and_written_keys(&self) -> Vec<(Vec<u8>, u32, u32, bool)> {
+		panic!("get_read_and_written_keys: unsupported feature for parachain validation")
+	}
+}
+
+impl sp_externalities::ExtensionStore for ValidationExternalities {
+	fn extension_by_type_id(&mut self, type_id: TypeId) -> Option<&mut dyn Any> {
+		self.0.get_mut(type_id)
+	}
+
+	fn register_extension_with_type_id(
+		&mut self,
+		type_id: TypeId,
+		extension: Box<dyn sp_externalities::Extension>,
+	) -> Result<(), sp_externalities::Error> {
+		self.0.register_with_type_id(type_id, extension)
+	}
+
+	fn deregister_extension_by_type_id(
+		&mut self,
+		type_id: TypeId,
+	) -> Result<(), sp_externalities::Error> {
+		if self.0.deregister(type_id) {
+			Ok(())
+		} else {
+			Err(sp_externalities::Error::ExtensionIsNotRegistered(type_id))
+		}
+	}
+}
+
+struct ReadRuntimeVersion;
+
+impl sp_core::traits::ReadRuntimeVersion for ReadRuntimeVersion {
+	fn read_runtime_version(
+		&self,
+		wasm_code: &[u8],
+		_ext: &mut dyn sp_externalities::Externalities,
+	) -> Result<Vec<u8>, String> {
+		let blob = RuntimeBlob::uncompress_if_needed(wasm_code)
+			.map_err(|e| format!("Failed to read the PVF runtime blob: {:?}", e))?;
+
+		match sc_executor::read_embedded_version(&blob)
+			.map_err(|e| format!("Failed to read the static section from the PVF blob: {:?}", e))?
+		{
+			Some(version) => {
+				use parity_scale_codec::Encode;
+				Ok(version.encode())
+			},
+			None => Err("runtime version section is not found".to_string()),
+		}
+	}
+}
@@ -46,3 +46,12 @@ pub struct MemoryAllocationStats {
 	/// Total allocated memory, in bytes.
 	pub allocated: u64,
 }
+
+/// The kind of prepare job.
+#[derive(Copy, Clone, Debug, Encode, Decode)]
+pub enum PrepareJobKind {
+	/// Compilation triggered by a candidate validation request.
+	Compilation,
+	/// A prechecking job.
+	Prechecking,
+}
@@ -14,6 +14,7 @@
 // You should have received a copy of the GNU General Public License
 // along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.

+use crate::prepare::PrepareJobKind;
 use parity_scale_codec::{Decode, Encode};
 use polkadot_parachain::primitives::ValidationCodeHash;
 use polkadot_primitives::ExecutorParams;
@@ -39,6 +40,8 @@ pub struct PvfPrepData {
 	executor_params: Arc<ExecutorParams>,
 	/// Preparation timeout
 	prep_timeout: Duration,
+	/// The kind of preparation job.
+	prep_kind: PrepareJobKind,
 }

 impl PvfPrepData {
@@ -47,11 +50,12 @@ impl PvfPrepData {
 		code: Vec<u8>,
 		executor_params: ExecutorParams,
 		prep_timeout: Duration,
+		prep_kind: PrepareJobKind,
 	) -> Self {
 		let code = Arc::new(code);
 		let code_hash = blake2_256(&code).into();
 		let executor_params = Arc::new(executor_params);
-		Self { code, code_hash, executor_params, prep_timeout }
+		Self { code, code_hash, executor_params, prep_timeout, prep_kind }
 	}

 	/// Returns validation code hash for the PVF
@@ -74,11 +78,21 @@ impl PvfPrepData {
 		self.prep_timeout
 	}

+	/// Returns preparation kind.
+	pub fn prep_kind(&self) -> PrepareJobKind {
+		self.prep_kind
+	}
+
 	/// Creates a structure for tests.
 	#[doc(hidden)]
 	pub fn from_discriminator_and_timeout(num: u32, timeout: Duration) -> Self {
 		let descriminator_buf = num.to_le_bytes().to_vec();
-		Self::from_code(descriminator_buf, ExecutorParams::default(), timeout)
+		Self::from_code(
+			descriminator_buf,
+			ExecutorParams::default(),
+			timeout,
+			PrepareJobKind::Compilation,
+		)
 	}

 	/// Creates a structure for tests.
@@ -86,6 +100,15 @@ impl PvfPrepData {
 	pub fn from_discriminator(num: u32) -> Self {
 		Self::from_discriminator_and_timeout(num, crate::tests::TEST_PREPARATION_TIMEOUT)
 	}
+
+	/// Creates a structure for tests.
+	#[doc(hidden)]
+	pub fn from_discriminator_precheck(num: u32) -> Self {
+		let mut pvf =
+			Self::from_discriminator_and_timeout(num, crate::tests::TEST_PREPARATION_TIMEOUT);
+		pvf.prep_kind = PrepareJobKind::Prechecking;
+		pvf
+	}
 }

 impl fmt::Debug for PvfPrepData {
@@ -18,12 +18,7 @@ polkadot-node-core-pvf-common = { path = "../common" }
 polkadot-parachain = { path = "../../../../parachain" }
 polkadot-primitives = { path = "../../../../primitives" }

-sc-executor = { git = "https://github.com/paritytech/substrate", branch = "master" }
-sc-executor-common = { git = "https://github.com/paritytech/substrate", branch = "master" }
-sc-executor-wasmtime = { git = "https://github.com/paritytech/substrate", branch = "master" }
 sp-core = { git = "https://github.com/paritytech/substrate", branch = "master" }
-sp-externalities = { git = "https://github.com/paritytech/substrate", branch = "master" }
-sp-io = { git = "https://github.com/paritytech/substrate", branch = "master" }
 sp-maybe-compressed-blob = { git = "https://github.com/paritytech/substrate", branch = "master" }
 sp-tracing = { git = "https://github.com/paritytech/substrate", branch = "master" }

@@ -1,312 +0,0 @@
-// Copyright (C) Parity Technologies (UK) Ltd.
-// This file is part of Polkadot.
-
-// Polkadot is free software: you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-
-// Polkadot is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-
-// You should have received a copy of the GNU General Public License
-// along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
-
-//! Interface to the Substrate Executor
-
-use polkadot_node_core_pvf_common::executor_intf::{
-	params_to_wasmtime_semantics, DEFAULT_CONFIG, NATIVE_STACK_MAX,
-};
-use polkadot_primitives::ExecutorParams;
-use sc_executor_common::{
-	error::WasmError,
-	runtime_blob::RuntimeBlob,
-	wasm_runtime::{InvokeMethod, WasmModule as _},
-};
-use sc_executor_wasmtime::{Config, WasmtimeRuntime};
-use sp_core::storage::{ChildInfo, TrackedStorageKey};
-use sp_externalities::MultiRemovalResults;
-use std::any::{Any, TypeId};
-
-// Wasmtime powers the Substrate Executor. It compiles the wasm bytecode into native code.
-// That native code does not create any stacks and just reuses the stack of the thread that
-// wasmtime was invoked from.
-//
-// Also, we configure the executor to provide the deterministic stack and that requires
-// supplying the amount of the native stack space that wasm is allowed to use. This is
-// realized by supplying the limit into `wasmtime::Config::max_wasm_stack`.
-//
-// There are quirks to that configuration knob:
-//
-// 1. It only limits the amount of stack space consumed by wasm but does not ensure nor check
-//    that the stack space is actually available.
-//
-//    That means, if the calling thread has 1 MiB of stack space left and the wasm code consumes
-//    more, then the wasmtime limit will **not** trigger. Instead, the wasm code will hit the
-//    guard page and the Rust stack overflow handler will be triggered. That leads to an
-//    **abort**.
-//
-// 2. It cannot and does not limit the stack space consumed by Rust code.
-//
-//    Meaning that if the wasm code leaves no stack space for Rust code, then the Rust code
-//    will abort and that will abort the process as well.
-//
-// Typically on Linux the main thread gets the stack size specified by the `ulimit` and
-// typically it's configured to 8 MiB. Rust's spawned threads are 2 MiB. OTOH, the
-// NATIVE_STACK_MAX is set to 256 MiB. Not nearly enough.
-//
-// Hence we need to increase it. The simplest way to fix that is to spawn a thread with the desired
-// stack limit.
-//
-// The reasoning why we pick this particular size is:
-//
-// The default Rust thread stack limit 2 MiB + 256 MiB wasm stack.
-/// The stack size for the execute thread.
-pub const EXECUTE_THREAD_STACK_SIZE: usize = 2 * 1024 * 1024 + NATIVE_STACK_MAX as usize;
-
-#[derive(Clone)]
-pub struct Executor {
-	config: Config,
-}
-
-impl Executor {
-	pub fn new(params: ExecutorParams) -> Result<Self, String> {
-		let mut config = DEFAULT_CONFIG.clone();
-		config.semantics = params_to_wasmtime_semantics(&params)?;
-
-		Ok(Self { config })
-	}
-
-	/// Executes the given PVF in the form of a compiled artifact and returns the result of execution
-	/// upon success.
-	///
-	/// # Safety
-	///
-	/// The caller must ensure that the compiled artifact passed here was:
-	///   1) produced by [`prepare`],
-	///   2) written to the disk as a file,
-	///   3) was not modified,
-	///   4) will not be modified while any runtime using this artifact is alive, or is being
-	///      instantiated.
-	///
-	/// Failure to adhere to these requirements might lead to crashes and arbitrary code execution.
-	pub unsafe fn execute(
-		&self,
-		compiled_artifact_blob: &[u8],
-		params: &[u8],
-	) -> Result<Vec<u8>, String> {
-		let mut extensions = sp_externalities::Extensions::new();
-
-		extensions.register(sp_core::traits::ReadRuntimeVersionExt::new(ReadRuntimeVersion));
-
-		let mut ext = ValidationExternalities(extensions);
-
-		match sc_executor::with_externalities_safe(&mut ext, || {
-			let runtime = self.create_runtime_from_bytes(compiled_artifact_blob)?;
-			runtime.new_instance()?.call(InvokeMethod::Export("validate_block"), params)
-		}) {
-			Ok(Ok(ok)) => Ok(ok),
-			Ok(Err(err)) | Err(err) => Err(err),
-		}
-		.map_err(|err| format!("execute error: {:?}", err))
-	}
-
-	/// Constructs the runtime for the given PVF, given the artifact bytes.
-	///
-	/// # Safety
-	///
-	/// The caller must ensure that the compiled artifact passed here was:
-	///   1) produced by [`prepare`],
-	///   2) was not modified,
-	///
-	/// Failure to adhere to these requirements might lead to crashes and arbitrary code execution.
-	pub unsafe fn create_runtime_from_bytes(
-		&self,
-		compiled_artifact_blob: &[u8],
-	) -> Result<WasmtimeRuntime, WasmError> {
-		sc_executor_wasmtime::create_runtime_from_artifact_bytes::<HostFunctions>(
-			compiled_artifact_blob,
-			self.config.clone(),
-		)
-	}
-}
-
-type HostFunctions = (
-	sp_io::misc::HostFunctions,
-	sp_io::crypto::HostFunctions,
-	sp_io::hashing::HostFunctions,
-	sp_io::allocator::HostFunctions,
-	sp_io::logging::HostFunctions,
-	sp_io::trie::HostFunctions,
-);
-
-/// The validation externalities that will panic on any storage related access.
-struct ValidationExternalities(sp_externalities::Extensions);
-
-impl sp_externalities::Externalities for ValidationExternalities {
-	fn storage(&self, _: &[u8]) -> Option<Vec<u8>> {
-		panic!("storage: unsupported feature for parachain validation")
-	}
-
-	fn storage_hash(&self, _: &[u8]) -> Option<Vec<u8>> {
-		panic!("storage_hash: unsupported feature for parachain validation")
-	}
-
-	fn child_storage_hash(&self, _: &ChildInfo, _: &[u8]) -> Option<Vec<u8>> {
-		panic!("child_storage_hash: unsupported feature for parachain validation")
-	}
-
-	fn child_storage(&self, _: &ChildInfo, _: &[u8]) -> Option<Vec<u8>> {
-		panic!("child_storage: unsupported feature for parachain validation")
-	}
-
-	fn kill_child_storage(
-		&mut self,
-		_child_info: &ChildInfo,
-		_maybe_limit: Option<u32>,
-		_maybe_cursor: Option<&[u8]>,
-	) -> MultiRemovalResults {
-		panic!("kill_child_storage: unsupported feature for parachain validation")
-	}
-
-	fn clear_prefix(
-		&mut self,
-		_prefix: &[u8],
-		_maybe_limit: Option<u32>,
-		_maybe_cursor: Option<&[u8]>,
-	) -> MultiRemovalResults {
-		panic!("clear_prefix: unsupported feature for parachain validation")
-	}
-
-	fn clear_child_prefix(
-		&mut self,
-		_child_info: &ChildInfo,
-		_prefix: &[u8],
-		_maybe_limit: Option<u32>,
-		_maybe_cursor: Option<&[u8]>,
-	) -> MultiRemovalResults {
-		panic!("clear_child_prefix: unsupported feature for parachain validation")
-	}
-
-	fn place_storage(&mut self, _: Vec<u8>, _: Option<Vec<u8>>) {
-		panic!("place_storage: unsupported feature for parachain validation")
-	}
-
-	fn place_child_storage(&mut self, _: &ChildInfo, _: Vec<u8>, _: Option<Vec<u8>>) {
-		panic!("place_child_storage: unsupported feature for parachain validation")
-	}
-
-	fn storage_root(&mut self, _: sp_core::storage::StateVersion) -> Vec<u8> {
-		panic!("storage_root: unsupported feature for parachain validation")
-	}
-
-	fn child_storage_root(&mut self, _: &ChildInfo, _: sp_core::storage::StateVersion) -> Vec<u8> {
-		panic!("child_storage_root: unsupported feature for parachain validation")
-	}
-
-	fn next_child_storage_key(&self, _: &ChildInfo, _: &[u8]) -> Option<Vec<u8>> {
-		panic!("next_child_storage_key: unsupported feature for parachain validation")
-	}
-
-	fn next_storage_key(&self, _: &[u8]) -> Option<Vec<u8>> {
-		panic!("next_storage_key: unsupported feature for parachain validation")
-	}
-
-	fn storage_append(&mut self, _key: Vec<u8>, _value: Vec<u8>) {
-		panic!("storage_append: unsupported feature for parachain validation")
-	}
-
-	fn storage_start_transaction(&mut self) {
-		panic!("storage_start_transaction: unsupported feature for parachain validation")
-	}
-
-	fn storage_rollback_transaction(&mut self) -> Result<(), ()> {
-		panic!("storage_rollback_transaction: unsupported feature for parachain validation")
-	}
-
-	fn storage_commit_transaction(&mut self) -> Result<(), ()> {
-		panic!("storage_commit_transaction: unsupported feature for parachain validation")
-	}
-
-	fn wipe(&mut self) {
-		panic!("wipe: unsupported feature for parachain validation")
-	}
-
-	fn commit(&mut self) {
-		panic!("commit: unsupported feature for parachain validation")
-	}
-
-	fn read_write_count(&self) -> (u32, u32, u32, u32) {
-		panic!("read_write_count: unsupported feature for parachain validation")
-	}
-
-	fn reset_read_write_count(&mut self) {
-		panic!("reset_read_write_count: unsupported feature for parachain validation")
-	}
-
-	fn get_whitelist(&self) -> Vec<TrackedStorageKey> {
-		panic!("get_whitelist: unsupported feature for parachain validation")
-	}
-
-	fn set_whitelist(&mut self, _: Vec<TrackedStorageKey>) {
-		panic!("set_whitelist: unsupported feature for parachain validation")
-	}
-
-	fn set_offchain_storage(&mut self, _: &[u8], _: std::option::Option<&[u8]>) {
-		panic!("set_offchain_storage: unsupported feature for parachain validation")
-	}
-
-	fn get_read_and_written_keys(&self) -> Vec<(Vec<u8>, u32, u32, bool)> {
-		panic!("get_read_and_written_keys: unsupported feature for parachain validation")
-	}
-}
-
-impl sp_externalities::ExtensionStore for ValidationExternalities {
-	fn extension_by_type_id(&mut self, type_id: TypeId) -> Option<&mut dyn Any> {
-		self.0.get_mut(type_id)
-	}
-
-	fn register_extension_with_type_id(
-		&mut self,
-		type_id: TypeId,
-		extension: Box<dyn sp_externalities::Extension>,
-	) -> Result<(), sp_externalities::Error> {
-		self.0.register_with_type_id(type_id, extension)
-	}
-
-	fn deregister_extension_by_type_id(
-		&mut self,
-		type_id: TypeId,
-	) -> Result<(), sp_externalities::Error> {
-		if self.0.deregister(type_id) {
-			Ok(())
-		} else {
-			Err(sp_externalities::Error::ExtensionIsNotRegistered(type_id))
-		}
-	}
-}
-
-struct ReadRuntimeVersion;
-
-impl sp_core::traits::ReadRuntimeVersion for ReadRuntimeVersion {
-	fn read_runtime_version(
-		&self,
-		wasm_code: &[u8],
-		_ext: &mut dyn sp_externalities::Externalities,
-	) -> Result<Vec<u8>, String> {
-		let blob = RuntimeBlob::uncompress_if_needed(wasm_code)
-			.map_err(|e| format!("Failed to read the PVF runtime blob: {:?}", e))?;
-
-		match sc_executor::read_embedded_version(&blob)
-			.map_err(|e| format!("Failed to read the static section from the PVF blob: {:?}", e))?
-		{
-			Some(version) => {
-				use parity_scale_codec::Encode;
-				Ok(version.encode())
-			},
-			None => Err("runtime version section is not found".to_string()),
-		}
-	}
-}
@@ -14,20 +14,18 @@
 // You should have received a copy of the GNU General Public License
 // along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.

-mod executor_intf;
-
-pub use executor_intf::Executor;
+pub use polkadot_node_core_pvf_common::executor_intf::Executor;

 // NOTE: Initializing logging in e.g. tests will not have an effect in the workers, as they are
 //       separate spawned processes. Run with e.g. `RUST_LOG=parachain::pvf-execute-worker=trace`.
 const LOG_TARGET: &str = "parachain::pvf-execute-worker";

-use crate::executor_intf::EXECUTE_THREAD_STACK_SIZE;
 use cpu_time::ProcessTime;
 use parity_scale_codec::{Decode, Encode};
 use polkadot_node_core_pvf_common::{
 	error::InternalValidationError,
 	execute::{Handshake, Response},
+	executor_intf::NATIVE_STACK_MAX,
 	framed_recv, framed_send,
 	worker::{
 		bytes_to_path, cpu_time_monitor_loop, stringify_panic_payload,
@@ -43,6 +41,42 @@ use std::{
 };
 use tokio::{io, net::UnixStream};

+// Wasmtime powers the Substrate Executor. It compiles the wasm bytecode into native code.
+// That native code does not create any stacks and just reuses the stack of the thread that
+// wasmtime was invoked from.
+//
+// Also, we configure the executor to provide the deterministic stack and that requires
+// supplying the amount of the native stack space that wasm is allowed to use. This is
+// realized by supplying the limit into `wasmtime::Config::max_wasm_stack`.
+//
+// There are quirks to that configuration knob:
+//
+// 1. It only limits the amount of stack space consumed by wasm but does not ensure nor check
+//    that the stack space is actually available.
+//
+//    That means, if the calling thread has 1 MiB of stack space left and the wasm code consumes
+//    more, then the wasmtime limit will **not** trigger. Instead, the wasm code will hit the
+//    guard page and the Rust stack overflow handler will be triggered. That leads to an
+//    **abort**.
+//
+// 2. It cannot and does not limit the stack space consumed by Rust code.
+//
+//    Meaning that if the wasm code leaves no stack space for Rust code, then the Rust code
+//    will abort and that will abort the process as well.
+//
+// Typically on Linux the main thread gets the stack size specified by the `ulimit` and
+// typically it's configured to 8 MiB. Rust's spawned threads are 2 MiB. OTOH, the
+// NATIVE_STACK_MAX is set to 256 MiB. Not nearly enough.
+//
+// Hence we need to increase it. The simplest way to fix that is to spawn a thread with the desired
+// stack limit.
+//
+// The reasoning why we pick this particular size is:
+//
+// The default Rust thread stack limit 2 MiB + 256 MiB wasm stack.
+/// The stack size for the execute thread.
+pub const EXECUTE_THREAD_STACK_SIZE: usize = 2 * 1024 * 1024 + NATIVE_STACK_MAX as usize;
+
 async fn recv_handshake(stream: &mut UnixStream) -> io::Result<Handshake> {
 	let handshake_enc = framed_recv(stream).await?;
 	let handshake = Handshake::decode(&mut &handshake_enc[..]).map_err(|_| {
@@ -30,8 +30,9 @@ use crate::memory_stats::memory_tracker::{get_memory_tracker_loop_stats, memory_
 use parity_scale_codec::{Decode, Encode};
 use polkadot_node_core_pvf_common::{
 	error::{PrepareError, PrepareResult},
+	executor_intf::Executor,
 	framed_recv, framed_send,
-	prepare::{MemoryStats, PrepareStats},
+	prepare::{MemoryStats, PrepareJobKind, PrepareStats},
 	pvf::PvfPrepData,
 	worker::{
 		bytes_to_path, cpu_time_monitor_loop, stringify_panic_payload,
@@ -40,6 +41,7 @@ use polkadot_node_core_pvf_common::{
 	},
 	ProcessTime,
 };
+use polkadot_primitives::ExecutorParams;
 use std::{
 	path::PathBuf,
 	sync::{mpsc::channel, Arc},
@@ -117,7 +119,7 @@ pub fn worker_entrypoint(socket_path: &str, node_version: Option<&str>) {
 		let worker_pid = std::process::id();

 		loop {
-			let (pvf, dest) = recv_request(&mut stream).await?;
+			let (pvf, temp_artifact_dest) = recv_request(&mut stream).await?;
 			gum::debug!(
 				target: LOG_TARGET,
 				%worker_pid,
@@ -125,6 +127,8 @@ pub fn worker_entrypoint(socket_path: &str, node_version: Option<&str>) {
 			);

 			let preparation_timeout = pvf.prep_timeout();
+			let prepare_job_kind = pvf.prep_kind();
+			let executor_params = (*pvf.executor_params()).clone();

 			// Conditional variable to notify us when a thread is done.
 			let condvar = thread::get_condvar();
@@ -151,11 +155,23 @@ pub fn worker_entrypoint(socket_path: &str, node_version: Option<&str>) {
 			let prepare_thread = thread::spawn_worker_thread(
 				"prepare thread",
 				move || {
-					let result = prepare_artifact(pvf, cpu_time_start);
+					#[allow(unused_mut)]
+					let mut result = prepare_artifact(pvf, cpu_time_start);

 					// Get the `ru_maxrss` stat. If supported, call getrusage for the thread.
 					#[cfg(target_os = "linux")]
-					let result = result.map(|(artifact, elapsed)| (artifact, elapsed, get_max_rss_thread()));
+					let mut result = result.map(|(artifact, elapsed)| (artifact, elapsed, get_max_rss_thread()));
+
+					// If we are pre-checking, check for runtime construction errors.
+					//
+					// As pre-checking is more strict than just preparation in terms of memory and
+					// time, it is okay to do extra checks here. This takes negligible time anyway.
+					if let PrepareJobKind::Prechecking = prepare_job_kind {
+						result = result.and_then(|output| {
+							runtime_construction_check(output.0.as_ref(), executor_params)?;
+							Ok(output)
+						});
+					}

 					result
 				},
@@ -203,9 +219,9 @@ pub fn worker_entrypoint(socket_path: &str, node_version: Option<&str>) {
 								target: LOG_TARGET,
 								%worker_pid,
 								"worker: writing artifact to {}",
-								dest.display(),
+								temp_artifact_dest.display(),
 							);
-							tokio::fs::write(&dest, &artifact).await?;
+							tokio::fs::write(&temp_artifact_dest, &artifact).await?;

 							Ok(PrepareStats { cpu_time_elapsed, memory_stats })
 						},
@@ -257,3 +273,18 @@ fn prepare_artifact(
 	}
 	.map(|artifact| (artifact, cpu_time_start.elapsed()))
 }
+
+/// Try constructing the runtime to catch any instantiation errors during pre-checking.
+fn runtime_construction_check(
+	artifact_bytes: &[u8],
+	executor_params: ExecutorParams,
+) -> Result<(), PrepareError> {
+	let executor = Executor::new(executor_params)
+		.map_err(|e| PrepareError::RuntimeConstruction(format!("cannot create executor: {}", e)))?;
+
+	// SAFETY: We just compiled this artifact.
+	let result = unsafe { executor.create_runtime_from_bytes(&artifact_bytes) };
+	result
+		.map(|_runtime| ())
+		.map_err(|err| PrepareError::RuntimeConstruction(format!("{:?}", err)))
+}
@@ -1215,7 +1215,9 @@ pub(crate) mod tests {

 		// First, test a simple precheck request.
 		let (result_tx, result_rx) = oneshot::channel();
-		host.precheck_pvf(PvfPrepData::from_discriminator(1), result_tx).await.unwrap();
+		host.precheck_pvf(PvfPrepData::from_discriminator_precheck(1), result_tx)
+			.await
+			.unwrap();

 		// The queue received the prepare request.
 		assert_matches!(
@@ -1239,7 +1241,9 @@ pub(crate) mod tests {
 		let mut precheck_receivers = Vec::new();
 		for _ in 0..3 {
 			let (result_tx, result_rx) = oneshot::channel();
-			host.precheck_pvf(PvfPrepData::from_discriminator(2), result_tx).await.unwrap();
+			host.precheck_pvf(PvfPrepData::from_discriminator_precheck(2), result_tx)
+				.await
+				.unwrap();
 			precheck_receivers.push(result_rx);
 		}
 		// Received prepare request.
@@ -1289,7 +1293,9 @@ pub(crate) mod tests {
 		);

 		let (result_tx, result_rx) = oneshot::channel();
-		host.precheck_pvf(PvfPrepData::from_discriminator(1), result_tx).await.unwrap();
+		host.precheck_pvf(PvfPrepData::from_discriminator_precheck(1), result_tx)
+			.await
+			.unwrap();

 		// Suppose the preparation failed, the execution queue is empty and both
 		// "clients" receive their results.
@@ -1311,7 +1317,9 @@ pub(crate) mod tests {
 		let mut precheck_receivers = Vec::new();
 		for _ in 0..3 {
 			let (result_tx, result_rx) = oneshot::channel();
-			host.precheck_pvf(PvfPrepData::from_discriminator(2), result_tx).await.unwrap();
+			host.precheck_pvf(PvfPrepData::from_discriminator_precheck(2), result_tx)
+				.await
+				.unwrap();
 			precheck_receivers.push(result_rx);
 		}

@@ -1357,7 +1365,9 @@ pub(crate) mod tests {

 		// Submit a precheck request that fails.
 		let (result_tx, result_rx) = oneshot::channel();
-		host.precheck_pvf(PvfPrepData::from_discriminator(1), result_tx).await.unwrap();
+		host.precheck_pvf(PvfPrepData::from_discriminator_precheck(1), result_tx)
+			.await
+			.unwrap();

 		// The queue received the prepare request.
 		assert_matches!(
@@ -1379,7 +1389,7 @@ pub(crate) mod tests {

 		// Submit another precheck request.
 		let (result_tx_2, result_rx_2) = oneshot::channel();
-		host.precheck_pvf(PvfPrepData::from_discriminator(1), result_tx_2)
+		host.precheck_pvf(PvfPrepData::from_discriminator_precheck(1), result_tx_2)
 			.await
 			.unwrap();

@@ -1395,7 +1405,7 @@ pub(crate) mod tests {

 		// Submit another precheck request.
 		let (result_tx_3, result_rx_3) = oneshot::channel();
-		host.precheck_pvf(PvfPrepData::from_discriminator(1), result_tx_3)
+		host.precheck_pvf(PvfPrepData::from_discriminator_precheck(1), result_tx_3)
 			.await
 			.unwrap();

@@ -113,7 +113,7 @@ pub use worker_intf::{framed_recv, framed_send, JOB_TIMEOUT_WALL_CLOCK_FACTOR};
 // Re-export some common types.
 pub use polkadot_node_core_pvf_common::{
 	error::{InternalValidationError, PrepareError},
-	prepare::PrepareStats,
+	prepare::{PrepareJobKind, PrepareStats},
 	pvf::PvfPrepData,
 };

@@ -17,8 +17,8 @@
 use assert_matches::assert_matches;
 use parity_scale_codec::Encode as _;
 use polkadot_node_core_pvf::{
-	start, Config, InvalidCandidate, Metrics, PvfPrepData, ValidationError, ValidationHost,
-	JOB_TIMEOUT_WALL_CLOCK_FACTOR,
+	start, Config, InvalidCandidate, Metrics, PrepareJobKind, PvfPrepData, ValidationError,
+	ValidationHost, JOB_TIMEOUT_WALL_CLOCK_FACTOR,
 };
 use polkadot_parachain::primitives::{BlockData, ValidationParams, ValidationResult};
 use polkadot_primitives::{ExecutorParam, ExecutorParams};
@@ -70,7 +70,12 @@ impl TestHost {
 			.lock()
 			.await
 			.execute_pvf(
-				PvfPrepData::from_code(code.into(), executor_params, TEST_PREPARATION_TIMEOUT),
+				PvfPrepData::from_code(
+					code.into(),
+					executor_params,
+					TEST_PREPARATION_TIMEOUT,
+					PrepareJobKind::Compilation,
+				),
 				TEST_EXECUTION_TIMEOUT,
 				params.encode(),
 				polkadot_node_core_pvf::Priority::Normal,