feat: initialize Kurdistan SDK - independent fork of Polkadot SDK

2025-12-13 15:44:15 +03:00
commit 286de54384
6841 changed files with 1848356 additions and 0 deletions
@@ -0,0 +1,103 @@
+[package]
+name = "pezkuwi-node-core-pvf"
+description = "Pezkuwi crate that implements the PVF validation host. Responsible for coordinating preparation and execution of PVFs."
+version = "7.0.0"
+authors.workspace = true
+edition.workspace = true
+license.workspace = true
+homepage.workspace = true
+repository.workspace = true
+
+[lints]
+workspace = true
+
+[[bench]]
+name = "host_prepare_pezkuwichain_runtime"
+harness = false
+
+[dependencies]
+always-assert = { workspace = true }
+array-bytes = { workspace = true, default-features = true }
+futures = { workspace = true }
+futures-timer = { workspace = true }
+gum = { workspace = true, default-features = true }
+is_executable = { optional = true, workspace = true }
+pin-project = { workspace = true }
+rand = { workspace = true, default-features = true }
+slotmap = { workspace = true }
+strum = { features = ["derive"], workspace = true, default-features = true }
+tempfile = { workspace = true }
+thiserror = { workspace = true }
+tokio = { features = [
+	"fs",
+	"process",
+], workspace = true, default-features = true }
+
+codec = { features = ["derive"], workspace = true }
+
+pezkuwi-node-core-pvf-common = { workspace = true, default-features = true }
+pezkuwi-node-metrics = { workspace = true, default-features = true }
+pezkuwi-node-primitives = { workspace = true, default-features = true }
+pezkuwi-node-subsystem = { workspace = true, default-features = true }
+pezkuwi-primitives = { workspace = true, default-features = true }
+pezkuwi-teyrchain-primitives = { workspace = true, default-features = true }
+
+pezkuwi-node-core-pvf-execute-worker = { optional = true, workspace = true, default-features = true }
+pezkuwi-node-core-pvf-prepare-worker = { optional = true, workspace = true, default-features = true }
+sc-tracing = { workspace = true }
+sp-core = { workspace = true, default-features = true }
+sp-maybe-compressed-blob = { optional = true, workspace = true, default-features = true }
+
+[dev-dependencies]
+assert_matches = { workspace = true }
+criterion = { features = [
+	"async_tokio",
+	"cargo_bench_support",
+], workspace = true }
+
+pezkuwi-node-core-pvf-common = { features = [
+	"test-utils",
+], workspace = true, default-features = true }
+pezkuwi-node-subsystem-test-helpers = { workspace = true }
+# For benches and integration tests, depend on ourselves with the test-utils feature.
+pezkuwi-node-core-pvf = { features = [
+	"test-utils",
+], workspace = true, default-features = true }
+pezkuwichain-runtime = { workspace = true }
+
+test-teyrchain-adder = { workspace = true }
+test-teyrchain-halt = { workspace = true }
+
+[target.'cfg(target_os = "linux")'.dev-dependencies]
+libc = { workspace = true }
+procfs = { workspace = true }
+rusty-fork = { workspace = true, default-features = true }
+sc-sysinfo = { workspace = true, default-features = true }
+
+[features]
+ci-only-tests = []
+jemalloc-allocator = ["pezkuwi-node-core-pvf-common/jemalloc-allocator"]
+# This feature is used to export test code to other crates without putting it in the production build.
+test-utils = [
+	"dep:is_executable",
+	"dep:pezkuwi-node-core-pvf-execute-worker",
+	"dep:pezkuwi-node-core-pvf-prepare-worker",
+	"dep:sp-maybe-compressed-blob",
+]
+runtime-benchmarks = [
+	"gum/runtime-benchmarks",
+	"pezkuwi-node-core-pvf-common/runtime-benchmarks",
+	"pezkuwi-node-core-pvf-execute-worker?/runtime-benchmarks",
+	"pezkuwi-node-core-pvf-prepare-worker?/runtime-benchmarks",
+	"pezkuwi-node-core-pvf/runtime-benchmarks",
+	"pezkuwi-node-metrics/runtime-benchmarks",
+	"pezkuwi-node-primitives/runtime-benchmarks",
+	"pezkuwi-node-subsystem-test-helpers/runtime-benchmarks",
+	"pezkuwi-node-subsystem/runtime-benchmarks",
+	"pezkuwi-primitives/runtime-benchmarks",
+	"pezkuwi-teyrchain-primitives/runtime-benchmarks",
+	"pezkuwichain-runtime/runtime-benchmarks",
+	"sc-sysinfo/runtime-benchmarks",
+	"sc-tracing/runtime-benchmarks",
+	"test-teyrchain-adder/runtime-benchmarks",
+]
@@ -0,0 +1,47 @@
+# PVF Host
+
+This is the PVF host, responsible for responding to requests from Candidate
+Validation and spawning worker tasks to fulfill those requests.
+
+See also:
+
+- for more information: [the Implementer's Guide][impl-guide]
+- for an explanation of terminology: [the Glossary][glossary]
+
+## Running basic tests
+
+Running `cargo test` in the `pvf/` directory will run unit and integration
+tests.
+
+**Note:** some tests run only under Linux, x86-64, and/or with the
+`ci-only-tests` feature enabled.
+
+See the general [Testing][testing] instructions for more information on
+**running tests** and **observing logs**.
+
+## Running a test-network with zombienet
+
+Since this crate is consensus-critical, for major changes it is highly
+recommended to run a test-network. See the "Behavior tests" section of the
+[Testing][testing] docs for full instructions.
+
+To run the PVF-specific zombienet test:
+
+```sh
+RUST_LOG=teyrchain::pvf=trace zombienet --provider=native spawn zombienet_tests/functional/0001-teyrchains-pvf.toml
+```
+
+## Testing on Linux
+
+Some of the PVF functionality, especially related to security, is Linux-only,
+and some is x86-64-only. If you touch anything security-related, make sure to
+test on Linux x86-64! If you're on a Mac, you can either run a VM or you can hire
+a VPS and use the open-source tool [EternalTerminal][et] to connect to it.[^et]
+
+[^et]: Unlike ssh, ET preserves your session across disconnects, and unlike
+another popular persistent shell, mosh, it allows scrollback.
+
+[impl-guide]: https://docs.pezkuwichain.io/sdk/book/pvf-prechecking.html#summary
+[glossary]: https://docs.pezkuwichain.io/sdk/book/glossary.html
+[testing]: https://github.com/paritytech/polkadot-sdk/blob/master/polkadot/doc/testing.md
+[et]: https://github.com/MisterTea/EternalTerminal
@@ -0,0 +1,138 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+//! Benchmarks for preparation through the host. We use a real PVF to get realistic results.
+
+use criterion::{criterion_group, criterion_main, BatchSize, Criterion, SamplingMode};
+use pezkuwi_node_core_pvf::{
+	start, testing, Config, Metrics, PrepareError, PrepareJobKind, PvfPrepData, ValidationHost,
+};
+use pezkuwi_primitives::ExecutorParams;
+use pezkuwichain_runtime::WASM_BINARY;
+use std::time::Duration;
+use tokio::{runtime::Handle, sync::Mutex};
+
+const TEST_PREPARATION_TIMEOUT: Duration = Duration::from_secs(30);
+
+struct TestHost {
+	// Keep a reference to the tempdir otherwise it gets deleted on drop.
+	#[allow(dead_code)]
+	cache_dir: tempfile::TempDir,
+	host: Mutex<ValidationHost>,
+}
+
+impl TestHost {
+	async fn new_with_config<F>(handle: &Handle, f: F) -> Self
+	where
+		F: FnOnce(&mut Config),
+	{
+		let (prepare_worker_path, execute_worker_path) = testing::build_workers_and_get_paths();
+
+		let cache_dir = tempfile::tempdir().unwrap();
+		let mut config = Config::new(
+			cache_dir.path().to_owned(),
+			None,
+			false,
+			prepare_worker_path,
+			execute_worker_path,
+			2,
+			1,
+			2,
+		);
+		f(&mut config);
+		let (host, task) = start(config, Metrics::default()).await.unwrap();
+		let _ = handle.spawn(task);
+		Self { host: Mutex::new(host), cache_dir }
+	}
+
+	async fn precheck_pvf(
+		&self,
+		code: &[u8],
+		executor_params: ExecutorParams,
+	) -> Result<(), PrepareError> {
+		let (result_tx, result_rx) = futures::channel::oneshot::channel();
+
+		let code = sp_maybe_compressed_blob::decompress(code, 16 * 1024 * 1024)
+			.expect("Compression works");
+
+		self.host
+			.lock()
+			.await
+			.precheck_pvf(
+				PvfPrepData::from_code(
+					code.into(),
+					executor_params,
+					TEST_PREPARATION_TIMEOUT,
+					PrepareJobKind::Prechecking,
+					16 * 1024 * 1024,
+				),
+				result_tx,
+			)
+			.await
+			.unwrap();
+		result_rx.await.unwrap()
+	}
+}
+
+fn host_prepare_pezkuwichain_runtime(c: &mut Criterion) {
+	pezkuwi_node_core_pvf_common::sp_tracing::try_init_simple();
+
+	let rt = tokio::runtime::Runtime::new().unwrap();
+
+	let blob = WASM_BINARY.expect("You need to build the WASM binaries to run the tests!");
+	let pvf = match sp_maybe_compressed_blob::decompress(&blob, 64 * 1024 * 1024) {
+		Ok(code) => PvfPrepData::from_code(
+			code.into_owned(),
+			ExecutorParams::default(),
+			Duration::from_secs(360),
+			PrepareJobKind::Compilation,
+			64 * 1024 * 1024,
+		),
+		Err(e) => {
+			panic!("Cannot decompress blob: {:?}", e);
+		},
+	};
+
+	let mut group = c.benchmark_group("prepare pezkuwichain");
+	group.sampling_mode(SamplingMode::Flat);
+	group.sample_size(20);
+	group.measurement_time(Duration::from_secs(240));
+	group.bench_function("host: prepare Pezkuwichain runtime", |b| {
+		b.to_async(&rt).iter_batched(
+			|| async {
+				(
+					TestHost::new_with_config(rt.handle(), |cfg| {
+						cfg.prepare_workers_hard_max_num = 1;
+					})
+					.await,
+					pvf.clone().maybe_compressed_code(),
+				)
+			},
+			|result| async move {
+				let (host, pvf_code) = result.await;
+
+				// `PvfPrepData` is designed to be cheap to clone, so cloning shouldn't affect the
+				// benchmark accuracy.
+				let _stats = host.precheck_pvf(&pvf_code, Default::default()).await.unwrap();
+			},
+			BatchSize::SmallInput,
+		)
+	});
+	group.finish();
+}
+
+criterion_group!(prepare, host_prepare_pezkuwichain_runtime);
+criterion_main!(prepare);
@@ -0,0 +1,21 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+fn main() {
+	if let Ok(profile) = std::env::var("PROFILE") {
+		println!(r#"cargo:rustc-cfg=build_profile="{}""#, profile);
+	}
+}
@@ -0,0 +1,63 @@
+[package]
+name = "pezkuwi-node-core-pvf-common"
+description = "Pezkuwi crate that contains functionality related to PVFs that is shared by the PVF host and the PVF workers."
+version = "7.0.0"
+authors.workspace = true
+edition.workspace = true
+license.workspace = true
+homepage.workspace = true
+repository.workspace = true
+
+[lints]
+workspace = true
+
+[dependencies]
+cpu-time = { workspace = true }
+futures = { workspace = true }
+gum = { workspace = true, default-features = true }
+libc = { workspace = true }
+nix = { features = ["resource", "sched"], workspace = true }
+thiserror = { workspace = true }
+
+codec = { features = ["derive"], workspace = true }
+
+pezkuwi-node-primitives = { workspace = true, default-features = true }
+pezkuwi-primitives = { workspace = true, default-features = true }
+pezkuwi-teyrchain-primitives = { workspace = true, default-features = true }
+
+sc-executor = { workspace = true, default-features = true }
+sc-executor-common = { workspace = true, default-features = true }
+sc-executor-wasmtime = { workspace = true, default-features = true }
+
+sp-core = { workspace = true, default-features = true }
+sp-crypto-hashing = { workspace = true, default-features = true }
+sp-externalities = { workspace = true, default-features = true }
+sp-io = { workspace = true, default-features = true }
+sp-tracing = { workspace = true, default-features = true }
+
+[target.'cfg(target_os = "linux")'.dependencies]
+landlock = { workspace = true }
+
+[target.'cfg(all(target_os = "linux", target_arch = "x86_64"))'.dependencies]
+seccompiler = { workspace = true }
+
+[dev-dependencies]
+assert_matches = { workspace = true }
+wat = { workspace = true }
+
+[target.'cfg(target_os = "linux")'.dev-dependencies]
+tempfile = { workspace = true }
+
+[features]
+# This feature is used to export test code to other crates without putting it in the production build.
+test-utils = []
+jemalloc-allocator = []
+runtime-benchmarks = [
+	"gum/runtime-benchmarks",
+	"pezkuwi-node-primitives/runtime-benchmarks",
+	"pezkuwi-primitives/runtime-benchmarks",
+	"pezkuwi-teyrchain-primitives/runtime-benchmarks",
+	"sc-executor-wasmtime/runtime-benchmarks",
+	"sc-executor/runtime-benchmarks",
+	"sp-io/runtime-benchmarks",
+]
@@ -0,0 +1,164 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+use crate::prepare::{PrepareSuccess, PrepareWorkerSuccess};
+use codec::{Decode, Encode};
+pub use sc_executor_common::error::Error as ExecuteError;
+
+/// Result of PVF preparation from a worker, with checksum of the compiled PVF and stats of the
+/// preparation if successful.
+pub type PrepareWorkerResult = Result<PrepareWorkerSuccess, PrepareError>;
+
+/// Result of PVF preparation propagated all the way back to the host, with path to the concluded
+/// artifact and stats of the preparation if successful.
+pub type PrepareResult = Result<PrepareSuccess, PrepareError>;
+
+/// Result of prechecking PVF performed by the validation host. Contains stats about the preparation
+/// if successful.
+pub type PrecheckResult = Result<(), PrepareError>;
+
+/// An error that occurred during the prepare part of the PVF pipeline.
+// Codec indexes are intended to stabilize pre-encoded payloads (see `OOM_PAYLOAD`)
+#[derive(thiserror::Error, Debug, Clone, Encode, Decode)]
+pub enum PrepareError {
+	/// During the prevalidation stage of preparation an issue was found with the PVF.
+	#[codec(index = 0)]
+	#[error("prepare: prevalidation error: {0}")]
+	Prevalidation(String),
+	/// Compilation failed for the given PVF.
+	#[codec(index = 1)]
+	#[error("prepare: preparation error: {0}")]
+	Preparation(String),
+	/// Instantiation of the WASM module instance failed.
+	#[codec(index = 2)]
+	#[error("prepare: runtime construction: {0}")]
+	RuntimeConstruction(String),
+	/// An unexpected error has occurred in the preparation job.
+	#[codec(index = 3)]
+	#[error("prepare: job error: {0}")]
+	JobError(String),
+	/// Failed to prepare the PVF due to the time limit.
+	#[codec(index = 4)]
+	#[error("prepare: timeout")]
+	TimedOut,
+	/// An IO error occurred. This state is reported by either the validation host or by the
+	/// worker.
+	#[codec(index = 5)]
+	#[error("prepare: io error while receiving response: {0}")]
+	IoErr(String),
+	/// The temporary file for the artifact could not be created at the given cache path. This
+	/// state is reported by the validation host (not by the worker).
+	#[codec(index = 6)]
+	#[error("prepare: error creating tmp file: {0}")]
+	CreateTmpFile(String),
+	/// The response from the worker is received, but the file cannot be renamed (moved) to the
+	/// final destination location. This state is reported by the validation host (not by the
+	/// worker).
+	#[codec(index = 7)]
+	#[error("prepare: error renaming tmp file ({src:?} -> {dest:?}): {err}")]
+	RenameTmpFile {
+		err: String,
+		// Unfortunately `PathBuf` doesn't implement `Encode`/`Decode`, so we do a fallible
+		// conversion to `Option<String>`.
+		src: Option<String>,
+		dest: Option<String>,
+	},
+	/// Memory limit reached
+	#[codec(index = 8)]
+	#[error("prepare: out of memory")]
+	OutOfMemory,
+	/// The response from the worker is received, but the worker cache could not be cleared. The
+	/// worker has to be killed to avoid jobs having access to data from other jobs. This state is
+	/// reported by the validation host (not by the worker).
+	#[codec(index = 9)]
+	#[error("prepare: error clearing worker cache: {0}")]
+	ClearWorkerDir(String),
+	/// The preparation job process died, due to OOM, a seccomp violation, or some other factor.
+	#[codec(index = 10)]
+	#[error("prepare: prepare job with pid {job_pid} died: {err}")]
+	JobDied { err: String, job_pid: i32 },
+	/// Some error occurred when interfacing with the kernel.
+	#[codec(index = 11)]
+	#[error("prepare: error interfacing with the kernel: {0}")]
+	Kernel(String),
+	/// Code blob failed to decompress
+	#[codec(index = 12)]
+	#[error("prepare: could not decompress code blob: {0}")]
+	CouldNotDecompressCodeBlob(String),
+}
+
+impl PrepareError {
+	/// Returns whether this is a deterministic error, i.e. one that should trigger reliably. Those
+	/// errors depend on the PVF itself and the sc-executor/wasmtime logic.
+	///
+	/// Non-deterministic errors can happen spuriously. Typically, they occur due to resource
+	/// starvation, e.g. under heavy load or memory pressure. Those errors are typically transient
+	/// but may persist e.g. if the node is run by overwhelmingly underpowered machine.
+	pub fn is_deterministic(&self) -> bool {
+		use PrepareError::*;
+		match self {
+			Prevalidation(_) |
+			Preparation(_) |
+			JobError(_) |
+			OutOfMemory |
+			CouldNotDecompressCodeBlob(_) => true,
+			IoErr(_) |
+			JobDied { .. } |
+			CreateTmpFile(_) |
+			RenameTmpFile { .. } |
+			ClearWorkerDir(_) |
+			Kernel(_) => false,
+			// Can occur due to issues with the PVF, but also due to factors like local load.
+			TimedOut => false,
+			// Can occur due to issues with the PVF, but also due to local errors.
+			RuntimeConstruction(_) => false,
+		}
+	}
+}
+
+/// Some internal error occurred.
+///
+/// Should only ever be used for validation errors independent of the candidate and PVF, or for
+/// errors we ruled out during pre-checking (so preparation errors are fine).
+#[derive(thiserror::Error, Debug, Clone, Encode, Decode)]
+pub enum InternalValidationError {
+	/// Some communication error occurred with the host.
+	#[error("validation: some communication error occurred with the host: {0}")]
+	HostCommunication(String),
+	/// Host could not create a hard link to the artifact path.
+	#[error("validation: host could not create a hard link to the artifact path: {0}")]
+	CouldNotCreateLink(String),
+	/// Could not find or open compiled artifact file.
+	#[error("validation: could not find or open compiled artifact file: {0}")]
+	CouldNotOpenFile(String),
+	/// Could not create a pipe between the worker and a child process.
+	#[error("validation: could not create pipe: {0}")]
+	CouldNotCreatePipe(String),
+	/// Host could not clear the worker cache after a job.
+	#[error("validation: host could not clear the worker cache ({path:?}) after a job: {err}")]
+	CouldNotClearWorkerDir {
+		err: String,
+		// Unfortunately `PathBuf` doesn't implement `Encode`/`Decode`, so we do a fallible
+		// conversion to `Option<String>`.
+		path: Option<String>,
+	},
+	/// Some error occurred when interfacing with the kernel.
+	#[error("validation: error interfacing with the kernel: {0}")]
+	Kernel(String),
+	/// Some non-deterministic preparation error occurred.
+	#[error("validation: prepare: {0}")]
+	NonDeterministicPrepareError(PrepareError),
+}
@@ -0,0 +1,141 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+use crate::{error::InternalValidationError, ArtifactChecksum};
+use codec::{Decode, Encode};
+use pezkuwi_node_primitives::PoV;
+use pezkuwi_primitives::{ExecutorParams, PersistedValidationData};
+use pezkuwi_teyrchain_primitives::primitives::ValidationResult;
+use std::time::Duration;
+
+/// The payload of the one-time handshake that is done when a worker process is created. Carries
+/// data from the host to the worker.
+#[derive(Encode, Decode)]
+pub struct Handshake {
+	/// The executor parameters.
+	pub executor_params: ExecutorParams,
+}
+
+/// A request to execute a PVF
+#[derive(Encode, Decode)]
+pub struct ExecuteRequest {
+	/// Persisted validation data.
+	pub pvd: PersistedValidationData,
+	/// Proof-of-validity.
+	pub pov: PoV,
+	/// Execution timeout.
+	pub execution_timeout: Duration,
+	/// Checksum of the artifact to execute.
+	pub artifact_checksum: ArtifactChecksum,
+}
+
+/// The response from the execution worker.
+#[derive(Debug, Encode, Decode)]
+pub struct WorkerResponse {
+	/// The response from the execute job process.
+	pub job_response: JobResponse,
+	/// The amount of CPU time taken by the job.
+	pub duration: Duration,
+	/// The uncompressed PoV size.
+	pub pov_size: u32,
+}
+
+/// An error occurred in the worker process.
+#[derive(thiserror::Error, Debug, Clone, Encode, Decode)]
+pub enum WorkerError {
+	/// The job timed out.
+	#[error("The job timed out")]
+	JobTimedOut,
+	/// The job process has died. We must kill the worker just in case.
+	///
+	/// We cannot treat this as an internal error because malicious code may have killed the job.
+	/// We still retry it, because in the non-malicious case it is likely spurious.
+	#[error("The job process (pid {job_pid}) has died: {err}")]
+	JobDied { err: String, job_pid: i32 },
+	/// An unexpected error occurred in the job process, e.g. failing to spawn a thread, panic,
+	/// etc.
+	///
+	/// Because malicious code can cause a job error, we must not treat it as an internal error. We
+	/// still retry it, because in the non-malicious case it is likely spurious.
+	#[error("An unexpected error occurred in the job process: {0}")]
+	JobError(#[from] JobError),
+
+	/// Some internal error occurred.
+	#[error("An internal error occurred: {0}")]
+	InternalError(#[from] InternalValidationError),
+}
+
+/// The result of a job on the execution worker.
+pub type JobResult = Result<JobResponse, JobError>;
+
+/// The successful response from a job on the execution worker.
+#[derive(Debug, Encode, Decode)]
+pub enum JobResponse {
+	Ok {
+		/// The result of teyrchain validation.
+		result_descriptor: ValidationResult,
+	},
+	/// A possibly transient runtime instantiation error happened during the execution; may be
+	/// retried with re-preparation
+	RuntimeConstruction(String),
+	/// The candidate is invalid.
+	InvalidCandidate(String),
+	/// PoV decompression failed
+	PoVDecompressionFailure,
+	/// The artifact is corrupted, re-prepare the artifact and try again.
+	CorruptedArtifact,
+}
+
+impl JobResponse {
+	/// Creates an invalid response from a context `ctx` and a message `msg` (which can be empty).
+	pub fn format_invalid(ctx: &'static str, msg: &str) -> Self {
+		if msg.is_empty() {
+			Self::InvalidCandidate(ctx.to_string())
+		} else {
+			Self::InvalidCandidate(format!("{}: {}", ctx, msg))
+		}
+	}
+
+	/// Creates a may retry response from a context `ctx` and a message `msg` (which can be empty).
+	pub fn runtime_construction(ctx: &'static str, msg: &str) -> Self {
+		if msg.is_empty() {
+			Self::RuntimeConstruction(ctx.to_string())
+		} else {
+			Self::RuntimeConstruction(format!("{}: {}", ctx, msg))
+		}
+	}
+}
+
+/// An unexpected error occurred in the execution job process. Because this comes from the job,
+/// which executes untrusted code, this error must likewise be treated as untrusted. That is, we
+/// cannot raise an internal error based on this.
+#[derive(thiserror::Error, Clone, Debug, Encode, Decode)]
+pub enum JobError {
+	#[error("The job timed out")]
+	TimedOut,
+	#[error("An unexpected panic has occurred in the execution job: {0}")]
+	Panic(String),
+	/// Some error occurred when interfacing with the kernel.
+	#[error("Error interfacing with the kernel: {0}")]
+	Kernel(String),
+	#[error("Could not spawn the requested thread: {0}")]
+	CouldNotSpawnThread(String),
+	#[error("An error occurred in the CPU time monitor thread: {0}")]
+	CpuTimeMonitorThread(String),
+	/// Since the job can return any exit status it wants, we have to treat this as untrusted.
+	#[error("Unexpected exit status: {0}")]
+	UnexpectedExitStatus(i32),
+}
@@ -0,0 +1,495 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+//! Interface to the Substrate Executor
+
+use crate::error::ExecuteError;
+use pezkuwi_primitives::{
+	executor_params::{DEFAULT_LOGICAL_STACK_MAX, DEFAULT_NATIVE_STACK_MAX},
+	ExecutorParam, ExecutorParams,
+};
+use sc_executor_common::{
+	error::WasmError,
+	runtime_blob::RuntimeBlob,
+	wasm_runtime::{HeapAllocStrategy, WasmModule as _},
+};
+use sc_executor_wasmtime::{Config, DeterministicStackLimit, Semantics, WasmtimeRuntime};
+use sp_core::storage::{ChildInfo, TrackedStorageKey};
+use sp_externalities::MultiRemovalResults;
+use std::any::{Any, TypeId};
+
+// Memory configuration
+//
+// When Substrate Runtime is instantiated, a number of WASM pages are allocated for the Substrate
+// Runtime instance's linear memory. The exact number of pages is a sum of whatever the WASM blob
+// itself requests (by default at least enough to hold the data section as well as have some space
+// left for the stack; this is, of course, overridable at link time when compiling the runtime)
+// plus the number of pages specified in the `extra_heap_pages` passed to the executor.
+//
+// By default, rustc (or `lld` specifically) should allocate 1 MiB for the shadow stack, or 16
+// pages. The data section for runtimes are typically rather small and can fit in a single digit
+// number of WASM pages, so let's say an extra 16 pages. Thus let's assume that 32 pages or 2 MiB
+// are used for these needs by default.
+const DEFAULT_HEAP_PAGES_ESTIMATE: u32 = 32;
+const EXTRA_HEAP_PAGES: u32 = 2048;
+
+// VALUES OF THE DEFAULT CONFIGURATION SHOULD NEVER BE CHANGED
+// They are used as base values for the execution environment parametrization.
+// To overwrite them, add new ones to `EXECUTOR_PARAMS` in the `session_info` pallet and perform
+// a runtime upgrade to make them active.
+pub const DEFAULT_CONFIG: Config = Config {
+	allow_missing_func_imports: true,
+	cache_path: None,
+	semantics: Semantics {
+		heap_alloc_strategy: sc_executor_common::wasm_runtime::HeapAllocStrategy::Dynamic {
+			maximum_pages: Some(DEFAULT_HEAP_PAGES_ESTIMATE + EXTRA_HEAP_PAGES),
+		},
+
+		instantiation_strategy:
+			sc_executor_wasmtime::InstantiationStrategy::RecreateInstanceCopyOnWrite,
+
+		// Enable deterministic stack limit to pin down the exact number of items the wasmtime stack
+		// can contain before it traps with stack overflow.
+		//
+		// Here is how the values below were chosen.
+		//
+		// At the moment of writing, the default native stack size limit is 1 MiB. Assuming a
+		// logical item (see the docs about the field and the instrumentation algorithm) is 8 bytes,
+		// 1 MiB can fit 2x 65536 logical items.
+		//
+		// Since reaching the native stack limit is undesirable, we halve the logical item limit and
+		// also increase the native 256x. This hopefully should preclude wasm code from reaching
+		// the stack limit set by the wasmtime.
+		deterministic_stack_limit: Some(DeterministicStackLimit {
+			logical_max: DEFAULT_LOGICAL_STACK_MAX,
+			native_stack_max: DEFAULT_NATIVE_STACK_MAX,
+		}),
+		canonicalize_nans: true,
+		// Rationale for turning the multi-threaded compilation off is to make the preparation time
+		// easily reproducible and as deterministic as possible.
+		//
+		// Currently the prepare queue doesn't distinguish between precheck and prepare requests.
+		// On the one hand, it simplifies the code, on the other, however, slows down compile times
+		// for execute requests. This behavior may change in future.
+		parallel_compilation: false,
+
+		// WASM extensions. Only those that are meaningful to us may be controlled here. By default,
+		// we're using WASM MVP, which means all the extensions are disabled. Nevertheless, some
+		// extensions (e.g., sign extension ops) are enabled by Wasmtime and cannot be disabled.
+		wasm_reference_types: false,
+		wasm_simd: false,
+		wasm_bulk_memory: false,
+		wasm_multi_value: false,
+	},
+};
+
+/// Executes the given PVF in the form of a compiled artifact and returns the result of
+/// execution upon success.
+///
+/// # Safety
+///
+/// The caller must ensure that the compiled artifact passed here was:
+///   1) produced by `prepare`,
+///   2) was not modified,
+///
+/// Failure to adhere to these requirements might lead to crashes and arbitrary code execution.
+pub unsafe fn execute_artifact(
+	compiled_artifact_blob: &[u8],
+	executor_params: &ExecutorParams,
+	params: &[u8],
+) -> Result<Vec<u8>, ExecuteError> {
+	let mut extensions = sp_externalities::Extensions::new();
+
+	extensions.register(sp_core::traits::ReadRuntimeVersionExt::new(ReadRuntimeVersion));
+
+	let mut ext = ValidationExternalities(extensions);
+
+	match sc_executor::with_externalities_safe(&mut ext, || {
+		let runtime = create_runtime_from_artifact_bytes(compiled_artifact_blob, executor_params)?;
+		runtime.new_instance()?.call("validate_block", params)
+	}) {
+		Ok(Ok(ok)) => Ok(ok),
+		Ok(Err(err)) | Err(err) => Err(err),
+	}
+}
+
+/// Constructs the runtime for the given PVF, given the artifact bytes.
+///
+/// # Safety
+///
+/// The caller must ensure that the compiled artifact passed here was:
+///   1) produced by `prepare`,
+///   2) was not modified,
+///
+/// Failure to adhere to these requirements might lead to crashes and arbitrary code execution.
+pub unsafe fn create_runtime_from_artifact_bytes(
+	compiled_artifact_blob: &[u8],
+	executor_params: &ExecutorParams,
+) -> Result<WasmtimeRuntime, WasmError> {
+	let mut config = DEFAULT_CONFIG.clone();
+	config.semantics = params_to_wasmtime_semantics(executor_params).0;
+
+	sc_executor_wasmtime::create_runtime_from_artifact_bytes::<HostFunctions>(
+		compiled_artifact_blob,
+		config,
+	)
+}
+
+/// Takes the default config and overwrites any settings with existing executor parameters.
+///
+/// Returns the semantics as well as the stack limit (since we are guaranteed to have it).
+pub fn params_to_wasmtime_semantics(par: &ExecutorParams) -> (Semantics, DeterministicStackLimit) {
+	let mut sem = DEFAULT_CONFIG.semantics.clone();
+	let mut stack_limit = sem
+		.deterministic_stack_limit
+		.expect("There is a comment to not change the default stack limit; it should always be available; qed")
+		.clone();
+
+	for p in par.iter() {
+		match p {
+			ExecutorParam::MaxMemoryPages(max_pages) =>
+				sem.heap_alloc_strategy = HeapAllocStrategy::Dynamic {
+					maximum_pages: Some((*max_pages).saturating_add(DEFAULT_HEAP_PAGES_ESTIMATE)),
+				},
+			ExecutorParam::StackLogicalMax(slm) => stack_limit.logical_max = *slm,
+			ExecutorParam::StackNativeMax(snm) => stack_limit.native_stack_max = *snm,
+			ExecutorParam::WasmExtBulkMemory => sem.wasm_bulk_memory = true,
+			ExecutorParam::PrecheckingMaxMemory(_) |
+			ExecutorParam::PvfPrepTimeout(_, _) |
+			ExecutorParam::PvfExecTimeout(_, _) => (), /* Not used here */
+		}
+	}
+	sem.deterministic_stack_limit = Some(stack_limit.clone());
+	(sem, stack_limit)
+}
+
+/// Runs the prevalidation on the given code. Returns a [`RuntimeBlob`] if it succeeds.
+pub fn prevalidate(code: &[u8]) -> Result<RuntimeBlob, sc_executor_common::error::WasmError> {
+	// Construct the runtime blob and do some basic checks for consistency.
+	let blob = RuntimeBlob::new(code)?;
+	// In the future this function should take care of any further prevalidation logic.
+	Ok(blob)
+}
+
+/// Runs preparation on the given runtime blob. If successful, it returns a serialized compiled
+/// artifact which can then be used to pass into `Executor::execute` after writing it to the disk.
+pub fn prepare(
+	blob: RuntimeBlob,
+	executor_params: &ExecutorParams,
+) -> Result<Vec<u8>, sc_executor_common::error::WasmError> {
+	let (semantics, _) = params_to_wasmtime_semantics(executor_params);
+	sc_executor_wasmtime::prepare_runtime_artifact(blob, &semantics)
+}
+
+/// Available host functions. We leave out:
+///
+/// 1. storage related stuff (PVF doesn't have a notion of a persistent storage/trie)
+/// 2. tracing
+/// 3. off chain workers (PVFs do not have such a notion)
+/// 4. runtime tasks
+/// 5. sandbox
+type HostFunctions = (
+	sp_io::misc::HostFunctions,
+	sp_io::crypto::HostFunctions,
+	sp_io::hashing::HostFunctions,
+	sp_io::allocator::HostFunctions,
+	sp_io::logging::HostFunctions,
+	sp_io::trie::HostFunctions,
+);
+
+/// The validation externalities that will panic on any storage related access. (PVFs should not
+/// have a notion of a persistent storage/trie.)
+struct ValidationExternalities(sp_externalities::Extensions);
+
+impl sp_externalities::Externalities for ValidationExternalities {
+	fn storage(&mut self, _: &[u8]) -> Option<Vec<u8>> {
+		panic!("storage: unsupported feature for teyrchain validation")
+	}
+
+	fn storage_hash(&mut self, _: &[u8]) -> Option<Vec<u8>> {
+		panic!("storage_hash: unsupported feature for teyrchain validation")
+	}
+
+	fn child_storage_hash(&mut self, _: &ChildInfo, _: &[u8]) -> Option<Vec<u8>> {
+		panic!("child_storage_hash: unsupported feature for teyrchain validation")
+	}
+
+	fn child_storage(&mut self, _: &ChildInfo, _: &[u8]) -> Option<Vec<u8>> {
+		panic!("child_storage: unsupported feature for teyrchain validation")
+	}
+
+	fn kill_child_storage(
+		&mut self,
+		_child_info: &ChildInfo,
+		_maybe_limit: Option<u32>,
+		_maybe_cursor: Option<&[u8]>,
+	) -> MultiRemovalResults {
+		panic!("kill_child_storage: unsupported feature for teyrchain validation")
+	}
+
+	fn clear_prefix(
+		&mut self,
+		_prefix: &[u8],
+		_maybe_limit: Option<u32>,
+		_maybe_cursor: Option<&[u8]>,
+	) -> MultiRemovalResults {
+		panic!("clear_prefix: unsupported feature for teyrchain validation")
+	}
+
+	fn clear_child_prefix(
+		&mut self,
+		_child_info: &ChildInfo,
+		_prefix: &[u8],
+		_maybe_limit: Option<u32>,
+		_maybe_cursor: Option<&[u8]>,
+	) -> MultiRemovalResults {
+		panic!("clear_child_prefix: unsupported feature for teyrchain validation")
+	}
+
+	fn place_storage(&mut self, _: Vec<u8>, _: Option<Vec<u8>>) {
+		panic!("place_storage: unsupported feature for teyrchain validation")
+	}
+
+	fn place_child_storage(&mut self, _: &ChildInfo, _: Vec<u8>, _: Option<Vec<u8>>) {
+		panic!("place_child_storage: unsupported feature for teyrchain validation")
+	}
+
+	fn storage_root(&mut self, _: sp_core::storage::StateVersion) -> Vec<u8> {
+		panic!("storage_root: unsupported feature for teyrchain validation")
+	}
+
+	fn child_storage_root(&mut self, _: &ChildInfo, _: sp_core::storage::StateVersion) -> Vec<u8> {
+		panic!("child_storage_root: unsupported feature for teyrchain validation")
+	}
+
+	fn next_child_storage_key(&mut self, _: &ChildInfo, _: &[u8]) -> Option<Vec<u8>> {
+		panic!("next_child_storage_key: unsupported feature for teyrchain validation")
+	}
+
+	fn next_storage_key(&mut self, _: &[u8]) -> Option<Vec<u8>> {
+		panic!("next_storage_key: unsupported feature for teyrchain validation")
+	}
+
+	fn storage_append(&mut self, _key: Vec<u8>, _value: Vec<u8>) {
+		panic!("storage_append: unsupported feature for teyrchain validation")
+	}
+
+	fn storage_start_transaction(&mut self) {
+		panic!("storage_start_transaction: unsupported feature for teyrchain validation")
+	}
+
+	fn storage_rollback_transaction(&mut self) -> Result<(), ()> {
+		panic!("storage_rollback_transaction: unsupported feature for teyrchain validation")
+	}
+
+	fn storage_commit_transaction(&mut self) -> Result<(), ()> {
+		panic!("storage_commit_transaction: unsupported feature for teyrchain validation")
+	}
+
+	fn wipe(&mut self) {
+		panic!("wipe: unsupported feature for teyrchain validation")
+	}
+
+	fn commit(&mut self) {
+		panic!("commit: unsupported feature for teyrchain validation")
+	}
+
+	fn read_write_count(&self) -> (u32, u32, u32, u32) {
+		panic!("read_write_count: unsupported feature for teyrchain validation")
+	}
+
+	fn reset_read_write_count(&mut self) {
+		panic!("reset_read_write_count: unsupported feature for teyrchain validation")
+	}
+
+	fn get_whitelist(&self) -> Vec<TrackedStorageKey> {
+		panic!("get_whitelist: unsupported feature for teyrchain validation")
+	}
+
+	fn set_whitelist(&mut self, _: Vec<TrackedStorageKey>) {
+		panic!("set_whitelist: unsupported feature for teyrchain validation")
+	}
+
+	fn set_offchain_storage(&mut self, _: &[u8], _: std::option::Option<&[u8]>) {
+		panic!("set_offchain_storage: unsupported feature for teyrchain validation")
+	}
+
+	fn get_read_and_written_keys(&self) -> Vec<(Vec<u8>, u32, u32, bool)> {
+		panic!("get_read_and_written_keys: unsupported feature for teyrchain validation")
+	}
+}
+
+impl sp_externalities::ExtensionStore for ValidationExternalities {
+	fn extension_by_type_id(&mut self, type_id: TypeId) -> Option<&mut dyn Any> {
+		self.0.get_mut(type_id)
+	}
+
+	fn register_extension_with_type_id(
+		&mut self,
+		type_id: TypeId,
+		extension: Box<dyn sp_externalities::Extension>,
+	) -> Result<(), sp_externalities::Error> {
+		self.0.register_with_type_id(type_id, extension)
+	}
+
+	fn deregister_extension_by_type_id(
+		&mut self,
+		type_id: TypeId,
+	) -> Result<(), sp_externalities::Error> {
+		if self.0.deregister(type_id) {
+			Ok(())
+		} else {
+			Err(sp_externalities::Error::ExtensionIsNotRegistered(type_id))
+		}
+	}
+}
+
+struct ReadRuntimeVersion;
+
+impl sp_core::traits::ReadRuntimeVersion for ReadRuntimeVersion {
+	fn read_runtime_version(
+		&self,
+		wasm_code: &[u8],
+		_ext: &mut dyn sp_externalities::Externalities,
+	) -> Result<Vec<u8>, String> {
+		let blob = RuntimeBlob::uncompress_if_needed(wasm_code)
+			.map_err(|e| format!("Failed to read the PVF runtime blob: {:?}", e))?;
+
+		match sc_executor::read_embedded_version(&blob)
+			.map_err(|e| format!("Failed to read the static section from the PVF blob: {:?}", e))?
+		{
+			Some(version) => {
+				use codec::Encode;
+				Ok(version.encode())
+			},
+			None => Err("runtime version section is not found".to_string()),
+		}
+	}
+}
+
+#[cfg(test)]
+mod tests {
+	use super::*;
+
+	#[test]
+	fn prep_hash_matches_artifact_effect_of_executor_params() {
+		use ExecutorParam::*;
+
+		// If you're adding a new ExecutorParam, please add it to the `cases` below.
+
+		let _coverage_check = |param: &ExecutorParam| match param {
+			MaxMemoryPages(_) => true,
+			StackLogicalMax(_) => true,
+			StackNativeMax(_) => true,
+			PrecheckingMaxMemory(_) => true,
+			PvfPrepTimeout(_, _) => true,
+			PvfExecTimeout(_, _) => true,
+			WasmExtBulkMemory => true,
+		};
+
+		// A minimal module with memory and an exported `validate_block` function.
+		let wat = r#"(module
+			(memory 1)
+			(func (export "validate_block") (param i32 i32))
+		)"#;
+		let wasm = wat::parse_str(wat).expect("wat parsing failed");
+		let blob = prevalidate(&wasm).expect("valid runtime blob");
+
+		let base = ExecutorParams::default();
+
+		let prepare_with = |params: &ExecutorParams| -> Vec<u8> {
+			prepare(blob.clone(), params).expect("prepare should succeed")
+		};
+
+		// Define pairs that toggle exactly one parameter.
+		let cases: Vec<(&str, ExecutorParams, ExecutorParams)> = vec![
+			(
+				"MaxMemoryPages",
+				base.clone(),
+				ExecutorParams::from(&[ExecutorParam::MaxMemoryPages(128)][..]),
+			),
+			(
+				"StackLogicalMax",
+				base.clone(),
+				ExecutorParams::from(
+					&[ExecutorParam::StackLogicalMax(DEFAULT_LOGICAL_STACK_MAX + 1)][..],
+				),
+			),
+			(
+				"StackNativeMax",
+				base.clone(),
+				ExecutorParams::from(
+					&[ExecutorParam::StackNativeMax(DEFAULT_NATIVE_STACK_MAX + 1024)][..],
+				),
+			),
+			(
+				"PrecheckingMaxMemory",
+				base.clone(),
+				ExecutorParams::from(&[ExecutorParam::PrecheckingMaxMemory(300 * 1024 * 1024)][..]),
+			),
+			(
+				"PvfPrepTimeout(Precheck)",
+				base.clone(),
+				ExecutorParams::from(
+					&[ExecutorParam::PvfPrepTimeout(pezkuwi_primitives::PvfPrepKind::Precheck, 1)]
+						[..],
+				),
+			),
+			(
+				"PvfPrepTimeout(Prepare)",
+				base.clone(),
+				ExecutorParams::from(
+					&[ExecutorParam::PvfPrepTimeout(pezkuwi_primitives::PvfPrepKind::Prepare, 2)][..],
+				),
+			),
+			(
+				"PvfExecTimeout(Backing)",
+				base.clone(),
+				ExecutorParams::from(
+					&[ExecutorParam::PvfExecTimeout(pezkuwi_primitives::PvfExecKind::Backing, 1)][..],
+				),
+			),
+			(
+				"PvfExecTimeout(Approval)",
+				base.clone(),
+				ExecutorParams::from(
+					&[ExecutorParam::PvfExecTimeout(pezkuwi_primitives::PvfExecKind::Approval, 2)]
+						[..],
+				),
+			),
+			(
+				"WasmExtBulkMemory",
+				base.clone(),
+				ExecutorParams::from(&[ExecutorParam::WasmExtBulkMemory][..]),
+			),
+		];
+
+		for (name, a, b) in cases.into_iter() {
+			let art_a = prepare_with(&a);
+			let art_b = prepare_with(&b);
+			let artifact_changed = art_a != art_b;
+			let prep_hash_changed = a.prep_hash() != b.prep_hash();
+			assert_eq!(
+				artifact_changed,
+				prep_hash_changed,
+				"ExecutorParam classification mismatch for {}: artifact_changed={}, prep_hash_changed={}",
+				name,
+				artifact_changed,
+				prep_hash_changed,
+			);
+		}
+	}
+}
@@ -0,0 +1,129 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+//! Contains functionality related to PVFs that is shared by the PVF host and the PVF workers.
+#![deny(unused_crate_dependencies)]
+
+pub mod error;
+pub mod execute;
+pub mod executor_interface;
+pub mod prepare;
+pub mod pvf;
+pub mod worker;
+pub mod worker_dir;
+
+pub use cpu_time::ProcessTime;
+
+// Used by `decl_worker_main!`.
+pub use sp_tracing;
+
+const LOG_TARGET: &str = "teyrchain::pvf-common";
+
+use codec::{Decode, Encode};
+use sp_core::H256;
+use std::{
+	io::{self, Read, Write},
+	mem,
+};
+
+#[cfg(feature = "test-utils")]
+pub mod tests {
+	use std::time::Duration;
+
+	pub const TEST_EXECUTION_TIMEOUT: Duration = Duration::from_secs(3);
+	pub const TEST_PREPARATION_TIMEOUT: Duration = Duration::from_secs(30);
+}
+
+/// Status of security features on the current system.
+#[derive(Debug, Clone, Default, PartialEq, Eq, Encode, Decode)]
+pub struct SecurityStatus {
+	/// Whether Secure Validator Mode is enabled. This mode enforces that all required security
+	/// features are present. All features are enabled on a best-effort basis regardless.
+	pub secure_validator_mode: bool,
+	/// Whether the landlock features we use are fully available on this system.
+	pub can_enable_landlock: bool,
+	/// Whether the seccomp features we use are fully available on this system.
+	pub can_enable_seccomp: bool,
+	/// Whether we are able to unshare the user namespace and change the filesystem root.
+	pub can_unshare_user_namespace_and_change_root: bool,
+	/// Whether we are able to call `clone` with all sandboxing flags.
+	pub can_do_secure_clone: bool,
+}
+
+/// A handshake with information for the worker.
+#[derive(Debug, Encode, Decode)]
+pub struct WorkerHandshake {
+	pub security_status: SecurityStatus,
+}
+
+/// Write some data prefixed by its length into `w`. Sync version of `framed_send` to avoid
+/// dependency on tokio.
+pub fn framed_send_blocking(w: &mut (impl Write + Unpin), buf: &[u8]) -> io::Result<()> {
+	let len_buf = buf.len().to_le_bytes();
+	w.write_all(&len_buf)?;
+	w.write_all(buf)?;
+	Ok(())
+}
+
+/// Read some data prefixed by its length from `r`. Sync version of `framed_recv` to avoid
+/// dependency on tokio.
+pub fn framed_recv_blocking(r: &mut (impl Read + Unpin)) -> io::Result<Vec<u8>> {
+	let mut len_buf = [0u8; mem::size_of::<usize>()];
+	r.read_exact(&mut len_buf)?;
+	let len = usize::from_le_bytes(len_buf);
+	let mut buf = vec![0; len];
+	r.read_exact(&mut buf)?;
+	Ok(buf)
+}
+
+#[derive(Debug, Default, Clone, Copy, Encode, Decode, PartialEq, Eq)]
+#[repr(transparent)]
+pub struct ArtifactChecksum(H256);
+
+/// Compute the checksum of the given artifact.
+pub fn compute_checksum(data: &[u8]) -> ArtifactChecksum {
+	ArtifactChecksum(H256::from_slice(&sp_crypto_hashing::twox_256(data)))
+}
+
+#[cfg(all(test, not(feature = "test-utils")))]
+mod tests {
+	use super::*;
+
+	#[test]
+	fn default_secure_status() {
+		let status = SecurityStatus::default();
+		assert!(
+			!status.secure_validator_mode,
+			"secure_validator_mode is false for default security status"
+		);
+		assert!(
+			!status.can_enable_landlock,
+			"can_enable_landlock is false for default security status"
+		);
+		assert!(
+			!status.can_enable_seccomp,
+			"can_enable_seccomp is false for default security status"
+		);
+		assert!(
+			!status.can_unshare_user_namespace_and_change_root,
+			"can_unshare_user_namespace_and_change_root is false for default security status"
+		);
+		assert!(
+			!status.can_do_secure_clone,
+			"can_do_secure_clone is false for default security status"
+		);
+	}
+}
@@ -0,0 +1,85 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+use crate::ArtifactChecksum;
+use codec::{Decode, Encode};
+use std::path::PathBuf;
+
+/// Result from prepare worker if successful.
+#[derive(Debug, Clone, Default, Encode, Decode)]
+pub struct PrepareWorkerSuccess {
+	/// Checksum of the compiled PVF.
+	pub checksum: ArtifactChecksum,
+	/// Stats of the current preparation run.
+	pub stats: PrepareStats,
+}
+
+/// Result of PVF preparation if successful.
+#[derive(Debug, Clone, Default)]
+pub struct PrepareSuccess {
+	/// Checksum of the compiled PVF.
+	pub checksum: ArtifactChecksum,
+	/// Canonical path to the compiled artifact.
+	pub path: PathBuf,
+	/// Size in bytes
+	pub size: u64,
+	/// Stats of the current preparation run.
+	pub stats: PrepareStats,
+}
+
+/// Preparation statistics, including the CPU time and memory taken.
+#[derive(Debug, Clone, Default, Encode, Decode)]
+pub struct PrepareStats {
+	/// The CPU time that elapsed for the preparation job.
+	pub cpu_time_elapsed: std::time::Duration,
+	/// The observed memory statistics for the preparation job.
+	pub memory_stats: MemoryStats,
+	/// The decompressed Wasm code length observed during the preparation.
+	pub observed_wasm_code_len: u32,
+}
+
+/// Helper struct to contain all the memory stats, including `MemoryAllocationStats` and, if
+/// supported by the OS, `ru_maxrss`.
+#[derive(Clone, Debug, Default, Encode, Decode)]
+pub struct MemoryStats {
+	/// Memory stats from `tikv_jemalloc_ctl`, polling-based and not very precise.
+	#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))]
+	pub memory_tracker_stats: Option<MemoryAllocationStats>,
+	/// `ru_maxrss` from `getrusage`. `None` if an error occurred.
+	#[cfg(target_os = "linux")]
+	pub max_rss: Option<i64>,
+	/// Peak allocation in bytes measured by tracking allocator
+	pub peak_tracked_alloc: u64,
+}
+
+/// Statistics of collected memory metrics.
+#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))]
+#[derive(Clone, Debug, Default, Encode, Decode)]
+pub struct MemoryAllocationStats {
+	/// Total resident memory, in bytes.
+	pub resident: u64,
+	/// Total allocated memory, in bytes.
+	pub allocated: u64,
+}
+
+/// The kind of prepare job.
+#[derive(Copy, Clone, Debug, Encode, Decode)]
+pub enum PrepareJobKind {
+	/// Compilation triggered by a candidate validation request.
+	Compilation,
+	/// A prechecking job.
+	Prechecking,
+}
@@ -0,0 +1,141 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+use crate::prepare::PrepareJobKind;
+use codec::{Decode, Encode};
+use pezkuwi_primitives::ExecutorParams;
+use pezkuwi_teyrchain_primitives::primitives::ValidationCodeHash;
+use std::{fmt, sync::Arc, time::Duration};
+
+/// A struct that carries the exhaustive set of data to prepare an artifact out of plain
+/// Wasm binary
+///
+/// Should be cheap to clone.
+#[derive(Clone, Encode, Decode)]
+pub struct PvfPrepData {
+	/// Wasm code (maybe compressed)
+	maybe_compressed_code: Arc<Vec<u8>>,
+	/// Maximum uncompressed code size.
+	validation_code_bomb_limit: u32,
+	/// Wasm code hash.
+	code_hash: ValidationCodeHash,
+	/// Executor environment parameters for the session for which artifact is prepared
+	executor_params: Arc<ExecutorParams>,
+	/// Preparation timeout
+	prep_timeout: Duration,
+	/// The kind of preparation job.
+	prep_kind: PrepareJobKind,
+}
+
+impl PvfPrepData {
+	/// Returns an instance of the PVF out of the given PVF code and executor params.
+	pub fn from_code(
+		code: Vec<u8>,
+		executor_params: ExecutorParams,
+		prep_timeout: Duration,
+		prep_kind: PrepareJobKind,
+		validation_code_bomb_limit: u32,
+	) -> Self {
+		let maybe_compressed_code = Arc::new(code);
+		let code_hash = sp_crypto_hashing::blake2_256(&maybe_compressed_code).into();
+		let executor_params = Arc::new(executor_params);
+		Self {
+			maybe_compressed_code,
+			code_hash,
+			executor_params,
+			prep_timeout,
+			prep_kind,
+			validation_code_bomb_limit,
+		}
+	}
+
+	/// Returns validation code hash
+	pub fn code_hash(&self) -> ValidationCodeHash {
+		self.code_hash
+	}
+
+	/// Returns PVF code blob
+	pub fn maybe_compressed_code(&self) -> Arc<Vec<u8>> {
+		self.maybe_compressed_code.clone()
+	}
+
+	/// Returns executor params
+	pub fn executor_params(&self) -> Arc<ExecutorParams> {
+		self.executor_params.clone()
+	}
+
+	/// Returns preparation timeout.
+	pub fn prep_timeout(&self) -> Duration {
+		self.prep_timeout
+	}
+
+	/// Returns preparation kind.
+	pub fn prep_kind(&self) -> PrepareJobKind {
+		self.prep_kind
+	}
+
+	/// Returns validation code bomb limit.
+	pub fn validation_code_bomb_limit(&self) -> u32 {
+		self.validation_code_bomb_limit
+	}
+
+	/// Creates a structure for tests.
+	#[cfg(feature = "test-utils")]
+	pub fn from_discriminator_and_timeout(num: u32, timeout: Duration) -> Self {
+		let discriminator_buf = num.to_le_bytes().to_vec();
+		Self::from_code(
+			discriminator_buf,
+			ExecutorParams::default(),
+			timeout,
+			PrepareJobKind::Compilation,
+			30 * 1024 * 1024,
+		)
+	}
+
+	/// Creates a structure for tests.
+	#[cfg(feature = "test-utils")]
+	pub fn from_discriminator(num: u32) -> Self {
+		Self::from_discriminator_and_timeout(num, crate::tests::TEST_PREPARATION_TIMEOUT)
+	}
+
+	/// Creates a structure for tests.
+	#[cfg(feature = "test-utils")]
+	pub fn from_discriminator_precheck(num: u32) -> Self {
+		let mut pvf =
+			Self::from_discriminator_and_timeout(num, crate::tests::TEST_PREPARATION_TIMEOUT);
+		pvf.prep_kind = PrepareJobKind::Prechecking;
+		pvf
+	}
+}
+
+impl fmt::Debug for PvfPrepData {
+	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+		write!(
+			f,
+			"Pvf {{ code: [...], code_hash: {:?}, executor_params: {:?}, prep_timeout: {:?} }}",
+			self.code_hash, self.executor_params, self.prep_timeout
+		)
+	}
+}
+
+impl PartialEq for PvfPrepData {
+	fn eq(&self, other: &Self) -> bool {
+		self.code_hash == other.code_hash &&
+			self.executor_params.hash() == other.executor_params.hash()
+	}
+}
+
+impl Eq for PvfPrepData {}
@@ -0,0 +1,839 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+//! Functionality common to both prepare and execute workers.
+
+pub mod security;
+
+use crate::{
+	framed_recv_blocking, framed_send_blocking, SecurityStatus, WorkerHandshake, LOG_TARGET,
+};
+use codec::{Decode, Encode};
+use cpu_time::ProcessTime;
+use futures::never::Never;
+use nix::{errno::Errno, sys::resource::Usage};
+use std::{
+	any::Any,
+	fmt::{self},
+	fs::File,
+	io::{self, Read, Write},
+	os::{
+		fd::{AsRawFd, FromRawFd, RawFd},
+		unix::net::UnixStream,
+	},
+	path::PathBuf,
+	sync::mpsc::{Receiver, RecvTimeoutError},
+	time::Duration,
+};
+
+/// Use this macro to declare a `fn main() {}` that will create an executable that can be used for
+/// spawning the desired worker.
+#[macro_export]
+macro_rules! decl_worker_main {
+	($expected_command:expr, $entrypoint:expr, $worker_version:expr, $worker_version_hash:expr $(,)*) => {
+		fn get_full_version() -> String {
+			format!("{}-{}", $worker_version, $worker_version_hash)
+		}
+
+		fn print_help(expected_command: &str) {
+			println!("{} {}", expected_command, $worker_version);
+			println!("commit: {}", $worker_version_hash);
+			println!();
+			println!("PVF worker that is called by pezkuwi.");
+		}
+
+		fn main() {
+			#[cfg(target_os = "linux")]
+			use $crate::worker::security;
+
+			$crate::sp_tracing::try_init_simple();
+
+			let args = std::env::args().collect::<Vec<_>>();
+			if args.len() == 1 {
+				print_help($expected_command);
+				return;
+			}
+
+			match args[1].as_ref() {
+				"--help" | "-h" => {
+					print_help($expected_command);
+					return;
+				},
+				"--version" | "-v" => {
+					println!("{}", $worker_version);
+					return;
+				},
+				// Useful for debugging. --version is used for version checks.
+				"--full-version" => {
+					println!("{}", get_full_version());
+					return;
+				},
+
+				"--check-can-enable-landlock" => {
+					#[cfg(target_os = "linux")]
+					let status = if let Err(err) = security::landlock::check_can_fully_enable() {
+						// Write the error to stderr, log it on the host-side.
+						eprintln!("{}", err);
+						-1
+					} else {
+						0
+					};
+					#[cfg(not(target_os = "linux"))]
+					let status = -1;
+					std::process::exit(status)
+				},
+				"--check-can-enable-seccomp" => {
+					#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
+					let status = if let Err(err) = security::seccomp::check_can_fully_enable() {
+						// Write the error to stderr, log it on the host-side.
+						eprintln!("{}", err);
+						-1
+					} else {
+						0
+					};
+					#[cfg(not(all(target_os = "linux", target_arch = "x86_64")))]
+					let status = -1;
+					std::process::exit(status)
+				},
+				"--check-can-unshare-user-namespace-and-change-root" => {
+					#[cfg(target_os = "linux")]
+					let cache_path_tempdir = std::path::Path::new(&args[2]);
+					#[cfg(target_os = "linux")]
+					let status = if let Err(err) =
+						security::change_root::check_can_fully_enable(&cache_path_tempdir)
+					{
+						// Write the error to stderr, log it on the host-side.
+						eprintln!("{}", err);
+						-1
+					} else {
+						0
+					};
+					#[cfg(not(target_os = "linux"))]
+					let status = -1;
+					std::process::exit(status)
+				},
+				"--check-can-do-secure-clone" => {
+					#[cfg(target_os = "linux")]
+					// SAFETY: new process is spawned within a single threaded process. This
+					// invariant is enforced by tests.
+					let status = if let Err(err) = unsafe { security::clone::check_can_fully_clone() } {
+						// Write the error to stderr, log it on the host-side.
+						eprintln!("{}", err);
+						-1
+					} else {
+						0
+					};
+					#[cfg(not(target_os = "linux"))]
+					let status = -1;
+					std::process::exit(status)
+				},
+
+				"test-sleep" => {
+					std::thread::sleep(std::time::Duration::from_secs(5));
+					return;
+				},
+
+				subcommand => {
+					// Must be passed for compatibility with the single-binary test workers.
+					if subcommand != $expected_command {
+						panic!(
+							"trying to run {} binary with the {} subcommand",
+							$expected_command, subcommand
+						)
+					}
+				},
+			}
+
+			let mut socket_path = None;
+			let mut worker_dir_path = None;
+			let mut node_version = None;
+
+			let mut i = 2;
+			while i < args.len() {
+				match args[i].as_ref() {
+					"--socket-path" => {
+						socket_path = Some(args[i + 1].as_str());
+						i += 1
+					},
+					"--worker-dir-path" => {
+						worker_dir_path = Some(args[i + 1].as_str());
+						i += 1
+					},
+					"--node-impl-version" => {
+						node_version = Some(args[i + 1].as_str());
+						i += 1
+					},
+					arg => panic!("Unexpected argument found: {}", arg),
+				}
+				i += 1;
+			}
+			let socket_path = socket_path.expect("the --socket-path argument is required");
+			let worker_dir_path =
+				worker_dir_path.expect("the --worker-dir-path argument is required");
+
+			let socket_path = std::path::Path::new(socket_path).to_owned();
+			let worker_dir_path = std::path::Path::new(worker_dir_path).to_owned();
+
+			$entrypoint(socket_path, worker_dir_path, node_version, Some($worker_version));
+		}
+	};
+}
+
+//taken from the os_pipe crate. Copied here to reduce one dependency and
+// because its type-safe abstractions do not play well with nix's clone
+#[cfg(not(target_os = "macos"))]
+pub fn pipe2_cloexec() -> io::Result<(libc::c_int, libc::c_int)> {
+	let mut fds: [libc::c_int; 2] = [0; 2];
+	let res = unsafe { libc::pipe2(fds.as_mut_ptr(), libc::O_CLOEXEC) };
+	if res != 0 {
+		return Err(io::Error::last_os_error());
+	}
+	Ok((fds[0], fds[1]))
+}
+
+#[cfg(target_os = "macos")]
+pub fn pipe2_cloexec() -> io::Result<(libc::c_int, libc::c_int)> {
+	let mut fds: [libc::c_int; 2] = [0; 2];
+	let res = unsafe { libc::pipe(fds.as_mut_ptr()) };
+	if res != 0 {
+		return Err(io::Error::last_os_error());
+	}
+	let res = unsafe { libc::fcntl(fds[0], libc::F_SETFD, libc::FD_CLOEXEC) };
+	if res != 0 {
+		return Err(io::Error::last_os_error());
+	}
+	let res = unsafe { libc::fcntl(fds[1], libc::F_SETFD, libc::FD_CLOEXEC) };
+	if res != 0 {
+		return Err(io::Error::last_os_error());
+	}
+	Ok((fds[0], fds[1]))
+}
+
+/// A wrapper around a file descriptor used to encapsulate and restrict
+/// functionality for pipe operations.
+pub struct PipeFd {
+	file: File,
+}
+
+impl AsRawFd for PipeFd {
+	/// Returns the raw file descriptor associated with this `PipeFd`
+	fn as_raw_fd(&self) -> RawFd {
+		self.file.as_raw_fd()
+	}
+}
+
+impl FromRawFd for PipeFd {
+	/// Creates a new `PipeFd` instance from a raw file descriptor.
+	///
+	/// # Safety
+	///
+	/// The fd passed in must be an owned file descriptor; in particular, it must be open.
+	unsafe fn from_raw_fd(fd: RawFd) -> Self {
+		PipeFd { file: File::from_raw_fd(fd) }
+	}
+}
+
+impl Read for PipeFd {
+	fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+		self.file.read(buf)
+	}
+
+	fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
+		self.file.read_to_end(buf)
+	}
+}
+
+impl Write for PipeFd {
+	fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+		self.file.write(buf)
+	}
+
+	fn flush(&mut self) -> io::Result<()> {
+		self.file.flush()
+	}
+
+	fn write_all(&mut self, buf: &[u8]) -> io::Result<()> {
+		self.file.write_all(buf)
+	}
+}
+
+/// Some allowed overhead that we account for in the "CPU time monitor" thread's sleeps, on the
+/// child process.
+pub const JOB_TIMEOUT_OVERHEAD: Duration = Duration::from_millis(50);
+
+#[derive(Debug, Clone, Copy)]
+pub enum WorkerKind {
+	Prepare,
+	Execute,
+	CheckPivotRoot,
+}
+
+impl fmt::Display for WorkerKind {
+	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+		match self {
+			Self::Prepare => write!(f, "prepare"),
+			Self::Execute => write!(f, "execute"),
+			Self::CheckPivotRoot => write!(f, "check pivot root"),
+		}
+	}
+}
+
+#[derive(Debug)]
+pub struct WorkerInfo {
+	pub pid: u32,
+	pub kind: WorkerKind,
+	pub version: Option<String>,
+	pub worker_dir_path: PathBuf,
+}
+
+// NOTE: The worker version must be passed in so that we accurately get the version of the worker,
+// and not the version that this crate was compiled with.
+//
+// NOTE: This must not spawn any threads due to safety requirements in `event_loop` and to avoid
+// errors in [`security::change_root::try_restrict`].
+//
+/// Initializes the worker process, then runs the given event loop, which spawns a new job process
+/// to securely handle each incoming request.
+pub fn run_worker<F>(
+	worker_kind: WorkerKind,
+	socket_path: PathBuf,
+	worker_dir_path: PathBuf,
+	node_version: Option<&str>,
+	worker_version: Option<&str>,
+	mut event_loop: F,
+) where
+	F: FnMut(UnixStream, &WorkerInfo, SecurityStatus) -> io::Result<Never>,
+{
+	#[cfg_attr(not(target_os = "linux"), allow(unused_mut))]
+	let mut worker_info = WorkerInfo {
+		pid: std::process::id(),
+		kind: worker_kind,
+		version: worker_version.map(|v| v.to_string()),
+		worker_dir_path,
+	};
+	gum::debug!(
+		target: LOG_TARGET,
+		?worker_info,
+		?socket_path,
+		"starting pvf worker ({})",
+		worker_info.kind
+	);
+
+	// Check for a mismatch between the node and worker versions.
+	if let (Some(node_version), Some(worker_version)) = (node_version, &worker_info.version) {
+		if node_version != worker_version {
+			gum::error!(
+				target: LOG_TARGET,
+				?worker_info,
+				%node_version,
+				"Node and worker version mismatch, node needs restarting, forcing shutdown",
+			);
+			kill_parent_node_in_emergency();
+			worker_shutdown(worker_info, "Version mismatch");
+		}
+	}
+
+	// Make sure that we can read the worker dir path, and log its contents.
+	let entries: io::Result<Vec<_>> = std::fs::read_dir(&worker_info.worker_dir_path)
+		.and_then(|d| d.map(|res| res.map(|e| e.file_name())).collect());
+	match entries {
+		Ok(entries) => {
+			gum::trace!(target: LOG_TARGET, ?worker_info, "content of worker dir: {:?}", entries)
+		},
+		Err(err) => {
+			let err = format!("Could not read worker dir: {}", err.to_string());
+			worker_shutdown_error(worker_info, &err);
+		},
+	}
+
+	// Connect to the socket.
+	let stream = || -> io::Result<UnixStream> {
+		let stream = UnixStream::connect(&socket_path)?;
+		let _ = std::fs::remove_file(&socket_path);
+		Ok(stream)
+	}();
+	let mut stream = match stream {
+		Ok(ok) => ok,
+		Err(err) => worker_shutdown_error(worker_info, &err.to_string()),
+	};
+
+	let WorkerHandshake { security_status } = match recv_worker_handshake(&mut stream) {
+		Ok(ok) => ok,
+		Err(err) => worker_shutdown_error(worker_info, &err.to_string()),
+	};
+
+	// Enable some security features.
+	{
+		gum::trace!(target: LOG_TARGET, ?security_status, "Enabling security features");
+
+		// First, make sure env vars were cleared, to match the environment we perform the checks
+		// within. (In theory, running checks with different env vars could result in different
+		// outcomes of the checks.)
+		if !security::check_env_vars_were_cleared(&worker_info) {
+			let err = "not all env vars were cleared when spawning the process";
+			gum::error!(
+				target: LOG_TARGET,
+				?worker_info,
+				"{}",
+				err
+			);
+			if security_status.secure_validator_mode {
+				worker_shutdown(worker_info, err);
+			}
+		}
+
+		// Call based on whether we can change root. Error out if it should work but fails.
+		//
+		// NOTE: This should not be called in a multi-threaded context (i.e. inside the tokio
+		// runtime). `unshare(2)`:
+		//
+		//       > CLONE_NEWUSER requires that the calling process is not threaded.
+		#[cfg(target_os = "linux")]
+		if security_status.can_unshare_user_namespace_and_change_root {
+			if let Err(err) = security::change_root::enable_for_worker(&worker_info) {
+				// The filesystem may be in an inconsistent state, always bail out.
+				let err = format!("Could not change root to be the worker cache path: {}", err);
+				worker_shutdown_error(worker_info, &err);
+			}
+			worker_info.worker_dir_path = std::path::Path::new("/").to_owned();
+		}
+
+		#[cfg(target_os = "linux")]
+		if security_status.can_enable_landlock {
+			if let Err(err) = security::landlock::enable_for_worker(&worker_info) {
+				// We previously were able to enable, so this should never happen. Shutdown if
+				// running in secure mode.
+				let err = format!("could not fully enable landlock: {:?}", err);
+				gum::error!(
+					target: LOG_TARGET,
+					?worker_info,
+					"{}. This should not happen, please report an issue",
+					err
+				);
+				if security_status.secure_validator_mode {
+					worker_shutdown(worker_info, &err);
+				}
+			}
+		}
+
+		// TODO: We can enable the seccomp networking blacklist on aarch64 as well, but we need a CI
+		//       job to catch regressions. See issue ci_cd/issues/609.
+		#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
+		if security_status.can_enable_seccomp {
+			if let Err(err) = security::seccomp::enable_for_worker(&worker_info) {
+				// We previously were able to enable, so this should never happen. Shutdown if
+				// running in secure mode.
+				let err = format!("could not fully enable seccomp: {:?}", err);
+				gum::error!(
+					target: LOG_TARGET,
+					?worker_info,
+					"{}. This should not happen, please report an issue",
+					err
+				);
+				if security_status.secure_validator_mode {
+					worker_shutdown(worker_info, &err);
+				}
+			}
+		}
+	}
+
+	// Run the main worker loop.
+	let err = event_loop(stream, &worker_info, security_status)
+		// It's never `Ok` because it's `Ok(Never)`.
+		.unwrap_err();
+
+	worker_shutdown(worker_info, &err.to_string());
+}
+
+/// Provide a consistent message on unexpected worker shutdown.
+fn worker_shutdown(worker_info: WorkerInfo, err: &str) -> ! {
+	gum::warn!(target: LOG_TARGET, ?worker_info, "quitting pvf worker ({}): {}", worker_info.kind, err);
+	std::process::exit(1);
+}
+
+/// Provide a consistent error on unexpected worker shutdown.
+fn worker_shutdown_error(worker_info: WorkerInfo, err: &str) -> ! {
+	gum::error!(target: LOG_TARGET, ?worker_info, "quitting pvf worker ({}): {}", worker_info.kind, err);
+	std::process::exit(1);
+}
+
+/// Loop that runs in the CPU time monitor thread on prepare and execute jobs. Continuously wakes up
+/// and then either blocks for the remaining CPU time, or returns if we exceed the CPU timeout.
+///
+/// Returning `Some` indicates that we should send a `TimedOut` error to the host. Will return
+/// `None` if the other thread finishes first, without us timing out.
+///
+/// NOTE: Sending a `TimedOut` error to the host will cause the worker, whether preparation or
+/// execution, to be killed by the host. We do not kill the process here because it would interfere
+/// with the proper handling of this error.
+pub fn cpu_time_monitor_loop(
+	cpu_time_start: ProcessTime,
+	timeout: Duration,
+	finished_rx: Receiver<()>,
+) -> Option<Duration> {
+	loop {
+		let cpu_time_elapsed = cpu_time_start.elapsed();
+
+		// Treat the timeout as CPU time, which is less subject to variance due to load.
+		if cpu_time_elapsed <= timeout {
+			// Sleep for the remaining CPU time, plus a bit to account for overhead. (And we don't
+			// want to wake up too often -- so, since we just want to halt the worker thread if it
+			// stalled, we can sleep longer than necessary.) Note that the sleep is wall clock time.
+			// The CPU clock may be slower than the wall clock.
+			let sleep_interval = timeout.saturating_sub(cpu_time_elapsed) + JOB_TIMEOUT_OVERHEAD;
+			match finished_rx.recv_timeout(sleep_interval) {
+				// Received finish signal.
+				Ok(()) => return None,
+				// Timed out, restart loop.
+				Err(RecvTimeoutError::Timeout) => continue,
+				Err(RecvTimeoutError::Disconnected) => return None,
+			}
+		}
+
+		return Some(cpu_time_elapsed);
+	}
+}
+
+/// Attempt to convert an opaque panic payload to a string.
+///
+/// This is a best effort, and is not guaranteed to provide the most accurate value.
+pub fn stringify_panic_payload(payload: Box<dyn Any + Send + 'static>) -> String {
+	match payload.downcast::<&'static str>() {
+		Ok(msg) => msg.to_string(),
+		Err(payload) => match payload.downcast::<String>() {
+			Ok(msg) => *msg,
+			// At least we tried...
+			Err(_) => "unknown panic payload".to_string(),
+		},
+	}
+}
+
+/// In case of node and worker version mismatch (as a result of in-place upgrade), send `SIGTERM`
+/// to the node to tear it down and prevent it from raising disputes on valid candidates. Node
+/// restart should be handled by the node owner. As node exits, Unix sockets opened to workers
+/// get closed by the OS and other workers receive error on socket read and also exit. Preparation
+/// jobs are written to the temporary files that are renamed to real artifacts on the node side, so
+/// no leftover artifacts are possible.
+fn kill_parent_node_in_emergency() {
+	unsafe {
+		// SAFETY: `getpid()` never fails but may return "no-parent" (0) or "parent-init" (1) in
+		// some corner cases, which is checked. `kill()` never fails.
+		let ppid = libc::getppid();
+		if ppid > 1 {
+			libc::kill(ppid, libc::SIGTERM);
+		}
+	}
+}
+
+/// Receives a handshake with information for the worker.
+fn recv_worker_handshake(stream: &mut UnixStream) -> io::Result<WorkerHandshake> {
+	let worker_handshake = framed_recv_blocking(stream)?;
+	let worker_handshake = WorkerHandshake::decode(&mut &worker_handshake[..]).map_err(|e| {
+		io::Error::new(
+			io::ErrorKind::Other,
+			format!("recv_worker_handshake: failed to decode WorkerHandshake: {}", e),
+		)
+	})?;
+	Ok(worker_handshake)
+}
+
+/// Calculate the total CPU time from the given `usage` structure, returned from
+/// [`nix::sys::resource::getrusage`], and calculates the total CPU time spent, including both user
+/// and system time.
+///
+/// # Arguments
+///
+/// - `rusage`: Contains resource usage information.
+///
+/// # Returns
+///
+/// Returns a `Duration` representing the total CPU time.
+pub fn get_total_cpu_usage(rusage: Usage) -> Duration {
+	let micros = (((rusage.user_time().tv_sec() + rusage.system_time().tv_sec()) * 1_000_000) +
+		(rusage.system_time().tv_usec() + rusage.user_time().tv_usec()) as i64) as u64;
+
+	return Duration::from_micros(micros);
+}
+
+/// Get a job response.
+pub fn recv_child_response<T>(
+	received_data: &mut io::BufReader<&[u8]>,
+	context: &'static str,
+) -> io::Result<T>
+where
+	T: Decode,
+{
+	let response_bytes = framed_recv_blocking(received_data)?;
+	T::decode(&mut response_bytes.as_slice()).map_err(|e| {
+		io::Error::new(
+			io::ErrorKind::Other,
+			format!("{} pvf recv_child_response: decode error: {}", context, e),
+		)
+	})
+}
+
+pub fn send_result<T, E>(
+	stream: &mut UnixStream,
+	result: Result<T, E>,
+	worker_info: &WorkerInfo,
+) -> io::Result<()>
+where
+	T: std::fmt::Debug,
+	E: std::fmt::Debug + std::fmt::Display,
+	Result<T, E>: Encode,
+{
+	if let Err(ref err) = result {
+		gum::warn!(
+			target: LOG_TARGET,
+			?worker_info,
+			"worker: error occurred: {}",
+			err
+		);
+	}
+	gum::trace!(
+		target: LOG_TARGET,
+		?worker_info,
+		"worker: sending result to host: {:?}",
+		result
+	);
+
+	framed_send_blocking(stream, &result.encode()).map_err(|err| {
+		gum::warn!(
+			target: LOG_TARGET,
+			?worker_info,
+			"worker: error occurred sending result to host: {}",
+			err
+		);
+		err
+	})
+}
+
+pub fn stringify_errno(context: &'static str, errno: Errno) -> String {
+	format!("{}: {}: {}", context, errno, io::Error::last_os_error())
+}
+
+/// Functionality related to threads spawned by the workers.
+///
+/// The motivation for this module is to coordinate worker threads without using async Rust.
+pub mod thread {
+	use std::{
+		io, panic,
+		sync::{Arc, Condvar, Mutex},
+		thread,
+		time::Duration,
+	};
+
+	/// Contains the outcome of waiting on threads, or `Pending` if none are ready.
+	#[derive(Debug, Clone, Copy)]
+	pub enum WaitOutcome {
+		Finished,
+		TimedOut,
+		Pending,
+	}
+
+	impl WaitOutcome {
+		pub fn is_pending(&self) -> bool {
+			matches!(self, Self::Pending)
+		}
+	}
+
+	/// Helper type.
+	pub type Cond = Arc<(Mutex<WaitOutcome>, Condvar)>;
+
+	/// Gets a condvar initialized to `Pending`.
+	pub fn get_condvar() -> Cond {
+		Arc::new((Mutex::new(WaitOutcome::Pending), Condvar::new()))
+	}
+
+	/// Runs a worker thread. Will run the requested function, and afterwards notify the threads
+	/// waiting on the condvar. Catches panics during execution and resumes the panics after
+	/// triggering the condvar, so that the waiting thread is notified on panics.
+	///
+	/// # Returns
+	///
+	/// Returns the thread's join handle. Calling `.join()` on it returns the result of executing
+	/// `f()`, as well as whether we were able to enable sandboxing.
+	pub fn spawn_worker_thread<F, R>(
+		name: &str,
+		f: F,
+		cond: Cond,
+		outcome: WaitOutcome,
+	) -> io::Result<thread::JoinHandle<R>>
+	where
+		F: FnOnce() -> R,
+		F: Send + 'static + panic::UnwindSafe,
+		R: Send + 'static,
+	{
+		thread::Builder::new()
+			.name(name.into())
+			.spawn(move || cond_notify_on_done(f, cond, outcome))
+	}
+
+	/// Runs a worker thread with the given stack size. See [`spawn_worker_thread`].
+	pub fn spawn_worker_thread_with_stack_size<F, R>(
+		name: &str,
+		f: F,
+		cond: Cond,
+		outcome: WaitOutcome,
+		stack_size: usize,
+	) -> io::Result<thread::JoinHandle<R>>
+	where
+		F: FnOnce() -> R,
+		F: Send + 'static + panic::UnwindSafe,
+		R: Send + 'static,
+	{
+		thread::Builder::new()
+			.name(name.into())
+			.stack_size(stack_size)
+			.spawn(move || cond_notify_on_done(f, cond, outcome))
+	}
+
+	/// Runs a function, afterwards notifying the threads waiting on the condvar. Catches panics and
+	/// resumes them after triggering the condvar, so that the waiting thread is notified on panics.
+	fn cond_notify_on_done<F, R>(f: F, cond: Cond, outcome: WaitOutcome) -> R
+	where
+		F: FnOnce() -> R,
+		F: panic::UnwindSafe,
+	{
+		let result = panic::catch_unwind(|| f());
+		cond_notify_all(cond, outcome);
+		match result {
+			Ok(inner) => return inner,
+			Err(err) => panic::resume_unwind(err),
+		}
+	}
+
+	/// Helper function to notify all threads waiting on this condvar.
+	fn cond_notify_all(cond: Cond, outcome: WaitOutcome) {
+		let (lock, cvar) = &*cond;
+		let mut flag = lock.lock().unwrap();
+		if !flag.is_pending() {
+			// Someone else already triggered the condvar.
+			return;
+		}
+		*flag = outcome;
+		cvar.notify_all();
+	}
+
+	/// Block the thread while it waits on the condvar.
+	pub fn wait_for_threads(cond: Cond) -> WaitOutcome {
+		let (lock, cvar) = &*cond;
+		let guard = cvar.wait_while(lock.lock().unwrap(), |flag| flag.is_pending()).unwrap();
+		*guard
+	}
+
+	/// Block the thread while it waits on the condvar or on a timeout. If the timeout is hit,
+	/// returns `None`.
+	#[cfg_attr(not(any(target_os = "linux", feature = "jemalloc-allocator")), allow(dead_code))]
+	pub fn wait_for_threads_with_timeout(cond: &Cond, dur: Duration) -> Option<WaitOutcome> {
+		let (lock, cvar) = &**cond;
+		let result = cvar
+			.wait_timeout_while(lock.lock().unwrap(), dur, |flag| flag.is_pending())
+			.unwrap();
+		if result.1.timed_out() {
+			None
+		} else {
+			Some(*result.0)
+		}
+	}
+
+	#[cfg(test)]
+	mod tests {
+		use super::*;
+		use assert_matches::assert_matches;
+
+		#[test]
+		fn get_condvar_should_be_pending() {
+			let condvar = get_condvar();
+			let outcome = *condvar.0.lock().unwrap();
+			assert!(outcome.is_pending());
+		}
+
+		#[test]
+		fn wait_for_threads_with_timeout_return_none_on_time_out() {
+			let condvar = Arc::new((Mutex::new(WaitOutcome::Pending), Condvar::new()));
+			let outcome = wait_for_threads_with_timeout(&condvar, Duration::from_millis(100));
+			assert!(outcome.is_none());
+		}
+
+		#[test]
+		fn wait_for_threads_with_timeout_returns_outcome() {
+			let condvar = Arc::new((Mutex::new(WaitOutcome::Pending), Condvar::new()));
+			let condvar2 = condvar.clone();
+			cond_notify_all(condvar2, WaitOutcome::Finished);
+			let outcome = wait_for_threads_with_timeout(&condvar, Duration::from_secs(2));
+			assert_matches!(outcome.unwrap(), WaitOutcome::Finished);
+		}
+
+		#[test]
+		fn spawn_worker_thread_should_notify_on_done() {
+			let condvar = Arc::new((Mutex::new(WaitOutcome::Pending), Condvar::new()));
+			let response =
+				spawn_worker_thread("thread", || 2, condvar.clone(), WaitOutcome::TimedOut);
+			let (lock, _) = &*condvar;
+			let r = response.unwrap().join().unwrap();
+			assert_eq!(r, 2);
+			assert_matches!(*lock.lock().unwrap(), WaitOutcome::TimedOut);
+		}
+
+		#[test]
+		fn spawn_worker_should_not_change_finished_outcome() {
+			let condvar = Arc::new((Mutex::new(WaitOutcome::Finished), Condvar::new()));
+			let response =
+				spawn_worker_thread("thread", move || 2, condvar.clone(), WaitOutcome::TimedOut);
+
+			let r = response.unwrap().join().unwrap();
+			assert_eq!(r, 2);
+			assert_matches!(*condvar.0.lock().unwrap(), WaitOutcome::Finished);
+		}
+
+		#[test]
+		fn cond_notify_on_done_should_update_wait_outcome_when_panic() {
+			let condvar = Arc::new((Mutex::new(WaitOutcome::Pending), Condvar::new()));
+			let err = panic::catch_unwind(panic::AssertUnwindSafe(|| {
+				cond_notify_on_done(|| panic!("test"), condvar.clone(), WaitOutcome::Finished)
+			}));
+
+			assert_matches!(*condvar.0.lock().unwrap(), WaitOutcome::Finished);
+			assert!(err.is_err());
+		}
+	}
+}
+
+#[cfg(test)]
+mod tests {
+	use super::*;
+	use std::sync::mpsc::channel;
+
+	#[test]
+	fn cpu_time_monitor_loop_should_return_time_elapsed() {
+		let cpu_time_start = ProcessTime::now();
+		let timeout = Duration::from_secs(0);
+		let (_tx, rx) = channel();
+		let result = cpu_time_monitor_loop(cpu_time_start, timeout, rx);
+		assert_ne!(result, None);
+	}
+
+	#[test]
+	fn cpu_time_monitor_loop_should_return_none() {
+		let cpu_time_start = ProcessTime::now();
+		let timeout = Duration::from_secs(10);
+		let (tx, rx) = channel();
+		tx.send(()).unwrap();
+		let result = cpu_time_monitor_loop(cpu_time_start, timeout, rx);
+		assert_eq!(result, None);
+	}
+}
@@ -0,0 +1,168 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+//! Functionality for securing workers by unsharing some namespaces from other processes and
+//! changing the root.
+
+use crate::{
+	worker::{WorkerInfo, WorkerKind},
+	LOG_TARGET,
+};
+use std::{env, ffi::CString, io, os::unix::ffi::OsStrExt, path::Path, ptr};
+
+#[derive(thiserror::Error, Debug)]
+pub enum Error {
+	#[error("{0}")]
+	OsErrWithContext(String),
+	#[error(transparent)]
+	Io(#[from] io::Error),
+	#[error("assertion failed: {0}")]
+	AssertionFailed(String),
+}
+
+pub type Result<T> = std::result::Result<T, Error>;
+
+/// Try to enable for the given kind of worker.
+///
+/// NOTE: This should not be called in a multi-threaded context. `unshare(2)`:
+///       "CLONE_NEWUSER requires that the calling process is not threaded."
+pub fn enable_for_worker(worker_info: &WorkerInfo) -> Result<()> {
+	gum::trace!(
+		target: LOG_TARGET,
+		?worker_info,
+		"enabling change-root",
+	);
+
+	try_restrict(worker_info)
+}
+
+/// Runs a check for unshare-and-change-root and returns an error indicating whether it can be fully
+/// enabled on the current Linux environment.
+///
+/// NOTE: This should not be called in a multi-threaded context. `unshare(2)`:
+///       "CLONE_NEWUSER requires that the calling process is not threaded."
+pub fn check_can_fully_enable(tempdir: &Path) -> Result<()> {
+	let worker_dir_path = tempdir.to_owned();
+	try_restrict(&WorkerInfo {
+		pid: std::process::id(),
+		kind: WorkerKind::CheckPivotRoot,
+		version: None,
+		worker_dir_path,
+	})
+}
+
+/// Unshare the user namespace and change root to be the worker directory.
+///
+/// NOTE: This should not be called in a multi-threaded context. `unshare(2)`:
+///       "CLONE_NEWUSER requires that the calling process is not threaded."
+fn try_restrict(worker_info: &WorkerInfo) -> Result<()> {
+	// TODO: Remove this once this is stable: https://github.com/rust-lang/rust/issues/105723
+	macro_rules! cstr_ptr {
+		($e:expr) => {
+			concat!($e, "\0").as_ptr().cast::<core::ffi::c_char>()
+		};
+	}
+
+	let worker_dir_path_c = CString::new(worker_info.worker_dir_path.as_os_str().as_bytes())
+		.expect("on unix; the path will never contain 0 bytes; qed");
+
+	// Wrapper around all the work to prevent repetitive error handling.
+	//
+	// # Errors
+	//
+	// It's the caller's responsibility to call `Error::last_os_error`. Note that that alone does
+	// not give the context of which call failed, so we return a &str error.
+	|| -> std::result::Result<(), &'static str> {
+		// SAFETY: We pass null-terminated C strings and use the APIs as documented. In fact, steps
+		//         (2) and (3) are adapted from the example in pivot_root(2), with the additional
+		//         change described in the `pivot_root(".", ".")` section.
+		unsafe {
+			// 1. `unshare` the user and the mount namespaces.
+			if libc::unshare(libc::CLONE_NEWUSER | libc::CLONE_NEWNS) < 0 {
+				return Err("unshare user and mount namespaces");
+			}
+
+			// 2. Setup mounts.
+			//
+			// Ensure that new root and its parent mount don't have shared propagation (which would
+			// cause pivot_root() to return an error), and prevent propagation of mount events to
+			// the initial mount namespace.
+			if libc::mount(
+				ptr::null(),
+				cstr_ptr!("/"),
+				ptr::null(),
+				libc::MS_REC | libc::MS_PRIVATE,
+				ptr::null(),
+			) < 0
+			{
+				return Err("mount MS_PRIVATE");
+			}
+			// Ensure that the new root is a mount point.
+			let additional_flags =
+				if let WorkerKind::Execute | WorkerKind::CheckPivotRoot = worker_info.kind {
+					libc::MS_RDONLY
+				} else {
+					0
+				};
+			if libc::mount(
+				worker_dir_path_c.as_ptr(),
+				worker_dir_path_c.as_ptr(),
+				ptr::null(), // ignored when MS_BIND is used
+				libc::MS_BIND |
+					libc::MS_REC | libc::MS_NOEXEC |
+					libc::MS_NODEV | libc::MS_NOSUID |
+					libc::MS_NOATIME |
+					additional_flags,
+				ptr::null(), // ignored when MS_BIND is used
+			) < 0
+			{
+				return Err("mount MS_BIND");
+			}
+
+			// 3. `pivot_root` to the artifact directory.
+			if libc::chdir(worker_dir_path_c.as_ptr()) < 0 {
+				return Err("chdir to worker dir path");
+			}
+			if libc::syscall(libc::SYS_pivot_root, cstr_ptr!("."), cstr_ptr!(".")) < 0 {
+				return Err("pivot_root");
+			}
+			if libc::umount2(cstr_ptr!("."), libc::MNT_DETACH) < 0 {
+				return Err("umount the old root mount point");
+			}
+		}
+
+		Ok(())
+	}()
+	.map_err(|err_ctx| {
+		let err = io::Error::last_os_error();
+		Error::OsErrWithContext(format!("{}: {}", err_ctx, err))
+	})?;
+
+	// Do some assertions.
+	if env::current_dir()? != Path::new("/") {
+		return Err(Error::AssertionFailed(
+			"expected current dir after pivot_root to be `/`".into(),
+		));
+	}
+	env::set_current_dir("..")?;
+	if env::current_dir()? != Path::new("/") {
+		return Err(Error::AssertionFailed(
+			"expected not to be able to break out of new root by doing `..`".into(),
+		));
+	}
+
+	Ok(())
+}
@@ -0,0 +1,93 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+//! Functionality for securing the job processes spawned by the workers using `clone`. If
+//! unsupported, falls back to `fork`.
+
+use crate::{worker::WorkerInfo, LOG_TARGET};
+use nix::{
+	errno::Errno,
+	sched::{CloneCb, CloneFlags},
+	unistd::Pid,
+};
+
+#[derive(thiserror::Error, Debug)]
+pub enum Error {
+	#[error("could not clone, errno: {0}")]
+	Clone(Errno),
+}
+
+pub type Result<T> = std::result::Result<T, Error>;
+
+/// Try to run clone(2) on the current worker.
+///
+/// SAFETY: new process should be either spawned within a single threaded process, or use only
+/// async-signal-safe functions.
+pub unsafe fn clone_on_worker(
+	worker_info: &WorkerInfo,
+	have_unshare_newuser: bool,
+	cb: CloneCb,
+) -> Result<Pid> {
+	let flags = clone_flags(have_unshare_newuser);
+
+	gum::trace!(
+		target: LOG_TARGET,
+		?worker_info,
+		"calling clone with flags: {:?}",
+		flags
+	);
+
+	try_clone(cb, flags)
+}
+
+/// Runs a check for clone(2) with all sandboxing flags and returns an error indicating whether it
+/// can be fully enabled on the current Linux environment.
+///
+/// SAFETY: new process should be either spawned within a single threaded process, or use only
+/// async-signal-safe functions.
+pub unsafe fn check_can_fully_clone() -> Result<()> {
+	try_clone(Box::new(|| 0), clone_flags(false)).map(|_pid| ())
+}
+
+/// Runs clone(2) with all sandboxing flags.
+///
+/// SAFETY: new process should be either spawned within a single threaded process, or use only
+/// async-signal-safe functions.
+unsafe fn try_clone(cb: CloneCb, flags: CloneFlags) -> Result<Pid> {
+	let mut stack = [0u8; 2 * 1024 * 1024];
+
+	nix::sched::clone(cb, stack.as_mut_slice(), flags, None).map_err(|errno| Error::Clone(errno))
+}
+
+/// Returns flags for `clone(2)`, including all the sandbox-related ones.
+fn clone_flags(have_unshare_newuser: bool) -> CloneFlags {
+	// NOTE: CLONE_NEWUSER does not work in `clone` if we previously called `unshare` with this
+	// flag. On the other hand, if we did not call `unshare` we need this flag for the CAP_SYS_ADMIN
+	// capability.
+	let maybe_clone_newuser =
+		if have_unshare_newuser { CloneFlags::empty() } else { CloneFlags::CLONE_NEWUSER };
+	// SIGCHLD flag is used to inform clone that the parent process is
+	// expecting a child termination signal, without this flag `waitpid` function
+	// return `ECHILD` error.
+	maybe_clone_newuser |
+		CloneFlags::CLONE_NEWCGROUP |
+		CloneFlags::CLONE_NEWIPC |
+		CloneFlags::CLONE_NEWNET |
+		CloneFlags::CLONE_NEWNS |
+		CloneFlags::CLONE_NEWPID |
+		CloneFlags::CLONE_NEWUTS |
+		CloneFlags::from_bits_retain(libc::SIGCHLD)
+}
@@ -0,0 +1,323 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+//! The [landlock] docs say it best:
+//!
+//! > "Landlock is a security feature available since Linux 5.13. The goal is to enable to restrict
+//! ambient rights (e.g., global filesystem access) for a set of processes by creating safe security
+//! sandboxes as new security layers in addition to the existing system-wide access-controls. This
+//! kind of sandbox is expected to help mitigate the security impact of bugs, unexpected or
+//! malicious behaviors in applications. Landlock empowers any process, including unprivileged ones,
+//! to securely restrict themselves."
+//!
+//! [landlock]: https://docs.rs/landlock/latest/landlock/index.html
+
+pub use landlock::RulesetStatus;
+
+use crate::{
+	worker::{stringify_panic_payload, WorkerInfo, WorkerKind},
+	LOG_TARGET,
+};
+use landlock::*;
+use std::path::{Path, PathBuf};
+
+/// Landlock ABI version. We use ABI V1 because:
+///
+/// 1. It is supported by our reference kernel version.
+/// 2. Later versions do not (yet) provide additional security that would benefit us.
+///
+/// # Versions (as of October 2023)
+///
+/// - Pezkuwi reference kernel version: 5.16+
+///
+/// - ABI V1: kernel 5.13 - Introduces landlock, including full restrictions on file reads.
+///
+/// - ABI V2: kernel 5.19 - Adds ability to prevent file renaming. Does not help us. During
+///   execution an attacker can only affect the name of a symlinked artifact and not the original
+///   one.
+///
+/// - ABI V3: kernel 6.2 - Adds ability to prevent file truncation. During execution, can
+///   prevent attackers from affecting a symlinked artifact. We don't strictly need this as we
+///   plan to check for file integrity anyway; see
+///   <https://github.com/pezkuwichain/pezkuwi-sdk/issues/107>.
+///
+/// # Determinism
+///
+/// You may wonder whether we could always use the latest ABI instead of only the ABI supported
+/// by the reference kernel version. It seems plausible, since landlock provides a best-effort
+/// approach to enabling sandboxing. For example, if the reference version only supported V1 and
+/// we were on V2, then landlock would use V2 if it was supported on the current machine, and
+/// just fall back to V1 if not.
+///
+/// The issue with this is indeterminacy. If half of validators were on V2 and half were on V1,
+/// they may have different semantics on some PVFs. So a malicious PVF now has a new attack
+/// vector: they can exploit this indeterminism between landlock ABIs!
+///
+/// On the other hand we do want validators to be as secure as possible and protect their keys
+/// from attackers. And, the risk with indeterminacy is low and there are other indeterminacy
+/// vectors anyway. So we will only upgrade to a new ABI if either the reference kernel version
+/// supports it or if it introduces some new feature that is beneficial to security.
+pub const LANDLOCK_ABI: ABI = ABI::V1;
+
+#[derive(thiserror::Error, Debug)]
+pub enum Error {
+	#[error("Could not fully enable: {0:?}")]
+	NotFullyEnabled(RulesetStatus),
+	#[error("Invalid exception path: {0:?}")]
+	InvalidExceptionPath(PathBuf),
+	#[error(transparent)]
+	RulesetError(#[from] RulesetError),
+	#[error("A panic occurred in try_restrict: {0}")]
+	Panic(String),
+}
+
+pub type Result<T> = std::result::Result<T, Error>;
+
+/// Try to enable landlock for the given kind of worker.
+pub fn enable_for_worker(worker_info: &WorkerInfo) -> Result<()> {
+	let exceptions: Vec<(PathBuf, BitFlags<AccessFs>)> = match worker_info.kind {
+		WorkerKind::Prepare => {
+			vec![(worker_info.worker_dir_path.to_owned(), AccessFs::WriteFile.into())]
+		},
+		WorkerKind::Execute => {
+			vec![(worker_info.worker_dir_path.to_owned(), AccessFs::ReadFile.into())]
+		},
+		WorkerKind::CheckPivotRoot => {
+			panic!("this should only be passed for checking pivot_root; qed")
+		},
+	};
+
+	gum::trace!(
+		target: LOG_TARGET,
+		?worker_info,
+		"enabling landlock with exceptions: {:?}",
+		exceptions,
+	);
+
+	try_restrict(exceptions)
+}
+
+// TODO: <https://github.com/landlock-lsm/rust-landlock/issues/36>
+/// Runs a check for landlock in its own thread, and returns an error indicating whether the given
+/// landlock ABI is fully enabled on the current Linux environment.
+pub fn check_can_fully_enable() -> Result<()> {
+	match std::thread::spawn(|| try_restrict(std::iter::empty::<(PathBuf, AccessFs)>())).join() {
+		Ok(Ok(())) => Ok(()),
+		Ok(Err(err)) => Err(err),
+		Err(err) => Err(Error::Panic(stringify_panic_payload(err))),
+	}
+}
+
+/// Tries to restrict the current thread (should only be called in a process' main thread) with
+/// the following landlock access controls:
+///
+/// 1. all global filesystem access restricted, with optional exceptions
+/// 2. ... more sandbox types (e.g. networking) may be supported in the future.
+///
+/// If landlock is not supported in the current environment this is simply a noop.
+///
+/// # Returns
+///
+/// The status of the restriction (whether it was fully, partially, or not-at-all enforced).
+fn try_restrict<I, P, A>(fs_exceptions: I) -> Result<()>
+where
+	I: IntoIterator<Item = (P, A)>,
+	P: AsRef<Path>,
+	A: Into<BitFlags<AccessFs>>,
+{
+	let mut ruleset =
+		Ruleset::default().handle_access(AccessFs::from_all(LANDLOCK_ABI))?.create()?;
+	for (fs_path, access_bits) in fs_exceptions {
+		let paths = &[fs_path.as_ref().to_owned()];
+		let mut rules = path_beneath_rules(paths, access_bits).peekable();
+		if rules.peek().is_none() {
+			// `path_beneath_rules` silently ignores missing paths, so check for it manually.
+			return Err(Error::InvalidExceptionPath(fs_path.as_ref().to_owned()));
+		}
+		ruleset = ruleset.add_rules(rules)?;
+	}
+
+	let status = ruleset.restrict_self()?;
+	if !matches!(status.ruleset, RulesetStatus::FullyEnforced) {
+		return Err(Error::NotFullyEnabled(status.ruleset));
+	}
+
+	Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+	use super::*;
+	use std::{fs, io::ErrorKind, thread};
+
+	#[test]
+	fn restricted_thread_cannot_read_file() {
+		// TODO: This would be nice: <https://github.com/rust-lang/rust/issues/68007>.
+		if check_can_fully_enable().is_err() {
+			return;
+		}
+
+		// Restricted thread cannot read from FS.
+		let handle = thread::spawn(|| {
+			// Create, write, and read two tmp files. This should succeed before any
+			// landlock restrictions are applied.
+			const TEXT: &str = "foo";
+			let tmpfile1 = tempfile::NamedTempFile::new().unwrap();
+			let path1 = tmpfile1.path();
+			let tmpfile2 = tempfile::NamedTempFile::new().unwrap();
+			let path2 = tmpfile2.path();
+
+			fs::write(path1, TEXT).unwrap();
+			let s = fs::read_to_string(path1).unwrap();
+			assert_eq!(s, TEXT);
+			fs::write(path2, TEXT).unwrap();
+			let s = fs::read_to_string(path2).unwrap();
+			assert_eq!(s, TEXT);
+
+			// Apply Landlock with a read exception for only one of the files.
+			let status = try_restrict(vec![(path1, AccessFs::ReadFile)]);
+			if !matches!(status, Ok(())) {
+				panic!(
+					"Ruleset should be enforced since we checked if landlock is enabled: {:?}",
+					status
+				);
+			}
+
+			// Try to read from both files, only tmpfile1 should succeed.
+			let result = fs::read_to_string(path1);
+			assert!(matches!(
+				result,
+				Ok(s) if s == TEXT
+			));
+			let result = fs::read_to_string(path2);
+			assert!(matches!(
+				result,
+				Err(err) if matches!(err.kind(), ErrorKind::PermissionDenied)
+			));
+
+			// Apply Landlock for all files.
+			let status = try_restrict(std::iter::empty::<(PathBuf, AccessFs)>());
+			if !matches!(status, Ok(())) {
+				panic!(
+					"Ruleset should be enforced since we checked if landlock is enabled: {:?}",
+					status
+				);
+			}
+
+			// Try to read from tmpfile1 after landlock, it should fail.
+			let result = fs::read_to_string(path1);
+			assert!(matches!(
+				result,
+				Err(err) if matches!(err.kind(), ErrorKind::PermissionDenied)
+			));
+		});
+
+		assert!(handle.join().is_ok());
+	}
+
+	#[test]
+	fn restricted_thread_cannot_write_file() {
+		// TODO: This would be nice: <https://github.com/rust-lang/rust/issues/68007>.
+		if check_can_fully_enable().is_err() {
+			return;
+		}
+
+		// Restricted thread cannot write to FS.
+		let handle = thread::spawn(|| {
+			// Create and write two tmp files. This should succeed before any landlock
+			// restrictions are applied.
+			const TEXT: &str = "foo";
+			let tmpfile1 = tempfile::NamedTempFile::new().unwrap();
+			let path1 = tmpfile1.path();
+			let tmpfile2 = tempfile::NamedTempFile::new().unwrap();
+			let path2 = tmpfile2.path();
+
+			fs::write(path1, TEXT).unwrap();
+			fs::write(path2, TEXT).unwrap();
+
+			// Apply Landlock with a write exception for only one of the files.
+			let status = try_restrict(vec![(path1, AccessFs::WriteFile)]);
+			if !matches!(status, Ok(())) {
+				panic!(
+					"Ruleset should be enforced since we checked if landlock is enabled: {:?}",
+					status
+				);
+			}
+
+			// Try to write to both files, only tmpfile1 should succeed.
+			let result = fs::write(path1, TEXT);
+			assert!(matches!(result, Ok(_)));
+			let result = fs::write(path2, TEXT);
+			assert!(matches!(
+				result,
+				Err(err) if matches!(err.kind(), ErrorKind::PermissionDenied)
+			));
+
+			// Apply Landlock for all files.
+			let status = try_restrict(std::iter::empty::<(PathBuf, AccessFs)>());
+			if !matches!(status, Ok(())) {
+				panic!(
+					"Ruleset should be enforced since we checked if landlock is enabled: {:?}",
+					status
+				);
+			}
+
+			// Try to write to tmpfile1 after landlock, it should fail.
+			let result = fs::write(path1, TEXT);
+			assert!(matches!(
+				result,
+				Err(err) if matches!(err.kind(), ErrorKind::PermissionDenied)
+			));
+		});
+
+		assert!(handle.join().is_ok());
+	}
+
+	// Test that checks whether landlock under our ABI version is able to truncate files.
+	#[test]
+	fn restricted_thread_can_truncate_file() {
+		// TODO: This would be nice: <https://github.com/rust-lang/rust/issues/68007>.
+		if check_can_fully_enable().is_err() {
+			return;
+		}
+
+		// Restricted thread can truncate file.
+		let handle = thread::spawn(|| {
+			// Create and write a file. This should succeed before any landlock
+			// restrictions are applied.
+			const TEXT: &str = "foo";
+			let tmpfile = tempfile::NamedTempFile::new().unwrap();
+			let path = tmpfile.path();
+
+			fs::write(path, TEXT).unwrap();
+
+			// Apply Landlock with all exceptions under the current ABI.
+			let status = try_restrict(vec![(path, AccessFs::from_all(LANDLOCK_ABI))]);
+			if !matches!(status, Ok(())) {
+				panic!(
+					"Ruleset should be enforced since we checked if landlock is enabled: {:?}",
+					status
+				);
+			}
+
+			// Try to truncate the file.
+			let result = tmpfile.as_file().set_len(0);
+			assert!(result.is_ok());
+		});
+
+		assert!(handle.join().is_ok());
+	}
+}
@@ -0,0 +1,77 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+//! Functionality for securing workers.
+//!
+//! This is needed because workers are used to compile and execute untrusted code (PVFs).
+//!
+//! We currently employ the following security measures:
+//!
+//! - Restrict filesystem
+//!   - Use Landlock to remove all unnecessary FS access rights.
+//!   - Unshare the user and mount namespaces.
+//!   - Change the root directory to a worker-specific temporary directory.
+//! - Restrict networking by blocking socket creation and io_uring.
+//! - Remove env vars
+
+#[cfg(target_os = "linux")]
+pub mod change_root;
+#[cfg(target_os = "linux")]
+pub mod clone;
+#[cfg(target_os = "linux")]
+pub mod landlock;
+#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
+pub mod seccomp;
+
+use crate::{worker::WorkerInfo, LOG_TARGET};
+
+/// Require env vars to have been removed when spawning the process, to prevent malicious code from
+/// accessing them.
+pub fn check_env_vars_were_cleared(worker_info: &WorkerInfo) -> bool {
+	gum::trace!(
+		target: LOG_TARGET,
+		?worker_info,
+		"clearing env vars in worker",
+	);
+
+	let mut ok = true;
+
+	for (key, value) in std::env::vars_os() {
+		// TODO: *theoretically* the value (or mere presence) of `RUST_LOG` can be a source of
+		// randomness for malicious code. It should be removed in the job process, which does no
+		// logging.
+		if key == "RUST_LOG" {
+			continue;
+		}
+		// An exception for MacOS. This is not a secure platform anyway, so we let it slide.
+		#[cfg(target_os = "macos")]
+		if key == "__CF_USER_TEXT_ENCODING" {
+			continue;
+		}
+
+		gum::error!(
+			target: LOG_TARGET,
+			?worker_info,
+			?key,
+			?value,
+			"env var was present that should have been removed",
+		);
+
+		ok = false;
+	}
+
+	ok
+}
@@ -0,0 +1,191 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+//! Functionality for sandboxing workers by restricting their capabilities by blocking certain
+//! syscalls with seccomp.
+//!
+//! For security we block the following:
+//!
+//! - creation of new sockets - these are unneeded in PVF jobs, and we can safely block them without
+//!   affecting consensus.
+//!
+//! - `io_uring` - allows for networking and needs to be blocked. See below for a discussion on the
+//!   safety of doing this.
+//!
+//! # Safety of blocking io_uring
+//!
+//! `io_uring` is just a way of issuing system calls in an async manner, and there is nothing
+//! stopping wasmtime from legitimately using it. Fortunately, at the moment it does not. Generally,
+//! not many applications use `io_uring` in production yet, because of the numerous kernel CVEs
+//! discovered. It's still under a lot of development. Android outright banned `io_uring` for these
+//! reasons.
+//!
+//! Considering `io_uring`'s status discussed above, and that it very likely would get detected
+//! either by our [static analysis](https://github.com/paritytech/polkadot-sdk/pull/1663) or by
+//! testing, we think it is safe to block it.
+//!
+//! ## Consensus analysis
+//!
+//! If execution hits an edge case code path unique to a given machine, it's already taken a
+//! non-deterministic branch anyway. After all, we just care that the majority of validators reach
+//! the same result and preserve consensus. So worst-case scenario, there's a dispute, and we can
+//! always admit fault and refund the wrong validator. On the other hand, if all validators take the
+//! code path that results in a seccomp violation, then they would all vote against the current
+//! candidate, which is also fine. The violation would get logged (in big scary letters) and
+//! hopefully some validator reports it to us.
+//!
+//! Actually, a worst-worse-case scenario is that 50% of validators vote against, so that there is
+//! no consensus. But so many things would have to go wrong for that to happen:
+//!
+//! 1. An update to `wasmtime` is introduced that uses io_uring (unlikely as io_uring is mainly for
+//!    IO-heavy applications)
+//!
+//! 2. The new syscall is not detected by our static analysis
+//!
+//! 3. It is never triggered in any of our tests
+//!
+//! 4. It then gets triggered on some super edge case in production on 50% of validators causing a
+//!    stall (bad but very unlikely)
+//!
+//! 5. Or, it triggers on only a few validators causing a dispute (more likely but not as bad)
+//!
+//! Considering how many things would have to go wrong here, we believe it's safe to block
+//! `io_uring`.
+//!
+//! # Action on syscall violations
+//!
+//! When a forbidden syscall is attempted we immediately kill the process in order to prevent the
+//! attacker from doing anything else. In execution, this will result in voting against the
+//! candidate.
+
+use crate::{
+	worker::{stringify_panic_payload, WorkerInfo},
+	LOG_TARGET,
+};
+use seccompiler::*;
+use std::collections::BTreeMap;
+
+/// The action to take on caught syscalls.
+#[cfg(not(test))]
+const CAUGHT_ACTION: SeccompAction = SeccompAction::KillProcess;
+/// Don't kill the process when testing.
+#[cfg(test)]
+const CAUGHT_ACTION: SeccompAction = SeccompAction::Errno(libc::EACCES as u32);
+
+#[derive(thiserror::Error, Debug)]
+pub enum Error {
+	#[error(transparent)]
+	Seccomp(#[from] seccompiler::Error),
+	#[error(transparent)]
+	Backend(#[from] seccompiler::BackendError),
+	#[error("A panic occurred in try_restrict: {0}")]
+	Panic(String),
+}
+
+pub type Result<T> = std::result::Result<T, Error>;
+
+/// Try to enable seccomp for the given kind of worker.
+pub fn enable_for_worker(worker_info: &WorkerInfo) -> Result<()> {
+	gum::trace!(
+		target: LOG_TARGET,
+		?worker_info,
+		"enabling seccomp",
+	);
+
+	try_restrict()
+}
+
+/// Runs a check for seccomp in its own thread, and returns an error indicating whether seccomp with
+/// our rules is fully enabled on the current Linux environment.
+pub fn check_can_fully_enable() -> Result<()> {
+	match std::thread::spawn(|| try_restrict()).join() {
+		Ok(Ok(())) => Ok(()),
+		Ok(Err(err)) => Err(err),
+		Err(err) => Err(Error::Panic(stringify_panic_payload(err))),
+	}
+}
+
+/// Applies a `seccomp` filter to disable networking for the PVF threads.
+fn try_restrict() -> Result<()> {
+	// Build a `seccomp` filter which by default allows all syscalls except those blocked in the
+	// blacklist.
+	let mut blacklisted_rules = BTreeMap::default();
+
+	// Restrict the creation of sockets.
+	blacklisted_rules.insert(libc::SYS_socketpair, vec![]);
+	blacklisted_rules.insert(libc::SYS_socket, vec![]);
+
+	// Prevent connecting to sockets for extra safety.
+	blacklisted_rules.insert(libc::SYS_connect, vec![]);
+
+	// Restrict io_uring.
+	blacklisted_rules.insert(libc::SYS_io_uring_setup, vec![]);
+	blacklisted_rules.insert(libc::SYS_io_uring_enter, vec![]);
+	blacklisted_rules.insert(libc::SYS_io_uring_register, vec![]);
+
+	let filter = SeccompFilter::new(
+		blacklisted_rules,
+		// Mismatch action: what to do if not in rule list.
+		SeccompAction::Allow,
+		// Match action: what to do if in rule list.
+		CAUGHT_ACTION,
+		TargetArch::x86_64,
+	)?;
+
+	let bpf_prog: BpfProgram = filter.try_into()?;
+
+	// Applies filter (runs seccomp) to the calling thread.
+	seccompiler::apply_filter(&bpf_prog)?;
+
+	Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+	use super::*;
+	use std::{io::ErrorKind, net::TcpListener, thread};
+
+	#[test]
+	fn sandboxed_thread_cannot_use_sockets() {
+		// TODO: This would be nice: <https://github.com/rust-lang/rust/issues/68007>.
+		if check_can_fully_enable().is_err() {
+			return;
+		}
+
+		let handle = thread::spawn(|| {
+			// Open a socket, this should succeed before seccomp is applied.
+			TcpListener::bind("127.0.0.1:0").unwrap();
+
+			let status = try_restrict();
+			if !matches!(status, Ok(())) {
+				panic!("Ruleset should be enforced since we checked if seccomp is enabled");
+			}
+
+			// Try to open a socket after seccomp.
+			assert!(matches!(
+				TcpListener::bind("127.0.0.1:0"),
+				Err(err) if matches!(err.kind(), ErrorKind::PermissionDenied)
+			));
+
+			// Other syscalls should still work.
+			unsafe {
+				assert!(libc::getppid() > 0);
+			}
+		});
+
+		assert!(handle.join().is_ok());
+	}
+}
@@ -0,0 +1,30 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+//! Shared functions for getting the known worker files.
+
+use std::path::{Path, PathBuf};
+
+const WORKER_EXECUTE_ARTIFACT_NAME: &str = "artifact";
+const WORKER_PREPARE_TMP_ARTIFACT_NAME: &str = "tmp-artifact";
+
+pub fn execute_artifact(worker_dir_path: &Path) -> PathBuf {
+	worker_dir_path.join(WORKER_EXECUTE_ARTIFACT_NAME)
+}
+
+pub fn prepare_tmp_artifact(worker_dir_path: &Path) -> PathBuf {
+	worker_dir_path.join(WORKER_PREPARE_TMP_ARTIFACT_NAME)
+}
@@ -0,0 +1,38 @@
+[package]
+name = "pezkuwi-node-core-pvf-execute-worker"
+description = "Pezkuwi crate that contains the logic for executing PVFs. Used by the pezkuwi-execute-worker binary."
+version = "7.0.0"
+authors.workspace = true
+edition.workspace = true
+license.workspace = true
+homepage.workspace = true
+repository.workspace = true
+
+[lints]
+workspace = true
+
+[dependencies]
+cfg-if = { workspace = true }
+cpu-time = { workspace = true }
+gum = { workspace = true, default-features = true }
+libc = { workspace = true }
+nix = { features = ["process", "resource", "sched"], workspace = true }
+
+codec = { features = ["derive"], workspace = true }
+
+pezkuwi-node-core-pvf-common = { workspace = true, default-features = true }
+pezkuwi-node-primitives = { workspace = true, default-features = true }
+pezkuwi-primitives = { workspace = true, default-features = true }
+pezkuwi-teyrchain-primitives = { workspace = true, default-features = true }
+
+sp-maybe-compressed-blob = { workspace = true, default-features = true }
+
+[features]
+builder = []
+runtime-benchmarks = [
+	"gum/runtime-benchmarks",
+	"pezkuwi-node-core-pvf-common/runtime-benchmarks",
+	"pezkuwi-node-primitives/runtime-benchmarks",
+	"pezkuwi-primitives/runtime-benchmarks",
+	"pezkuwi-teyrchain-primitives/runtime-benchmarks",
+]
@@ -0,0 +1,696 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+//! Contains the logic for executing PVFs. Used by the pezkuwi-execute-worker binary.
+
+#![deny(unused_crate_dependencies)]
+#![warn(missing_docs)]
+
+pub use pezkuwi_node_core_pvf_common::{error::ExecuteError, executor_interface::execute_artifact};
+use pezkuwi_teyrchain_primitives::primitives::ValidationParams;
+
+// NOTE: Initializing logging in e.g. tests will not have an effect in the workers, as they are
+//       separate spawned processes. Run with e.g. `RUST_LOG=teyrchain::pvf-execute-worker=trace`.
+const LOG_TARGET: &str = "teyrchain::pvf-execute-worker";
+
+use codec::{Decode, Encode};
+use cpu_time::ProcessTime;
+use nix::{
+	errno::Errno,
+	sys::{
+		resource::{Usage, UsageWho},
+		wait::WaitStatus,
+	},
+	unistd::{ForkResult, Pid},
+};
+use pezkuwi_node_core_pvf_common::{
+	compute_checksum,
+	error::InternalValidationError,
+	execute::{
+		ExecuteRequest, Handshake, JobError, JobResponse, JobResult, WorkerError, WorkerResponse,
+	},
+	executor_interface::params_to_wasmtime_semantics,
+	framed_recv_blocking, framed_send_blocking,
+	worker::{
+		cpu_time_monitor_loop, get_total_cpu_usage, pipe2_cloexec, recv_child_response, run_worker,
+		send_result, stringify_errno, stringify_panic_payload,
+		thread::{self, WaitOutcome},
+		PipeFd, WorkerInfo, WorkerKind,
+	},
+	worker_dir, ArtifactChecksum,
+};
+use pezkuwi_node_primitives::{BlockData, PoV, POV_BOMB_LIMIT};
+use pezkuwi_primitives::{ExecutorParams, PersistedValidationData};
+use pezkuwi_teyrchain_primitives::primitives::ValidationResult;
+use std::{
+	io::{self, Read},
+	os::{
+		fd::{AsRawFd, FromRawFd},
+		unix::net::UnixStream,
+	},
+	path::PathBuf,
+	process,
+	sync::{mpsc::channel, Arc},
+	time::Duration,
+};
+
+/// The number of threads for the child process:
+/// 1 - Main thread
+/// 2 - Cpu monitor thread
+/// 3 - Execute thread
+///
+/// NOTE: The correctness of this value is enforced by a test. If the number of threads inside
+/// the child process changes in the future, this value must be changed as well.
+pub const EXECUTE_WORKER_THREAD_NUMBER: u32 = 3;
+
+/// Receives a handshake with information specific to the execute worker.
+fn recv_execute_handshake(stream: &mut UnixStream) -> io::Result<Handshake> {
+	let handshake_enc = framed_recv_blocking(stream)?;
+	let handshake = Handshake::decode(&mut &handshake_enc[..]).map_err(|_| {
+		io::Error::new(
+			io::ErrorKind::Other,
+			"execute pvf recv_execute_handshake: failed to decode Handshake".to_owned(),
+		)
+	})?;
+	Ok(handshake)
+}
+
+fn recv_request(
+	stream: &mut UnixStream,
+) -> io::Result<(PersistedValidationData, PoV, Duration, ArtifactChecksum)> {
+	let request_bytes = framed_recv_blocking(stream)?;
+	let request = ExecuteRequest::decode(&mut &request_bytes[..]).map_err(|_| {
+		io::Error::new(
+			io::ErrorKind::Other,
+			"execute pvf recv_request: failed to decode ExecuteRequest".to_string(),
+		)
+	})?;
+
+	Ok((request.pvd, request.pov, request.execution_timeout, request.artifact_checksum))
+}
+
+/// Sends an error to the host and returns the original error wrapped in `io::Error`.
+macro_rules! map_and_send_err {
+	($error:expr, $err_constructor:expr, $stream:expr, $worker_info:expr) => {{
+		let err: WorkerError = $err_constructor($error.to_string()).into();
+		let io_err = io::Error::new(io::ErrorKind::Other, err.to_string());
+		let _ = send_result::<WorkerResponse, WorkerError>($stream, Err(err), $worker_info);
+		io_err
+	}};
+}
+
+/// The entrypoint that the spawned execute worker should start with.
+///
+/// # Parameters
+///
+/// - `socket_path`: specifies the path to the socket used to communicate with the host.
+///
+/// - `worker_dir_path`: specifies the path to the worker-specific temporary directory.
+///
+/// - `node_version`: if `Some`, is checked against the `worker_version`. A mismatch results in
+///   immediate worker termination. `None` is used for tests and in other situations when version
+///   check is not necessary.
+///
+/// - `worker_version`: see above
+pub fn worker_entrypoint(
+	socket_path: PathBuf,
+	worker_dir_path: PathBuf,
+	node_version: Option<&str>,
+	worker_version: Option<&str>,
+) {
+	run_worker(
+		WorkerKind::Execute,
+		socket_path,
+		worker_dir_path,
+		node_version,
+		worker_version,
+		|mut stream, worker_info, security_status| {
+			let artifact_path = worker_dir::execute_artifact(&worker_info.worker_dir_path);
+
+			let Handshake { executor_params } =
+				recv_execute_handshake(&mut stream).map_err(|e| {
+					map_and_send_err!(
+						e,
+						InternalValidationError::HostCommunication,
+						&mut stream,
+						worker_info
+					)
+				})?;
+
+			let executor_params: Arc<ExecutorParams> = Arc::new(executor_params);
+			let execute_thread_stack_size = max_stack_size(&executor_params);
+
+			loop {
+				let (pvd, pov, execution_timeout, artifact_checksum) = recv_request(&mut stream)
+					.map_err(|e| {
+						map_and_send_err!(
+							e,
+							InternalValidationError::HostCommunication,
+							&mut stream,
+							worker_info
+						)
+					})?;
+				gum::debug!(
+					target: LOG_TARGET,
+					?worker_info,
+					?security_status,
+					"worker: validating artifact {}",
+					artifact_path.display(),
+				);
+
+				// Get the artifact bytes.
+				let compiled_artifact_blob = std::fs::read(&artifact_path).map_err(|e| {
+					map_and_send_err!(
+						e,
+						InternalValidationError::CouldNotOpenFile,
+						&mut stream,
+						worker_info
+					)
+				})?;
+
+				if artifact_checksum != compute_checksum(&compiled_artifact_blob) {
+					send_result::<WorkerResponse, WorkerError>(
+						&mut stream,
+						Ok(WorkerResponse {
+							job_response: JobResponse::CorruptedArtifact,
+							duration: Duration::ZERO,
+							pov_size: 0,
+						}),
+						worker_info,
+					)?;
+					continue;
+				}
+
+				let (pipe_read_fd, pipe_write_fd) = pipe2_cloexec().map_err(|e| {
+					map_and_send_err!(
+						e,
+						InternalValidationError::CouldNotCreatePipe,
+						&mut stream,
+						worker_info
+					)
+				})?;
+
+				let usage_before = nix::sys::resource::getrusage(UsageWho::RUSAGE_CHILDREN)
+					.map_err(|errno| {
+						let e = stringify_errno("getrusage before", errno);
+						map_and_send_err!(
+							e,
+							InternalValidationError::Kernel,
+							&mut stream,
+							worker_info
+						)
+					})?;
+				let stream_fd = stream.as_raw_fd();
+
+				let compiled_artifact_blob = Arc::new(compiled_artifact_blob);
+
+				let raw_block_data =
+					match sp_maybe_compressed_blob::decompress(&pov.block_data.0, POV_BOMB_LIMIT) {
+						Ok(data) => data,
+						Err(_) => {
+							send_result::<WorkerResponse, WorkerError>(
+								&mut stream,
+								Ok(WorkerResponse {
+									job_response: JobResponse::PoVDecompressionFailure,
+									duration: Duration::ZERO,
+									pov_size: 0,
+								}),
+								worker_info,
+							)?;
+							continue;
+						},
+					};
+
+				let pov_size = raw_block_data.len() as u32;
+
+				let params = ValidationParams {
+					parent_head: pvd.parent_head.clone(),
+					block_data: BlockData(raw_block_data.to_vec()),
+					relay_parent_number: pvd.relay_parent_number,
+					relay_parent_storage_root: pvd.relay_parent_storage_root,
+				};
+				let params = Arc::new(params.encode());
+
+				cfg_if::cfg_if! {
+					if #[cfg(target_os = "linux")] {
+						let result = if security_status.can_do_secure_clone {
+							handle_clone(
+								pipe_write_fd,
+								pipe_read_fd,
+								stream_fd,
+								&compiled_artifact_blob,
+								&executor_params,
+								&params,
+								execution_timeout,
+								execute_thread_stack_size,
+								worker_info,
+								security_status.can_unshare_user_namespace_and_change_root,
+								usage_before,
+								pov_size,
+							)?
+						} else {
+							// Fall back to using fork.
+							handle_fork(
+								pipe_write_fd,
+								pipe_read_fd,
+								stream_fd,
+								&compiled_artifact_blob,
+								&executor_params,
+								&params,
+								execution_timeout,
+								execute_thread_stack_size,
+								worker_info,
+								usage_before,
+								pov_size,
+							)?
+						};
+					} else {
+						let result = handle_fork(
+							pipe_write_fd,
+							pipe_read_fd,
+							stream_fd,
+							&compiled_artifact_blob,
+							&executor_params,
+							&params,
+							execution_timeout,
+							execute_thread_stack_size,
+							worker_info,
+							usage_before,
+							pov_size,
+						)?;
+					}
+				}
+
+				gum::trace!(
+					target: LOG_TARGET,
+					?worker_info,
+					"worker: sending result to host: {:?}",
+					result
+				);
+				send_result(&mut stream, result, worker_info)?;
+			}
+		},
+	);
+}
+
+fn validate_using_artifact(
+	compiled_artifact_blob: &[u8],
+	executor_params: &ExecutorParams,
+	params: &[u8],
+) -> JobResponse {
+	let descriptor_bytes = match unsafe {
+		// SAFETY: this should be safe since the compiled artifact passed here comes from the
+		//         file created by the prepare workers. These files are obtained by calling
+		//         [`executor_interface::prepare`].
+		execute_artifact(compiled_artifact_blob, executor_params, params)
+	} {
+		Err(ExecuteError::RuntimeConstruction(wasmerr)) =>
+			return JobResponse::runtime_construction("execute", &wasmerr.to_string()),
+		Err(err) => return JobResponse::format_invalid("execute", &err.to_string()),
+		Ok(d) => d,
+	};
+
+	let result_descriptor = match ValidationResult::decode(&mut &descriptor_bytes[..]) {
+		Err(err) =>
+			return JobResponse::format_invalid(
+				"validation result decoding failed",
+				&err.to_string(),
+			),
+		Ok(r) => r,
+	};
+
+	JobResponse::Ok { result_descriptor }
+}
+
+#[cfg(target_os = "linux")]
+fn handle_clone(
+	pipe_write_fd: i32,
+	pipe_read_fd: i32,
+	stream_fd: i32,
+	compiled_artifact_blob: &Arc<Vec<u8>>,
+	executor_params: &Arc<ExecutorParams>,
+	params: &Arc<Vec<u8>>,
+	execution_timeout: Duration,
+	execute_stack_size: usize,
+	worker_info: &WorkerInfo,
+	have_unshare_newuser: bool,
+	usage_before: Usage,
+	pov_size: u32,
+) -> io::Result<Result<WorkerResponse, WorkerError>> {
+	use pezkuwi_node_core_pvf_common::worker::security;
+
+	// SAFETY: new process is spawned within a single threaded process. This invariant
+	// is enforced by tests. Stack size being specified to ensure child doesn't overflow
+	match unsafe {
+		security::clone::clone_on_worker(
+			worker_info,
+			have_unshare_newuser,
+			Box::new(|| {
+				handle_child_process(
+					pipe_write_fd,
+					pipe_read_fd,
+					stream_fd,
+					Arc::clone(compiled_artifact_blob),
+					Arc::clone(executor_params),
+					Arc::clone(params),
+					execution_timeout,
+					execute_stack_size,
+				)
+			}),
+		)
+	} {
+		Ok(child) => handle_parent_process(
+			pipe_read_fd,
+			pipe_write_fd,
+			worker_info,
+			child,
+			usage_before,
+			pov_size,
+			execution_timeout,
+		),
+		Err(security::clone::Error::Clone(errno)) =>
+			Ok(Err(internal_error_from_errno("clone", errno))),
+	}
+}
+
+fn handle_fork(
+	pipe_write_fd: i32,
+	pipe_read_fd: i32,
+	stream_fd: i32,
+	compiled_artifact_blob: &Arc<Vec<u8>>,
+	executor_params: &Arc<ExecutorParams>,
+	params: &Arc<Vec<u8>>,
+	execution_timeout: Duration,
+	execute_worker_stack_size: usize,
+	worker_info: &WorkerInfo,
+	usage_before: Usage,
+	pov_size: u32,
+) -> io::Result<Result<WorkerResponse, WorkerError>> {
+	// SAFETY: new process is spawned within a single threaded process. This invariant
+	// is enforced by tests.
+	match unsafe { nix::unistd::fork() } {
+		Ok(ForkResult::Child) => handle_child_process(
+			pipe_write_fd,
+			pipe_read_fd,
+			stream_fd,
+			Arc::clone(compiled_artifact_blob),
+			Arc::clone(executor_params),
+			Arc::clone(params),
+			execution_timeout,
+			execute_worker_stack_size,
+		),
+		Ok(ForkResult::Parent { child }) => handle_parent_process(
+			pipe_read_fd,
+			pipe_write_fd,
+			worker_info,
+			child,
+			usage_before,
+			pov_size,
+			execution_timeout,
+		),
+		Err(errno) => Ok(Err(internal_error_from_errno("fork", errno))),
+	}
+}
+
+/// This is used to handle child process during pvf execute worker.
+/// It executes the artifact and pipes back the response to the parent process.
+///
+/// # Returns
+///
+/// - pipe back `JobResponse` to the parent process.
+fn handle_child_process(
+	pipe_write_fd: i32,
+	pipe_read_fd: i32,
+	stream_fd: i32,
+	compiled_artifact_blob: Arc<Vec<u8>>,
+	executor_params: Arc<ExecutorParams>,
+	params: Arc<Vec<u8>>,
+	execution_timeout: Duration,
+	execute_thread_stack_size: usize,
+) -> ! {
+	// SAFETY: this is an open and owned file descriptor at this point.
+	let mut pipe_write = unsafe { PipeFd::from_raw_fd(pipe_write_fd) };
+
+	// Drop the read end so we don't have too many FDs open.
+	if let Err(errno) = nix::unistd::close(pipe_read_fd) {
+		send_child_response(&mut pipe_write, job_error_from_errno("closing pipe", errno));
+	}
+
+	// Dropping the stream closes the underlying socket. We want to make sure
+	// that the sandboxed child can't get any kind of information from the
+	// outside world. The only IPC it should be able to do is sending its
+	// response over the pipe.
+	if let Err(errno) = nix::unistd::close(stream_fd) {
+		send_child_response(&mut pipe_write, job_error_from_errno("closing stream", errno));
+	}
+
+	gum::debug!(
+		target: LOG_TARGET,
+		worker_job_pid = %process::id(),
+		"worker job: executing artifact",
+	);
+
+	// Conditional variable to notify us when a thread is done.
+	let condvar = thread::get_condvar();
+	let cpu_time_start = ProcessTime::now();
+
+	// Spawn a new thread that runs the CPU time monitor.
+	let (cpu_time_monitor_tx, cpu_time_monitor_rx) = channel::<()>();
+	let cpu_time_monitor_thread = thread::spawn_worker_thread(
+		"cpu time monitor thread",
+		move || cpu_time_monitor_loop(cpu_time_start, execution_timeout, cpu_time_monitor_rx),
+		Arc::clone(&condvar),
+		WaitOutcome::TimedOut,
+	)
+	.unwrap_or_else(|err| {
+		send_child_response(&mut pipe_write, Err(JobError::CouldNotSpawnThread(err.to_string())))
+	});
+
+	let execute_thread = thread::spawn_worker_thread_with_stack_size(
+		"execute thread",
+		move || validate_using_artifact(&compiled_artifact_blob, &executor_params, &params),
+		Arc::clone(&condvar),
+		WaitOutcome::Finished,
+		execute_thread_stack_size,
+	)
+	.unwrap_or_else(|err| {
+		send_child_response(&mut pipe_write, Err(JobError::CouldNotSpawnThread(err.to_string())))
+	});
+
+	let outcome = thread::wait_for_threads(condvar);
+
+	let response = match outcome {
+		WaitOutcome::Finished => {
+			let _ = cpu_time_monitor_tx.send(());
+			execute_thread.join().map_err(|e| JobError::Panic(stringify_panic_payload(e)))
+		},
+		// If the CPU thread is not selected, we signal it to end, the join handle is
+		// dropped and the thread will finish in the background.
+		WaitOutcome::TimedOut => match cpu_time_monitor_thread.join() {
+			Ok(Some(_cpu_time_elapsed)) => Err(JobError::TimedOut),
+			Ok(None) => Err(JobError::CpuTimeMonitorThread(
+				"error communicating over finished channel".into(),
+			)),
+			Err(e) => Err(JobError::CpuTimeMonitorThread(stringify_panic_payload(e))),
+		},
+		WaitOutcome::Pending => {
+			unreachable!("we run wait_while until the outcome is no longer pending; qed")
+		},
+	};
+
+	send_child_response(&mut pipe_write, response);
+}
+
+/// Returns stack size based on the number of threads.
+/// The stack size is represented by 2MiB * number_of_threads + native stack;
+///
+/// # Background
+///
+/// Wasmtime powers the Substrate Executor. It compiles the wasm bytecode into native code.
+/// That native code does not create any stacks and just reuses the stack of the thread that
+/// wasmtime was invoked from.
+///
+/// Also, we configure the executor to provide the deterministic stack and that requires
+/// supplying the amount of the native stack space that wasm is allowed to use. This is
+/// realized by supplying the limit into `wasmtime::Config::max_wasm_stack`.
+///
+/// There are quirks to that configuration knob:
+///
+/// 1. It only limits the amount of stack space consumed by wasm but does not ensure nor check that
+///    the stack space is actually available.
+///
+///    That means, if the calling thread has 1 MiB of stack space left and the wasm code consumes
+///    more, then the wasmtime limit will **not** trigger. Instead, the wasm code will hit the
+///    guard page and the Rust stack overflow handler will be triggered. That leads to an
+///    **abort**.
+///
+/// 2. It cannot and does not limit the stack space consumed by Rust code.
+///
+///    Meaning that if the wasm code leaves no stack space for Rust code, then the Rust code
+///    will abort and that will abort the process as well.
+///
+/// Typically on Linux the main thread gets the stack size specified by the `ulimit` and
+/// typically it's configured to 8 MiB. Rust's spawned threads are 2 MiB. OTOH, the
+/// DEFAULT_NATIVE_STACK_MAX is set to 256 MiB. Not nearly enough.
+///
+/// Hence we need to increase it. The simplest way to fix that is to spawn an execute thread with
+/// the desired stack limit. We must also make sure the job process has enough stack for *all* its
+/// threads. This function can be used to get the stack size of either the execute thread or execute
+/// job process.
+fn max_stack_size(executor_params: &ExecutorParams) -> usize {
+	let (_sem, deterministic_stack_limit) = params_to_wasmtime_semantics(executor_params);
+	return (2 * 1024 * 1024 + deterministic_stack_limit.native_stack_max) as usize;
+}
+
+/// Waits for child process to finish and handle child response from pipe.
+///
+/// # Returns
+///
+/// - The response, either `Ok` or some error state.
+fn handle_parent_process(
+	pipe_read_fd: i32,
+	pipe_write_fd: i32,
+	worker_info: &WorkerInfo,
+	job_pid: Pid,
+	usage_before: Usage,
+	pov_size: u32,
+	timeout: Duration,
+) -> io::Result<Result<WorkerResponse, WorkerError>> {
+	// the read end will wait until all write ends have been closed,
+	// this drop is necessary to avoid deadlock
+	if let Err(errno) = nix::unistd::close(pipe_write_fd) {
+		return Ok(Err(internal_error_from_errno("closing pipe write fd", errno)));
+	};
+
+	// SAFETY: pipe_read_fd is an open and owned file descriptor at this point.
+	let mut pipe_read = unsafe { PipeFd::from_raw_fd(pipe_read_fd) };
+
+	// Read from the child. Don't decode unless the process exited normally, which we check later.
+	let mut received_data = Vec::new();
+	pipe_read
+		.read_to_end(&mut received_data)
+		// Could not decode job response. There is either a bug or the job was hijacked.
+		// Should retry at any rate.
+		.map_err(|err| io::Error::new(io::ErrorKind::Other, err.to_string()))?;
+
+	let status = nix::sys::wait::waitpid(job_pid, None);
+	gum::trace!(
+		target: LOG_TARGET,
+		?worker_info,
+		%job_pid,
+		"execute worker received wait status from job: {:?}",
+		status,
+	);
+
+	let usage_after = match nix::sys::resource::getrusage(UsageWho::RUSAGE_CHILDREN) {
+		Ok(usage) => usage,
+		Err(errno) => return Ok(Err(internal_error_from_errno("getrusage after", errno))),
+	};
+
+	// Using `getrusage` is needed to check whether child has timedout since we cannot rely on
+	// child to report its own time.
+	// As `getrusage` returns resource usage from all terminated child processes,
+	// it is necessary to subtract the usage before the current child process to isolate its cpu
+	// time
+	let cpu_tv = get_total_cpu_usage(usage_after) - get_total_cpu_usage(usage_before);
+	if cpu_tv >= timeout {
+		gum::warn!(
+			target: LOG_TARGET,
+			?worker_info,
+			%job_pid,
+			"execute job took {}ms cpu time, exceeded execute timeout {}ms",
+			cpu_tv.as_millis(),
+			timeout.as_millis(),
+		);
+		return Ok(Err(WorkerError::JobTimedOut));
+	}
+
+	match status {
+		Ok(WaitStatus::Exited(_, exit_status)) => {
+			let mut reader = io::BufReader::new(received_data.as_slice());
+			let result = recv_child_response(&mut reader, "execute")?;
+
+			match result {
+				Ok(job_response) => {
+					// The exit status should have been zero if no error occurred.
+					if exit_status != 0 {
+						return Ok(Err(WorkerError::JobError(JobError::UnexpectedExitStatus(
+							exit_status,
+						))));
+					}
+
+					Ok(Ok(WorkerResponse { job_response, pov_size, duration: cpu_tv }))
+				},
+				Err(job_error) => {
+					gum::warn!(
+						target: LOG_TARGET,
+						?worker_info,
+						%job_pid,
+						"execute job error: {}",
+						job_error,
+					);
+					if matches!(job_error, JobError::TimedOut) {
+						Ok(Err(WorkerError::JobTimedOut))
+					} else {
+						Ok(Err(WorkerError::JobError(job_error.into())))
+					}
+				},
+			}
+		},
+		// The job was killed by the given signal.
+		//
+		// The job gets SIGSYS on seccomp violations, but this signal may have been sent for some
+		// other reason, so we still need to check for seccomp violations elsewhere.
+		Ok(WaitStatus::Signaled(_pid, signal, _core_dump)) => Ok(Err(WorkerError::JobDied {
+			err: format!("received signal: {signal:?}"),
+			job_pid: job_pid.as_raw(),
+		})),
+		Err(errno) => Ok(Err(internal_error_from_errno("waitpid", errno))),
+
+		// It is within an attacker's power to send an unexpected exit status. So we cannot treat
+		// this as an internal error (which would make us abstain), but must vote against.
+		Ok(unexpected_wait_status) => Ok(Err(WorkerError::JobDied {
+			err: format!("unexpected status from wait: {unexpected_wait_status:?}"),
+			job_pid: job_pid.as_raw(),
+		})),
+	}
+}
+
+/// Write a job response to the pipe and exit process after.
+///
+/// # Arguments
+///
+/// - `pipe_write`: A `PipeFd` structure, the writing end of a pipe.
+///
+/// - `response`: Child process response
+fn send_child_response(pipe_write: &mut PipeFd, response: JobResult) -> ! {
+	framed_send_blocking(pipe_write, response.encode().as_slice())
+		.unwrap_or_else(|_| process::exit(libc::EXIT_FAILURE));
+
+	if response.is_ok() {
+		process::exit(libc::EXIT_SUCCESS)
+	} else {
+		process::exit(libc::EXIT_FAILURE)
+	}
+}
+
+fn internal_error_from_errno(context: &'static str, errno: Errno) -> WorkerError {
+	WorkerError::InternalError(InternalValidationError::Kernel(stringify_errno(context, errno)))
+}
+
+fn job_error_from_errno(context: &'static str, errno: Errno) -> JobResult {
+	Err(JobError::Kernel(stringify_errno(context, errno)))
+}
@@ -0,0 +1,54 @@
+[package]
+name = "pezkuwi-node-core-pvf-prepare-worker"
+description = "Pezkuwi crate that contains the logic for preparing PVFs. Used by the pezkuwi-prepare-worker binary."
+version = "7.0.0"
+authors.workspace = true
+edition.workspace = true
+license.workspace = true
+homepage.workspace = true
+repository.workspace = true
+
+[lints]
+workspace = true
+
+[[bench]]
+name = "prepare_pezkuwichain_runtime"
+harness = false
+
+[dependencies]
+cfg-if = { workspace = true }
+gum = { workspace = true, default-features = true }
+libc = { workspace = true }
+nix = { features = ["process", "resource", "sched"], workspace = true }
+tikv-jemalloc-ctl = { optional = true, workspace = true }
+tikv-jemallocator = { optional = true, workspace = true }
+tracking-allocator = { workspace = true, default-features = true }
+
+codec = { features = ["derive"], workspace = true }
+
+pezkuwi-node-core-pvf-common = { workspace = true, default-features = true }
+pezkuwi-primitives = { workspace = true, default-features = true }
+
+sp-maybe-compressed-blob = { workspace = true, default-features = true }
+
+[target.'cfg(target_os = "linux")'.dependencies]
+tikv-jemallocator = { workspace = true }
+tikv-jemalloc-ctl = { workspace = true }
+
+[dev-dependencies]
+criterion = { features = ["cargo_bench_support"], workspace = true }
+pezkuwichain-runtime = { workspace = true }
+
+[features]
+builder = []
+jemalloc-allocator = [
+	"dep:tikv-jemalloc-ctl",
+	"dep:tikv-jemallocator",
+	"pezkuwi-node-core-pvf-common/jemalloc-allocator",
+]
+runtime-benchmarks = [
+	"gum/runtime-benchmarks",
+	"pezkuwi-node-core-pvf-common/runtime-benchmarks",
+	"pezkuwi-primitives/runtime-benchmarks",
+	"pezkuwichain-runtime/runtime-benchmarks",
+]
@@ -0,0 +1,70 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+use criterion::{criterion_group, criterion_main, Criterion, SamplingMode};
+use pezkuwi_node_core_pvf_common::{
+	executor_interface::{prepare, prevalidate},
+	prepare::PrepareJobKind,
+	pvf::PvfPrepData,
+};
+use pezkuwi_primitives::ExecutorParams;
+use std::time::Duration;
+
+fn do_prepare_runtime(pvf: PvfPrepData) {
+	let maybe_compressed_code = pvf.maybe_compressed_code();
+	let raw_validation_code =
+		sp_maybe_compressed_blob::decompress(&maybe_compressed_code, usize::MAX).unwrap();
+
+	let blob = match prevalidate(&raw_validation_code) {
+		Err(err) => panic!("{:?}", err),
+		Ok(b) => b,
+	};
+
+	match prepare(blob, &pvf.executor_params()) {
+		Ok(_) => (),
+		Err(err) => panic!("{:?}", err),
+	}
+}
+
+fn prepare_pezkuwichain_runtime(c: &mut Criterion) {
+	let blob = pezkuwichain_runtime::WASM_BINARY.unwrap();
+	let pvf = match sp_maybe_compressed_blob::decompress(&blob, 64 * 1024 * 1024) {
+		Ok(code) => PvfPrepData::from_code(
+			code.into_owned(),
+			ExecutorParams::default(),
+			Duration::from_secs(360),
+			PrepareJobKind::Compilation,
+			64 * 1024 * 1024,
+		),
+		Err(e) => {
+			panic!("Cannot decompress blob: {:?}", e);
+		},
+	};
+
+	let mut group = c.benchmark_group("pezkuwichain");
+	group.sampling_mode(SamplingMode::Flat);
+	group.sample_size(20);
+	group.measurement_time(Duration::from_secs(240));
+	group.bench_function("prepare Pezkuwichain runtime", |b| {
+		// `PvfPrepData` is designed to be cheap to clone, so cloning shouldn't affect the
+		// benchmark accuracy
+		b.iter(|| do_prepare_runtime(pvf.clone()))
+	});
+	group.finish();
+}
+
+criterion_group!(preparation, prepare_pezkuwichain_runtime);
+criterion_main!(preparation);
@@ -0,0 +1,785 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+//! Contains the logic for preparing PVFs. Used by the pezkuwi-prepare-worker binary.
+
+mod memory_stats;
+
+// NOTE: Initializing logging in e.g. tests will not have an effect in the workers, as they are
+//       separate spawned processes. Run with e.g. `RUST_LOG=teyrchain::pvf-prepare-worker=trace`.
+const LOG_TARGET: &str = "teyrchain::pvf-prepare-worker";
+
+#[cfg(target_os = "linux")]
+use crate::memory_stats::max_rss_stat::{extract_max_rss_stat, get_max_rss_thread};
+#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))]
+use crate::memory_stats::memory_tracker::{get_memory_tracker_loop_stats, memory_tracker_loop};
+use codec::{Decode, Encode};
+use nix::{
+	errno::Errno,
+	sys::{
+		resource::{Usage, UsageWho},
+		wait::WaitStatus,
+	},
+	unistd::{ForkResult, Pid},
+};
+use pezkuwi_node_core_pvf_common::{
+	compute_checksum,
+	error::{PrepareError, PrepareWorkerResult},
+	executor_interface::{create_runtime_from_artifact_bytes, prepare, prevalidate},
+	framed_recv_blocking, framed_send_blocking,
+	prepare::{MemoryStats, PrepareJobKind, PrepareStats, PrepareWorkerSuccess},
+	pvf::PvfPrepData,
+	worker::{
+		cpu_time_monitor_loop, get_total_cpu_usage, pipe2_cloexec, recv_child_response, run_worker,
+		send_result, stringify_errno, stringify_panic_payload,
+		thread::{self, spawn_worker_thread, WaitOutcome},
+		PipeFd, WorkerInfo, WorkerKind,
+	},
+	worker_dir, ProcessTime,
+};
+use pezkuwi_primitives::ExecutorParams;
+use std::{
+	fs,
+	io::{self, Read},
+	os::{
+		fd::{AsRawFd, FromRawFd, RawFd},
+		unix::net::UnixStream,
+	},
+	path::{Path, PathBuf},
+	process,
+	sync::{mpsc::channel, Arc},
+	time::Duration,
+};
+use tracking_allocator::TrackingAllocator;
+
+#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))]
+#[global_allocator]
+static ALLOC: TrackingAllocator<tikv_jemallocator::Jemalloc> =
+	TrackingAllocator(tikv_jemallocator::Jemalloc);
+
+#[cfg(not(any(target_os = "linux", feature = "jemalloc-allocator")))]
+#[global_allocator]
+static ALLOC: TrackingAllocator<std::alloc::System> = TrackingAllocator(std::alloc::System);
+
+/// The number of threads for the child process:
+/// 1 - Main thread
+/// 2 - Cpu monitor thread
+/// 3 - Memory tracker thread
+/// 4 - Prepare thread
+///
+/// NOTE: The correctness of this value is enforced by a test. If the number of threads inside
+/// the child process changes in the future, this value must be changed as well.
+pub const PREPARE_WORKER_THREAD_NUMBER: u32 = 4;
+
+/// Contains the bytes for a successfully compiled artifact.
+#[derive(Encode, Decode)]
+pub struct CompiledArtifact(Vec<u8>);
+
+impl CompiledArtifact {
+	/// Creates a `CompiledArtifact`.
+	pub fn new(code: Vec<u8>) -> Self {
+		Self(code)
+	}
+}
+
+impl AsRef<[u8]> for CompiledArtifact {
+	fn as_ref(&self) -> &[u8] {
+		self.0.as_slice()
+	}
+}
+
+#[derive(Encode, Decode)]
+pub struct PrepareOutcome {
+	pub compiled_artifact: CompiledArtifact,
+	pub observed_wasm_code_len: u32,
+}
+
+/// Get a worker request.
+fn recv_request(stream: &mut UnixStream) -> io::Result<PvfPrepData> {
+	let pvf = framed_recv_blocking(stream)?;
+	let pvf = PvfPrepData::decode(&mut &pvf[..]).map_err(|e| {
+		io::Error::new(
+			io::ErrorKind::Other,
+			format!("prepare pvf recv_request: failed to decode PvfPrepData: {}", e),
+		)
+	})?;
+	Ok(pvf)
+}
+
+fn start_memory_tracking(fd: RawFd, limit: Option<isize>) {
+	unsafe {
+		// SAFETY: Inside the failure handler, the allocator is locked and no allocations or
+		// deallocations are possible. For Linux, that always holds for the code below, so it's
+		// safe. For MacOS, that technically holds at the time of writing, but there are no future
+		// guarantees.
+		// The arguments of unsafe `libc` calls are valid, the payload validity is covered with
+		// a test.
+		ALLOC.start_tracking(
+			limit,
+			Some(Box::new(move || {
+				#[cfg(target_os = "linux")]
+				{
+					// Syscalls never allocate or deallocate, so this is safe.
+					libc::syscall(libc::SYS_write, fd, OOM_PAYLOAD.as_ptr(), OOM_PAYLOAD.len());
+					libc::syscall(libc::SYS_close, fd);
+					// Make sure we exit from all threads. Copied from glibc.
+					libc::syscall(libc::SYS_exit_group, 1);
+					loop {
+						libc::syscall(libc::SYS_exit, 1);
+					}
+				}
+				#[cfg(not(target_os = "linux"))]
+				{
+					// Syscalls are not available on MacOS, so we have to use `libc` wrappers.
+					// Technically, there may be allocations inside, although they shouldn't be
+					// there. In that case, we'll see deadlocks on MacOS after the OOM condition
+					// triggered. As we consider running a validator on MacOS unsafe, and this
+					// code is only run by a validator, it's a lesser evil.
+					libc::write(fd, OOM_PAYLOAD.as_ptr().cast(), OOM_PAYLOAD.len());
+					libc::close(fd);
+					libc::_exit(1);
+				}
+			})),
+		);
+	}
+}
+
+fn end_memory_tracking() -> isize {
+	ALLOC.end_tracking()
+}
+
+/// The entrypoint that the spawned prepare worker should start with.
+///
+/// # Parameters
+///
+/// - `socket_path`: specifies the path to the socket used to communicate with the host.
+///
+/// - `worker_dir_path`: specifies the path to the worker-specific temporary directory.
+///
+/// - `node_version`: if `Some`, is checked against the `worker_version`. A mismatch results in
+///   immediate worker termination. `None` is used for tests and in other situations when version
+///   check is not necessary.
+///
+/// - `worker_version`: see above
+///
+/// # Flow
+///
+/// This runs the following in a loop:
+///
+/// 1. Get the code and parameters for preparation from the host.
+///
+/// 2. Start a new child process
+///
+/// 3. Start the memory tracker and the actual preparation in two separate threads.
+///
+/// 4. Wait on the two threads created in step 3.
+///
+/// 5. Stop the memory tracker and get the stats.
+///
+/// 6. Pipe the result back to the parent process and exit from child process.
+///
+/// 7. If compilation succeeded, write the compiled artifact into a temporary file.
+///
+/// 8. Send the result of preparation back to the host, including the checksum of the artifact. If
+///    any error occurred in the above steps, we send that in the `PrepareWorkerResult`.
+pub fn worker_entrypoint(
+	socket_path: PathBuf,
+	worker_dir_path: PathBuf,
+	node_version: Option<&str>,
+	worker_version: Option<&str>,
+) {
+	run_worker(
+		WorkerKind::Prepare,
+		socket_path,
+		worker_dir_path,
+		node_version,
+		worker_version,
+		|mut stream, worker_info, security_status| {
+			let temp_artifact_dest = worker_dir::prepare_tmp_artifact(&worker_info.worker_dir_path);
+
+			loop {
+				let pvf = recv_request(&mut stream)?;
+				gum::debug!(
+					target: LOG_TARGET,
+					?worker_info,
+					?security_status,
+					"worker: preparing artifact",
+				);
+
+				let preparation_timeout = pvf.prep_timeout();
+				let prepare_job_kind = pvf.prep_kind();
+				let executor_params = pvf.executor_params();
+
+				let (pipe_read_fd, pipe_write_fd) = pipe2_cloexec()?;
+
+				let usage_before = match nix::sys::resource::getrusage(UsageWho::RUSAGE_CHILDREN) {
+					Ok(usage) => usage,
+					Err(errno) => {
+						let result: PrepareWorkerResult =
+							Err(error_from_errno("getrusage before", errno));
+						send_result(&mut stream, result, worker_info)?;
+						continue;
+					},
+				};
+
+				let stream_fd = stream.as_raw_fd();
+
+				cfg_if::cfg_if! {
+					if #[cfg(target_os = "linux")] {
+						let result = if security_status.can_do_secure_clone {
+							handle_clone(
+								&pvf,
+								pipe_write_fd,
+								pipe_read_fd,
+								stream_fd,
+								preparation_timeout,
+								prepare_job_kind,
+								&executor_params,
+								worker_info,
+								security_status.can_unshare_user_namespace_and_change_root,
+								&temp_artifact_dest,
+								usage_before,
+							)
+						} else {
+							// Fall back to using fork.
+							handle_fork(
+								&pvf,
+								pipe_write_fd,
+								pipe_read_fd,
+								stream_fd,
+								preparation_timeout,
+								prepare_job_kind,
+								&executor_params,
+								worker_info,
+								&temp_artifact_dest,
+								usage_before,
+							)
+						};
+					} else {
+						let result = handle_fork(
+							&pvf,
+							pipe_write_fd,
+							pipe_read_fd,
+							stream_fd,
+							preparation_timeout,
+							prepare_job_kind,
+							&executor_params,
+							worker_info,
+							&temp_artifact_dest,
+							usage_before,
+						);
+					}
+				}
+
+				gum::trace!(
+					target: LOG_TARGET,
+					?worker_info,
+					"worker: sending result to host: {:?}",
+					result
+				);
+				send_result(&mut stream, result, worker_info)?;
+			}
+		},
+	);
+}
+
+fn prepare_artifact(pvf: PvfPrepData) -> Result<PrepareOutcome, PrepareError> {
+	let maybe_compressed_code = pvf.maybe_compressed_code();
+	let raw_validation_code = sp_maybe_compressed_blob::decompress(
+		&maybe_compressed_code,
+		pvf.validation_code_bomb_limit() as usize,
+	)
+	.map_err(|e| PrepareError::CouldNotDecompressCodeBlob(e.to_string()))?;
+	let observed_wasm_code_len = raw_validation_code.len() as u32;
+
+	let blob = match prevalidate(&raw_validation_code) {
+		Err(err) => return Err(PrepareError::Prevalidation(format!("{:?}", err))),
+		Ok(b) => b,
+	};
+
+	match prepare(blob, &pvf.executor_params()) {
+		Ok(compiled_artifact) => Ok(PrepareOutcome {
+			compiled_artifact: CompiledArtifact::new(compiled_artifact),
+			observed_wasm_code_len,
+		}),
+		Err(err) => Err(PrepareError::Preparation(format!("{:?}", err))),
+	}
+}
+
+/// Try constructing the runtime to catch any instantiation errors during pre-checking.
+fn runtime_construction_check(
+	artifact_bytes: &[u8],
+	executor_params: &ExecutorParams,
+) -> Result<(), PrepareError> {
+	// SAFETY: We just compiled this artifact.
+	let result = unsafe { create_runtime_from_artifact_bytes(artifact_bytes, executor_params) };
+	result
+		.map(|_runtime| ())
+		.map_err(|err| PrepareError::RuntimeConstruction(format!("{:?}", err)))
+}
+
+#[derive(Encode, Decode)]
+struct JobResponse {
+	artifact: CompiledArtifact,
+	memory_stats: MemoryStats,
+	observed_wasm_code_len: u32,
+}
+
+#[cfg(target_os = "linux")]
+fn handle_clone(
+	pvf: &PvfPrepData,
+	pipe_write_fd: i32,
+	pipe_read_fd: i32,
+	stream_fd: i32,
+	preparation_timeout: Duration,
+	prepare_job_kind: PrepareJobKind,
+	executor_params: &Arc<ExecutorParams>,
+	worker_info: &WorkerInfo,
+	have_unshare_newuser: bool,
+	temp_artifact_dest: &Path,
+	usage_before: Usage,
+) -> Result<PrepareWorkerSuccess, PrepareError> {
+	use pezkuwi_node_core_pvf_common::worker::security;
+
+	// SAFETY: new process is spawned within a single threaded process. This invariant
+	// is enforced by tests. Stack size being specified to ensure child doesn't overflow
+	match unsafe {
+		security::clone::clone_on_worker(
+			worker_info,
+			have_unshare_newuser,
+			Box::new(|| {
+				handle_child_process(
+					pvf.clone(),
+					pipe_write_fd,
+					pipe_read_fd,
+					stream_fd,
+					preparation_timeout,
+					prepare_job_kind,
+					Arc::clone(&executor_params),
+				)
+			}),
+		)
+	} {
+		Ok(child) => handle_parent_process(
+			pipe_read_fd,
+			pipe_write_fd,
+			worker_info,
+			child,
+			temp_artifact_dest,
+			usage_before,
+			preparation_timeout,
+		),
+		Err(security::clone::Error::Clone(errno)) => Err(error_from_errno("clone", errno)),
+	}
+}
+
+fn handle_fork(
+	pvf: &PvfPrepData,
+	pipe_write_fd: i32,
+	pipe_read_fd: i32,
+	stream_fd: i32,
+	preparation_timeout: Duration,
+	prepare_job_kind: PrepareJobKind,
+	executor_params: &Arc<ExecutorParams>,
+	worker_info: &WorkerInfo,
+	temp_artifact_dest: &Path,
+	usage_before: Usage,
+) -> Result<PrepareWorkerSuccess, PrepareError> {
+	// SAFETY: new process is spawned within a single threaded process. This invariant
+	// is enforced by tests.
+	match unsafe { nix::unistd::fork() } {
+		Ok(ForkResult::Child) => handle_child_process(
+			pvf.clone(),
+			pipe_write_fd,
+			pipe_read_fd,
+			stream_fd,
+			preparation_timeout,
+			prepare_job_kind,
+			Arc::clone(executor_params),
+		),
+		Ok(ForkResult::Parent { child }) => handle_parent_process(
+			pipe_read_fd,
+			pipe_write_fd,
+			worker_info,
+			child,
+			temp_artifact_dest,
+			usage_before,
+			preparation_timeout,
+		),
+		Err(errno) => Err(error_from_errno("fork", errno)),
+	}
+}
+
+/// This is used to handle child process during pvf prepare worker.
+/// It prepares the artifact and tracks memory stats during preparation
+/// and pipes back the response to the parent process.
+///
+/// # Returns
+///
+/// - If any error occur, pipe response back with `PrepareError`.
+///
+/// - If success, pipe back `JobResponse`.
+fn handle_child_process(
+	pvf: PvfPrepData,
+	pipe_write_fd: i32,
+	pipe_read_fd: i32,
+	stream_fd: i32,
+	preparation_timeout: Duration,
+	prepare_job_kind: PrepareJobKind,
+	executor_params: Arc<ExecutorParams>,
+) -> ! {
+	// SAFETY: pipe_writer is an open and owned file descriptor at this point.
+	let mut pipe_write = unsafe { PipeFd::from_raw_fd(pipe_write_fd) };
+
+	// Drop the read end so we don't have too many FDs open.
+	if let Err(errno) = nix::unistd::close(pipe_read_fd) {
+		send_child_response(
+			&mut pipe_write,
+			JobResult::Err(error_from_errno("closing pipe", errno)),
+		);
+	}
+
+	// Dropping the stream closes the underlying socket. We want to make sure
+	// that the sandboxed child can't get any kind of information from the
+	// outside world. The only IPC it should be able to do is sending its
+	// response over the pipe.
+	if let Err(errno) = nix::unistd::close(stream_fd) {
+		send_child_response(
+			&mut pipe_write,
+			JobResult::Err(error_from_errno("error closing stream", errno)),
+		);
+	}
+
+	let worker_job_pid = process::id();
+	gum::debug!(
+		target: LOG_TARGET,
+		%worker_job_pid,
+		?prepare_job_kind,
+		?preparation_timeout,
+		"worker job: preparing artifact",
+	);
+
+	// Conditional variable to notify us when a thread is done.
+	let condvar = thread::get_condvar();
+
+	// Run the memory tracker in a regular, non-worker thread.
+	#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))]
+	let condvar_memory = Arc::clone(&condvar);
+	#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))]
+	let memory_tracker_thread = std::thread::spawn(|| memory_tracker_loop(condvar_memory));
+
+	start_memory_tracking(
+		pipe_write.as_raw_fd(),
+		executor_params.prechecking_max_memory().map(|v| {
+			v.try_into().unwrap_or_else(|_| {
+				gum::warn!(
+					LOG_TARGET,
+					%worker_job_pid,
+					"Illegal pre-checking max memory value {} discarded",
+					v,
+				);
+				0
+			})
+		}),
+	);
+
+	let cpu_time_start = ProcessTime::now();
+
+	// Spawn a new thread that runs the CPU time monitor.
+	let (cpu_time_monitor_tx, cpu_time_monitor_rx) = channel::<()>();
+	let cpu_time_monitor_thread = thread::spawn_worker_thread(
+		"cpu time monitor thread",
+		move || cpu_time_monitor_loop(cpu_time_start, preparation_timeout, cpu_time_monitor_rx),
+		Arc::clone(&condvar),
+		WaitOutcome::TimedOut,
+	)
+	.unwrap_or_else(|err| {
+		send_child_response(&mut pipe_write, Err(PrepareError::IoErr(err.to_string())))
+	});
+
+	let prepare_thread = spawn_worker_thread(
+		"prepare worker",
+		move || {
+			#[allow(unused_mut)]
+			let mut result = prepare_artifact(pvf).map(|o| (o,));
+
+			// Get the `ru_maxrss` stat. If supported, call getrusage for the thread.
+			#[cfg(target_os = "linux")]
+			let mut result = result.map(|outcome| (outcome.0, get_max_rss_thread()));
+
+			// If we are pre-checking, check for runtime construction errors.
+			//
+			// As pre-checking is more strict than just preparation in terms of memory
+			// and time, it is okay to do extra checks here. This takes negligible time
+			// anyway.
+			if let PrepareJobKind::Prechecking = prepare_job_kind {
+				result = result.and_then(|output| {
+					runtime_construction_check(
+						output.0.compiled_artifact.as_ref(),
+						&executor_params,
+					)?;
+					Ok(output)
+				});
+			}
+			result
+		},
+		Arc::clone(&condvar),
+		WaitOutcome::Finished,
+	)
+	.unwrap_or_else(|err| {
+		send_child_response(&mut pipe_write, Err(PrepareError::IoErr(err.to_string())))
+	});
+
+	let outcome = thread::wait_for_threads(condvar);
+
+	let peak_alloc = {
+		let peak = end_memory_tracking();
+		gum::debug!(
+			target: LOG_TARGET,
+			%worker_job_pid,
+			"prepare job peak allocation is {} bytes",
+			peak,
+		);
+		peak
+	};
+
+	let result = match outcome {
+		WaitOutcome::Finished => {
+			let _ = cpu_time_monitor_tx.send(());
+
+			match prepare_thread.join().unwrap_or_else(|err| {
+				send_child_response(
+					&mut pipe_write,
+					Err(PrepareError::JobError(stringify_panic_payload(err))),
+				)
+			}) {
+				Err(err) => Err(err),
+				Ok(ok) => {
+					cfg_if::cfg_if! {
+						if #[cfg(target_os = "linux")] {
+							let (PrepareOutcome { compiled_artifact, observed_wasm_code_len }, max_rss) = ok;
+						} else {
+							let (PrepareOutcome { compiled_artifact, observed_wasm_code_len },) = ok;
+						}
+					}
+
+					// Stop the memory stats worker and get its observed memory stats.
+					#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))]
+					let memory_tracker_stats = get_memory_tracker_loop_stats(memory_tracker_thread, process::id());
+
+					let memory_stats = MemoryStats {
+						#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))]
+						memory_tracker_stats,
+						#[cfg(target_os = "linux")]
+						max_rss: extract_max_rss_stat(max_rss, process::id()),
+						// Negative peak allocation values are legit; they are narrow
+						// corner cases and shouldn't affect overall statistics
+						// significantly
+						peak_tracked_alloc: if peak_alloc > 0 { peak_alloc as u64 } else { 0u64 },
+					};
+
+					Ok(JobResponse {
+						artifact: compiled_artifact,
+						observed_wasm_code_len,
+						memory_stats,
+					})
+				},
+			}
+		},
+
+		// If the CPU thread is not selected, we signal it to end, the join handle is
+		// dropped and the thread will finish in the background.
+		WaitOutcome::TimedOut => match cpu_time_monitor_thread.join() {
+			Ok(Some(_cpu_time_elapsed)) => Err(PrepareError::TimedOut),
+			Ok(None) => Err(PrepareError::IoErr("error communicating over closed channel".into())),
+			Err(err) => Err(PrepareError::IoErr(stringify_panic_payload(err))),
+		},
+		WaitOutcome::Pending => {
+			unreachable!("we run wait_while until the outcome is no longer pending; qed")
+		},
+	};
+
+	send_child_response(&mut pipe_write, result);
+}
+
+/// Waits for child process to finish and handle child response from pipe.
+///
+/// # Returns
+///
+/// - If the child send response without an error, this function returns `Ok(PrepareStats)`
+///   containing memory and CPU usage statistics.
+///
+/// - If the child send response with an error, it returns a `PrepareError` with that error.
+///
+/// - If the child process timeout, it returns `PrepareError::TimedOut`.
+fn handle_parent_process(
+	pipe_read_fd: i32,
+	pipe_write_fd: i32,
+	worker_info: &WorkerInfo,
+	job_pid: Pid,
+	temp_artifact_dest: &Path,
+	usage_before: Usage,
+	timeout: Duration,
+) -> Result<PrepareWorkerSuccess, PrepareError> {
+	// the read end will wait until all write ends have been closed,
+	// this drop is necessary to avoid deadlock
+	if let Err(errno) = nix::unistd::close(pipe_write_fd) {
+		return Err(error_from_errno("closing pipe write fd", errno));
+	};
+
+	// SAFETY: this is an open and owned file descriptor at this point.
+	let mut pipe_read = unsafe { PipeFd::from_raw_fd(pipe_read_fd) };
+
+	// Read from the child. Don't decode unless the process exited normally, which we check later.
+	let mut received_data = Vec::new();
+	pipe_read
+		.read_to_end(&mut received_data)
+		.map_err(|err| PrepareError::IoErr(err.to_string()))?;
+
+	let status = nix::sys::wait::waitpid(job_pid, None);
+	gum::trace!(
+		target: LOG_TARGET,
+		?worker_info,
+		%job_pid,
+		"prepare worker received wait status from job: {:?}",
+		status,
+	);
+
+	let usage_after = nix::sys::resource::getrusage(UsageWho::RUSAGE_CHILDREN)
+		.map_err(|errno| error_from_errno("getrusage after", errno))?;
+
+	// Using `getrusage` is needed to check whether child has timedout since we cannot rely on
+	// child to report its own time.
+	// As `getrusage` returns resource usage from all terminated child processes,
+	// it is necessary to subtract the usage before the current child process to isolate its cpu
+	// time
+	let cpu_tv = get_total_cpu_usage(usage_after) - get_total_cpu_usage(usage_before);
+	if cpu_tv >= timeout {
+		gum::warn!(
+			target: LOG_TARGET,
+			?worker_info,
+			%job_pid,
+			"prepare job took {}ms cpu time, exceeded prepare timeout {}ms",
+			cpu_tv.as_millis(),
+			timeout.as_millis(),
+		);
+		return Err(PrepareError::TimedOut);
+	}
+
+	match status {
+		Ok(WaitStatus::Exited(_pid, exit_status)) => {
+			let mut reader = io::BufReader::new(received_data.as_slice());
+			let result = recv_child_response(&mut reader, "prepare")
+				.map_err(|err| PrepareError::JobError(err.to_string()))?;
+
+			match result {
+				Err(err) => Err(err),
+				Ok(JobResponse { artifact, memory_stats, observed_wasm_code_len }) => {
+					// The exit status should have been zero if no error occurred.
+					if exit_status != 0 {
+						return Err(PrepareError::JobError(format!(
+							"unexpected exit status: {}",
+							exit_status
+						)));
+					}
+
+					// Write the serialized artifact into a temp file.
+					//
+					// PVF host only keeps artifacts statuses in its memory,
+					// successfully compiled code gets stored on the disk (and
+					// consequently deserialized by execute-workers). The prepare worker
+					// is only required to send `Ok` to the pool to indicate the
+					// success.
+					gum::debug!(
+						target: LOG_TARGET,
+						?worker_info,
+						%job_pid,
+						"worker: writing artifact to {}",
+						temp_artifact_dest.display(),
+					);
+					// Write to the temp file created by the host.
+					if let Err(err) = fs::write(temp_artifact_dest, &artifact) {
+						return Err(PrepareError::IoErr(err.to_string()));
+					};
+
+					let checksum = compute_checksum(&artifact.as_ref());
+					Ok(PrepareWorkerSuccess {
+						checksum,
+						stats: PrepareStats {
+							memory_stats,
+							cpu_time_elapsed: cpu_tv,
+							observed_wasm_code_len,
+						},
+					})
+				},
+			}
+		},
+		// The job was killed by the given signal.
+		//
+		// The job gets SIGSYS on seccomp violations, but this signal may have been sent for some
+		// other reason, so we still need to check for seccomp violations elsewhere.
+		Ok(WaitStatus::Signaled(_pid, signal, _core_dump)) => Err(PrepareError::JobDied {
+			err: format!("received signal: {signal:?}"),
+			job_pid: job_pid.as_raw(),
+		}),
+		Err(errno) => Err(error_from_errno("waitpid", errno)),
+
+		// An attacker can make the child process return any exit status it wants. So we can treat
+		// all unexpected cases the same way.
+		Ok(unexpected_wait_status) => Err(PrepareError::JobDied {
+			err: format!("unexpected status from wait: {unexpected_wait_status:?}"),
+			job_pid: job_pid.as_raw(),
+		}),
+	}
+}
+
+/// Write a job response to the pipe and exit process after.
+///
+/// # Arguments
+///
+/// - `pipe_write`: A `PipeFd` structure, the writing end of a pipe.
+///
+/// - `response`: Child process response
+fn send_child_response(pipe_write: &mut PipeFd, response: JobResult) -> ! {
+	framed_send_blocking(pipe_write, response.encode().as_slice())
+		.unwrap_or_else(|_| process::exit(libc::EXIT_FAILURE));
+
+	if response.is_ok() {
+		process::exit(libc::EXIT_SUCCESS)
+	} else {
+		process::exit(libc::EXIT_FAILURE)
+	}
+}
+
+fn error_from_errno(context: &'static str, errno: Errno) -> PrepareError {
+	PrepareError::Kernel(stringify_errno(context, errno))
+}
+
+type JobResult = Result<JobResponse, PrepareError>;
+
+/// Pre-encoded length-prefixed `JobResult::Err(PrepareError::OutOfMemory)`
+const OOM_PAYLOAD: &[u8] = b"\x02\x00\x00\x00\x00\x00\x00\x00\x01\x08";
+
+#[test]
+fn pre_encoded_payloads() {
+	// NOTE: This must match the type of `response` in `send_child_response`.
+	let oom_unencoded: JobResult = JobResult::Err(PrepareError::OutOfMemory);
+	let oom_encoded = oom_unencoded.encode();
+	// The payload is prefixed with	its length in `framed_send`.
+	let mut oom_payload = oom_encoded.len().to_le_bytes().to_vec();
+	oom_payload.extend(oom_encoded);
+	assert_eq!(oom_payload, OOM_PAYLOAD);
+}
@@ -0,0 +1,196 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+//! Memory stats for preparation.
+//!
+//! Right now we gather three measurements:
+//!
+//! - `ru_maxrss` (resident set size) from `getrusage`.
+//! - `resident` memory stat provided by `tikv-malloc-ctl`.
+//! - `allocated` memory stat also from `tikv-malloc-ctl`.
+//!
+//! Currently we are only logging these for the purposes of gathering data. In the future, we may
+//! use these stats to reject PVFs during pre-checking. See
+//! <https://github.com/paritytech/polkadot/issues/6472#issuecomment-1381941762> for more
+//! background.
+
+/// Module for the memory tracker. The memory tracker runs in its own thread, where it polls memory
+/// usage at an interval.
+///
+/// NOTE: Requires jemalloc enabled.
+#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))]
+pub mod memory_tracker {
+	use crate::LOG_TARGET;
+	use pezkuwi_node_core_pvf_common::{
+		prepare::MemoryAllocationStats,
+		worker::{stringify_panic_payload, thread},
+	};
+	use std::{thread::JoinHandle, time::Duration};
+	use tikv_jemalloc_ctl::{epoch, stats, Error};
+
+	#[derive(Clone)]
+	struct MemoryAllocationTracker {
+		epoch: tikv_jemalloc_ctl::epoch_mib,
+		allocated: stats::allocated_mib,
+		resident: stats::resident_mib,
+	}
+
+	impl MemoryAllocationTracker {
+		pub fn new() -> Result<Self, Error> {
+			Ok(Self {
+				epoch: epoch::mib()?,
+				allocated: stats::allocated::mib()?,
+				resident: stats::resident::mib()?,
+			})
+		}
+
+		pub fn snapshot(&self) -> Result<MemoryAllocationStats, Error> {
+			// update stats by advancing the allocation epoch
+			self.epoch.advance()?;
+
+			// Convert to `u64`, as `usize` is not `Encode`able.
+			let allocated = self.allocated.read()? as u64;
+			let resident = self.resident.read()? as u64;
+			Ok(MemoryAllocationStats { allocated, resident })
+		}
+	}
+
+	/// Runs a thread in the background that observes memory statistics. The goal is to try to get
+	/// accurate stats during preparation.
+	///
+	/// # Algorithm
+	///
+	/// 1. Create the memory tracker.
+	///
+	/// 2. Sleep for some short interval. Whenever we wake up, take a snapshot by updating the
+	///    allocation epoch.
+	///
+	/// 3. When we are notified that preparation has completed, take one last snapshot and return
+	///    the maximum observed values.
+	///
+	/// # Errors
+	///
+	/// For simplicity, any errors are returned as a string. As this is not a critical component,
+	/// errors are used for informational purposes (logging) only.
+	pub fn memory_tracker_loop(condvar: thread::Cond) -> Result<MemoryAllocationStats, String> {
+		// NOTE: This doesn't need to be too fine-grained since preparation currently takes 3-10s or
+		// more. Apart from that, there is not really a science to this number.
+		const POLL_INTERVAL: Duration = Duration::from_millis(100);
+
+		let tracker = MemoryAllocationTracker::new().map_err(|err| err.to_string())?;
+		let mut max_stats = MemoryAllocationStats::default();
+
+		let mut update_stats = || -> Result<(), String> {
+			let current_stats = tracker.snapshot().map_err(|err| err.to_string())?;
+			if current_stats.resident > max_stats.resident {
+				max_stats.resident = current_stats.resident;
+			}
+			if current_stats.allocated > max_stats.allocated {
+				max_stats.allocated = current_stats.allocated;
+			}
+			Ok(())
+		};
+
+		loop {
+			// Take a snapshot and update the max stats.
+			update_stats()?;
+
+			// Sleep for the poll interval, or wake up if the condvar is triggered. Note that
+			// `wait_timeout_while` is documented as not being very precise or reliable, which is
+			// fine here -- see note above.
+			match thread::wait_for_threads_with_timeout(&condvar, POLL_INTERVAL) {
+				Some(_outcome) => {
+					update_stats()?;
+					return Ok(max_stats);
+				},
+				None => continue,
+			}
+		}
+	}
+
+	/// Helper function to get the stats from the memory tracker. Helps isolate this error handling.
+	pub fn get_memory_tracker_loop_stats(
+		thread: JoinHandle<Result<MemoryAllocationStats, String>>,
+		worker_pid: u32,
+	) -> Option<MemoryAllocationStats> {
+		match thread.join() {
+			Ok(Ok(stats)) => Some(stats),
+			Ok(Err(err)) => {
+				gum::warn!(
+					target: LOG_TARGET,
+					%worker_pid,
+					"worker: error occurred in the memory tracker thread: {}", err
+				);
+				None
+			},
+			Err(err) => {
+				gum::warn!(
+					target: LOG_TARGET,
+					%worker_pid,
+					"worker: error joining on memory tracker thread: {}", stringify_panic_payload(err)
+				);
+				None
+			},
+		}
+	}
+}
+
+/// Module for dealing with the `ru_maxrss` (peak resident memory) stat from `getrusage`.
+///
+/// NOTE: `getrusage` with the `RUSAGE_THREAD` parameter is only supported on Linux. `RUSAGE_SELF`
+/// works on MacOS, but we need to get the max rss only for the preparation thread. Getting it for
+/// the current process would conflate the stats of previous jobs run by the process.
+#[cfg(target_os = "linux")]
+pub mod max_rss_stat {
+	use crate::LOG_TARGET;
+	use core::mem::MaybeUninit;
+	use libc::{getrusage, rusage, RUSAGE_THREAD};
+	use std::io;
+
+	/// Get the rusage stats for the current thread.
+	fn getrusage_thread() -> io::Result<rusage> {
+		let mut result: MaybeUninit<rusage> = MaybeUninit::zeroed();
+
+		// SAFETY: `result` is a valid pointer, so calling this is safe.
+		if unsafe { getrusage(RUSAGE_THREAD, result.as_mut_ptr()) } == -1 {
+			return Err(io::Error::last_os_error());
+		}
+
+		// SAFETY: `result` was successfully initialized by `getrusage`.
+		unsafe { Ok(result.assume_init()) }
+	}
+
+	/// Gets the `ru_maxrss` for the current thread.
+	pub fn get_max_rss_thread() -> io::Result<i64> {
+		// `c_long` is either `i32` or `i64` depending on architecture. `i64::from` always works.
+		getrusage_thread().map(|rusage| i64::from(rusage.ru_maxrss))
+	}
+
+	/// Extracts the max_rss stat and logs any error.
+	pub fn extract_max_rss_stat(max_rss: io::Result<i64>, worker_pid: u32) -> Option<i64> {
+		max_rss
+			.map_err(|err| {
+				gum::warn!(
+					target: LOG_TARGET,
+					%worker_pid,
+					"error getting `ru_maxrss` in preparation thread: {}",
+					err
+				);
+				err
+			})
+			.ok()
+	}
+}
@@ -0,0 +1,474 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+//! PVF artifacts (final compiled code blobs).
+//!
+//! # Lifecycle of an artifact
+//!
+//! 1. During node start-up, we prune all the cached artifacts, if any.
+//!
+//! 2. In order to be executed, a PVF should be prepared first. This means that artifacts should
+//!    have an [`ArtifactState::Prepared`] entry for that artifact in the table. If not, the
+//!    preparation process kicks in. The execution request is stashed until after the preparation is
+//!    done, and the artifact state in the host is set to [`ArtifactState::Preparing`]. Preparation
+//!    goes through the preparation queue and the pool.
+//!
+//!    1. If the artifact is already being processed, we add another execution request to the
+//!       existing preparation job, without starting a new one.
+//!
+//!    2. Note that if the state is [`ArtifactState::FailedToProcess`], we usually do not retry
+//!       preparation, though we may under certain conditions.
+//!
+//! 3. The pool gets an available worker and instructs it to work on the given PVF. The worker
+//!    starts compilation. When the worker finishes successfully, it writes the serialized artifact
+//!    into a temporary file and notifies the host that it's done. The host atomically moves
+//!    (renames) the temporary file to the destination filename of the artifact.
+//!
+//! 4. If the worker concluded successfully or returned an error, then the pool notifies the queue.
+//!    In both cases, the queue reports to the host that the result is ready.
+//!
+//! 5. The host will react by changing the artifact state to either [`ArtifactState::Prepared`] or
+//!    [`ArtifactState::FailedToProcess`] for the PVF in question. On success, the
+//!    `last_time_needed` will be set to the current time. It will also dispatch the pending
+//!    execution requests.
+//!
+//! 6. On success, the execution request will come through the execution queue and ultimately be
+//!    processed by an execution worker. When this worker receives the request, it will read the
+//!    requested artifact. If it doesn't exist it reports an internal error. A request for execution
+//!    will bump the `last_time_needed` to the current time.
+//!
+//! 7. There is a separate process for pruning the prepared artifacts whose `last_time_needed` is
+//!    older by a predefined parameter. This process is run very rarely (say, once a day). Once the
+//!    artifact is expired it is removed from disk eagerly atomically.
+
+use crate::{host::PrecheckResultSender, worker_interface::WORKER_DIR_PREFIX};
+use always_assert::always;
+use pezkuwi_node_core_pvf_common::{error::PrepareError, pvf::PvfPrepData, ArtifactChecksum};
+use pezkuwi_primitives::ExecutorParamsPrepHash;
+use pezkuwi_teyrchain_primitives::primitives::ValidationCodeHash;
+use std::{
+	collections::HashMap,
+	fs,
+	path::{Path, PathBuf},
+	time::{Duration, SystemTime},
+};
+
+/// The extension to use for cached artifacts.
+const ARTIFACT_EXTENSION: &str = "pvf";
+
+/// The prefix that artifacts used to start with under the old naming scheme.
+const ARTIFACT_OLD_PREFIX: &str = "wasmtime_";
+
+pub fn generate_artifact_path(cache_path: &Path) -> PathBuf {
+	let file_name = {
+		use array_bytes::Hex;
+		use rand::RngCore;
+		let mut bytes = [0u8; 64];
+		rand::thread_rng().fill_bytes(&mut bytes);
+		bytes.hex("0x")
+	};
+	let mut artifact_path = cache_path.join(file_name);
+	artifact_path.set_extension(ARTIFACT_EXTENSION);
+	artifact_path
+}
+
+/// Identifier of an artifact. Encodes a code hash of the PVF and a hash of preparation-related
+///  executor parameter set.
+#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct ArtifactId {
+	pub(crate) code_hash: ValidationCodeHash,
+	pub(crate) executor_params_prep_hash: ExecutorParamsPrepHash,
+}
+
+impl ArtifactId {
+	/// Creates a new artifact ID with the given hash.
+	pub fn new(
+		code_hash: ValidationCodeHash,
+		executor_params_prep_hash: ExecutorParamsPrepHash,
+	) -> Self {
+		Self { code_hash, executor_params_prep_hash }
+	}
+
+	/// Returns an artifact ID that corresponds to the PVF with given preparation-related
+	/// executor parameters.
+	pub fn from_pvf_prep_data(pvf: &PvfPrepData) -> Self {
+		Self::new(pvf.code_hash(), pvf.executor_params().prep_hash())
+	}
+}
+
+/// A bundle of the artifact ID and the path.
+///
+/// Rationale for having this is two-fold:
+///
+/// - While we can derive the artifact path from the artifact id, it makes sense to carry it around
+/// sometimes to avoid extra work.
+/// - At the same time, carrying only path limiting the ability for logging.
+#[derive(Debug, Clone)]
+pub struct ArtifactPathId {
+	pub(crate) id: ArtifactId,
+	pub(crate) path: PathBuf,
+	pub(crate) checksum: ArtifactChecksum,
+}
+
+impl ArtifactPathId {
+	pub(crate) fn new(artifact_id: ArtifactId, path: &Path, checksum: ArtifactChecksum) -> Self {
+		Self { id: artifact_id, path: path.to_owned(), checksum }
+	}
+}
+
+#[derive(Debug)]
+pub enum ArtifactState {
+	/// The artifact is ready to be used by the executor.
+	///
+	/// That means that the artifact should be accessible through the path obtained by the artifact
+	/// id (unless, it was removed externally).
+	Prepared {
+		/// The checksum of the compiled artifact.
+		checksum: ArtifactChecksum,
+		/// The path of the compiled artifact.
+		path: PathBuf,
+		/// The time when the artifact was last needed.
+		///
+		/// This is updated when we get the heads up for this artifact or when we just discover
+		/// this file.
+		last_time_needed: SystemTime,
+		/// Size in bytes
+		size: u64,
+	},
+	/// A task to prepare this artifact is scheduled.
+	Preparing {
+		/// List of result senders that are waiting for a response.
+		waiting_for_response: Vec<PrecheckResultSender>,
+		/// The number of times this artifact has failed to prepare.
+		num_failures: u32,
+	},
+	/// The code couldn't be compiled due to an error. Such artifacts
+	/// never reach the executor and stay in the host's memory.
+	FailedToProcess {
+		/// Keep track of the last time that processing this artifact failed.
+		last_time_failed: SystemTime,
+		/// The number of times this artifact has failed to prepare.
+		num_failures: u32,
+		/// The last error encountered for preparation.
+		error: PrepareError,
+	},
+}
+
+/// A container of all known artifact ids and their states.
+pub struct Artifacts {
+	inner: HashMap<ArtifactId, ArtifactState>,
+}
+
+/// Parameters we use to cleanup artifacts
+/// After we hit the cache limit we remove the least used artifacts
+/// but only if they are stale more than minimum stale time
+#[derive(Debug)]
+pub struct ArtifactsCleanupConfig {
+	// Max size in bytes. Reaching it the least used artefacts are deleted
+	cache_limit: u64,
+	// Inactive time after which artefact is allowed to be deleted
+	min_stale_time: Duration,
+}
+
+impl Default for ArtifactsCleanupConfig {
+	fn default() -> Self {
+		Self {
+			cache_limit: 10 * 1024 * 1024 * 1024,              // 10 GiB
+			min_stale_time: Duration::from_secs(24 * 60 * 60), // 24 hours
+		}
+	}
+}
+
+#[cfg(test)]
+impl ArtifactsCleanupConfig {
+	pub fn new(cache_limit: u64, min_stale_time: Duration) -> Self {
+		Self { cache_limit, min_stale_time }
+	}
+}
+
+impl Artifacts {
+	#[cfg(test)]
+	pub(crate) fn empty() -> Self {
+		Self { inner: HashMap::new() }
+	}
+
+	#[cfg(test)]
+	fn len(&self) -> usize {
+		self.inner.len()
+	}
+
+	#[cfg(test)]
+	fn artifact_ids(&self) -> Vec<ArtifactId> {
+		self.inner.keys().cloned().collect()
+	}
+
+	#[cfg(feature = "test-utils")]
+	pub fn replace_artifact_checksum(
+		&mut self,
+		checksum: ArtifactChecksum,
+		new_checksum: ArtifactChecksum,
+	) {
+		for artifact in self.inner.values_mut() {
+			if let ArtifactState::Prepared { checksum: c, .. } = artifact {
+				if *c == checksum {
+					*c = new_checksum;
+				}
+			}
+		}
+	}
+
+	/// Create an empty table and the cache directory on-disk if it doesn't exist.
+	pub async fn new(cache_path: &Path) -> Self {
+		// Make sure that the cache path directory and all its parents are created.
+		let _ = tokio::fs::create_dir_all(cache_path).await;
+
+		// Delete any leftover artifacts and worker dirs from previous runs. We don't delete the
+		// entire cache directory in case the user made a mistake and set it to e.g. their home
+		// directory. This is a best-effort to do clean-up, so ignore any errors.
+		for entry in fs::read_dir(cache_path).into_iter().flatten().flatten() {
+			let path = entry.path();
+			let Some(file_name) = path.file_name().and_then(|f| f.to_str()) else { continue };
+			if path.is_dir() && file_name.starts_with(WORKER_DIR_PREFIX) {
+				let _ = fs::remove_dir_all(path);
+			} else if path.extension().map_or(false, |ext| ext == ARTIFACT_EXTENSION) ||
+				file_name.starts_with(ARTIFACT_OLD_PREFIX)
+			{
+				let _ = fs::remove_file(path);
+			}
+		}
+
+		Self { inner: HashMap::new() }
+	}
+
+	/// Returns the state of the given artifact by its ID.
+	pub fn artifact_state_mut(&mut self, artifact_id: &ArtifactId) -> Option<&mut ArtifactState> {
+		self.inner.get_mut(artifact_id)
+	}
+
+	/// Inform the table about the artifact with the given ID. The state will be set to "preparing".
+	///
+	/// This function must be used only for brand-new artifacts and should never be used for
+	/// replacing existing ones.
+	pub fn insert_preparing(
+		&mut self,
+		artifact_id: ArtifactId,
+		waiting_for_response: Vec<PrecheckResultSender>,
+	) {
+		// See the precondition.
+		always!(self
+			.inner
+			.insert(artifact_id, ArtifactState::Preparing { waiting_for_response, num_failures: 0 })
+			.is_none());
+	}
+
+	/// Insert an artifact with the given ID as "prepared".
+	///
+	/// This function should only be used to build the artifact table at startup with valid
+	/// artifact caches.
+	#[cfg(test)]
+	pub(crate) fn insert_prepared(
+		&mut self,
+		artifact_id: ArtifactId,
+		path: PathBuf,
+		checksum: ArtifactChecksum,
+		last_time_needed: SystemTime,
+		size: u64,
+	) {
+		// See the precondition.
+		always!(self
+			.inner
+			.insert(artifact_id, ArtifactState::Prepared { path, checksum, last_time_needed, size })
+			.is_none());
+	}
+
+	/// Remove artifact by its id.
+	pub fn remove(&mut self, artifact_id: ArtifactId) -> Option<(ArtifactId, PathBuf)> {
+		self.inner.remove(&artifact_id).and_then(|state| match state {
+			ArtifactState::Prepared { path, .. } => Some((artifact_id, path)),
+			_ => None,
+		})
+	}
+
+	/// Remove artifacts older than the given TTL when the total artifact size reaches the limit
+	/// and return id and path of the removed ones
+	pub fn prune(&mut self, cleanup_config: &ArtifactsCleanupConfig) -> Vec<(ArtifactId, PathBuf)> {
+		let mut to_remove = vec![];
+		let now = SystemTime::now();
+
+		let mut total_size = 0;
+		let mut artifact_sizes = vec![];
+
+		for (k, v) in self.inner.iter() {
+			if let ArtifactState::Prepared { ref path, last_time_needed, size, .. } = *v {
+				total_size += size;
+				artifact_sizes.push((k.clone(), path.clone(), size, last_time_needed));
+			}
+		}
+		artifact_sizes
+			.sort_by_key(|&(_, _, _, last_time_needed)| std::cmp::Reverse(last_time_needed));
+
+		while total_size > cleanup_config.cache_limit {
+			let Some((artifact_id, path, size, last_time_needed)) = artifact_sizes.pop() else {
+				break;
+			};
+
+			let used_recently = now
+				.duration_since(last_time_needed)
+				.map(|stale_time| stale_time < cleanup_config.min_stale_time)
+				.unwrap_or(true);
+			if used_recently {
+				break;
+			}
+
+			self.inner.remove(&artifact_id);
+			to_remove.push((artifact_id, path));
+			total_size -= size;
+		}
+
+		to_remove
+	}
+}
+
+#[cfg(test)]
+mod tests {
+	use crate::testing::artifact_id;
+
+	use super::*;
+
+	#[tokio::test]
+	async fn cache_cleared_on_startup() {
+		let tempdir = tempfile::tempdir().unwrap();
+		let cache_path = tempdir.path();
+
+		// These should be cleared.
+		fs::write(cache_path.join("abcd.pvf"), "test").unwrap();
+		fs::write(cache_path.join("wasmtime_..."), "test").unwrap();
+		fs::create_dir(cache_path.join("worker-dir-prepare-test")).unwrap();
+
+		// These should not be touched.
+		fs::write(cache_path.join("abcd.pvfartifact"), "test").unwrap();
+		fs::write(cache_path.join("pezkuwi_..."), "test").unwrap();
+		fs::create_dir(cache_path.join("worker-prepare-test")).unwrap();
+
+		let artifacts = Artifacts::new(cache_path).await;
+
+		let entries: Vec<String> = fs::read_dir(&cache_path)
+			.unwrap()
+			.map(|entry| entry.unwrap().file_name().into_string().unwrap())
+			.collect();
+		assert_eq!(entries.len(), 3);
+		assert!(entries.contains(&String::from("abcd.pvfartifact")));
+		assert!(entries.contains(&String::from("pezkuwi_...")));
+		assert!(entries.contains(&String::from("worker-prepare-test")));
+		assert_eq!(artifacts.len(), 0);
+	}
+
+	#[tokio::test]
+	async fn test_pruned_by_cache_size() {
+		let mock_now = SystemTime::now();
+		let tempdir = tempfile::tempdir().unwrap();
+		let cache_path = tempdir.path();
+
+		let path1 = generate_artifact_path(cache_path);
+		let path2 = generate_artifact_path(cache_path);
+		let path3 = generate_artifact_path(cache_path);
+		let artifact_id1 = artifact_id(1);
+		let artifact_id2 = artifact_id(2);
+		let artifact_id3 = artifact_id(3);
+
+		let mut artifacts = Artifacts::new(cache_path).await;
+		let cleanup_config = ArtifactsCleanupConfig::new(1500, Duration::from_secs(0));
+
+		artifacts.insert_prepared(
+			artifact_id1.clone(),
+			path1.clone(),
+			Default::default(),
+			mock_now - Duration::from_secs(5),
+			1024,
+		);
+		artifacts.insert_prepared(
+			artifact_id2.clone(),
+			path2.clone(),
+			Default::default(),
+			mock_now - Duration::from_secs(10),
+			1024,
+		);
+		artifacts.insert_prepared(
+			artifact_id3.clone(),
+			path3.clone(),
+			Default::default(),
+			mock_now - Duration::from_secs(15),
+			1024,
+		);
+
+		let pruned = artifacts.prune(&cleanup_config);
+
+		assert!(artifacts.artifact_ids().contains(&artifact_id1));
+		assert!(!pruned.contains(&(artifact_id1, path1)));
+		assert!(!artifacts.artifact_ids().contains(&artifact_id2));
+		assert!(pruned.contains(&(artifact_id2, path2)));
+		assert!(!artifacts.artifact_ids().contains(&artifact_id3));
+		assert!(pruned.contains(&(artifact_id3, path3)));
+	}
+
+	#[tokio::test]
+	async fn test_did_not_prune_by_cache_size_because_of_stale_time() {
+		let mock_now = SystemTime::now();
+		let tempdir = tempfile::tempdir().unwrap();
+		let cache_path = tempdir.path();
+
+		let path1 = generate_artifact_path(cache_path);
+		let path2 = generate_artifact_path(cache_path);
+		let path3 = generate_artifact_path(cache_path);
+		let artifact_id1 = artifact_id(1);
+		let artifact_id2 = artifact_id(2);
+		let artifact_id3 = artifact_id(3);
+
+		let mut artifacts = Artifacts::new(cache_path).await;
+		let cleanup_config = ArtifactsCleanupConfig::new(1500, Duration::from_secs(12));
+
+		artifacts.insert_prepared(
+			artifact_id1.clone(),
+			path1.clone(),
+			Default::default(),
+			mock_now - Duration::from_secs(5),
+			1024,
+		);
+		artifacts.insert_prepared(
+			artifact_id2.clone(),
+			path2.clone(),
+			Default::default(),
+			mock_now - Duration::from_secs(10),
+			1024,
+		);
+		artifacts.insert_prepared(
+			artifact_id3.clone(),
+			path3.clone(),
+			Default::default(),
+			mock_now - Duration::from_secs(15),
+			1024,
+		);
+
+		let pruned = artifacts.prune(&cleanup_config);
+
+		assert!(artifacts.artifact_ids().contains(&artifact_id1));
+		assert!(!pruned.contains(&(artifact_id1, path1)));
+		assert!(artifacts.artifact_ids().contains(&artifact_id2));
+		assert!(!pruned.contains(&(artifact_id2, path2)));
+		assert!(!artifacts.artifact_ids().contains(&artifact_id3));
+		assert!(pruned.contains(&(artifact_id3, path3)));
+	}
+}
@@ -0,0 +1,116 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+use pezkuwi_node_core_pvf_common::error::{InternalValidationError, PrepareError};
+
+/// A error raised during validation of the candidate.
+#[derive(thiserror::Error, Debug, Clone)]
+pub enum ValidationError {
+	/// Deterministic preparation issue. In practice, most of the problems should be caught by
+	/// prechecking, so this may be a sign of internal conditions.
+	///
+	/// In principle if preparation of the `WASM` fails, the current candidate cannot be the
+	/// reason for that. So we can't say whether it is invalid or not. In addition, with
+	/// pre-checking enabled only valid runtimes should ever get enacted, so we can be
+	/// reasonably sure that this is some local problem on the current node. However, as this
+	/// particular error *seems* to indicate a deterministic error, we raise a warning.
+	#[error("candidate validation: {0}")]
+	Preparation(PrepareError),
+	/// The error was raised because the candidate is invalid. Should vote against.
+	#[error("candidate validation: {0}")]
+	Invalid(#[from] InvalidCandidate),
+	/// Possibly transient issue that may resolve after retries. Should vote against when retries
+	/// fail.
+	#[error("candidate validation: {0}")]
+	PossiblyInvalid(#[from] PossiblyInvalidError),
+	/// Preparation or execution issue caused by an internal condition. Should not vote against.
+	#[error("candidate validation: internal: {0}")]
+	Internal(#[from] InternalValidationError),
+	/// The execution deadline of allowed_ancestry_len + 1 has been reached. Jobs like backing have
+	/// a limited time to execute. Once the deadline is reached, the current candidate cannot be
+	/// backed, regardless of its validity.
+	#[error("candidate validation: execution deadline has been reached.")]
+	ExecutionDeadline,
+}
+
+/// A description of an error raised during executing a PVF and can be attributed to the combination
+/// of the candidate [`pezkuwi_teyrchain_primitives::primitives::ValidationParams`] and the PVF.
+#[derive(thiserror::Error, Debug, Clone)]
+pub enum InvalidCandidate {
+	/// The candidate is reported to be invalid by the execution worker. The string contains the
+	/// error message.
+	#[error("invalid: worker reported: {0}")]
+	WorkerReportedInvalid(String),
+	/// PVF execution (compilation is not included) took more time than was allotted.
+	#[error("invalid: hard timeout")]
+	HardTimeout,
+	/// Proof-of-validity failed to decompress correctly
+	#[error("invalid: PoV failed to decompress")]
+	PoVDecompressionFailure,
+}
+
+/// Possibly transient issue that may resolve after retries.
+#[derive(thiserror::Error, Debug, Clone)]
+pub enum PossiblyInvalidError {
+	/// The worker process (not the job) has died during validation of a candidate.
+	///
+	/// It's unlikely that this is caused by malicious code since workers spawn separate job
+	/// processes, and those job processes are sandboxed. But, it is possible. We retry in this
+	/// case, and if the error persists, we assume it's caused by the candidate and vote against.
+	#[error("possibly invalid: ambiguous worker death")]
+	AmbiguousWorkerDeath,
+	/// The job process (not the worker) has died for one of the following reasons:
+	///
+	/// (a) A seccomp violation occurred, most likely due to an attempt by malicious code to
+	/// execute arbitrary code. Note that there is no foolproof way to detect this if the operator
+	/// has seccomp auditing disabled.
+	///
+	/// (b) The host machine ran out of free memory and the OOM killer started killing the
+	/// processes, and in order to save the parent it will "sacrifice child" first.
+	///
+	/// (c) Some other reason, perhaps transient or perhaps caused by malicious code.
+	///
+	/// We cannot treat this as an internal error because malicious code may have caused this.
+	#[error("possibly invalid: ambiguous job death: {0}")]
+	AmbiguousJobDeath(String),
+	/// An unexpected error occurred in the job process and we can't be sure whether the candidate
+	/// is really invalid or some internal glitch occurred. Whenever we are unsure, we can never
+	/// treat an error as internal as we would abstain from voting. This is bad because if the
+	/// issue was due to the candidate, then all validators would abstain, stalling finality on the
+	/// chain. So we will first retry the candidate, and if the issue persists we are forced to
+	/// vote invalid.
+	#[error("possibly invalid: job error: {0}")]
+	JobError(String),
+	/// Instantiation of the WASM module instance failed during an execution.
+	/// Possibly related to local issues or dirty node update. May be retried with re-preparation.
+	#[error("possibly invalid: runtime construction: {0}")]
+	RuntimeConstruction(String),
+	/// The artifact is corrupted, re-prepare the artifact and try again.
+	#[error("possibly invalid: artifact is corrupted")]
+	CorruptedArtifact,
+}
+
+impl From<PrepareError> for ValidationError {
+	fn from(error: PrepareError) -> Self {
+		// Here we need to classify the errors into two errors: deterministic and non-deterministic.
+		// See [`PrepareError::is_deterministic`].
+		if error.is_deterministic() {
+			Self::Preparation(error)
+		} else {
+			Self::Internal(InternalValidationError::NonDeterministicPrepareError(error))
+		}
+	}
+}
@@ -0,0 +1,26 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+//! Execution part of the pipeline.
+//!
+//! The validation host [runs the queue][`start`] communicating with it by sending [`ToQueue`]
+//! messages. The queue will spawn workers in new processes. Those processes should jump to
+//! `pezkuwi_node_core_pvf_worker::execute_worker_entrypoint`.
+
+mod queue;
+mod worker_interface;
+
+pub use queue::{start, FromQueue, PendingExecutionRequest, ToQueue};
@@ -0,0 +1,315 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+//! Host interface to the execute worker.
+
+use crate::{
+	artifacts::ArtifactPathId,
+	worker_interface::{
+		clear_worker_dir_path, framed_recv, framed_send, spawn_with_program_path, IdleWorker,
+		SpawnErr, WorkerDir, WorkerHandle, JOB_TIMEOUT_WALL_CLOCK_FACTOR,
+	},
+	LOG_TARGET,
+};
+use codec::{Decode, Encode};
+use futures::FutureExt;
+use futures_timer::Delay;
+use pezkuwi_node_core_pvf_common::{
+	error::InternalValidationError,
+	execute::{Handshake, WorkerError, WorkerResponse},
+	worker_dir, ArtifactChecksum, SecurityStatus,
+};
+use pezkuwi_node_primitives::PoV;
+use pezkuwi_primitives::{ExecutorParams, PersistedValidationData};
+use std::{path::Path, sync::Arc, time::Duration};
+use tokio::{io, net::UnixStream};
+
+/// Spawns a new worker with the given program path that acts as the worker and the spawn timeout.
+///
+/// Sends a handshake message to the worker as soon as it is spawned.
+pub async fn spawn(
+	program_path: &Path,
+	cache_path: &Path,
+	executor_params: ExecutorParams,
+	spawn_timeout: Duration,
+	node_version: Option<&str>,
+	security_status: SecurityStatus,
+) -> Result<(IdleWorker, WorkerHandle), SpawnErr> {
+	let mut extra_args = vec!["execute-worker"];
+	if let Some(node_version) = node_version {
+		extra_args.extend_from_slice(&["--node-impl-version", node_version]);
+	}
+
+	let (mut idle_worker, worker_handle) = spawn_with_program_path(
+		"execute",
+		program_path,
+		cache_path,
+		&extra_args,
+		spawn_timeout,
+		security_status,
+	)
+	.await?;
+	send_execute_handshake(&mut idle_worker.stream, Handshake { executor_params })
+		.await
+		.map_err(|error| {
+			let err = SpawnErr::Handshake { err: error.to_string() };
+			gum::warn!(
+				target: LOG_TARGET,
+				worker_pid = %idle_worker.pid,
+				"failed to send a handshake to the spawned worker: {}",
+				error
+			);
+			err
+		})?;
+	Ok((idle_worker, worker_handle))
+}
+
+/// Outcome of PVF execution.
+///
+/// PVF execution completed and the result is returned. The worker is ready for
+/// another job.
+pub struct Response {
+	/// The response (valid/invalid) from the worker.
+	pub worker_response: WorkerResponse,
+	/// Returning the idle worker token means the worker can be reused.
+	pub idle_worker: IdleWorker,
+}
+/// The idle worker token is not returned for any of these cases, meaning the worker must be
+/// terminated.
+///
+/// NOTE: Errors related to the preparation process are not expected to be encountered by the
+/// execution workers.
+#[derive(thiserror::Error, Debug)]
+pub enum Error {
+	/// The execution time exceeded the hard limit. The worker is terminated.
+	#[error("The communication with the worker exceeded the hard limit")]
+	HardTimeout,
+	/// An I/O error happened during communication with the worker. This may mean that the worker
+	/// process already died. The token is not returned in any case.
+	#[error("An I/O error happened during communication with the worker: {0}")]
+	CommunicationErr(#[from] io::Error),
+	/// The worker reported an error (can be from itself or from the job). The worker should not be
+	/// reused.
+	#[error("The worker reported an error: {0}")]
+	WorkerError(#[from] WorkerError),
+
+	/// An internal error happened during the validation. Such an error is most likely related to
+	/// some transient glitch.
+	///
+	/// Should only ever be used for errors independent of the candidate and PVF. Therefore it may
+	/// be a problem with the worker, so we terminate it.
+	#[error("An internal error occurred: {0}")]
+	InternalError(#[from] InternalValidationError),
+}
+
+/// Given the idle token of a worker and parameters of work, communicates with the worker and
+/// returns the outcome.
+///
+/// NOTE: Not returning the idle worker token in `Outcome` will trigger the child process being
+/// killed, if it's still alive.
+pub async fn start_work(
+	worker: IdleWorker,
+	artifact: ArtifactPathId,
+	execution_timeout: Duration,
+	pvd: Arc<PersistedValidationData>,
+	pov: Arc<PoV>,
+) -> Result<Response, Error> {
+	let IdleWorker { mut stream, pid, worker_dir } = worker;
+
+	gum::debug!(
+		target: LOG_TARGET,
+		worker_pid = %pid,
+		?worker_dir,
+		validation_code_hash = ?artifact.id.code_hash,
+		"starting execute for {}",
+		artifact.path.display(),
+	);
+
+	with_worker_dir_setup(worker_dir, pid, &artifact.path, |worker_dir| async move {
+		send_request(&mut stream, pvd, pov, execution_timeout, artifact.checksum)
+			.await
+			.map_err(|error| {
+				gum::warn!(
+					target: LOG_TARGET,
+					worker_pid = %pid,
+					validation_code_hash = ?artifact.id.code_hash,
+					"failed to send an execute request: {}",
+					error,
+				);
+				Error::InternalError(InternalValidationError::HostCommunication(error.to_string()))
+			})?;
+
+		// We use a generous timeout here. This is in addition to the one in the child process, in
+		// case the child stalls. We have a wall clock timeout here in the host, but a CPU timeout
+		// in the child. We want to use CPU time because it varies less than wall clock time under
+		// load, but the CPU resources of the child can only be measured from the parent after the
+		// child process terminates.
+		let timeout = execution_timeout * JOB_TIMEOUT_WALL_CLOCK_FACTOR;
+		let worker_result = futures::select! {
+			worker_result = recv_result(&mut stream).fuse() => {
+				match worker_result {
+					Ok(result) =>
+						handle_result(
+							result,
+							pid,
+							execution_timeout,
+						)
+							.await,
+					Err(error) => {
+						gum::warn!(
+							target: LOG_TARGET,
+							worker_pid = %pid,
+							validation_code_hash = ?artifact.id.code_hash,
+							"failed to recv an execute result: {}",
+							error,
+						);
+
+						return Err(Error::CommunicationErr(error))
+					},
+				}
+			},
+			_ = Delay::new(timeout).fuse() => {
+				gum::warn!(
+					target: LOG_TARGET,
+					worker_pid = %pid,
+					validation_code_hash = ?artifact.id.code_hash,
+					"execution worker exceeded lenient timeout for execution, child worker likely stalled",
+				);
+				return Err(Error::HardTimeout)
+			},
+		};
+
+		match worker_result {
+			Ok(worker_response) => Ok(Response {
+				worker_response,
+				idle_worker: IdleWorker { stream, pid, worker_dir },
+			}),
+			Err(worker_error) => Err(worker_error.into()),
+		}
+	})
+	.await
+}
+
+/// Handles the case where we successfully received response bytes on the host from the child.
+///
+/// Here we know the artifact exists, but is still located in a temporary file which will be cleared
+/// by [`with_worker_dir_setup`].
+async fn handle_result(
+	worker_result: Result<WorkerResponse, WorkerError>,
+	worker_pid: u32,
+	execution_timeout: Duration,
+) -> Result<WorkerResponse, WorkerError> {
+	if let Ok(WorkerResponse { duration, .. }) = worker_result {
+		if duration > execution_timeout {
+			// The job didn't complete within the timeout.
+			gum::warn!(
+				target: LOG_TARGET,
+				worker_pid,
+				"execute job took {}ms cpu time, exceeded execution timeout {}ms.",
+				duration.as_millis(),
+				execution_timeout.as_millis(),
+			);
+
+			// Return a timeout error.
+			return Err(WorkerError::JobTimedOut);
+		}
+	}
+
+	worker_result
+}
+
+/// Create a temporary file for an artifact in the worker cache, execute the given future/closure
+/// passing the file path in, and clean up the worker cache.
+///
+/// Failure to clean up the worker cache results in an error - leaving any files here could be a
+/// security issue, and we should shut down the worker. This should be very rare.
+async fn with_worker_dir_setup<F, Fut>(
+	worker_dir: WorkerDir,
+	pid: u32,
+	artifact_path: &Path,
+	f: F,
+) -> Result<Response, Error>
+where
+	Fut: futures::Future<Output = Result<Response, Error>>,
+	F: FnOnce(WorkerDir) -> Fut,
+{
+	// Cheaply create a hard link to the artifact. The artifact is always at a known location in the
+	// worker cache, and the child can't access any other artifacts or gain any information from the
+	// original filename.
+	let link_path = worker_dir::execute_artifact(worker_dir.path());
+	if let Err(err) = tokio::fs::hard_link(artifact_path, link_path).await {
+		gum::warn!(
+			target: LOG_TARGET,
+			worker_pid = %pid,
+			?worker_dir,
+			"failed to clear worker cache after the job: {}",
+			err,
+		);
+		return Err(InternalValidationError::CouldNotCreateLink(format!("{:?}", err)).into());
+	}
+
+	let worker_dir_path = worker_dir.path().to_owned();
+	let result = f(worker_dir).await;
+
+	// Try to clear the worker dir.
+	if let Err(err) = clear_worker_dir_path(&worker_dir_path) {
+		gum::warn!(
+			target: LOG_TARGET,
+			worker_pid = %pid,
+			?worker_dir_path,
+			"failed to clear worker cache after the job: {:?}",
+			err,
+		);
+		return Err(InternalValidationError::CouldNotClearWorkerDir {
+			err: format!("{:?}", err),
+			path: worker_dir_path.to_str().map(String::from),
+		}
+		.into());
+	}
+
+	result
+}
+
+/// Sends a handshake with information specific to the execute worker.
+async fn send_execute_handshake(stream: &mut UnixStream, handshake: Handshake) -> io::Result<()> {
+	framed_send(stream, &handshake.encode()).await
+}
+
+async fn send_request(
+	stream: &mut UnixStream,
+	pvd: Arc<PersistedValidationData>,
+	pov: Arc<PoV>,
+	execution_timeout: Duration,
+	artifact_checksum: ArtifactChecksum,
+) -> io::Result<()> {
+	let request = pezkuwi_node_core_pvf_common::execute::ExecuteRequest {
+		pvd: (*pvd).clone(),
+		pov: (*pov).clone(),
+		execution_timeout,
+		artifact_checksum,
+	};
+	framed_send(stream, &request.encode()).await
+}
+
+async fn recv_result(stream: &mut UnixStream) -> io::Result<Result<WorkerResponse, WorkerError>> {
+	let result_bytes = framed_recv(stream).await?;
+	Result::<WorkerResponse, WorkerError>::decode(&mut result_bytes.as_slice()).map_err(|e| {
+		io::Error::new(
+			io::ErrorKind::Other,
+			format!("execute pvf recv_result: decode error: {:?}", e),
+		)
+	})
+}
@@ -0,0 +1,157 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+#![warn(missing_docs)]
+
+//! The PVF validation host. Responsible for coordinating preparation and execution of PVFs.
+//!
+//! For more background, refer to the Implementer's Guide: [PVF
+//! Pre-checking](https://docs.pezkuwichain.io/sdk/book/pvf-prechecking.html), [Candidate
+//! Validation](https://docs.pezkuwichain.io/sdk/book/node/utility/candidate-validation.html)
+//! and [PVF Host and Workers](https://docs.pezkuwichain.io/sdk/book/node/utility/pvf-host-and-workers.html).
+//!
+//!
+//! # Entrypoint
+//!
+//! This crate provides a simple API. You first [`start`] the validation host, which gives you the
+//! [handle][`ValidationHost`] and the future you need to poll.
+//!
+//! Then using the handle the client can send three types of requests:
+//!
+//! (a) PVF pre-checking. This takes the `Pvf` code and tries to prepare it (verify and
+//! compile) in order to pre-check its validity.
+//!
+//! (b) PVF execution. This accepts the PVF
+//! [`params`][`pezkuwi_teyrchain_primitives::primitives::ValidationParams`]     and the `Pvf`
+//! code, prepares (verifies and compiles) the code, and then executes PVF     with the `params`.
+//!
+//! (c) Heads up. This request allows to signal that the given PVF may be needed soon and that it
+//!     should be prepared for execution.
+//!
+//! The preparation results are cached for some time after they either used or was signaled in heads
+//! up. All requests that depends on preparation of the same PVF are bundled together and will be
+//! executed as soon as the artifact is prepared.
+//!
+//! # Priority
+//!
+//! PVF execution requests can specify the [priority][`Priority`] with which the given request
+//! should be handled. Different priority levels have different effects. This is discussed below.
+//!
+//! Preparation started by a heads up signal always starts with the background priority. If there
+//! is already a request for that PVF preparation under way the priority is inherited. If after
+//! heads up, a new PVF execution request comes in with a higher priority, then the original task's
+//! priority will be adjusted to match the new one if it's larger.
+//!
+//! Priority can never go down, only up.
+//!
+//! # Under the hood
+//!
+//! ## The flow
+//!
+//! Under the hood, the validation host is built using a bunch of communicating processes, not
+//! dissimilar to actors. Each of such "processes" is a future task that contains an event loop that
+//! processes incoming messages, potentially delegating sub-tasks to other "processes".
+//!
+//! Two of these processes are queues. The first one is for preparation jobs and the second one is
+//! for execution. Both of the queues are backed by separate pools of workers of different kind.
+//!
+//! Preparation workers handle preparation requests by prevalidating and instrumenting PVF wasm
+//! code, and then passing it into the compiler, to prepare the artifact.
+//!
+//! ## Artifacts
+//!
+//! An artifact is the final product of preparation. If the preparation succeeded, then the artifact
+//! will contain the compiled code usable for quick execution by a worker later on. If the
+//! preparation failed, then no artifact is created.
+//!
+//! The artifact is saved on disk and is also tracked by an in memory table. This in memory table
+//! doesn't contain the artifact contents though, only a flag for the state of the given artifact
+//! and some associated data. If the artifact failed to process, this also includes the error.
+//!
+//! A pruning task will run at a fixed interval of time. This task will remove all artifacts that
+//! weren't used or received a heads up signal for a while.
+//!
+//! ## Execution
+//!
+//! The execute workers will be fed by the requests from the execution queue, which is basically a
+//! combination of a path to the compiled artifact and the
+//! [`params`][`pezkuwi_teyrchain_primitives::primitives::ValidationParams`].
+
+mod artifacts;
+mod error;
+mod execute;
+mod host;
+mod metrics;
+mod prepare;
+mod priority;
+#[cfg(target_os = "linux")]
+mod security;
+mod worker_interface;
+
+#[cfg(feature = "test-utils")]
+pub mod testing;
+
+pub use error::{InvalidCandidate, PossiblyInvalidError, ValidationError};
+pub use host::{
+	start, Config, ValidationHost, EXECUTE_BINARY_NAME, HOST_MESSAGE_QUEUE_SIZE,
+	PREPARE_BINARY_NAME,
+};
+pub use metrics::Metrics;
+pub use priority::Priority;
+pub use worker_interface::{framed_recv, framed_send, JOB_TIMEOUT_WALL_CLOCK_FACTOR};
+
+// Re-export some common types.
+pub use pezkuwi_node_core_pvf_common::{
+	error::{InternalValidationError, PrepareError},
+	prepare::{PrepareJobKind, PrepareStats},
+	pvf::PvfPrepData,
+	SecurityStatus,
+};
+
+use std::{path::Path, process::Command};
+
+/// The log target for this crate.
+pub const LOG_TARGET: &str = "teyrchain::pvf";
+
+/// Utility to get the version of a worker, used for version checks.
+///
+/// The worker's existence at the given path must be checked separately.
+pub fn get_worker_version(worker_path: &Path) -> std::io::Result<String> {
+	let worker_version = Command::new(worker_path).args(["--version"]).output()?.stdout;
+	Ok(std::str::from_utf8(&worker_version)
+		.expect("version is printed as a string; qed")
+		.trim()
+		.to_string())
+}
+
+// Trying to run securely and some mandatory errors occurred.
+pub(crate) const SECURE_MODE_ERROR: &'static str =
+	"🚨 Your system cannot securely run a validator. \
+\nRunning validation of malicious PVF code has a higher risk of compromising this machine.";
+// Some errors occurred when running insecurely, or some optional errors occurred when running
+// securely.
+pub(crate) const SECURE_MODE_WARNING: &'static str = "🚨 Some security issues have been detected. \
+\nRunning validation of malicious PVF code has a higher risk of compromising this machine.";
+// Message to be printed only when running securely and mandatory errors occurred.
+pub(crate) const IGNORE_SECURE_MODE_TIP: &'static str =
+"\nYou can ignore this error with the `--insecure-validator-i-know-what-i-do` \
+command line argument if you understand and accept the risks of running insecurely. \
+With this flag, security features are enabled on a best-effort basis, but not mandatory. \
+\nMore information: https://docs.pezkuwichain.io/infrastructure/running-a-validator/operational-tasks/general-management/#secure-your-validator";
+// Only Linux supports security features
+#[cfg(not(target_os = "linux"))]
+pub(crate) const SECURE_LINUX_NOTE: &'static str = "\nSecure mode is enabled only for Linux \
+\nand a full secure mode is enabled only for Linux x86-64.";
@@ -0,0 +1,436 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+//! Prometheus metrics related to the validation host.
+
+use pezkuwi_node_core_pvf_common::prepare::MemoryStats;
+use pezkuwi_node_metrics::metrics::{self, prometheus};
+use pezkuwi_node_subsystem::messages::PvfExecKind;
+
+/// Validation host metrics.
+#[derive(Default, Clone)]
+pub struct Metrics(Option<MetricsInner>);
+
+impl Metrics {
+	/// Returns a handle to submit prepare workers metrics.
+	pub(crate) fn prepare_worker(&'_ self) -> WorkerRelatedMetrics<'_> {
+		WorkerRelatedMetrics { metrics: self, flavor: WorkerFlavor::Prepare }
+	}
+
+	/// Returns a handle to submit execute workers metrics.
+	pub(crate) fn execute_worker(&'_ self) -> WorkerRelatedMetrics<'_> {
+		WorkerRelatedMetrics { metrics: self, flavor: WorkerFlavor::Execute }
+	}
+
+	/// When preparation pipeline had a new item enqueued.
+	pub(crate) fn prepare_enqueued(&self) {
+		if let Some(metrics) = &self.0 {
+			metrics.prepare_enqueued.inc();
+		}
+	}
+
+	/// When preparation pipeline concluded working on an item.
+	pub(crate) fn prepare_concluded(&self) {
+		if let Some(metrics) = &self.0 {
+			metrics.prepare_concluded.inc();
+		}
+	}
+
+	/// When execution pipeline had a new item enqueued.
+	pub(crate) fn execute_enqueued(&self) {
+		if let Some(metrics) = &self.0 {
+			metrics.execute_enqueued.inc();
+		}
+	}
+
+	/// When execution pipeline finished executing a request.
+	pub(crate) fn execute_finished(&self) {
+		if let Some(metrics) = &self.0 {
+			metrics.execute_finished.inc();
+		}
+	}
+
+	/// Time between sending preparation request to a worker to having the response.
+	pub(crate) fn time_preparation(
+		&self,
+	) -> Option<metrics::prometheus::prometheus::HistogramTimer> {
+		self.0.as_ref().map(|metrics| metrics.preparation_time.start_timer())
+	}
+
+	/// Time between sending execution request to a worker to having the response.
+	pub(crate) fn time_execution(&self) -> Option<metrics::prometheus::prometheus::HistogramTimer> {
+		self.0.as_ref().map(|metrics| metrics.execution_time.start_timer())
+	}
+
+	pub(crate) fn observe_execution_queued_time(&self, queued_for_millis: u32) {
+		self.0.as_ref().map(|metrics| {
+			metrics.execution_queued_time.observe(queued_for_millis as f64 / 1000 as f64)
+		});
+	}
+
+	/// Observe memory stats for preparation.
+	#[allow(unused_variables)]
+	pub(crate) fn observe_preparation_memory_metrics(&self, memory_stats: MemoryStats) {
+		if let Some(metrics) = &self.0 {
+			#[cfg(target_os = "linux")]
+			if let Some(max_rss) = memory_stats.max_rss {
+				metrics.preparation_max_rss.observe(max_rss as f64);
+			}
+
+			#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))]
+			if let Some(tracker_stats) = memory_stats.memory_tracker_stats {
+				// We convert these stats from B to KB to match the unit of `ru_maxrss` from
+				// `getrusage`.
+				let max_resident_kb = (tracker_stats.resident / 1024) as f64;
+				let max_allocated_kb = (tracker_stats.allocated / 1024) as f64;
+
+				metrics.preparation_max_resident.observe(max_resident_kb);
+				metrics.preparation_max_allocated.observe(max_allocated_kb);
+			}
+
+			metrics
+				.preparation_peak_tracked_allocation
+				.observe((memory_stats.peak_tracked_alloc / 1024) as f64);
+		}
+	}
+
+	pub(crate) fn observe_code_size(&self, code_size: usize) {
+		if let Some(metrics) = &self.0 {
+			metrics.code_size.observe(code_size as f64);
+		}
+	}
+
+	pub(crate) fn observe_pov_size(&self, pov_size: usize, compressed: bool) {
+		if let Some(metrics) = &self.0 {
+			metrics
+				.pov_size
+				.with_label_values(&[if compressed { "true" } else { "false" }])
+				.observe(pov_size as f64);
+		}
+	}
+
+	/// When preparation pipeline concluded working on an item.
+	pub(crate) fn on_execute_kind(&self, kind: PvfExecKind) {
+		if let Some(metrics) = &self.0 {
+			metrics.exec_kind_selected.with_label_values(&[kind.as_str()]).inc();
+		}
+	}
+}
+
+#[derive(Clone)]
+struct MetricsInner {
+	worker_spawning: prometheus::CounterVec<prometheus::U64>,
+	worker_spawned: prometheus::CounterVec<prometheus::U64>,
+	worker_retired: prometheus::CounterVec<prometheus::U64>,
+	prepare_enqueued: prometheus::Counter<prometheus::U64>,
+	prepare_concluded: prometheus::Counter<prometheus::U64>,
+	execute_enqueued: prometheus::Counter<prometheus::U64>,
+	execute_finished: prometheus::Counter<prometheus::U64>,
+	preparation_time: prometheus::Histogram,
+	execution_time: prometheus::Histogram,
+	execution_queued_time: prometheus::Histogram,
+	#[cfg(target_os = "linux")]
+	preparation_max_rss: prometheus::Histogram,
+	// Max. allocated memory, tracked by Jemallocator, polling-based
+	#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))]
+	preparation_max_allocated: prometheus::Histogram,
+	// Max. resident memory, tracked by Jemallocator, polling-based
+	#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))]
+	preparation_max_resident: prometheus::Histogram,
+	// Peak allocation value, tracked by tracking-allocator
+	preparation_peak_tracked_allocation: prometheus::Histogram,
+	pov_size: prometheus::HistogramVec,
+	code_size: prometheus::Histogram,
+	exec_kind_selected: prometheus::CounterVec<prometheus::U64>,
+}
+
+impl metrics::Metrics for Metrics {
+	fn try_register(registry: &prometheus::Registry) -> Result<Self, prometheus::PrometheusError> {
+		let inner = MetricsInner {
+			worker_spawning: prometheus::register(
+				prometheus::CounterVec::new(
+					prometheus::Opts::new(
+						"pezkuwi_pvf_worker_spawning",
+						"The total number of workers began to spawn",
+					),
+					&["flavor"],
+				)?,
+				registry,
+			)?,
+			worker_spawned: prometheus::register(
+				prometheus::CounterVec::new(
+					prometheus::Opts::new(
+						"pezkuwi_pvf_worker_spawned",
+						"The total number of workers spawned successfully",
+					),
+					&["flavor"],
+				)?,
+				registry,
+			)?,
+			worker_retired: prometheus::register(
+				prometheus::CounterVec::new(
+					prometheus::Opts::new(
+						"pezkuwi_pvf_worker_retired",
+						"The total number of workers retired, either killed by the host or died on duty",
+					),
+					&["flavor"],
+				)?,
+				registry,
+			)?,
+			prepare_enqueued: prometheus::register(
+				prometheus::Counter::new(
+					"pezkuwi_pvf_prepare_enqueued",
+					"The total number of jobs enqueued into the preparation pipeline"
+				)?,
+				registry,
+			)?,
+			prepare_concluded: prometheus::register(
+				prometheus::Counter::new(
+					"pezkuwi_pvf_prepare_concluded",
+					"The total number of jobs concluded in the preparation pipeline"
+				)?,
+				registry,
+			)?,
+			execute_enqueued: prometheus::register(
+				prometheus::Counter::new(
+					"pezkuwi_pvf_execute_enqueued",
+					"The total number of jobs enqueued into the execution pipeline"
+				)?,
+				registry,
+			)?,
+			execute_finished: prometheus::register(
+				prometheus::Counter::new(
+					"pezkuwi_pvf_execute_finished",
+					"The total number of jobs done in the execution pipeline"
+				)?,
+				registry,
+			)?,
+			preparation_time: prometheus::register(
+				prometheus::Histogram::with_opts(
+					prometheus::HistogramOpts::new(
+						"pezkuwi_pvf_preparation_time",
+						"Time spent in preparing PVF artifacts in seconds",
+					)
+					.buckets(vec![
+						// This is synchronized with the `DEFAULT_PRECHECK_PREPARATION_TIMEOUT=60s`
+						// and `DEFAULT_LENIENT_PREPARATION_TIMEOUT=360s` constants found in
+						// node/core/candidate-validation/src/lib.rs
+						0.1,
+						0.5,
+						1.0,
+						2.0,
+						3.0,
+						10.0,
+						20.0,
+						30.0,
+						60.0,
+						120.0,
+						240.0,
+						360.0,
+						480.0,
+					]),
+				)?,
+				registry,
+			)?,
+			execution_time: prometheus::register(
+				prometheus::Histogram::with_opts(
+					prometheus::HistogramOpts::new(
+						"pezkuwi_pvf_execution_time",
+						"Time spent in executing PVFs",
+					).buckets(vec![
+						// This is synchronized with `DEFAULT_APPROVAL_EXECUTION_TIMEOUT` and
+						// `DEFAULT_BACKING_EXECUTION_TIMEOUT` constants in
+						// node/core/candidate-validation/src/lib.rs
+						0.01,
+						0.025,
+						0.05,
+						0.1,
+						0.25,
+						0.5,
+						1.0,
+						2.0,
+						3.0,
+						4.0,
+						5.0,
+						6.0,
+						8.0,
+						10.0,
+						12.0,
+					]),
+				)?,
+				registry,
+			)?,
+			execution_queued_time: prometheus::register(
+				prometheus::Histogram::with_opts(
+					prometheus::HistogramOpts::new(
+						"pezkuwi_pvf_execution_queued_time",
+						"Time spent in queue waiting for PVFs execution job to be assigned",
+					).buckets(vec![
+						0.01,
+						0.025,
+						0.05,
+						0.1,
+						0.25,
+						0.5,
+						1.0,
+						2.0,
+						3.0,
+						4.0,
+						5.0,
+						6.0,
+						12.0,
+						24.0,
+						48.0,
+					]),
+				)?,
+				registry,
+			)?,
+			#[cfg(target_os = "linux")]
+			preparation_max_rss: prometheus::register(
+				prometheus::Histogram::with_opts(
+					prometheus::HistogramOpts::new(
+						"pezkuwi_pvf_preparation_max_rss",
+						"ru_maxrss (maximum resident set size) observed for preparation (in kilobytes)",
+					).buckets(
+						prometheus::exponential_buckets(8192.0, 2.0, 10)
+							.expect("arguments are always valid; qed"),
+					),
+				)?,
+				registry,
+			)?,
+			#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))]
+			preparation_max_resident: prometheus::register(
+				prometheus::Histogram::with_opts(
+					prometheus::HistogramOpts::new(
+						"pezkuwi_pvf_preparation_max_resident",
+						"max resident memory observed for preparation (in kilobytes)",
+					).buckets(
+						prometheus::exponential_buckets(8192.0, 2.0, 10)
+							.expect("arguments are always valid; qed"),
+					),
+				)?,
+				registry,
+			)?,
+			#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))]
+			preparation_max_allocated: prometheus::register(
+				prometheus::Histogram::with_opts(
+					prometheus::HistogramOpts::new(
+						"pezkuwi_pvf_preparation_max_allocated",
+						"max allocated memory observed for preparation (in kilobytes)",
+					).buckets(
+						prometheus::exponential_buckets(8192.0, 2.0, 10)
+							.expect("arguments are always valid; qed"),
+					),
+				)?,
+				registry,
+			)?,
+			preparation_peak_tracked_allocation: prometheus::register(
+				prometheus::Histogram::with_opts(
+					prometheus::HistogramOpts::new(
+						"pezkuwi_pvf_preparation_peak_tracked_allocation",
+						"peak allocation observed for preparation (in kilobytes)",
+					).buckets(
+						prometheus::exponential_buckets(8192.0, 2.0, 10)
+							.expect("arguments are always valid; qed"),
+					),
+				)?,
+				registry,
+			)?,
+			// The following metrics was moved here from the candidate valiidation subsystem.
+			// Names are kept to avoid breaking dashboards and stuff.
+			pov_size: prometheus::register(
+				prometheus::HistogramVec::new(
+					prometheus::HistogramOpts::new(
+						"pezkuwi_teyrchain_candidate_validation_pov_size",
+						"The compressed and decompressed size of the proof of validity of a candidate",
+					)
+					.buckets(
+						prometheus::exponential_buckets(16384.0, 2.0, 10)
+							.expect("arguments are always valid; qed"),
+					),
+					&["compressed"],
+				)?,
+				registry,
+			)?,
+			code_size: prometheus::register(
+				prometheus::Histogram::with_opts(
+					prometheus::HistogramOpts::new(
+						"pezkuwi_teyrchain_candidate_validation_code_size",
+						"The size of the decompressed WASM validation blob used for checking a candidate",
+					)
+					.buckets(
+						prometheus::exponential_buckets(16384.0, 2.0, 10)
+							.expect("arguments are always valid; qed"),
+					),
+				)?,
+				registry,
+			)?,
+			exec_kind_selected: prometheus::register(
+				prometheus::CounterVec::new(
+					prometheus::Opts::new(
+						"pezkuwi_pvf_exec_kind_selected",
+						"The total number of selected execute kinds",
+					),
+					&["priority"],
+				)?,
+				registry,
+			)?,
+		};
+		Ok(Metrics(Some(inner)))
+	}
+}
+
+enum WorkerFlavor {
+	Prepare,
+	Execute,
+}
+
+impl WorkerFlavor {
+	fn as_label(&self) -> &'static str {
+		match *self {
+			WorkerFlavor::Prepare => "prepare",
+			WorkerFlavor::Execute => "execute",
+		}
+	}
+}
+
+pub(crate) struct WorkerRelatedMetrics<'a> {
+	metrics: &'a Metrics,
+	flavor: WorkerFlavor,
+}
+
+impl<'a> WorkerRelatedMetrics<'a> {
+	/// When the spawning of a worker started.
+	pub(crate) fn on_begin_spawn(&self) {
+		if let Some(metrics) = &self.metrics.0 {
+			metrics.worker_spawning.with_label_values(&[self.flavor.as_label()]).inc();
+		}
+	}
+
+	/// When the worker successfully spawned.
+	pub(crate) fn on_spawned(&self) {
+		if let Some(metrics) = &self.metrics.0 {
+			metrics.worker_spawned.with_label_values(&[self.flavor.as_label()]).inc();
+		}
+	}
+
+	/// When the worker was killed or died.
+	pub(crate) fn on_retired(&self) {
+		if let Some(metrics) = &self.metrics.0 {
+			metrics.worker_retired.with_label_values(&[self.flavor.as_label()]).inc();
+		}
+	}
+}
@@ -0,0 +1,30 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+//! Preparation part of pipeline
+//!
+//! The validation host spins up two processes: the queue (by running [`start_queue`]) and the pool
+//! (by running [`start_pool`]).
+//!
+//! The pool will spawn workers in new processes and those should execute pass control to
+//! `pezkuwi_node_core_pvf_worker::prepare_worker_entrypoint`.
+
+mod pool;
+mod queue;
+mod worker_interface;
+
+pub use pool::start as start_pool;
+pub use queue::{start as start_queue, FromQueue, ToQueue};
@@ -0,0 +1,520 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+use super::worker_interface::{self, Outcome};
+use crate::{
+	metrics::Metrics,
+	worker_interface::{IdleWorker, WorkerHandle},
+	LOG_TARGET,
+};
+use always_assert::never;
+use futures::{
+	channel::mpsc, future::BoxFuture, stream::FuturesUnordered, Future, FutureExt, StreamExt,
+};
+use pezkuwi_node_core_pvf_common::{
+	error::{PrepareError, PrepareResult},
+	pvf::PvfPrepData,
+	SecurityStatus,
+};
+use slotmap::HopSlotMap;
+use std::{
+	fmt,
+	path::{Path, PathBuf},
+	task::Poll,
+	time::Duration,
+};
+
+slotmap::new_key_type! { pub struct Worker; }
+
+/// Messages that the pool handles.
+#[derive(Debug, PartialEq, Eq)]
+pub enum ToPool {
+	/// Request a new worker to spawn.
+	///
+	/// This request won't fail in case if the worker cannot be created. Instead, we consider
+	/// the failures transient and we try to spawn a worker after a delay.
+	///
+	/// [`FromPool::Spawned`] will be returned as soon as the worker is spawned.
+	///
+	/// The client should anticipate a [`FromPool::Rip`] message, in case the spawned worker was
+	/// stopped for some reason.
+	Spawn,
+
+	/// Kill the given worker. No-op if the given worker is not running.
+	///
+	/// [`FromPool::Rip`] won't be sent in this case. However, the client should be prepared to
+	/// receive [`FromPool::Rip`] nonetheless, since the worker may be have been ripped before
+	/// this message is processed.
+	Kill(Worker),
+
+	/// Request the given worker to start working on the given code.
+	///
+	/// Once the job either succeeded or failed, a [`FromPool::Concluded`] message will be sent
+	/// back. It's also possible that the worker dies before handling the message in which case
+	/// [`FromPool::Rip`] will be sent back.
+	///
+	/// In either case, the worker is considered busy and no further `StartWork` messages should be
+	/// sent until either `Concluded` or `Rip` message is received.
+	StartWork { worker: Worker, pvf: PvfPrepData, cache_path: PathBuf },
+}
+
+/// A message sent from pool to its client.
+#[derive(Debug)]
+pub enum FromPool {
+	/// The given worker was just spawned and is ready to be used.
+	Spawned(Worker),
+
+	/// The given worker either succeeded or failed the given job.
+	Concluded {
+		/// A key for retrieving the worker data from the pool.
+		worker: Worker,
+		/// Indicates whether the worker process was killed.
+		rip: bool,
+		/// [`Ok`] indicates that compiled artifact is successfully stored on disk.
+		/// Otherwise, an [error](PrepareError) is supplied.
+		result: PrepareResult,
+	},
+
+	/// The given worker ceased to exist.
+	Rip(Worker),
+}
+
+struct WorkerData {
+	idle: Option<IdleWorker>,
+	handle: WorkerHandle,
+}
+
+impl fmt::Debug for WorkerData {
+	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+		write!(f, "WorkerData(pid={})", self.handle.id())
+	}
+}
+
+enum PoolEvent {
+	Spawn(IdleWorker, WorkerHandle),
+	StartWork(Worker, Outcome),
+}
+
+type Mux = FuturesUnordered<BoxFuture<'static, PoolEvent>>;
+
+struct Pool {
+	// Some variables related to the current session.
+	program_path: PathBuf,
+	cache_path: PathBuf,
+	spawn_timeout: Duration,
+	node_version: Option<String>,
+	security_status: SecurityStatus,
+
+	to_pool: mpsc::Receiver<ToPool>,
+	from_pool: mpsc::UnboundedSender<FromPool>,
+	spawned: HopSlotMap<Worker, WorkerData>,
+	mux: Mux,
+
+	metrics: Metrics,
+}
+
+/// A fatal error that warrants stopping the event loop of the pool.
+struct Fatal;
+
+async fn run(
+	Pool {
+		program_path,
+		cache_path,
+		spawn_timeout,
+		node_version,
+		security_status,
+		to_pool,
+		mut from_pool,
+		mut spawned,
+		mut mux,
+		metrics,
+	}: Pool,
+) {
+	macro_rules! break_if_fatal {
+		($expr:expr) => {
+			match $expr {
+				Err(Fatal) => break,
+				Ok(v) => v,
+			}
+		};
+	}
+
+	let mut to_pool = to_pool.fuse();
+
+	loop {
+		futures::select! {
+			to_pool = to_pool.next() => {
+				let to_pool = break_if_fatal!(to_pool.ok_or(Fatal));
+				handle_to_pool(
+					&metrics,
+					&program_path,
+					&cache_path,
+					spawn_timeout,
+					node_version.clone(),
+					security_status.clone(),
+					&mut spawned,
+					&mut mux,
+					to_pool,
+				)
+			}
+			ev = mux.select_next_some() => {
+				break_if_fatal!(handle_mux(&metrics, &mut from_pool, &mut spawned, ev))
+			}
+		}
+
+		break_if_fatal!(purge_dead(&metrics, &mut from_pool, &mut spawned).await);
+	}
+}
+
+async fn purge_dead(
+	metrics: &Metrics,
+	from_pool: &mut mpsc::UnboundedSender<FromPool>,
+	spawned: &mut HopSlotMap<Worker, WorkerData>,
+) -> Result<(), Fatal> {
+	let mut to_remove = vec![];
+	for (worker, data) in spawned.iter_mut() {
+		if data.idle.is_none() {
+			// The idle token is missing, meaning this worker is now occupied: skip it. This is
+			// because the worker process is observed by the work task and should it reach the
+			// deadline or be terminated it will be handled by the corresponding mux event.
+			continue;
+		}
+
+		if let Poll::Ready(()) = futures::poll!(&mut data.handle) {
+			// a resolved future means that the worker has terminated. Weed it out.
+			to_remove.push(worker);
+		}
+	}
+	for w in to_remove {
+		if attempt_retire(metrics, spawned, w) {
+			reply(from_pool, FromPool::Rip(w))?;
+		}
+	}
+	Ok(())
+}
+
+fn handle_to_pool(
+	metrics: &Metrics,
+	program_path: &Path,
+	cache_path: &Path,
+	spawn_timeout: Duration,
+	node_version: Option<String>,
+	security_status: SecurityStatus,
+	spawned: &mut HopSlotMap<Worker, WorkerData>,
+	mux: &mut Mux,
+	to_pool: ToPool,
+) {
+	match to_pool {
+		ToPool::Spawn => {
+			gum::debug!(target: LOG_TARGET, "spawning a new prepare worker");
+			metrics.prepare_worker().on_begin_spawn();
+			mux.push(
+				spawn_worker_task(
+					program_path.to_owned(),
+					cache_path.to_owned(),
+					spawn_timeout,
+					node_version,
+					security_status,
+				)
+				.boxed(),
+			);
+		},
+		ToPool::StartWork { worker, pvf, cache_path } => {
+			if let Some(data) = spawned.get_mut(worker) {
+				if let Some(idle) = data.idle.take() {
+					let preparation_timer = metrics.time_preparation();
+					mux.push(
+						start_work_task(
+							metrics.clone(),
+							worker,
+							idle,
+							pvf,
+							cache_path,
+							preparation_timer,
+						)
+						.boxed(),
+					);
+				} else {
+					// idle token is present after spawn and after a job is concluded;
+					// the precondition for `StartWork` is it should be sent only if all previous
+					// work items concluded;
+					// thus idle token is Some;
+					// qed.
+					never!("unexpected absence of the idle token in prepare pool");
+				}
+			} else {
+				// That's a relatively normal situation since the queue may send `start_work` and
+				// before receiving it the pool would report that the worker died.
+			}
+		},
+		ToPool::Kill(worker) => {
+			gum::debug!(target: LOG_TARGET, ?worker, "killing prepare worker");
+			// It may be absent if it were previously already removed by `purge_dead`.
+			let _ = attempt_retire(metrics, spawned, worker);
+		},
+	}
+}
+
+async fn spawn_worker_task(
+	program_path: PathBuf,
+	cache_path: PathBuf,
+	spawn_timeout: Duration,
+	node_version: Option<String>,
+	security_status: SecurityStatus,
+) -> PoolEvent {
+	use futures_timer::Delay;
+
+	loop {
+		match worker_interface::spawn(
+			&program_path,
+			&cache_path,
+			spawn_timeout,
+			node_version.as_deref(),
+			security_status.clone(),
+		)
+		.await
+		{
+			Ok((idle, handle)) => break PoolEvent::Spawn(idle, handle),
+			Err(err) => {
+				gum::warn!(target: LOG_TARGET, "failed to spawn a prepare worker: {:?}", err);
+
+				// Assume that the failure intermittent and retry after a delay.
+				Delay::new(Duration::from_secs(3)).await;
+			},
+		}
+	}
+}
+
+async fn start_work_task<Timer>(
+	metrics: Metrics,
+	worker: Worker,
+	idle: IdleWorker,
+	pvf: PvfPrepData,
+	cache_path: PathBuf,
+	_preparation_timer: Option<Timer>,
+) -> PoolEvent {
+	let outcome = worker_interface::start_work(&metrics, idle, pvf, cache_path).await;
+	PoolEvent::StartWork(worker, outcome)
+}
+
+fn handle_mux(
+	metrics: &Metrics,
+	from_pool: &mut mpsc::UnboundedSender<FromPool>,
+	spawned: &mut HopSlotMap<Worker, WorkerData>,
+	event: PoolEvent,
+) -> Result<(), Fatal> {
+	match event {
+		PoolEvent::Spawn(idle, handle) => {
+			metrics.prepare_worker().on_spawned();
+
+			let worker = spawned.insert(WorkerData { idle: Some(idle), handle });
+
+			reply(from_pool, FromPool::Spawned(worker))?;
+
+			Ok(())
+		},
+		PoolEvent::StartWork(worker, outcome) => {
+			// If we receive an outcome that the worker is unreachable or that an error occurred on
+			// the worker, we attempt to kill the worker process.
+			match outcome {
+				Outcome::Concluded { worker: idle, result } =>
+					handle_concluded_no_rip(from_pool, spawned, worker, idle, result),
+				// Return `Concluded`, but do not kill the worker since the error was on the host
+				// side.
+				Outcome::CreateTmpFileErr { worker: idle, err } => handle_concluded_no_rip(
+					from_pool,
+					spawned,
+					worker,
+					idle,
+					Err(PrepareError::CreateTmpFile(err)),
+				),
+				// Return `Concluded`, but do not kill the worker since the error was on the host
+				// side.
+				Outcome::RenameTmpFile { worker: idle, err, src, dest } => handle_concluded_no_rip(
+					from_pool,
+					spawned,
+					worker,
+					idle,
+					Err(PrepareError::RenameTmpFile { err, src, dest }),
+				),
+				// Could not clear worker cache. Kill the worker so other jobs can't see the data.
+				Outcome::ClearWorkerDir { err } => {
+					if attempt_retire(metrics, spawned, worker) {
+						reply(
+							from_pool,
+							FromPool::Concluded {
+								worker,
+								rip: true,
+								result: Err(PrepareError::ClearWorkerDir(err)),
+							},
+						)?;
+					}
+
+					Ok(())
+				},
+				Outcome::Unreachable => {
+					if attempt_retire(metrics, spawned, worker) {
+						reply(from_pool, FromPool::Rip(worker))?;
+					}
+
+					Ok(())
+				},
+				Outcome::IoErr(err) => {
+					if attempt_retire(metrics, spawned, worker) {
+						reply(
+							from_pool,
+							FromPool::Concluded {
+								worker,
+								rip: true,
+								result: Err(PrepareError::IoErr(err)),
+							},
+						)?;
+					}
+
+					Ok(())
+				},
+				// The worker might still be usable, but we kill it just in case.
+				Outcome::JobDied { err, job_pid } => {
+					if attempt_retire(metrics, spawned, worker) {
+						reply(
+							from_pool,
+							FromPool::Concluded {
+								worker,
+								rip: true,
+								result: Err(PrepareError::JobDied { err, job_pid }),
+							},
+						)?;
+					}
+
+					Ok(())
+				},
+				Outcome::TimedOut => {
+					if attempt_retire(metrics, spawned, worker) {
+						reply(
+							from_pool,
+							FromPool::Concluded {
+								worker,
+								rip: true,
+								result: Err(PrepareError::TimedOut),
+							},
+						)?;
+					}
+
+					Ok(())
+				},
+				Outcome::OutOfMemory => {
+					if attempt_retire(metrics, spawned, worker) {
+						reply(
+							from_pool,
+							FromPool::Concluded {
+								worker,
+								rip: true,
+								result: Err(PrepareError::OutOfMemory),
+							},
+						)?;
+					}
+
+					Ok(())
+				},
+			}
+		},
+	}
+}
+
+fn reply(from_pool: &mut mpsc::UnboundedSender<FromPool>, m: FromPool) -> Result<(), Fatal> {
+	from_pool.unbounded_send(m).map_err(|_| Fatal)
+}
+
+/// Removes the given worker from the registry if it there. This will lead to dropping and hence
+/// to killing the worker process.
+///
+/// Returns `true` if the worker exists and was removed and the process was killed.
+///
+/// This function takes care about counting the retired workers metric.
+fn attempt_retire(
+	metrics: &Metrics,
+	spawned: &mut HopSlotMap<Worker, WorkerData>,
+	worker: Worker,
+) -> bool {
+	if spawned.remove(worker).is_some() {
+		metrics.prepare_worker().on_retired();
+		true
+	} else {
+		false
+	}
+}
+
+/// Handles the case where we received a response. There potentially was an error, but not the fault
+/// of the worker as far as we know, so the worker should not be killed.
+///
+/// This function tries to put the idle worker back into the pool and then replies with
+/// `FromPool::Concluded` with `rip: false`.
+fn handle_concluded_no_rip(
+	from_pool: &mut mpsc::UnboundedSender<FromPool>,
+	spawned: &mut HopSlotMap<Worker, WorkerData>,
+	worker: Worker,
+	idle: IdleWorker,
+	result: PrepareResult,
+) -> Result<(), Fatal> {
+	let data = match spawned.get_mut(worker) {
+		None => {
+			// Perhaps the worker was killed meanwhile and the result is no longer relevant. We
+			// already send `Rip` when purging if we detect that the worker is dead.
+			return Ok(());
+		},
+		Some(data) => data,
+	};
+
+	// We just replace the idle worker that was loaned from this option during
+	// the work starting.
+	let old = data.idle.replace(idle);
+	never!(
+		old.is_some(),
+		"old idle worker was taken out when starting work; we only replace it here; qed"
+	);
+
+	reply(from_pool, FromPool::Concluded { worker, rip: false, result })?;
+
+	Ok(())
+}
+
+/// Spins up the pool and returns the future that should be polled to make the pool functional.
+pub fn start(
+	metrics: Metrics,
+	program_path: PathBuf,
+	cache_path: PathBuf,
+	spawn_timeout: Duration,
+	node_version: Option<String>,
+	security_status: SecurityStatus,
+) -> (mpsc::Sender<ToPool>, mpsc::UnboundedReceiver<FromPool>, impl Future<Output = ()>) {
+	let (to_pool_tx, to_pool_rx) = mpsc::channel(10);
+	let (from_pool_tx, from_pool_rx) = mpsc::unbounded();
+
+	let run = run(Pool {
+		metrics,
+		program_path,
+		cache_path,
+		spawn_timeout,
+		node_version,
+		security_status,
+		to_pool: to_pool_rx,
+		from_pool: from_pool_tx,
+		spawned: HopSlotMap::with_capacity_and_key(20),
+		mux: Mux::new(),
+	});
+
+	(to_pool_tx, from_pool_rx, run)
+}
@@ -0,0 +1,796 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+//! A queue that handles requests for PVF preparation.
+
+use super::pool::{self, Worker};
+use crate::{artifacts::ArtifactId, metrics::Metrics, Priority, LOG_TARGET};
+use always_assert::{always, never};
+use futures::{channel::mpsc, stream::StreamExt as _, Future, SinkExt};
+use pezkuwi_node_core_pvf_common::{error::PrepareResult, pvf::PvfPrepData};
+use std::{
+	collections::{HashMap, VecDeque},
+	path::PathBuf,
+};
+
+#[cfg(test)]
+use std::time::Duration;
+
+/// A request to pool.
+#[derive(Debug)]
+pub enum ToQueue {
+	/// This schedules preparation of the given PVF.
+	///
+	/// Note that it is incorrect to enqueue the same PVF again without first receiving the
+	/// [`FromQueue`] response.
+	Enqueue { priority: Priority, pvf: PvfPrepData },
+}
+
+/// A response from queue.
+#[derive(Debug)]
+pub struct FromQueue {
+	/// Identifier of an artifact.
+	pub(crate) artifact_id: ArtifactId,
+	/// Outcome of the PVF processing. [`Ok`] indicates that compiled artifact
+	/// is successfully stored on disk. Otherwise, an
+	/// [error](pezkuwi_node_core_pvf_common::error::PrepareError) is supplied.
+	pub(crate) result: PrepareResult,
+}
+
+#[derive(Default)]
+struct Limits {
+	/// The maximum number of workers this pool can ever host. This is expected to be a small
+	/// number, e.g. within a dozen.
+	hard_capacity: usize,
+
+	/// The number of workers we want aim to have. If there is a critical job and we are already
+	/// at `soft_capacity`, we are allowed to grow up to `hard_capacity`. Thus this should be equal
+	/// or smaller than `hard_capacity`.
+	soft_capacity: usize,
+}
+
+impl Limits {
+	/// Returns `true` if the queue is allowed to request one more worker.
+	fn can_afford_one_more(&self, spawned_num: usize, critical: bool) -> bool {
+		let cap = if critical { self.hard_capacity } else { self.soft_capacity };
+		spawned_num < cap
+	}
+
+	/// Offer the worker back to the pool. The passed worker ID must be considered unusable unless
+	/// it wasn't taken by the pool, in which case it will be returned as `Some`.
+	fn should_cull(&mut self, spawned_num: usize) -> bool {
+		spawned_num > self.soft_capacity
+	}
+}
+
+slotmap::new_key_type! { pub struct Job; }
+
+struct JobData {
+	/// The priority of this job. Can be bumped.
+	priority: Priority,
+	pvf: PvfPrepData,
+	worker: Option<Worker>,
+}
+
+#[derive(Default)]
+struct WorkerData {
+	job: Option<Job>,
+}
+
+impl WorkerData {
+	fn is_idle(&self) -> bool {
+		self.job.is_none()
+	}
+}
+
+/// A queue structured like this is prone to starving, however, we don't care that much since we
+/// expect there is going to be a limited number of critical jobs and we don't really care if
+/// background starve.
+#[derive(Default)]
+struct Unscheduled {
+	normal: VecDeque<Job>,
+	critical: VecDeque<Job>,
+}
+
+impl Unscheduled {
+	fn queue_mut(&mut self, prio: Priority) -> &mut VecDeque<Job> {
+		match prio {
+			Priority::Normal => &mut self.normal,
+			Priority::Critical => &mut self.critical,
+		}
+	}
+
+	fn add(&mut self, prio: Priority, job: Job) {
+		self.queue_mut(prio).push_back(job);
+	}
+
+	fn readd(&mut self, prio: Priority, job: Job) {
+		self.queue_mut(prio).push_front(job);
+	}
+
+	fn is_empty(&self) -> bool {
+		self.normal.is_empty() && self.critical.is_empty()
+	}
+
+	fn next(&mut self) -> Option<Job> {
+		let mut check = |prio: Priority| self.queue_mut(prio).pop_front();
+		check(Priority::Critical).or_else(|| check(Priority::Normal))
+	}
+}
+
+struct Queue {
+	metrics: Metrics,
+
+	to_queue_rx: mpsc::Receiver<ToQueue>,
+	from_queue_tx: mpsc::UnboundedSender<FromQueue>,
+
+	to_pool_tx: mpsc::Sender<pool::ToPool>,
+	from_pool_rx: mpsc::UnboundedReceiver<pool::FromPool>,
+
+	cache_path: PathBuf,
+	limits: Limits,
+
+	jobs: slotmap::SlotMap<Job, JobData>,
+
+	/// A mapping from artifact id to a job.
+	artifact_id_to_job: HashMap<ArtifactId, Job>,
+	/// The registry of all workers.
+	workers: slotmap::SparseSecondaryMap<Worker, WorkerData>,
+	/// The number of workers requested to spawn but not yet spawned.
+	spawn_inflight: usize,
+
+	/// The jobs that are not yet scheduled. These are waiting until the next `poll` where they are
+	/// processed all at once.
+	unscheduled: Unscheduled,
+}
+
+/// A fatal error that warrants stopping the queue.
+struct Fatal;
+
+impl Queue {
+	fn new(
+		metrics: Metrics,
+		soft_capacity: usize,
+		hard_capacity: usize,
+		cache_path: PathBuf,
+		to_queue_rx: mpsc::Receiver<ToQueue>,
+		from_queue_tx: mpsc::UnboundedSender<FromQueue>,
+		to_pool_tx: mpsc::Sender<pool::ToPool>,
+		from_pool_rx: mpsc::UnboundedReceiver<pool::FromPool>,
+	) -> Self {
+		Self {
+			metrics,
+			to_queue_rx,
+			from_queue_tx,
+			to_pool_tx,
+			from_pool_rx,
+			cache_path,
+			spawn_inflight: 0,
+			limits: Limits { hard_capacity, soft_capacity },
+			jobs: slotmap::SlotMap::with_key(),
+			unscheduled: Unscheduled::default(),
+			artifact_id_to_job: HashMap::new(),
+			workers: slotmap::SparseSecondaryMap::new(),
+		}
+	}
+
+	async fn run(mut self) {
+		macro_rules! break_if_fatal {
+			($expr:expr) => {
+				if let Err(Fatal) = $expr {
+					break;
+				}
+			};
+		}
+
+		loop {
+			// biased to make it behave deterministically for tests.
+			futures::select_biased! {
+				to_queue = self.to_queue_rx.select_next_some() =>
+					break_if_fatal!(handle_to_queue(&mut self, to_queue).await),
+				from_pool = self.from_pool_rx.select_next_some() =>
+					break_if_fatal!(handle_from_pool(&mut self, from_pool).await),
+			}
+		}
+	}
+}
+
+async fn handle_to_queue(queue: &mut Queue, to_queue: ToQueue) -> Result<(), Fatal> {
+	match to_queue {
+		ToQueue::Enqueue { priority, pvf } => {
+			handle_enqueue(queue, priority, pvf).await?;
+		},
+	}
+	Ok(())
+}
+
+async fn handle_enqueue(
+	queue: &mut Queue,
+	priority: Priority,
+	pvf: PvfPrepData,
+) -> Result<(), Fatal> {
+	gum::debug!(
+		target: LOG_TARGET,
+		validation_code_hash = ?pvf.code_hash(),
+		?priority,
+		preparation_timeout = ?pvf.prep_timeout(),
+		"PVF is enqueued for preparation.",
+	);
+	queue.metrics.prepare_enqueued();
+
+	let artifact_id = ArtifactId::from_pvf_prep_data(&pvf);
+	if never!(
+		queue.artifact_id_to_job.contains_key(&artifact_id),
+		"second Enqueue sent for a known artifact"
+	) {
+		// This function is called in response to a `Enqueue` message;
+		// Precondition for `Enqueue` is that it is sent only once for a PVF;
+		// Thus this should always be `false`;
+		// qed.
+		gum::warn!(
+			target: LOG_TARGET,
+			"duplicate `enqueue` command received for {:?}",
+			artifact_id,
+		);
+		return Ok(());
+	}
+
+	let job = queue.jobs.insert(JobData { priority, pvf, worker: None });
+	queue.artifact_id_to_job.insert(artifact_id, job);
+
+	if let Some(available) = find_idle_worker(queue) {
+		// This may seem not fair (w.r.t priority) on the first glance, but it should be. This is
+		// because as soon as a worker finishes with the job it's immediately given the next one.
+		assign(queue, available, job).await?;
+	} else {
+		spawn_extra_worker(queue, priority.is_critical()).await?;
+		queue.unscheduled.add(priority, job);
+	}
+
+	Ok(())
+}
+
+fn find_idle_worker(queue: &mut Queue) -> Option<Worker> {
+	queue.workers.iter().filter(|(_, data)| data.is_idle()).map(|(k, _)| k).next()
+}
+
+async fn handle_from_pool(queue: &mut Queue, from_pool: pool::FromPool) -> Result<(), Fatal> {
+	use pool::FromPool;
+	match from_pool {
+		FromPool::Spawned(worker) => handle_worker_spawned(queue, worker).await?,
+		FromPool::Concluded { worker, rip, result } =>
+			handle_worker_concluded(queue, worker, rip, result).await?,
+		FromPool::Rip(worker) => handle_worker_rip(queue, worker).await?,
+	}
+	Ok(())
+}
+
+async fn handle_worker_spawned(queue: &mut Queue, worker: Worker) -> Result<(), Fatal> {
+	queue.workers.insert(worker, WorkerData::default());
+	queue.spawn_inflight -= 1;
+
+	if let Some(job) = queue.unscheduled.next() {
+		assign(queue, worker, job).await?;
+	}
+
+	Ok(())
+}
+
+async fn handle_worker_concluded(
+	queue: &mut Queue,
+	worker: Worker,
+	rip: bool,
+	result: PrepareResult,
+) -> Result<(), Fatal> {
+	queue.metrics.prepare_concluded();
+
+	macro_rules! never_none {
+		($expr:expr) => {
+			match $expr {
+				Some(v) => v,
+				None => {
+					// Precondition of calling this is that the `$expr` is never none;
+					// Assume the conditions holds, then this never is not hit;
+					// qed.
+					never!("never_none, {}", stringify!($expr));
+					return Ok(());
+				},
+			}
+		};
+	}
+
+	// Find out on which artifact was the worker working.
+
+	// workers are registered upon spawn and removed in one of the following cases:
+	//   1. received rip signal
+	//   2. received concluded signal with rip=true;
+	// concluded signal only comes from a spawned worker and only once;
+	// rip signal is not sent after conclusion with rip=true;
+	// the worker should be registered;
+	// this can't be None;
+	// qed.
+	let worker_data = never_none!(queue.workers.get_mut(worker));
+
+	// worker_data.job is set only by `assign` and removed only here for a worker;
+	// concluded signal only comes for a worker that was previously assigned and only once;
+	// the worker should have the job;
+	// this can't be None;
+	// qed.
+	let job = never_none!(worker_data.job.take());
+
+	// job_data is inserted upon enqueue and removed only here;
+	// as was established above, this worker was previously `assign`ed to the job;
+	// that implies that the job was enqueued;
+	// conclude signal only comes once;
+	// we are just to remove the job for the first and the only time;
+	// this can't be None;
+	// qed.
+	let job_data = never_none!(queue.jobs.remove(job));
+	let artifact_id = ArtifactId::from_pvf_prep_data(&job_data.pvf);
+
+	queue.artifact_id_to_job.remove(&artifact_id);
+
+	gum::debug!(
+		target: LOG_TARGET,
+		validation_code_hash = ?artifact_id.code_hash,
+		?worker,
+		?rip,
+		"prepare worker concluded",
+	);
+
+	reply(&mut queue.from_queue_tx, FromQueue { artifact_id, result })?;
+
+	// Figure out what to do with the worker.
+	if rip {
+		let worker_data = queue.workers.remove(worker);
+		// worker should exist, it's asserted above;
+		// qed.
+		always!(worker_data.is_some());
+
+		if !queue.unscheduled.is_empty() {
+			// That is unconditionally not critical just to not accidentally fill up
+			// the pool up to the hard cap.
+			spawn_extra_worker(queue, false).await?;
+		}
+	} else if queue.limits.should_cull(queue.workers.len() + queue.spawn_inflight) {
+		// We no longer need services of this worker. Kill it.
+		queue.workers.remove(worker);
+		send_pool(&mut queue.to_pool_tx, pool::ToPool::Kill(worker)).await?;
+	} else {
+		// see if there are more work available and schedule it.
+		if let Some(job) = queue.unscheduled.next() {
+			assign(queue, worker, job).await?;
+		}
+	}
+
+	Ok(())
+}
+
+async fn handle_worker_rip(queue: &mut Queue, worker: Worker) -> Result<(), Fatal> {
+	gum::debug!(target: LOG_TARGET, ?worker, "prepare worker ripped");
+
+	let worker_data = queue.workers.remove(worker);
+	if let Some(WorkerData { job: Some(job), .. }) = worker_data {
+		// This is an edge case where the worker ripped after we sent assignment but before it
+		// was received by the pool.
+		let priority = queue.jobs.get(job).map(|data| data.priority).unwrap_or_else(|| {
+			// job is inserted upon enqueue and removed on concluded signal;
+			// this is enclosed in the if statement that narrows the situation to before
+			// conclusion;
+			// that means that the job still exists and is known;
+			// this path cannot be hit;
+			// qed.
+			never!("the job of the ripped worker must be known but it is not");
+			Priority::Normal
+		});
+		queue.unscheduled.readd(priority, job);
+	}
+
+	// If there are still jobs left, spawn another worker to replace the ripped one (but only if it
+	// was indeed removed). That is unconditionally not critical just to not accidentally fill up
+	// the pool up to the hard cap.
+	if worker_data.is_some() && !queue.unscheduled.is_empty() {
+		spawn_extra_worker(queue, false).await?;
+	}
+	Ok(())
+}
+
+/// Spawns an extra worker if possible.
+async fn spawn_extra_worker(queue: &mut Queue, critical: bool) -> Result<(), Fatal> {
+	if queue
+		.limits
+		.can_afford_one_more(queue.workers.len() + queue.spawn_inflight, critical)
+	{
+		queue.spawn_inflight += 1;
+		send_pool(&mut queue.to_pool_tx, pool::ToPool::Spawn).await?;
+	}
+
+	Ok(())
+}
+
+/// Attaches the work to the given worker telling the poll about the job.
+async fn assign(queue: &mut Queue, worker: Worker, job: Job) -> Result<(), Fatal> {
+	let job_data = &mut queue.jobs[job];
+	job_data.worker = Some(worker);
+
+	queue.workers[worker].job = Some(job);
+
+	send_pool(
+		&mut queue.to_pool_tx,
+		pool::ToPool::StartWork {
+			worker,
+			pvf: job_data.pvf.clone(),
+			cache_path: queue.cache_path.clone(),
+		},
+	)
+	.await?;
+
+	Ok(())
+}
+
+fn reply(from_queue_tx: &mut mpsc::UnboundedSender<FromQueue>, m: FromQueue) -> Result<(), Fatal> {
+	from_queue_tx.unbounded_send(m).map_err(|_| {
+		// The host has hung up and thus it's fatal and we should shutdown ourselves.
+		Fatal
+	})
+}
+
+async fn send_pool(
+	to_pool_tx: &mut mpsc::Sender<pool::ToPool>,
+	m: pool::ToPool,
+) -> Result<(), Fatal> {
+	to_pool_tx.send(m).await.map_err(|_| {
+		// The pool has hung up and thus we are no longer are able to fulfill our duties. Shutdown.
+		Fatal
+	})
+}
+
+/// Spins up the queue and returns the future that should be polled to make the queue functional.
+pub fn start(
+	metrics: Metrics,
+	soft_capacity: usize,
+	hard_capacity: usize,
+	cache_path: PathBuf,
+	to_pool_tx: mpsc::Sender<pool::ToPool>,
+	from_pool_rx: mpsc::UnboundedReceiver<pool::FromPool>,
+) -> (mpsc::Sender<ToQueue>, mpsc::UnboundedReceiver<FromQueue>, impl Future<Output = ()>) {
+	let (to_queue_tx, to_queue_rx) = mpsc::channel(150);
+	let (from_queue_tx, from_queue_rx) = mpsc::unbounded();
+
+	let run = Queue::new(
+		metrics,
+		soft_capacity,
+		hard_capacity,
+		cache_path,
+		to_queue_rx,
+		from_queue_tx,
+		to_pool_tx,
+		from_pool_rx,
+	)
+	.run();
+
+	(to_queue_tx, from_queue_rx, run)
+}
+
+#[cfg(test)]
+mod tests {
+	use super::*;
+	use crate::host::tests::TEST_PREPARATION_TIMEOUT;
+	use assert_matches::assert_matches;
+	use futures::{future::BoxFuture, FutureExt};
+	use pezkuwi_node_core_pvf_common::{error::PrepareError, prepare::PrepareSuccess};
+	use slotmap::SlotMap;
+	use std::task::Poll;
+
+	/// Creates a new PVF which artifact id can be uniquely identified by the given number.
+	fn pvf(discriminator: u32) -> PvfPrepData {
+		PvfPrepData::from_discriminator(discriminator)
+	}
+
+	async fn run_until<R>(
+		task: &mut (impl Future<Output = ()> + Unpin),
+		mut fut: (impl Future<Output = R> + Unpin),
+	) -> R {
+		let start = std::time::Instant::now();
+		let fut = &mut fut;
+		loop {
+			if start.elapsed() > std::time::Duration::from_secs(1) {
+				// We expect that this will take only a couple of iterations and thus to take way
+				// less than a second.
+				panic!("timeout");
+			}
+
+			if let Poll::Ready(r) = futures::poll!(&mut *fut) {
+				break r;
+			}
+
+			if futures::poll!(&mut *task).is_ready() {
+				panic!()
+			}
+		}
+	}
+
+	struct Test {
+		_tempdir: tempfile::TempDir,
+		run: BoxFuture<'static, ()>,
+		workers: SlotMap<Worker, ()>,
+		from_pool_tx: mpsc::UnboundedSender<pool::FromPool>,
+		to_pool_rx: mpsc::Receiver<pool::ToPool>,
+		to_queue_tx: mpsc::Sender<ToQueue>,
+		from_queue_rx: mpsc::UnboundedReceiver<FromQueue>,
+	}
+
+	impl Test {
+		fn new(soft_capacity: usize, hard_capacity: usize) -> Self {
+			let tempdir = tempfile::tempdir().unwrap();
+
+			let (to_pool_tx, to_pool_rx) = mpsc::channel(10);
+			let (from_pool_tx, from_pool_rx) = mpsc::unbounded();
+
+			let workers: SlotMap<Worker, ()> = SlotMap::with_key();
+
+			let (to_queue_tx, from_queue_rx, run) = start(
+				Metrics::default(),
+				soft_capacity,
+				hard_capacity,
+				tempdir.path().to_owned().into(),
+				to_pool_tx,
+				from_pool_rx,
+			);
+
+			Self {
+				_tempdir: tempdir,
+				run: run.boxed(),
+				workers,
+				from_pool_tx,
+				to_pool_rx,
+				to_queue_tx,
+				from_queue_rx,
+			}
+		}
+
+		fn send_queue(&mut self, to_queue: ToQueue) {
+			self.to_queue_tx.send(to_queue).now_or_never().unwrap().unwrap();
+		}
+
+		async fn poll_and_recv_from_queue(&mut self) -> FromQueue {
+			let from_queue_rx = &mut self.from_queue_rx;
+			run_until(&mut self.run, async { from_queue_rx.next().await.unwrap() }.boxed()).await
+		}
+
+		fn send_from_pool(&mut self, from_pool: pool::FromPool) {
+			self.from_pool_tx.send(from_pool).now_or_never().unwrap().unwrap();
+		}
+
+		async fn poll_and_recv_to_pool(&mut self) -> pool::ToPool {
+			let to_pool_rx = &mut self.to_pool_rx;
+			run_until(&mut self.run, async { to_pool_rx.next().await.unwrap() }.boxed()).await
+		}
+
+		async fn poll_ensure_to_pool_is_empty(&mut self) {
+			use futures_timer::Delay;
+
+			let to_pool_rx = &mut self.to_pool_rx;
+			run_until(
+				&mut self.run,
+				async {
+					futures::select! {
+						_ = Delay::new(Duration::from_millis(500)).fuse() => (),
+						_ = to_pool_rx.next().fuse() => {
+							panic!("to pool supposed to be empty")
+						}
+					}
+				}
+				.boxed(),
+			)
+			.await
+		}
+	}
+
+	#[tokio::test]
+	async fn properly_concludes() {
+		let mut test = Test::new(2, 2);
+
+		test.send_queue(ToQueue::Enqueue { priority: Priority::Normal, pvf: pvf(1) });
+		assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
+
+		let w = test.workers.insert(());
+		test.send_from_pool(pool::FromPool::Spawned(w));
+		test.send_from_pool(pool::FromPool::Concluded {
+			worker: w,
+			rip: false,
+			result: Ok(PrepareSuccess::default()),
+		});
+
+		assert_eq!(
+			test.poll_and_recv_from_queue().await.artifact_id,
+			ArtifactId::from_pvf_prep_data(&pvf(1))
+		);
+	}
+
+	#[tokio::test]
+	async fn dont_spawn_over_soft_limit_unless_critical() {
+		let mut test = Test::new(2, 3);
+
+		let priority = Priority::Normal;
+		test.send_queue(ToQueue::Enqueue { priority, pvf: PvfPrepData::from_discriminator(1) });
+		test.send_queue(ToQueue::Enqueue { priority, pvf: PvfPrepData::from_discriminator(2) });
+		// Start a non-precheck preparation for this one.
+		test.send_queue(ToQueue::Enqueue {
+			priority,
+			pvf: PvfPrepData::from_discriminator_and_timeout(3, TEST_PREPARATION_TIMEOUT * 3),
+		});
+
+		// Receive only two spawns.
+		assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
+		assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
+
+		let w1 = test.workers.insert(());
+		let w2 = test.workers.insert(());
+
+		test.send_from_pool(pool::FromPool::Spawned(w1));
+		test.send_from_pool(pool::FromPool::Spawned(w2));
+
+		// Get two start works.
+		assert_matches!(test.poll_and_recv_to_pool().await, pool::ToPool::StartWork { .. });
+		assert_matches!(test.poll_and_recv_to_pool().await, pool::ToPool::StartWork { .. });
+
+		test.send_from_pool(pool::FromPool::Concluded {
+			worker: w1,
+			rip: false,
+			result: Ok(PrepareSuccess::default()),
+		});
+
+		assert_matches!(test.poll_and_recv_to_pool().await, pool::ToPool::StartWork { .. });
+
+		// Enqueue a critical job.
+		test.send_queue(ToQueue::Enqueue {
+			priority: Priority::Critical,
+			pvf: PvfPrepData::from_discriminator(4),
+		});
+
+		// 2 out of 2 are working, but there is a critical job incoming. That means that spawning
+		// another worker is warranted.
+		assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
+	}
+
+	#[tokio::test]
+	async fn cull_unwanted() {
+		let mut test = Test::new(1, 2);
+
+		test.send_queue(ToQueue::Enqueue {
+			priority: Priority::Normal,
+			pvf: PvfPrepData::from_discriminator(1),
+		});
+		assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
+		let w1 = test.workers.insert(());
+		test.send_from_pool(pool::FromPool::Spawned(w1));
+		assert_matches!(test.poll_and_recv_to_pool().await, pool::ToPool::StartWork { .. });
+
+		// Enqueue a critical job, which warrants spawning over the soft limit.
+		test.send_queue(ToQueue::Enqueue {
+			priority: Priority::Critical,
+			pvf: PvfPrepData::from_discriminator(2),
+		});
+		assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
+
+		// However, before the new worker had a chance to spawn, the first worker finishes with its
+		// job. The old worker will be killed while the new worker will be let live, even though
+		// it's not instantiated.
+		//
+		// That's a bit silly in this context, but in production there will be an entire pool up
+		// to the `soft_capacity` of workers and it doesn't matter which one to cull. Either way,
+		// we just check that edge case of an edge case works.
+		test.send_from_pool(pool::FromPool::Concluded {
+			worker: w1,
+			rip: false,
+			result: Ok(PrepareSuccess::default()),
+		});
+		assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Kill(w1));
+	}
+
+	#[tokio::test]
+	async fn worker_mass_die_out_doesnt_stall_queue() {
+		let mut test = Test::new(2, 2);
+
+		let priority = Priority::Normal;
+		test.send_queue(ToQueue::Enqueue { priority, pvf: PvfPrepData::from_discriminator(1) });
+		test.send_queue(ToQueue::Enqueue { priority, pvf: PvfPrepData::from_discriminator(2) });
+		// Start a non-precheck preparation for this one.
+		test.send_queue(ToQueue::Enqueue {
+			priority,
+			pvf: PvfPrepData::from_discriminator_and_timeout(3, TEST_PREPARATION_TIMEOUT * 3),
+		});
+
+		assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
+		assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
+
+		let w1 = test.workers.insert(());
+		let w2 = test.workers.insert(());
+
+		test.send_from_pool(pool::FromPool::Spawned(w1));
+		test.send_from_pool(pool::FromPool::Spawned(w2));
+
+		assert_matches!(test.poll_and_recv_to_pool().await, pool::ToPool::StartWork { .. });
+		assert_matches!(test.poll_and_recv_to_pool().await, pool::ToPool::StartWork { .. });
+
+		// Conclude worker 1 and rip it.
+		test.send_from_pool(pool::FromPool::Concluded {
+			worker: w1,
+			rip: true,
+			result: Ok(PrepareSuccess::default()),
+		});
+
+		// Since there is still work, the queue requested one extra worker to spawn to handle the
+		// remaining enqueued work items.
+		assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
+		assert_eq!(
+			test.poll_and_recv_from_queue().await.artifact_id,
+			ArtifactId::from_pvf_prep_data(&pvf(1))
+		);
+	}
+
+	#[tokio::test]
+	async fn doesnt_resurrect_ripped_worker_if_no_work() {
+		let mut test = Test::new(2, 2);
+
+		test.send_queue(ToQueue::Enqueue {
+			priority: Priority::Normal,
+			pvf: PvfPrepData::from_discriminator(1),
+		});
+
+		assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
+
+		let w1 = test.workers.insert(());
+		test.send_from_pool(pool::FromPool::Spawned(w1));
+
+		assert_matches!(test.poll_and_recv_to_pool().await, pool::ToPool::StartWork { .. });
+
+		test.send_from_pool(pool::FromPool::Concluded {
+			worker: w1,
+			rip: true,
+			result: Err(PrepareError::IoErr("test".into())),
+		});
+		test.poll_ensure_to_pool_is_empty().await;
+	}
+
+	#[tokio::test]
+	async fn rip_for_start_work() {
+		let mut test = Test::new(2, 2);
+
+		test.send_queue(ToQueue::Enqueue {
+			priority: Priority::Normal,
+			pvf: PvfPrepData::from_discriminator(1),
+		});
+
+		assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
+
+		let w1 = test.workers.insert(());
+		test.send_from_pool(pool::FromPool::Spawned(w1));
+
+		// Now, to the interesting part. After the queue normally issues the `start_work` command to
+		// the pool, before receiving the command the queue may report that the worker ripped.
+		assert_matches!(test.poll_and_recv_to_pool().await, pool::ToPool::StartWork { .. });
+		test.send_from_pool(pool::FromPool::Rip(w1));
+
+		// In this case, the pool should spawn a new worker and request it to work on the item.
+		assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
+
+		let w2 = test.workers.insert(());
+		test.send_from_pool(pool::FromPool::Spawned(w2));
+		assert_matches!(test.poll_and_recv_to_pool().await, pool::ToPool::StartWork { .. });
+	}
+}
@@ -0,0 +1,376 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+//! Host interface to the prepare worker.
+
+use crate::{
+	artifacts::generate_artifact_path,
+	metrics::Metrics,
+	worker_interface::{
+		clear_worker_dir_path, framed_recv, framed_send, spawn_with_program_path, IdleWorker,
+		SpawnErr, WorkerDir, WorkerHandle, JOB_TIMEOUT_WALL_CLOCK_FACTOR,
+	},
+	LOG_TARGET,
+};
+use codec::{Decode, Encode};
+use pezkuwi_node_core_pvf_common::{
+	error::{PrepareError, PrepareResult, PrepareWorkerResult},
+	prepare::{PrepareStats, PrepareSuccess, PrepareWorkerSuccess},
+	pvf::PvfPrepData,
+	worker_dir, SecurityStatus,
+};
+
+use sp_core::hexdisplay::HexDisplay;
+use std::{
+	path::{Path, PathBuf},
+	time::Duration,
+};
+use tokio::{io, net::UnixStream};
+
+/// Spawns a new worker with the given program path that acts as the worker and the spawn timeout.
+///
+/// Sends a handshake message to the worker as soon as it is spawned.
+pub async fn spawn(
+	program_path: &Path,
+	cache_path: &Path,
+	spawn_timeout: Duration,
+	node_version: Option<&str>,
+	security_status: SecurityStatus,
+) -> Result<(IdleWorker, WorkerHandle), SpawnErr> {
+	let mut extra_args = vec!["prepare-worker"];
+	if let Some(node_version) = node_version {
+		extra_args.extend_from_slice(&["--node-impl-version", node_version]);
+	}
+
+	spawn_with_program_path(
+		"prepare",
+		program_path,
+		cache_path,
+		&extra_args,
+		spawn_timeout,
+		security_status,
+	)
+	.await
+}
+
+/// Outcome of PVF preparation.
+///
+/// If the idle worker token is not returned, it means the worker must be terminated.
+pub enum Outcome {
+	/// The worker has finished the work assigned to it.
+	Concluded { worker: IdleWorker, result: PrepareResult },
+	/// The host tried to reach the worker but failed. This is most likely because the worked was
+	/// killed by the system.
+	Unreachable,
+	/// The temporary file for the artifact could not be created at the given cache path.
+	CreateTmpFileErr { worker: IdleWorker, err: String },
+	/// The response from the worker is received, but the tmp file cannot be renamed (moved) to the
+	/// final destination location.
+	RenameTmpFile {
+		worker: IdleWorker,
+		err: String,
+		// Unfortunately `PathBuf` doesn't implement `Encode`/`Decode`, so we do a fallible
+		// conversion to `Option<String>`.
+		src: Option<String>,
+		dest: Option<String>,
+	},
+	/// The worker cache could not be cleared for the given reason.
+	ClearWorkerDir { err: String },
+	/// The worker failed to finish the job until the given deadline.
+	///
+	/// The worker is no longer usable and should be killed.
+	TimedOut,
+	/// An IO error occurred while receiving the result from the worker process.
+	///
+	/// This doesn't return an idle worker instance, thus this worker is no longer usable.
+	IoErr(String),
+	/// The worker ran out of memory and is aborting. The worker should be ripped.
+	OutOfMemory,
+	/// The preparation job process died, due to OOM, a seccomp violation, or some other factor.
+	///
+	/// The worker might still be usable, but we kill it just in case.
+	JobDied { err: String, job_pid: i32 },
+}
+
+/// Given the idle token of a worker and parameters of work, communicates with the worker and
+/// returns the outcome.
+///
+/// NOTE: Returning the `TimedOut`, `IoErr` or `Unreachable` outcomes will trigger the child process
+/// being killed.
+pub async fn start_work(
+	metrics: &Metrics,
+	worker: IdleWorker,
+	pvf: PvfPrepData,
+	cache_path: PathBuf,
+) -> Outcome {
+	let IdleWorker { stream, pid, worker_dir } = worker;
+
+	gum::debug!(
+		target: LOG_TARGET,
+		worker_pid = %pid,
+		?worker_dir,
+		"starting prepare for {:?}",
+		pvf,
+	);
+
+	with_worker_dir_setup(
+		worker_dir,
+		stream,
+		pid,
+		|tmp_artifact_file, mut stream, worker_dir| async move {
+			let preparation_timeout = pvf.prep_timeout();
+
+			if let Err(err) = send_request(&mut stream, &pvf).await {
+				gum::warn!(
+					target: LOG_TARGET,
+					worker_pid = %pid,
+					"failed to send a prepare request: {:?}",
+					err,
+				);
+				return Outcome::Unreachable;
+			}
+
+			// Wait for the result from the worker, keeping in mind that there may be a timeout, the
+			// worker may get killed, or something along these lines. In that case we should
+			// propagate the error to the pool.
+			//
+			// We use a generous timeout here. This is in addition to the one in the child process,
+			// in case the child stalls. We have a wall clock timeout here in the host, but a CPU
+			// timeout in the child. We want to use CPU time because it varies less than wall clock
+			// time under load, but the CPU resources of the child can only be measured from the
+			// parent after the child process terminates.
+			let timeout = preparation_timeout * JOB_TIMEOUT_WALL_CLOCK_FACTOR;
+			let result = tokio::time::timeout(timeout, recv_response(&mut stream, pid)).await;
+
+			match result {
+				// Received bytes from worker within the time limit.
+				Ok(Ok(prepare_worker_result)) =>
+					handle_response(
+						metrics,
+						IdleWorker { stream, pid, worker_dir },
+						prepare_worker_result,
+						pid,
+						tmp_artifact_file,
+						&cache_path,
+						preparation_timeout,
+					)
+					.await,
+				Ok(Err(err)) => {
+					// Communication error within the time limit.
+					gum::warn!(
+						target: LOG_TARGET,
+						worker_pid = %pid,
+						"failed to recv a prepare response: {}",
+						err,
+					);
+					Outcome::IoErr(err.to_string())
+				},
+				Err(_) => {
+					// Timed out here on the host.
+					gum::warn!(
+						target: LOG_TARGET,
+						worker_pid = %pid,
+						"did not recv a prepare response within the time limit",
+					);
+					Outcome::TimedOut
+				},
+			}
+		},
+	)
+	.await
+}
+
+/// Handles the case where we successfully received response bytes on the host from the child.
+///
+/// Here we know the artifact exists, but is still located in a temporary file which will be cleared
+/// by [`with_worker_dir_setup`].
+async fn handle_response(
+	metrics: &Metrics,
+	worker: IdleWorker,
+	result: PrepareWorkerResult,
+	worker_pid: u32,
+	tmp_file: PathBuf,
+	cache_path: &Path,
+	preparation_timeout: Duration,
+) -> Outcome {
+	// TODO: Add `checksum` to `ArtifactPathId`. See:
+	//       https://github.com/pezkuwichain/pezkuwi-sdk/issues/122
+	let PrepareWorkerSuccess {
+		checksum,
+		stats: PrepareStats { cpu_time_elapsed, memory_stats, observed_wasm_code_len },
+	} = match result.clone() {
+		Ok(result) => result,
+		// Timed out on the child. This should already be logged by the child.
+		Err(PrepareError::TimedOut) => return Outcome::TimedOut,
+		Err(PrepareError::JobDied { err, job_pid }) => return Outcome::JobDied { err, job_pid },
+		Err(PrepareError::OutOfMemory) => return Outcome::OutOfMemory,
+		Err(err) => return Outcome::Concluded { worker, result: Err(err) },
+	};
+
+	metrics.observe_code_size(observed_wasm_code_len as usize);
+
+	if cpu_time_elapsed > preparation_timeout {
+		// The job didn't complete within the timeout.
+		gum::warn!(
+			target: LOG_TARGET,
+			%worker_pid,
+			"prepare job took {}ms cpu time, exceeded preparation timeout {}ms. Clearing WIP artifact {}",
+			cpu_time_elapsed.as_millis(),
+			preparation_timeout.as_millis(),
+			tmp_file.display(),
+		);
+		return Outcome::TimedOut;
+	}
+
+	let size = match tokio::fs::metadata(cache_path).await {
+		Ok(metadata) => metadata.len(),
+		Err(err) => {
+			gum::warn!(
+				target: LOG_TARGET,
+				?cache_path,
+				"failed to read size of the artifact: {}",
+				err,
+			);
+			return Outcome::IoErr(err.to_string());
+		},
+	};
+
+	// The file name should uniquely identify the artifact even across restarts. In case the cache
+	// for some reason is not cleared correctly, we cannot
+	// accidentally execute an artifact compiled under a different wasmtime version, host
+	// environment, etc.
+	let artifact_path = generate_artifact_path(cache_path);
+
+	gum::debug!(
+		target: LOG_TARGET,
+		%worker_pid,
+		"promoting WIP artifact {} to {}",
+		tmp_file.display(),
+		artifact_path.display(),
+	);
+
+	let outcome = match tokio::fs::rename(&tmp_file, &artifact_path).await {
+		Ok(()) => Outcome::Concluded {
+			worker,
+			result: Ok(PrepareSuccess {
+				checksum,
+				path: artifact_path,
+				size,
+				stats: PrepareStats {
+					cpu_time_elapsed,
+					memory_stats: memory_stats.clone(),
+					observed_wasm_code_len,
+				},
+			}),
+		},
+		Err(err) => {
+			gum::warn!(
+				target: LOG_TARGET,
+				%worker_pid,
+				"failed to rename the artifact from {} to {}: {:?}",
+				tmp_file.display(),
+				artifact_path.display(),
+				err,
+			);
+			Outcome::RenameTmpFile {
+				worker,
+				err: format!("{:?}", err),
+				src: tmp_file.to_str().map(String::from),
+				dest: artifact_path.to_str().map(String::from),
+			}
+		},
+	};
+
+	// If there were no errors up until now, log the memory stats for a successful preparation, if
+	// available.
+	metrics.observe_preparation_memory_metrics(memory_stats);
+
+	outcome
+}
+
+/// Create a temporary file for an artifact in the worker cache, execute the given future/closure
+/// passing the file path in, and clean up the worker cache.
+///
+/// Failure to clean up the worker cache results in an error - leaving any files here could be a
+/// security issue, and we should shut down the worker. This should be very rare.
+async fn with_worker_dir_setup<F, Fut>(
+	worker_dir: WorkerDir,
+	stream: UnixStream,
+	pid: u32,
+	f: F,
+) -> Outcome
+where
+	Fut: futures::Future<Output = Outcome>,
+	F: FnOnce(PathBuf, UnixStream, WorkerDir) -> Fut,
+{
+	// Create the tmp file here so that the child doesn't need any file creation rights. This will
+	// be cleared at the end of this function.
+	let tmp_file = worker_dir::prepare_tmp_artifact(worker_dir.path());
+	if let Err(err) = tokio::fs::File::create(&tmp_file).await {
+		gum::warn!(
+			target: LOG_TARGET,
+			worker_pid = %pid,
+			?worker_dir,
+			"failed to create a temp file for the artifact: {:?}",
+			err,
+		);
+		return Outcome::CreateTmpFileErr {
+			worker: IdleWorker { stream, pid, worker_dir },
+			err: format!("{:?}", err),
+		};
+	};
+
+	let worker_dir_path = worker_dir.path().to_owned();
+	let outcome = f(tmp_file, stream, worker_dir).await;
+
+	// Try to clear the worker dir.
+	if let Err(err) = clear_worker_dir_path(&worker_dir_path) {
+		gum::warn!(
+			target: LOG_TARGET,
+			worker_pid = %pid,
+			?worker_dir_path,
+			"failed to clear worker cache after the job: {:?}",
+			err,
+		);
+		return Outcome::ClearWorkerDir { err: format!("{:?}", err) };
+	}
+
+	outcome
+}
+
+async fn send_request(stream: &mut UnixStream, pvf: &PvfPrepData) -> io::Result<()> {
+	framed_send(stream, &pvf.encode()).await?;
+	Ok(())
+}
+
+async fn recv_response(stream: &mut UnixStream, pid: u32) -> io::Result<PrepareWorkerResult> {
+	let result = framed_recv(stream).await?;
+	let result = PrepareWorkerResult::decode(&mut &result[..]).map_err(|e| {
+		// We received invalid bytes from the worker.
+		let bound_bytes = &result[..result.len().min(4)];
+		gum::warn!(
+			target: LOG_TARGET,
+			worker_pid = %pid,
+			"received unexpected response from the prepare worker: {}",
+			HexDisplay::from(&bound_bytes),
+		);
+		io::Error::new(
+			io::ErrorKind::Other,
+			format!("prepare pvf recv_response: failed to decode result: {:?}", e),
+		)
+	})?;
+	Ok(result)
+}
@@ -0,0 +1,50 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+use pezkuwi_node_subsystem::messages::PvfExecKind;
+
+/// A priority assigned to preparation of a PVF.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
+pub enum Priority {
+	/// Normal priority for things that do not require immediate response, but still need to be
+	/// done pretty quick.
+	///
+	/// Backing falls into this category.
+	Normal,
+	/// This priority is used for requests that are required to be processed as soon as possible.
+	///
+	/// Disputes and approvals are on a critical path and require execution as soon as
+	/// possible to not delay finality.
+	Critical,
+}
+
+impl Priority {
+	/// Returns `true` if `self` is `Critical`
+	pub fn is_critical(self) -> bool {
+		self == Priority::Critical
+	}
+}
+
+impl From<PvfExecKind> for Priority {
+	fn from(priority: PvfExecKind) -> Self {
+		match priority {
+			PvfExecKind::Dispute => Priority::Critical,
+			PvfExecKind::Approval => Priority::Critical,
+			PvfExecKind::BackingSystemParas(_) => Priority::Normal,
+			PvfExecKind::Backing(_) => Priority::Normal,
+		}
+	}
+}
@@ -0,0 +1,379 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+use crate::{Config, SecurityStatus, LOG_TARGET};
+use futures::join;
+use std::{fmt, path::Path};
+
+/// Run checks for supported security features.
+///
+/// # Returns
+///
+/// Returns the set of security features that we were able to enable. If an error occurs while
+/// enabling a security feature we set the corresponding status to `false`.
+///
+/// # Errors
+///
+/// Returns an error only if we could not fully enforce the security level required by the current
+/// configuration.
+pub async fn check_security_status(config: &Config) -> Result<SecurityStatus, String> {
+	let Config { prepare_worker_program_path, secure_validator_mode, cache_path, .. } = config;
+
+	let (landlock, seccomp, change_root, secure_clone) = join!(
+		check_landlock(prepare_worker_program_path),
+		check_seccomp(prepare_worker_program_path),
+		check_can_unshare_user_namespace_and_change_root(prepare_worker_program_path, cache_path),
+		check_can_do_secure_clone(prepare_worker_program_path),
+	);
+
+	let full_security_status = FullSecurityStatus::new(
+		*secure_validator_mode,
+		landlock,
+		seccomp,
+		change_root,
+		secure_clone,
+	);
+	let security_status = full_security_status.as_partial();
+
+	if full_security_status.err_occurred() {
+		print_secure_mode_error_or_warning(&full_security_status);
+		if !full_security_status.all_errs_allowed() {
+			return Err("could not enable Secure Validator Mode; check logs".into());
+		}
+	}
+
+	if security_status.secure_validator_mode {
+		gum::info!(
+			target: LOG_TARGET,
+			"👮‍♀️ Running in Secure Validator Mode. \
+			 It is highly recommended that you operate according to our security guidelines. \
+			 \nMore information: https://wiki.network.pezkuwichain.io/docs/maintain-guides-secure-validator#secure-validator-mode"
+		);
+	}
+
+	Ok(security_status)
+}
+
+/// Contains the full security status including error states.
+struct FullSecurityStatus {
+	partial: SecurityStatus,
+	errs: Vec<SecureModeError>,
+}
+
+impl FullSecurityStatus {
+	fn new(
+		secure_validator_mode: bool,
+		landlock: SecureModeResult,
+		seccomp: SecureModeResult,
+		change_root: SecureModeResult,
+		secure_clone: SecureModeResult,
+	) -> Self {
+		Self {
+			partial: SecurityStatus {
+				secure_validator_mode,
+				can_enable_landlock: landlock.is_ok(),
+				can_enable_seccomp: seccomp.is_ok(),
+				can_unshare_user_namespace_and_change_root: change_root.is_ok(),
+				can_do_secure_clone: secure_clone.is_ok(),
+			},
+			errs: [landlock, seccomp, change_root, secure_clone]
+				.into_iter()
+				.filter_map(|result| result.err())
+				.collect(),
+		}
+	}
+
+	fn as_partial(&self) -> SecurityStatus {
+		self.partial.clone()
+	}
+
+	fn err_occurred(&self) -> bool {
+		!self.errs.is_empty()
+	}
+
+	fn all_errs_allowed(&self) -> bool {
+		!self.partial.secure_validator_mode ||
+			self.errs.iter().all(|err| err.is_allowed_in_secure_mode(&self.partial))
+	}
+
+	fn errs_string(&self) -> String {
+		self.errs
+			.iter()
+			.map(|err| {
+				format!(
+					"\n  - {}{}",
+					if err.is_allowed_in_secure_mode(&self.partial) { "Optional: " } else { "" },
+					err
+				)
+			})
+			.collect()
+	}
+}
+
+type SecureModeResult = std::result::Result<(), SecureModeError>;
+
+/// Errors related to enabling Secure Validator Mode.
+#[derive(Debug)]
+enum SecureModeError {
+	CannotEnableLandlock { err: String, abi: u8 },
+	CannotEnableSeccomp(String),
+	CannotUnshareUserNamespaceAndChangeRoot(String),
+	CannotDoSecureClone(String),
+}
+
+impl SecureModeError {
+	/// Whether this error is allowed with Secure Validator Mode enabled.
+	fn is_allowed_in_secure_mode(&self, security_status: &SecurityStatus) -> bool {
+		use SecureModeError::*;
+		match self {
+			// Landlock is present on relatively recent Linuxes. This is optional if the unshare
+			// capability is present, providing FS sandboxing a different way.
+			CannotEnableLandlock { .. } =>
+				security_status.can_unshare_user_namespace_and_change_root,
+			// seccomp should be present on all modern Linuxes unless it's been disabled.
+			CannotEnableSeccomp(_) => false,
+			// Should always be present on modern Linuxes. If not, Landlock also provides FS
+			// sandboxing, so don't enforce this.
+			CannotUnshareUserNamespaceAndChangeRoot(_) => security_status.can_enable_landlock,
+			// We have not determined the kernel requirements for this capability, and it's also not
+			// necessary for FS or networking restrictions.
+			CannotDoSecureClone(_) => true,
+		}
+	}
+}
+
+impl fmt::Display for SecureModeError {
+	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+		use SecureModeError::*;
+		match self {
+			CannotEnableLandlock{err, abi} => write!(f, "Cannot enable landlock (ABI {abi}), a Linux 5.13+ kernel security feature: {err}"),
+			CannotEnableSeccomp(err) => write!(f, "Cannot enable seccomp, a Linux-specific kernel security feature: {err}"),
+			CannotUnshareUserNamespaceAndChangeRoot(err) => write!(f, "Cannot unshare user namespace and change root, which are Linux-specific kernel security features: {err}"),
+			CannotDoSecureClone(err) => write!(f, "Cannot call clone with all sandboxing flags, a Linux-specific kernel security features: {err}"),
+		}
+	}
+}
+
+/// Print an error if Secure Validator Mode and some mandatory errors occurred, warn otherwise.
+fn print_secure_mode_error_or_warning(security_status: &FullSecurityStatus) {
+	let all_errs_allowed = security_status.all_errs_allowed();
+	let errs_string = security_status.errs_string();
+
+	if all_errs_allowed {
+		gum::warn!(
+			target: LOG_TARGET,
+			"{}{}",
+			crate::SECURE_MODE_WARNING,
+			errs_string,
+		);
+	} else {
+		gum::error!(
+			target: LOG_TARGET,
+			"{}{}{}",
+			crate::SECURE_MODE_ERROR,
+			errs_string,
+			crate::IGNORE_SECURE_MODE_TIP
+		);
+	}
+}
+
+/// Check if we can change root to a new, sandboxed root and return an error if not.
+///
+/// We do this check by spawning a new process and trying to sandbox it. To get as close as possible
+/// to running the check in a worker, we try it... in a worker. The expected return status is 0 on
+/// success and -1 on failure.
+async fn check_can_unshare_user_namespace_and_change_root(
+	prepare_worker_program_path: &Path,
+	cache_path: &Path,
+) -> SecureModeResult {
+	let cache_dir_tempdir = tempfile::Builder::new()
+		.prefix("check-can-unshare-")
+		.tempdir_in(cache_path)
+		.map_err(|err| {
+			SecureModeError::CannotUnshareUserNamespaceAndChangeRoot(format!(
+				"could not create a temporary directory in {:?}: {}",
+				cache_path, err
+			))
+		})?;
+	spawn_process_for_security_check(
+		prepare_worker_program_path,
+		"--check-can-unshare-user-namespace-and-change-root",
+		&[cache_dir_tempdir.path()],
+	)
+	.await
+	.map_err(|err| SecureModeError::CannotUnshareUserNamespaceAndChangeRoot(err))
+}
+
+/// Check if landlock is supported and return an error if not.
+///
+/// We do this check by spawning a new process and trying to sandbox it. To get as close as possible
+/// to running the check in a worker, we try it... in a worker. The expected return status is 0 on
+/// success and -1 on failure.
+async fn check_landlock(prepare_worker_program_path: &Path) -> SecureModeResult {
+	let abi = pezkuwi_node_core_pvf_common::worker::security::landlock::LANDLOCK_ABI as u8;
+	spawn_process_for_security_check(
+		prepare_worker_program_path,
+		"--check-can-enable-landlock",
+		std::iter::empty::<&str>(),
+	)
+	.await
+	.map_err(|err| SecureModeError::CannotEnableLandlock { err, abi })
+}
+
+/// Check if seccomp is supported and return an error if not.
+///
+/// We do this check by spawning a new process and trying to sandbox it. To get as close as possible
+/// to running the check in a worker, we try it... in a worker. The expected return status is 0 on
+/// success and -1 on failure.
+
+#[cfg(target_arch = "x86_64")]
+async fn check_seccomp(prepare_worker_program_path: &Path) -> SecureModeResult {
+	spawn_process_for_security_check(
+		prepare_worker_program_path,
+		"--check-can-enable-seccomp",
+		std::iter::empty::<&str>(),
+	)
+	.await
+	.map_err(|err| SecureModeError::CannotEnableSeccomp(err))
+}
+
+#[cfg(not(target_arch = "x86_64"))]
+async fn check_seccomp(_: &Path) -> SecureModeResult {
+	Err(SecureModeError::CannotEnableSeccomp(
+		"only supported on CPUs from the x86_64 family (usually Intel or AMD)".into(),
+	))
+}
+
+/// Check if we can call `clone` with all sandboxing flags, and return an error if not.
+///
+/// We do this check by spawning a new process and trying to sandbox it. To get as close as possible
+/// to running the check in a worker, we try it... in a worker. The expected return status is 0 on
+/// success and -1 on failure.
+async fn check_can_do_secure_clone(prepare_worker_program_path: &Path) -> SecureModeResult {
+	spawn_process_for_security_check(
+		prepare_worker_program_path,
+		"--check-can-do-secure-clone",
+		std::iter::empty::<&str>(),
+	)
+	.await
+	.map_err(|err| SecureModeError::CannotDoSecureClone(err))
+}
+
+async fn spawn_process_for_security_check<I, S>(
+	prepare_worker_program_path: &Path,
+	check_arg: &'static str,
+	extra_args: I,
+) -> Result<(), String>
+where
+	I: IntoIterator<Item = S>,
+	S: AsRef<std::ffi::OsStr>,
+{
+	let mut command = tokio::process::Command::new(prepare_worker_program_path);
+	// Clear env vars. (In theory, running checks with different env vars could result in different
+	// outcomes of the checks.)
+	command.env_clear();
+	// Add back any env vars we want to keep.
+	if let Ok(value) = std::env::var("RUST_LOG") {
+		command.env("RUST_LOG", value);
+	}
+
+	match command.arg(check_arg).args(extra_args).output().await {
+		Ok(output) if output.status.success() => Ok(()),
+		Ok(output) => {
+			let stderr = std::str::from_utf8(&output.stderr)
+				.expect("child process writes a UTF-8 string to stderr; qed")
+				.trim();
+			if stderr.is_empty() {
+				Err("not available".into())
+			} else {
+				Err(format!("not available: {}", stderr))
+			}
+		},
+		Err(err) => Err(format!("could not start child process: {}", err)),
+	}
+}
+
+#[cfg(test)]
+mod tests {
+	use super::*;
+
+	#[test]
+	fn test_secure_mode_error_optionality() {
+		let err = SecureModeError::CannotEnableLandlock { err: String::new(), abi: 3 };
+		assert!(err.is_allowed_in_secure_mode(&SecurityStatus {
+			secure_validator_mode: true,
+			can_enable_landlock: false,
+			can_enable_seccomp: false,
+			can_unshare_user_namespace_and_change_root: true,
+			can_do_secure_clone: true,
+		}));
+		assert!(!err.is_allowed_in_secure_mode(&SecurityStatus {
+			secure_validator_mode: true,
+			can_enable_landlock: false,
+			can_enable_seccomp: true,
+			can_unshare_user_namespace_and_change_root: false,
+			can_do_secure_clone: false,
+		}));
+
+		let err = SecureModeError::CannotEnableSeccomp(String::new());
+		assert!(!err.is_allowed_in_secure_mode(&SecurityStatus {
+			secure_validator_mode: true,
+			can_enable_landlock: false,
+			can_enable_seccomp: false,
+			can_unshare_user_namespace_and_change_root: true,
+			can_do_secure_clone: true,
+		}));
+		assert!(!err.is_allowed_in_secure_mode(&SecurityStatus {
+			secure_validator_mode: true,
+			can_enable_landlock: false,
+			can_enable_seccomp: true,
+			can_unshare_user_namespace_and_change_root: false,
+			can_do_secure_clone: false,
+		}));
+
+		let err = SecureModeError::CannotUnshareUserNamespaceAndChangeRoot(String::new());
+		assert!(err.is_allowed_in_secure_mode(&SecurityStatus {
+			secure_validator_mode: true,
+			can_enable_landlock: true,
+			can_enable_seccomp: false,
+			can_unshare_user_namespace_and_change_root: false,
+			can_do_secure_clone: false,
+		}));
+		assert!(!err.is_allowed_in_secure_mode(&SecurityStatus {
+			secure_validator_mode: true,
+			can_enable_landlock: false,
+			can_enable_seccomp: true,
+			can_unshare_user_namespace_and_change_root: false,
+			can_do_secure_clone: false,
+		}));
+
+		let err = SecureModeError::CannotDoSecureClone(String::new());
+		assert!(err.is_allowed_in_secure_mode(&SecurityStatus {
+			secure_validator_mode: true,
+			can_enable_landlock: true,
+			can_enable_seccomp: true,
+			can_unshare_user_namespace_and_change_root: true,
+			can_do_secure_clone: true,
+		}));
+		assert!(err.is_allowed_in_secure_mode(&SecurityStatus {
+			secure_validator_mode: false,
+			can_enable_landlock: false,
+			can_enable_seccomp: false,
+			can_unshare_user_namespace_and_change_root: false,
+			can_do_secure_clone: false,
+		}));
+	}
+}
@@ -0,0 +1,134 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+//! Various utilities for testing.
+
+pub use crate::{
+	host::{EXECUTE_BINARY_NAME, PREPARE_BINARY_NAME},
+	worker_interface::{spawn_with_program_path, SpawnErr},
+};
+
+use crate::{artifacts::ArtifactId, get_worker_version};
+use is_executable::IsExecutable;
+use pezkuwi_node_core_pvf_common::pvf::PvfPrepData;
+use pezkuwi_node_primitives::NODE_VERSION;
+use pezkuwi_primitives::ExecutorParams;
+use std::{
+	path::PathBuf,
+	sync::{Mutex, OnceLock},
+};
+
+/// A function that emulates the stitches together behaviors of the preparation and the execution
+/// worker in a single synchronous function.
+pub fn validate_candidate(
+	code: &[u8],
+	params: &[u8],
+) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
+	use pezkuwi_node_core_pvf_common::executor_interface::{prepare, prevalidate};
+	use pezkuwi_node_core_pvf_execute_worker::execute_artifact;
+
+	let code = sp_maybe_compressed_blob::decompress(code, 10 * 1024 * 1024)
+		.expect("Decompressing code failed");
+
+	let blob = prevalidate(&code)?;
+	let executor_params = ExecutorParams::default();
+	let compiled_artifact_blob = prepare(blob, &executor_params)?;
+
+	let result = unsafe {
+		// SAFETY: This is trivially safe since the artifact is obtained by calling `prepare`
+		//         and is written into a temporary directory in an unmodified state.
+		execute_artifact(&compiled_artifact_blob, &executor_params, params)?
+	};
+
+	Ok(result)
+}
+
+/// Retrieves the worker paths and builds workers as needed.
+///
+/// NOTE: This should only be called in dev code (tests, benchmarks) as it relies on the relative
+/// paths of the built workers.
+pub fn build_workers_and_get_paths() -> (PathBuf, PathBuf) {
+	// Only needs to be called once for the current process.
+	static WORKER_PATHS: OnceLock<Mutex<(PathBuf, PathBuf)>> = OnceLock::new();
+
+	fn build_workers() {
+		let mut build_args = vec![
+			"build",
+			"--package=pezkuwi",
+			"--bin=pezkuwi-prepare-worker",
+			"--bin=pezkuwi-execute-worker",
+		];
+
+		if cfg!(build_profile = "release") {
+			build_args.push("--release");
+		}
+
+		let mut cargo = std::process::Command::new("cargo");
+		let cmd = cargo
+			// wasm runtime not needed
+			.env("SKIP_WASM_BUILD", "1")
+			.args(build_args)
+			.stdout(std::process::Stdio::piped());
+
+		println!("INFO: calling `{cmd:?}`");
+		let exit_status = cmd.status().expect("Failed to run the build program");
+
+		if !exit_status.success() {
+			eprintln!("ERROR: Failed to build workers: {}", exit_status.code().unwrap());
+			std::process::exit(1);
+		}
+	}
+
+	let mutex = WORKER_PATHS.get_or_init(|| {
+		let mut workers_path = std::env::current_exe().unwrap();
+		workers_path.pop();
+		workers_path.pop();
+		let mut prepare_worker_path = workers_path.clone();
+		prepare_worker_path.push(PREPARE_BINARY_NAME);
+		let mut execute_worker_path = workers_path.clone();
+		execute_worker_path.push(EXECUTE_BINARY_NAME);
+
+		// explain why a build happens
+		if !prepare_worker_path.is_executable() {
+			println!("WARN: Prepare worker does not exist or is not executable. Workers directory: {:?}", workers_path);
+		}
+		if !execute_worker_path.is_executable() {
+			println!("WARN: Execute worker does not exist or is not executable. Workers directory: {:?}", workers_path);
+		}
+		if let Ok(ver) = get_worker_version(&prepare_worker_path) {
+			if ver != NODE_VERSION {
+				println!("WARN: Prepare worker version {ver} does not match node version {NODE_VERSION}; worker path: {prepare_worker_path:?}");
+			}
+		}
+		if let Ok(ver) = get_worker_version(&execute_worker_path) {
+			if ver != NODE_VERSION {
+				println!("WARN: Execute worker version {ver} does not match node version {NODE_VERSION}; worker path: {execute_worker_path:?}");
+			}
+		}
+
+		build_workers();
+
+		Mutex::new((prepare_worker_path, execute_worker_path))
+	});
+
+	let guard = mutex.lock().unwrap();
+	(guard.0.clone(), guard.1.clone())
+}
+
+/// Creates a new PVF which artifact id can be uniquely identified by the given number.
+pub fn artifact_id(discriminator: u32) -> ArtifactId {
+	ArtifactId::from_pvf_prep_data(&PvfPrepData::from_discriminator(discriminator))
+}
@@ -0,0 +1,431 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+//! Common logic for implementation of worker processes.
+
+use crate::LOG_TARGET;
+use codec::Encode;
+use futures::FutureExt as _;
+use futures_timer::Delay;
+use pezkuwi_node_core_pvf_common::{SecurityStatus, WorkerHandshake};
+use pin_project::pin_project;
+use rand::Rng;
+use std::{
+	fmt, mem,
+	path::{Path, PathBuf},
+	pin::Pin,
+	task::{Context, Poll},
+	time::Duration,
+};
+use tokio::{
+	io::{self, AsyncRead, AsyncReadExt as _, AsyncWrite, AsyncWriteExt as _, ReadBuf},
+	net::{UnixListener, UnixStream},
+	process,
+};
+
+/// A multiple of the job timeout (in CPU time) for which we are willing to wait on the host (in
+/// wall clock time). This is lenient because CPU time may go slower than wall clock time.
+pub const JOB_TIMEOUT_WALL_CLOCK_FACTOR: u32 = 4;
+
+/// This is publicly exposed only for integration tests.
+///
+/// # Parameters
+///
+/// - `debug_id`: An identifier for the process (e.g. "execute" or "prepare").
+///
+/// - `program_path`: The path to the program.
+///
+/// - `cache_path`: The path to the artifact cache.
+///
+/// - `extra_args`: Optional extra CLI arguments to the program. NOTE: Should only contain data
+///   required before the handshake, like node/worker versions for the version check. Other data
+///   should go through the handshake.
+///
+/// - `spawn_timeout`: The amount of time to wait for the child process to spawn.
+///
+/// - `security_status`: contains the detected status of security features.
+#[doc(hidden)]
+pub async fn spawn_with_program_path(
+	debug_id: &'static str,
+	program_path: impl Into<PathBuf>,
+	cache_path: &Path,
+	extra_args: &[&str],
+	spawn_timeout: Duration,
+	security_status: SecurityStatus,
+) -> Result<(IdleWorker, WorkerHandle), SpawnErr> {
+	let program_path = program_path.into();
+	let worker_dir = WorkerDir::new(debug_id, cache_path).await?;
+	let extra_args: Vec<String> = extra_args.iter().map(|arg| arg.to_string()).collect();
+	// Hack the borrow-checker.
+	let program_path_clone = program_path.clone();
+	let worker_dir_clone = worker_dir.path().to_owned();
+	let extra_args_clone = extra_args.clone();
+
+	with_transient_socket_path(debug_id, |socket_path| {
+		let socket_path = socket_path.to_owned();
+
+		async move {
+			let listener = match UnixListener::bind(&socket_path) {
+				Ok(ok) => ok,
+				Err(err) => return Err(SpawnErr::Bind { socket_path, err: err.to_string() }),
+			};
+
+			let handle =
+				WorkerHandle::spawn(&program_path, &extra_args, &socket_path, &worker_dir.path())
+					.map_err(|err| SpawnErr::ProcessSpawn { program_path, err: err.to_string() })?;
+
+			futures::select! {
+				accept_result = listener.accept().fuse() => {
+					let (mut stream, _) = accept_result
+						.map_err(|err| SpawnErr::Accept { socket_path, err: err.to_string() })?;
+					send_worker_handshake(&mut stream, WorkerHandshake { security_status })
+						.await
+						.map_err(|err| SpawnErr::Handshake { err: err.to_string() })?;
+					Ok((IdleWorker { stream, pid: handle.id(), worker_dir }, handle))
+				}
+				_ = Delay::new(spawn_timeout).fuse() => Err(SpawnErr::AcceptTimeout{spawn_timeout}),
+			}
+		}
+	})
+	.await
+	.map_err(|err| {
+		gum::warn!(
+			target: LOG_TARGET,
+			%debug_id,
+			program_path = ?program_path_clone,
+			extra_args = ?extra_args_clone,
+			worker_dir = ?worker_dir_clone,
+			"error spawning worker: {}",
+			err,
+		);
+		err
+	})
+}
+
+/// A temporary, random, free path that is necessary only to establish socket communications. If a
+/// directory exists at the path at the end of this function, it is removed then.
+async fn with_transient_socket_path<T, F, Fut>(debug_id: &'static str, f: F) -> Result<T, SpawnErr>
+where
+	F: FnOnce(&Path) -> Fut,
+	Fut: futures::Future<Output = Result<T, SpawnErr>> + 'static,
+{
+	/// Returns a path under [`std::env::temp_dir`]. The path name will start with the given prefix.
+	///
+	/// There is only a certain number of retries. If exceeded this function will give up and return
+	/// an error.
+	pub async fn tmppath(prefix: &str) -> io::Result<PathBuf> {
+		fn make_tmppath(prefix: &str, dir: &Path) -> PathBuf {
+			use rand::distributions::Alphanumeric;
+
+			const DISCRIMINATOR_LEN: usize = 10;
+
+			let mut buf = Vec::with_capacity(prefix.len() + DISCRIMINATOR_LEN);
+			buf.extend(prefix.as_bytes());
+			buf.extend(rand::thread_rng().sample_iter(&Alphanumeric).take(DISCRIMINATOR_LEN));
+
+			let s = std::str::from_utf8(&buf)
+				.expect("the string is collected from a valid utf-8 sequence; qed");
+
+			let mut path = dir.to_owned();
+			path.push(s);
+			path
+		}
+
+		const NUM_RETRIES: usize = 50;
+
+		let dir = std::env::temp_dir();
+		for _ in 0..NUM_RETRIES {
+			let tmp_path = make_tmppath(prefix, &dir);
+			if !tmp_path.exists() {
+				return Ok(tmp_path);
+			}
+		}
+
+		Err(io::Error::new(io::ErrorKind::Other, "failed to create a temporary path"))
+	}
+
+	let socket_path = tmppath(&format!("pvf-host-{}-", debug_id))
+		.await
+		.map_err(|_| SpawnErr::TmpPath)?;
+	let result = f(&socket_path).await;
+
+	// Best effort to remove the socket file. Under normal circumstances the socket will be removed
+	// by the worker. We make sure that it is removed here, just in case a failed rendezvous.
+	let _ = tokio::fs::remove_file(socket_path).await;
+
+	result
+}
+
+/// A struct that represents an idle worker.
+///
+/// This struct is supposed to be used as a token that is passed by move into a subroutine that
+/// initiates a job. If the worker dies on the duty, then the token is not returned.
+#[derive(Debug)]
+pub struct IdleWorker {
+	/// The stream to which the child process is connected.
+	pub stream: UnixStream,
+
+	/// The identifier of this process. Used to reset the niceness.
+	pub pid: u32,
+
+	/// The temporary per-worker path. We clean up the worker dir between jobs and delete it when
+	/// the worker dies.
+	pub worker_dir: WorkerDir,
+}
+
+/// This is publicly exposed only for integration tests.
+///
+/// An error happened during spawning a worker process.
+#[derive(thiserror::Error, Clone, Debug)]
+#[doc(hidden)]
+pub enum SpawnErr {
+	#[error("cannot obtain a temporary path location")]
+	TmpPath,
+	#[error("cannot bind the socket to the given path {socket_path:?}: {err}")]
+	Bind { socket_path: PathBuf, err: String },
+	#[error(
+		"an error happened during accepting a connection to the socket {socket_path:?}: {err}"
+	)]
+	Accept { socket_path: PathBuf, err: String },
+	#[error("an error happened during spawning the process at path {program_path:?}: {err}")]
+	ProcessSpawn { program_path: PathBuf, err: String },
+	#[error("the deadline {}ms allotted for the worker spawning and connecting to the socket has elapsed", .spawn_timeout.as_millis())]
+	AcceptTimeout { spawn_timeout: Duration },
+	#[error("failed to send handshake after successful spawning was signaled: {err}")]
+	Handshake { err: String },
+}
+
+/// This is a representation of a potentially running worker. Drop it and the process will be
+/// killed.
+///
+/// A worker's handle is also a future that resolves when it's detected that the worker's process
+/// has been terminated. Since the worker is running in another process it is obviously not
+/// necessary to poll this future to make the worker run, it's only for termination detection.
+///
+/// This future relies on the fact that a child process's stdout `fd` is closed upon its
+/// termination.
+#[pin_project]
+pub struct WorkerHandle {
+	child: process::Child,
+	child_id: u32,
+	#[pin]
+	stdout: process::ChildStdout,
+	program: PathBuf,
+	drop_box: Box<[u8]>,
+}
+
+impl WorkerHandle {
+	fn spawn(
+		program: impl AsRef<Path>,
+		extra_args: &[String],
+		socket_path: impl AsRef<Path>,
+		worker_dir_path: impl AsRef<Path>,
+	) -> io::Result<Self> {
+		// Clear all env vars from the spawned process.
+		let mut command = process::Command::new(program.as_ref());
+		command.env_clear();
+
+		command.env("RUST_LOG", sc_tracing::logging::get_directives().join(","));
+
+		let mut child = command
+			.args(extra_args)
+			.arg("--socket-path")
+			.arg(socket_path.as_ref().as_os_str())
+			.arg("--worker-dir-path")
+			.arg(worker_dir_path.as_ref().as_os_str())
+			.stdout(std::process::Stdio::piped())
+			.kill_on_drop(true)
+			.spawn()?;
+
+		let child_id = child
+			.id()
+			.ok_or(io::Error::new(io::ErrorKind::Other, "could not get id of spawned process"))?;
+		let stdout = child
+			.stdout
+			.take()
+			.expect("the process spawned with piped stdout should have the stdout handle");
+
+		Ok(WorkerHandle {
+			child,
+			child_id,
+			stdout,
+			program: program.as_ref().to_path_buf(),
+			// We don't expect the bytes to be ever read. But in case we do, we should not use a
+			// buffer of a small size, because otherwise if the child process does return any data
+			// we will end up issuing a syscall for each byte. We also prefer not to do allocate
+			// that on the stack, since each poll the buffer will be allocated and initialized (and
+			// that's due `poll_read` takes &mut [u8] and there are no guarantees that a `poll_read`
+			// won't ever read from there even though that's unlikely).
+			//
+			// OTOH, we also don't want to be super smart here and we could just afford to allocate
+			// a buffer for that here.
+			drop_box: vec![0; 8192].into_boxed_slice(),
+		})
+	}
+
+	/// Returns the process id of this worker.
+	pub fn id(&self) -> u32 {
+		self.child_id
+	}
+}
+
+impl futures::Future for WorkerHandle {
+	type Output = ();
+
+	fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
+		let me = self.project();
+		// Create a `ReadBuf` here instead of storing it in `WorkerHandle` to avoid a lifetime
+		// parameter on `WorkerHandle`. Creating the `ReadBuf` is fairly cheap.
+		let mut read_buf = ReadBuf::new(&mut *me.drop_box);
+		match futures::ready!(AsyncRead::poll_read(me.stdout, cx, &mut read_buf)) {
+			Ok(()) => {
+				if read_buf.filled().len() > 0 {
+					// weird, we've read something. Pretend that never happened and reschedule
+					// ourselves.
+					cx.waker().wake_by_ref();
+					Poll::Pending
+				} else {
+					// Nothing read means `EOF` means the child was terminated. Resolve.
+					Poll::Ready(())
+				}
+			},
+			Err(err) => {
+				// The implementation is guaranteed to not to return `WouldBlock` and Interrupted.
+				// This leaves us with legit errors which we suppose were due to termination.
+
+				// Log the status code.
+				gum::debug!(
+					target: LOG_TARGET,
+					worker_pid = %me.child_id,
+					status_code = ?me.child.try_wait().ok().flatten().map(|c| c.to_string()),
+					"pvf worker ({}): {:?}",
+					me.program.display(),
+					err,
+				);
+				Poll::Ready(())
+			},
+		}
+	}
+}
+
+impl fmt::Debug for WorkerHandle {
+	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+		write!(f, "WorkerHandle(pid={})", self.id())
+	}
+}
+
+/// Write some data prefixed by its length into `w`.
+pub async fn framed_send(w: &mut (impl AsyncWrite + Unpin), buf: &[u8]) -> io::Result<()> {
+	let len_buf = buf.len().to_le_bytes();
+	w.write_all(&len_buf).await?;
+	w.write_all(buf).await?;
+	Ok(())
+}
+
+/// Read some data prefixed by its length from `r`.
+pub async fn framed_recv(r: &mut (impl AsyncRead + Unpin)) -> io::Result<Vec<u8>> {
+	let mut len_buf = [0u8; mem::size_of::<usize>()];
+	r.read_exact(&mut len_buf).await?;
+	let len = usize::from_le_bytes(len_buf);
+	let mut buf = vec![0; len];
+	r.read_exact(&mut buf).await?;
+	Ok(buf)
+}
+
+/// Sends a handshake with information for the worker.
+async fn send_worker_handshake(
+	stream: &mut UnixStream,
+	handshake: WorkerHandshake,
+) -> io::Result<()> {
+	framed_send(stream, &handshake.encode()).await
+}
+
+/// A temporary worker dir that contains only files needed by the worker. The worker will change its
+/// root (the `/` directory) to this directory; it should have access to no other paths on its
+/// filesystem.
+///
+/// NOTE: This struct cleans up its associated directory when it is dropped. Therefore it should not
+/// implement `Clone`.
+///
+/// # File structure
+///
+/// The overall file structure for the PVF system is as follows. The `worker-dir-X`s are managed by
+/// this struct.
+///
+/// ```nocompile
+/// + /<cache_path>/
+///   - artifact-1
+///   - artifact-2
+///   - [...]
+///   - worker-dir-1/  (new `/` for worker-1)
+///     + socket                            (created by host)
+///     + tmp-artifact                      (created by host) (prepare-only)
+///     + artifact     (link -> artifact-1) (created by host) (execute-only)
+///   - worker-dir-2/  (new `/` for worker-2)
+///     + [...]
+/// ```
+#[derive(Debug)]
+pub struct WorkerDir {
+	tempdir: tempfile::TempDir,
+}
+
+pub const WORKER_DIR_PREFIX: &str = "worker-dir";
+
+impl WorkerDir {
+	/// Creates a new, empty worker dir with a random name in the given cache dir.
+	pub async fn new(debug_id: &'static str, cache_dir: &Path) -> Result<Self, SpawnErr> {
+		let prefix = format!("{WORKER_DIR_PREFIX}-{debug_id}-");
+		let tempdir = tempfile::Builder::new()
+			.prefix(&prefix)
+			.tempdir_in(cache_dir)
+			.map_err(|_| SpawnErr::TmpPath)?;
+		Ok(Self { tempdir })
+	}
+
+	pub fn path(&self) -> &Path {
+		self.tempdir.path()
+	}
+}
+
+// Not async since Rust has trouble with async recursion. There should be few files here anyway.
+//
+/// Clear the temporary worker dir without deleting it. Not deleting is important because the worker
+/// has mounted its own separate filesystem here.
+///
+/// Should be called right after a job has finished. We don't want jobs to have access to
+/// artifacts from previous jobs.
+pub fn clear_worker_dir_path(worker_dir_path: &Path) -> io::Result<()> {
+	fn remove_dir_contents(path: &Path) -> io::Result<()> {
+		for entry in std::fs::read_dir(path)? {
+			let entry = entry?;
+			let path = entry.path();
+
+			if entry.file_type()?.is_dir() {
+				remove_dir_contents(&path)?;
+				std::fs::remove_dir(path)?;
+			} else {
+				std::fs::remove_file(path)?;
+			}
+		}
+		Ok(())
+	}
+
+	// Note the worker dir may not exist anymore because of the worker dying and being cleaned up.
+	match remove_dir_contents(worker_dir_path) {
+		Err(err) if matches!(err.kind(), io::ErrorKind::NotFound) => Ok(()),
+		result => result,
+	}
+}
@@ -0,0 +1,205 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+//! PVF host integration tests checking the chain production pipeline.
+
+use super::TestHost;
+use codec::{Decode, Encode};
+use pezkuwi_node_primitives::PoV;
+use pezkuwi_primitives::PersistedValidationData;
+use pezkuwi_teyrchain_primitives::primitives::{
+	BlockData as GenericBlockData, HeadData as GenericHeadData,
+};
+use sp_core::H256;
+use test_teyrchain_adder::{hash_state, BlockData, HeadData};
+
+#[tokio::test]
+async fn execute_good_block_on_parent() {
+	let parent_head = HeadData { number: 0, parent_hash: [0; 32], post_state: hash_state(0) };
+	let block_data = BlockData { state: 0, add: 512 };
+	let pvd = PersistedValidationData {
+		parent_head: GenericHeadData(parent_head.encode()),
+		relay_parent_number: 1u32,
+		relay_parent_storage_root: H256::default(),
+		max_pov_size: 4096 * 1024,
+	};
+	let pov = PoV { block_data: GenericBlockData(block_data.encode()) };
+
+	let host = TestHost::new().await;
+
+	let ret = host
+		.validate_candidate(
+			test_teyrchain_adder::wasm_binary_unwrap(),
+			pvd,
+			pov,
+			Default::default(),
+			H256::default(),
+		)
+		.await
+		.unwrap();
+
+	let new_head = HeadData::decode(&mut &ret.head_data.0[..]).unwrap();
+
+	assert_eq!(new_head.number, 1);
+	assert_eq!(new_head.parent_hash, parent_head.hash());
+	assert_eq!(new_head.post_state, hash_state(512));
+}
+
+#[tokio::test]
+async fn execute_good_chain_on_parent() {
+	let mut parent_hash = [0; 32];
+	let mut last_state = 0;
+
+	let host = TestHost::new().await;
+
+	for (number, add) in (0..10).enumerate() {
+		let parent_head =
+			HeadData { number: number as u64, parent_hash, post_state: hash_state(last_state) };
+		let block_data = BlockData { state: last_state, add };
+		let pvd = PersistedValidationData {
+			parent_head: GenericHeadData(parent_head.encode()),
+			relay_parent_number: 1u32,
+			relay_parent_storage_root: H256::default(),
+			max_pov_size: 4096 * 1024,
+		};
+		let pov = PoV { block_data: GenericBlockData(block_data.encode()) };
+
+		let ret = host
+			.validate_candidate(
+				test_teyrchain_adder::wasm_binary_unwrap(),
+				pvd,
+				pov,
+				Default::default(),
+				H256::default(),
+			)
+			.await
+			.unwrap();
+
+		let new_head = HeadData::decode(&mut &ret.head_data.0[..]).unwrap();
+
+		assert_eq!(new_head.number, number as u64 + 1);
+		assert_eq!(new_head.parent_hash, parent_head.hash());
+		assert_eq!(new_head.post_state, hash_state(last_state + add));
+
+		parent_hash = new_head.hash();
+		last_state += add;
+	}
+}
+
+#[tokio::test]
+async fn execute_bad_block_on_parent() {
+	let parent_head = HeadData { number: 0, parent_hash: [0; 32], post_state: hash_state(0) };
+	let block_data = BlockData {
+		state: 256, // start state is wrong.
+		add: 256,
+	};
+	let pvd = PersistedValidationData {
+		parent_head: GenericHeadData(parent_head.encode()),
+		relay_parent_number: 1u32,
+		relay_parent_storage_root: H256::default(),
+		max_pov_size: 4096 * 1024,
+	};
+	let pov = PoV { block_data: GenericBlockData(block_data.encode()) };
+
+	let host = TestHost::new().await;
+
+	let _err = host
+		.validate_candidate(
+			test_teyrchain_adder::wasm_binary_unwrap(),
+			pvd,
+			pov,
+			Default::default(),
+			H256::default(),
+		)
+		.await
+		.unwrap_err();
+}
+
+#[tokio::test]
+async fn stress_spawn() {
+	let host = std::sync::Arc::new(TestHost::new().await);
+
+	async fn execute(host: std::sync::Arc<TestHost>) {
+		let parent_head = HeadData { number: 0, parent_hash: [0; 32], post_state: hash_state(0) };
+		let block_data = BlockData { state: 0, add: 512 };
+		let pvd = PersistedValidationData {
+			parent_head: GenericHeadData(parent_head.encode()),
+			relay_parent_number: 1u32,
+			relay_parent_storage_root: H256::default(),
+			max_pov_size: 4096 * 1024,
+		};
+		let pov = PoV { block_data: GenericBlockData(block_data.encode()) };
+		let ret = host
+			.validate_candidate(
+				test_teyrchain_adder::wasm_binary_unwrap(),
+				pvd,
+				pov,
+				Default::default(),
+				H256::default(),
+			)
+			.await
+			.unwrap();
+
+		let new_head = HeadData::decode(&mut &ret.head_data.0[..]).unwrap();
+
+		assert_eq!(new_head.number, 1);
+		assert_eq!(new_head.parent_hash, parent_head.hash());
+		assert_eq!(new_head.post_state, hash_state(512));
+	}
+
+	futures::future::join_all((0..100).map(|_| execute(host.clone()))).await;
+}
+
+// With one worker, run multiple execution jobs serially. They should not conflict.
+#[tokio::test]
+async fn execute_can_run_serially() {
+	let host = std::sync::Arc::new(
+		TestHost::new_with_config(|cfg| {
+			cfg.execute_workers_max_num = 1;
+		})
+		.await,
+	);
+
+	async fn execute(host: std::sync::Arc<TestHost>) {
+		let parent_head = HeadData { number: 0, parent_hash: [0; 32], post_state: hash_state(0) };
+		let block_data = BlockData { state: 0, add: 512 };
+		let pvd = PersistedValidationData {
+			parent_head: GenericHeadData(parent_head.encode()),
+			relay_parent_number: 1u32,
+			relay_parent_storage_root: H256::default(),
+			max_pov_size: 4096 * 1024,
+		};
+		let pov = PoV { block_data: GenericBlockData(block_data.encode()) };
+		let ret = host
+			.validate_candidate(
+				test_teyrchain_adder::wasm_binary_unwrap(),
+				pvd,
+				pov,
+				Default::default(),
+				H256::default(),
+			)
+			.await
+			.unwrap();
+
+		let new_head = HeadData::decode(&mut &ret.head_data.0[..]).unwrap();
+
+		assert_eq!(new_head.number, 1);
+		assert_eq!(new_head.parent_hash, parent_head.hash());
+		assert_eq!(new_head.post_state, hash_state(512));
+	}
+
+	futures::future::join_all((0..5).map(|_| execute(host.clone()))).await;
+}
@@ -0,0 +1,861 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+//! General PVF host integration tests checking the functionality of the PVF host itself.
+
+use assert_matches::assert_matches;
+#[cfg(all(feature = "ci-only-tests", target_os = "linux"))]
+use pezkuwi_node_core_pvf::SecurityStatus;
+use pezkuwi_node_core_pvf::{
+	start, testing::build_workers_and_get_paths, Config, InvalidCandidate, Metrics,
+	PossiblyInvalidError, PrepareError, PrepareJobKind, PvfPrepData, ValidationError,
+	ValidationHost, JOB_TIMEOUT_WALL_CLOCK_FACTOR,
+};
+use pezkuwi_node_core_pvf_common::{compute_checksum, ArtifactChecksum};
+use pezkuwi_node_primitives::{PoV, POV_BOMB_LIMIT};
+use pezkuwi_node_subsystem::messages::PvfExecKind;
+use pezkuwi_primitives::{
+	ExecutorParam, ExecutorParams, Hash, PersistedValidationData, PvfExecKind as RuntimePvfExecKind,
+};
+use pezkuwi_teyrchain_primitives::primitives::{BlockData, ValidationResult};
+use sp_core::H256;
+
+const VALIDATION_CODE_BOMB_LIMIT: u32 = 30 * 1024 * 1024;
+
+use std::{io::Write, sync::Arc, time::Duration};
+use tokio::sync::Mutex;
+
+mod adder;
+#[cfg(target_os = "linux")]
+mod process;
+mod worker_common;
+
+const TEST_EXECUTION_TIMEOUT: Duration = Duration::from_secs(6);
+const TEST_PREPARATION_TIMEOUT: Duration = Duration::from_secs(6);
+
+struct TestHost {
+	// Keep a reference to the tempdir as it gets deleted on drop.
+	cache_dir: tempfile::TempDir,
+	host: Mutex<ValidationHost>,
+}
+
+impl TestHost {
+	async fn new() -> Self {
+		Self::new_with_config(|_| ()).await
+	}
+
+	async fn new_with_config<F>(f: F) -> Self
+	where
+		F: FnOnce(&mut Config),
+	{
+		let (prepare_worker_path, execute_worker_path) = build_workers_and_get_paths();
+
+		let cache_dir = tempfile::tempdir().unwrap();
+		let mut config = Config::new(
+			cache_dir.path().to_owned(),
+			None,
+			false,
+			prepare_worker_path,
+			execute_worker_path,
+			2,
+			1,
+			2,
+		);
+		f(&mut config);
+		let (host, task) = start(config, Metrics::default()).await.unwrap();
+		let _ = tokio::task::spawn(task);
+		Self { cache_dir, host: Mutex::new(host) }
+	}
+
+	async fn precheck_pvf(
+		&self,
+		code: &[u8],
+		executor_params: ExecutorParams,
+	) -> Result<(), PrepareError> {
+		let (result_tx, result_rx) = futures::channel::oneshot::channel();
+
+		self.host
+			.lock()
+			.await
+			.precheck_pvf(
+				PvfPrepData::from_code(
+					code.into(),
+					executor_params,
+					TEST_PREPARATION_TIMEOUT,
+					PrepareJobKind::Prechecking,
+					VALIDATION_CODE_BOMB_LIMIT,
+				),
+				result_tx,
+			)
+			.await
+			.unwrap();
+		result_rx.await.unwrap()
+	}
+
+	async fn validate_candidate(
+		&self,
+		code: &[u8],
+		pvd: PersistedValidationData,
+		pov: PoV,
+		executor_params: ExecutorParams,
+		relay_parent: Hash,
+	) -> Result<ValidationResult, ValidationError> {
+		let (result_tx, result_rx) = futures::channel::oneshot::channel();
+
+		self.host
+			.lock()
+			.await
+			.execute_pvf(
+				PvfPrepData::from_code(
+					code.into(),
+					executor_params,
+					TEST_PREPARATION_TIMEOUT,
+					PrepareJobKind::Compilation,
+					VALIDATION_CODE_BOMB_LIMIT,
+				),
+				TEST_EXECUTION_TIMEOUT,
+				Arc::new(pvd),
+				Arc::new(pov),
+				pezkuwi_node_core_pvf::Priority::Normal,
+				PvfExecKind::Backing(relay_parent),
+				result_tx,
+			)
+			.await
+			.unwrap();
+		result_rx.await.unwrap()
+	}
+
+	async fn replace_artifact_checksum(
+		&self,
+		checksum: ArtifactChecksum,
+		new_checksum: ArtifactChecksum,
+	) {
+		self.host
+			.lock()
+			.await
+			.replace_artifact_checksum(checksum, new_checksum)
+			.await
+			.unwrap();
+	}
+
+	#[cfg(all(feature = "ci-only-tests", target_os = "linux"))]
+	async fn security_status(&self) -> SecurityStatus {
+		self.host.lock().await.security_status.clone()
+	}
+}
+
+#[tokio::test]
+async fn prepare_job_terminates_on_timeout() {
+	let host = TestHost::new().await;
+
+	let start = std::time::Instant::now();
+	let result = host
+		.precheck_pvf(pezkuwichain_runtime::WASM_BINARY.unwrap(), Default::default())
+		.await;
+
+	match result {
+		Err(PrepareError::TimedOut) => {},
+		r => panic!("{:?}", r),
+	}
+
+	let duration = std::time::Instant::now().duration_since(start);
+	assert!(duration >= TEST_PREPARATION_TIMEOUT);
+	assert!(duration < TEST_PREPARATION_TIMEOUT * JOB_TIMEOUT_WALL_CLOCK_FACTOR);
+}
+
+#[tokio::test]
+async fn execute_job_terminates_on_timeout() {
+	let host = TestHost::new().await;
+	let pvd = PersistedValidationData {
+		parent_head: Default::default(),
+		relay_parent_number: 1u32,
+		relay_parent_storage_root: H256::default(),
+		max_pov_size: 4096 * 1024,
+	};
+	let pov = PoV { block_data: BlockData(Vec::new()) };
+
+	let start = std::time::Instant::now();
+	let result = host
+		.validate_candidate(
+			test_teyrchain_halt::wasm_binary_unwrap(),
+			pvd,
+			pov,
+			Default::default(),
+			H256::default(),
+		)
+		.await;
+
+	match result {
+		Err(ValidationError::Invalid(InvalidCandidate::HardTimeout)) => {},
+		r => panic!("{:?}", r),
+	}
+
+	let duration = std::time::Instant::now().duration_since(start);
+	assert!(duration >= TEST_EXECUTION_TIMEOUT);
+	assert!(duration < TEST_EXECUTION_TIMEOUT * JOB_TIMEOUT_WALL_CLOCK_FACTOR);
+}
+
+#[cfg(feature = "ci-only-tests")]
+#[tokio::test]
+async fn ensure_parallel_execution() {
+	// Run some jobs that do not complete, thus timing out.
+	let host = TestHost::new().await;
+	let pvd = PersistedValidationData {
+		parent_head: Default::default(),
+		relay_parent_number: 1u32,
+		relay_parent_storage_root: H256::default(),
+		max_pov_size: 4096 * 1024,
+	};
+	let pov = PoV { block_data: BlockData(Vec::new()) };
+	let execute_pvf_future_1 = host.validate_candidate(
+		test_teyrchain_halt::wasm_binary_unwrap(),
+		pvd.clone(),
+		pov.clone(),
+		Default::default(),
+		H256::default(),
+	);
+	let execute_pvf_future_2 = host.validate_candidate(
+		test_teyrchain_halt::wasm_binary_unwrap(),
+		pvd,
+		pov,
+		Default::default(),
+		H256::default(),
+	);
+
+	let start = std::time::Instant::now();
+	let (res1, res2) = futures::join!(execute_pvf_future_1, execute_pvf_future_2);
+	assert_matches!(
+		(res1, res2),
+		(
+			Err(ValidationError::Invalid(InvalidCandidate::HardTimeout)),
+			Err(ValidationError::Invalid(InvalidCandidate::HardTimeout))
+		)
+	);
+
+	// Total time should be < 2 x TEST_EXECUTION_TIMEOUT (two workers run in parallel).
+	let duration = std::time::Instant::now().duration_since(start);
+	let max_duration = 2 * TEST_EXECUTION_TIMEOUT;
+	assert!(
+		duration < max_duration,
+		"Expected duration {}ms to be less than {}ms",
+		duration.as_millis(),
+		max_duration.as_millis()
+	);
+}
+
+#[tokio::test]
+async fn execute_queue_doesnt_stall_if_workers_died() {
+	let host = TestHost::new_with_config(|cfg| {
+		cfg.execute_workers_max_num = 5;
+	})
+	.await;
+	let pvd = PersistedValidationData {
+		parent_head: Default::default(),
+		relay_parent_number: 1u32,
+		relay_parent_storage_root: H256::default(),
+		max_pov_size: 4096 * 1024,
+	};
+	let pov = PoV { block_data: BlockData(Vec::new()) };
+
+	// Here we spawn 8 validation jobs for the `halt` PVF and share those between 5 workers. The
+	// first five jobs should timeout and the workers killed. For the next 3 jobs a new batch of
+	// workers should be spun up.
+	let start = std::time::Instant::now();
+	futures::future::join_all((0u8..=8).map(|_| {
+		host.validate_candidate(
+			test_teyrchain_halt::wasm_binary_unwrap(),
+			pvd.clone(),
+			pov.clone(),
+			Default::default(),
+			H256::default(),
+		)
+	}))
+	.await;
+
+	// Total time should be >= 2 x TEST_EXECUTION_TIMEOUT (two separate sets of workers that should
+	// both timeout).
+	let duration = std::time::Instant::now().duration_since(start);
+	let max_duration = 2 * TEST_EXECUTION_TIMEOUT;
+	assert!(
+		duration >= max_duration,
+		"Expected duration {}ms to be greater than or equal to {}ms",
+		duration.as_millis(),
+		max_duration.as_millis()
+	);
+}
+
+#[cfg(feature = "ci-only-tests")]
+#[tokio::test]
+async fn execute_queue_doesnt_stall_with_varying_executor_params() {
+	let host = TestHost::new_with_config(|cfg| {
+		cfg.execute_workers_max_num = 2;
+	})
+	.await;
+	let pvd = PersistedValidationData {
+		parent_head: Default::default(),
+		relay_parent_number: 1u32,
+		relay_parent_storage_root: H256::default(),
+		max_pov_size: 4096 * 1024,
+	};
+	let pov = PoV { block_data: BlockData(Vec::new()) };
+
+	let executor_params_1 = ExecutorParams::default();
+	let executor_params_2 = ExecutorParams::from(&[ExecutorParam::StackLogicalMax(1024)][..]);
+
+	// Here we spawn 6 validation jobs for the `halt` PVF and share those between 2 workers. Every
+	// 3rd job will have different set of executor parameters. All the workers should be killed
+	// and in this case the queue should respawn new workers with needed executor environment
+	// without waiting. The jobs will be executed in 3 batches, each running two jobs in parallel,
+	// and execution time would be roughly 3 * TEST_EXECUTION_TIMEOUT
+	let start = std::time::Instant::now();
+	futures::future::join_all((0u8..6).map(|i| {
+		host.validate_candidate(
+			test_teyrchain_halt::wasm_binary_unwrap(),
+			pvd.clone(),
+			pov.clone(),
+			match i % 3 {
+				0 => executor_params_1.clone(),
+				_ => executor_params_2.clone(),
+			},
+			H256::default(),
+		)
+	}))
+	.await;
+
+	let duration = std::time::Instant::now().duration_since(start);
+	let min_duration = 3 * TEST_EXECUTION_TIMEOUT;
+	let max_duration = 4 * TEST_EXECUTION_TIMEOUT;
+	assert!(
+		duration >= min_duration,
+		"Expected duration {}ms to be greater than or equal to {}ms",
+		duration.as_millis(),
+		min_duration.as_millis()
+	);
+	assert!(
+		duration <= max_duration,
+		"Expected duration {}ms to be less than or equal to {}ms",
+		duration.as_millis(),
+		max_duration.as_millis()
+	);
+}
+
+// Test that deleting a prepared artifact does not lead to a dispute when we try to execute it.
+#[tokio::test]
+async fn deleting_prepared_artifact_does_not_dispute() {
+	let host = TestHost::new().await;
+	let cache_dir = host.cache_dir.path();
+	let pvd = PersistedValidationData {
+		parent_head: Default::default(),
+		relay_parent_number: 1u32,
+		relay_parent_storage_root: H256::default(),
+		max_pov_size: 4096 * 1024,
+	};
+	let pov = PoV { block_data: BlockData(Vec::new()) };
+
+	let _stats = host
+		.precheck_pvf(test_teyrchain_halt::wasm_binary_unwrap(), Default::default())
+		.await
+		.unwrap();
+
+	// Manually delete the prepared artifact from disk. The in-memory artifacts table won't change.
+	{
+		// Get the artifact path (asserting it exists).
+		let mut cache_dir: Vec<_> = std::fs::read_dir(cache_dir).unwrap().collect();
+		// Should contain the artifact and the worker dir.
+		assert_eq!(cache_dir.len(), 2);
+		let mut artifact_path = cache_dir.pop().unwrap().unwrap();
+		if artifact_path.path().is_dir() {
+			artifact_path = cache_dir.pop().unwrap().unwrap();
+		}
+
+		// Delete the artifact.
+		std::fs::remove_file(artifact_path.path()).unwrap();
+	}
+
+	// Try to validate, artifact should get recreated.
+	let result = host
+		.validate_candidate(
+			test_teyrchain_halt::wasm_binary_unwrap(),
+			pvd,
+			pov,
+			Default::default(),
+			H256::default(),
+		)
+		.await;
+
+	assert_matches!(result, Err(ValidationError::Invalid(InvalidCandidate::HardTimeout)));
+}
+
+// Test that corruption of a prepared artifact due to disk issues does not lead to a dispute when we
+// try to execute it.
+#[tokio::test]
+async fn corrupted_on_disk_prepared_artifact_does_not_dispute() {
+	let host = TestHost::new().await;
+	let cache_dir = host.cache_dir.path();
+	let pvd = PersistedValidationData {
+		parent_head: Default::default(),
+		relay_parent_number: 1u32,
+		relay_parent_storage_root: H256::default(),
+		max_pov_size: 4096 * 1024,
+	};
+	let pov = PoV { block_data: BlockData(Vec::new()) };
+
+	let _stats = host
+		.precheck_pvf(test_teyrchain_halt::wasm_binary_unwrap(), Default::default())
+		.await
+		.unwrap();
+
+	// Manually corrupting the prepared artifact from disk. The in-memory artifacts table won't
+	// change.
+	let artifact_path = {
+		// Get the artifact path (asserting it exists).
+		let mut cache_dir: Vec<_> = std::fs::read_dir(cache_dir).unwrap().collect();
+		// Should contain the artifact and the worker dir.
+		assert_eq!(cache_dir.len(), 2);
+		let mut artifact_path = cache_dir.pop().unwrap().unwrap();
+		if artifact_path.path().is_dir() {
+			artifact_path = cache_dir.pop().unwrap().unwrap();
+		}
+
+		// Corrupt the artifact.
+		let mut f = std::fs::OpenOptions::new()
+			.write(true)
+			.truncate(true)
+			.open(artifact_path.path())
+			.unwrap();
+		f.write_all(b"corrupted wasm").unwrap();
+		f.flush().unwrap();
+		artifact_path
+	};
+
+	assert!(artifact_path.path().exists());
+
+	// Try to validate, artifact should get removed because of the corruption.
+	let result = host
+		.validate_candidate(
+			test_teyrchain_halt::wasm_binary_unwrap(),
+			pvd,
+			pov,
+			Default::default(),
+			H256::default(),
+		)
+		.await;
+
+	assert_matches!(
+		result,
+		Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::CorruptedArtifact))
+	);
+
+	// because of CorruptedArtifact we may retry
+	host.precheck_pvf(test_teyrchain_halt::wasm_binary_unwrap(), Default::default())
+		.await
+		.unwrap();
+
+	// The actual artifact removal is done concurrently
+	// with sending of the result of the execution
+	// it is not a problem for further re-preparation as
+	// artifact filenames are random
+	for _ in 1..5 {
+		if !artifact_path.path().exists() {
+			break;
+		}
+		tokio::time::sleep(Duration::from_secs(1)).await;
+	}
+
+	assert!(
+		!artifact_path.path().exists(),
+		"the corrupted artifact ({}) should be deleted by the host",
+		artifact_path.path().display()
+	);
+}
+
+// Test that corruption of a prepared artifact does not lead to a dispute when we try to execute it.
+#[tokio::test]
+async fn corrupted_prepared_artifact_does_not_dispute() {
+	let host = TestHost::new().await;
+	let cache_dir = host.cache_dir.path();
+	let pvd = PersistedValidationData {
+		parent_head: Default::default(),
+		relay_parent_number: 1u32,
+		relay_parent_storage_root: H256::default(),
+		max_pov_size: 4096 * 1024,
+	};
+	let pov = PoV { block_data: BlockData(Vec::new()) };
+
+	let _stats = host
+		.precheck_pvf(test_teyrchain_halt::wasm_binary_unwrap(), Default::default())
+		.await
+		.unwrap();
+
+	// Manually corrupting the prepared artifact from disk. The in-memory artifacts table won't
+	// change.
+	let (artifact_path, checksum, new_checksum) = {
+		// Get the artifact path (asserting it exists).
+		let mut cache_dir: Vec<_> = std::fs::read_dir(cache_dir).unwrap().collect();
+		// Should contain the artifact and the worker dir.
+		assert_eq!(cache_dir.len(), 2);
+		let mut artifact_path = cache_dir.pop().unwrap().unwrap();
+		if artifact_path.path().is_dir() {
+			artifact_path = cache_dir.pop().unwrap().unwrap();
+		}
+
+		let checksum =
+			compute_checksum(&std::fs::read(artifact_path.path()).expect("artifact exists"));
+		let new_artifact = b"corrupted wasm";
+		let new_checksum = compute_checksum(new_artifact);
+
+		// Corrupt the artifact.
+		let mut f = std::fs::OpenOptions::new()
+			.write(true)
+			.truncate(true)
+			.open(artifact_path.path())
+			.unwrap();
+		f.write_all(new_artifact).unwrap();
+		f.flush().unwrap();
+		(artifact_path, checksum, new_checksum)
+	};
+
+	assert!(artifact_path.path().exists());
+
+	host.replace_artifact_checksum(checksum, new_checksum).await;
+
+	// Try to validate, artifact should get removed because of the corruption.
+	let result = host
+		.validate_candidate(
+			test_teyrchain_halt::wasm_binary_unwrap(),
+			pvd,
+			pov,
+			Default::default(),
+			H256::default(),
+		)
+		.await;
+
+	assert_matches!(
+		result,
+		Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::RuntimeConstruction(_)))
+	);
+
+	// because of RuntimeConstruction we may retry
+	host.precheck_pvf(test_teyrchain_halt::wasm_binary_unwrap(), Default::default())
+		.await
+		.unwrap();
+
+	// The actual artifact removal is done concurrently
+	// with sending of the result of the execution
+	// it is not a problem for further re-preparation as
+	// artifact filenames are random
+	for _ in 1..5 {
+		if !artifact_path.path().exists() {
+			break;
+		}
+		tokio::time::sleep(Duration::from_secs(1)).await;
+	}
+
+	assert!(
+		!artifact_path.path().exists(),
+		"the corrupted artifact ({}) should be deleted by the host",
+		artifact_path.path().display()
+	);
+}
+
+#[tokio::test]
+async fn cache_cleared_on_startup() {
+	// Don't drop this host, it owns the `TempDir` which gets cleared on drop.
+	let host = TestHost::new().await;
+
+	let _stats = host
+		.precheck_pvf(test_teyrchain_halt::wasm_binary_unwrap(), Default::default())
+		.await
+		.unwrap();
+
+	// The cache dir should contain one artifact and one worker dir.
+	let cache_dir = host.cache_dir.path().to_owned();
+	assert_eq!(std::fs::read_dir(&cache_dir).unwrap().count(), 2);
+
+	// Start a new host, previous artifact should be cleared.
+	let _host = TestHost::new_with_config(|cfg| {
+		cfg.cache_path = cache_dir.clone();
+	})
+	.await;
+	assert_eq!(std::fs::read_dir(&cache_dir).unwrap().count(), 0);
+}
+
+// This test checks if the adder teyrchain runtime can be prepared with 10Mb preparation memory
+// limit enforced. At the moment of writing, the limit if far enough to prepare the PVF. If it
+// starts failing, either Wasmtime version has changed, or the PVF code itself has changed, and
+// more memory is required now. Multi-threaded preparation, if ever enabled, may also affect
+// memory consumption.
+#[tokio::test]
+async fn prechecking_within_memory_limits() {
+	let host = TestHost::new().await;
+	let result = host
+		.precheck_pvf(
+			::test_teyrchain_adder::wasm_binary_unwrap(),
+			ExecutorParams::from(&[ExecutorParam::PrecheckingMaxMemory(10 * 1024 * 1024)][..]),
+		)
+		.await;
+
+	assert_matches!(result, Ok(_));
+}
+
+// This test checks if the adder teyrchain runtime can be prepared with 512Kb preparation memory
+// limit enforced. At the moment of writing, the limit if not enough to prepare the PVF, and the
+// preparation is supposed to generate an error. If the test starts failing, either Wasmtime
+// version has changed, or the PVF code itself has changed, and less memory is required now.
+#[tokio::test]
+async fn prechecking_out_of_memory() {
+	use pezkuwi_node_core_pvf::PrepareError;
+
+	let host = TestHost::new().await;
+	let result = host
+		.precheck_pvf(
+			::test_teyrchain_adder::wasm_binary_unwrap(),
+			ExecutorParams::from(&[ExecutorParam::PrecheckingMaxMemory(512 * 1024)][..]),
+		)
+		.await;
+
+	assert_matches!(result, Err(PrepareError::OutOfMemory));
+}
+
+// With one worker, run multiple preparation jobs serially. They should not conflict.
+#[tokio::test]
+async fn prepare_can_run_serially() {
+	let host = TestHost::new_with_config(|cfg| {
+		cfg.prepare_workers_hard_max_num = 1;
+	})
+	.await;
+
+	let _stats = host
+		.precheck_pvf(::test_teyrchain_adder::wasm_binary_unwrap(), Default::default())
+		.await
+		.unwrap();
+
+	// Prepare a different wasm blob to prevent skipping work.
+	let _stats = host
+		.precheck_pvf(test_teyrchain_halt::wasm_binary_unwrap(), Default::default())
+		.await
+		.unwrap();
+}
+
+// CI machines should be able to enable all the security features.
+#[cfg(all(feature = "ci-only-tests", target_os = "linux"))]
+#[tokio::test]
+async fn all_security_features_work() {
+	let can_enable_landlock = {
+		let res = unsafe { libc::syscall(libc::SYS_landlock_create_ruleset, 0usize, 0usize, 1u32) };
+		if res == -1 {
+			let err = std::io::Error::last_os_error().raw_os_error().unwrap();
+			if err == libc::ENOSYS {
+				false
+			} else {
+				panic!("Unexpected errno from landlock check: {err}");
+			}
+		} else {
+			true
+		}
+	};
+
+	let host = TestHost::new().await;
+
+	assert_eq!(
+		host.security_status().await,
+		SecurityStatus {
+			// Disabled in tests to not enforce the presence of security features. This CI-only test
+			// is the only one that tests them.
+			secure_validator_mode: false,
+			can_enable_landlock,
+			can_enable_seccomp: true,
+			can_unshare_user_namespace_and_change_root: true,
+			can_do_secure_clone: true,
+		}
+	);
+}
+
+// Regression test to make sure the unshare-pivot-root capability does not depend on the PVF
+// artifacts cache existing.
+#[cfg(all(feature = "ci-only-tests", target_os = "linux"))]
+#[tokio::test]
+async fn nonexistent_cache_dir() {
+	let host = TestHost::new_with_config(|cfg| {
+		cfg.cache_path = cfg.cache_path.join("nonexistent_cache_dir");
+	})
+	.await;
+
+	assert!(host.security_status().await.can_unshare_user_namespace_and_change_root);
+
+	let _stats = host
+		.precheck_pvf(::test_teyrchain_adder::wasm_binary_unwrap(), Default::default())
+		.await
+		.unwrap();
+}
+
+// Checks the the artifact is not re-prepared when the executor environment parameters change
+// in a way not affecting the preparation
+#[tokio::test]
+async fn artifact_does_not_reprepare_on_non_meaningful_exec_parameter_change() {
+	let host = TestHost::new_with_config(|cfg| {
+		cfg.prepare_workers_hard_max_num = 1;
+	})
+	.await;
+	let cache_dir = host.cache_dir.path();
+
+	let set1 = ExecutorParams::default();
+	let set2 = ExecutorParams::from(
+		&[ExecutorParam::PvfExecTimeout(RuntimePvfExecKind::Backing, 2500)][..],
+	);
+
+	let _stats = host
+		.precheck_pvf(test_teyrchain_halt::wasm_binary_unwrap(), set1)
+		.await
+		.unwrap();
+
+	let md1 = {
+		let mut cache_dir: Vec<_> = std::fs::read_dir(cache_dir).unwrap().collect();
+		assert_eq!(cache_dir.len(), 2);
+		let mut artifact_path = cache_dir.pop().unwrap().unwrap();
+		if artifact_path.path().is_dir() {
+			artifact_path = cache_dir.pop().unwrap().unwrap();
+		}
+		std::fs::metadata(artifact_path.path()).unwrap()
+	};
+
+	// FS times are not monotonical so we wait 2 secs here to be sure that the creation time of the
+	// second attifact will be different
+	tokio::time::sleep(Duration::from_secs(2)).await;
+
+	let _stats = host
+		.precheck_pvf(test_teyrchain_halt::wasm_binary_unwrap(), set2)
+		.await
+		.unwrap();
+
+	let md2 = {
+		let mut cache_dir: Vec<_> = std::fs::read_dir(cache_dir).unwrap().collect();
+		assert_eq!(cache_dir.len(), 2);
+		let mut artifact_path = cache_dir.pop().unwrap().unwrap();
+		if artifact_path.path().is_dir() {
+			artifact_path = cache_dir.pop().unwrap().unwrap();
+		}
+		std::fs::metadata(artifact_path.path()).unwrap()
+	};
+
+	assert_eq!(md1.created().unwrap(), md2.created().unwrap());
+}
+
+// Checks if the artifact is re-prepared if the re-preparation is needed by the nature of
+// the execution environment parameters change
+#[tokio::test]
+async fn artifact_does_reprepare_on_meaningful_exec_parameter_change() {
+	let host = TestHost::new_with_config(|cfg| {
+		cfg.prepare_workers_hard_max_num = 1;
+	})
+	.await;
+	let cache_dir = host.cache_dir.path();
+
+	let set1 = ExecutorParams::default();
+	let set2 = ExecutorParams::from(&[ExecutorParam::MaxMemoryPages(128)][..]);
+
+	let _stats = host
+		.precheck_pvf(test_teyrchain_halt::wasm_binary_unwrap(), set1)
+		.await
+		.unwrap();
+	let cache_dir_contents: Vec<_> = std::fs::read_dir(cache_dir).unwrap().collect();
+
+	assert_eq!(cache_dir_contents.len(), 2);
+
+	let _stats = host
+		.precheck_pvf(test_teyrchain_halt::wasm_binary_unwrap(), set2)
+		.await
+		.unwrap();
+	let cache_dir_contents: Vec<_> = std::fs::read_dir(cache_dir).unwrap().collect();
+
+	assert_eq!(cache_dir_contents.len(), 3); // new artifact has been added
+}
+
+// Checks that we cannot prepare oversized compressed code
+#[tokio::test]
+async fn invalid_compressed_code_fails_prechecking() {
+	let host = TestHost::new().await;
+	let raw_code = vec![2u8; VALIDATION_CODE_BOMB_LIMIT as usize + 1];
+	let validation_code = sp_maybe_compressed_blob::compress_strongly(
+		&raw_code,
+		VALIDATION_CODE_BOMB_LIMIT as usize + 1,
+	)
+	.unwrap();
+
+	let res = host.precheck_pvf(&validation_code, Default::default()).await;
+
+	assert_matches!(res, Err(PrepareError::CouldNotDecompressCodeBlob(_)));
+}
+
+// Checks that we cannot validate with oversized compressed code
+#[tokio::test]
+async fn invalid_compressed_code_fails_validation() {
+	let host = TestHost::new().await;
+	let pvd = PersistedValidationData {
+		parent_head: Default::default(),
+		relay_parent_number: 1u32,
+		relay_parent_storage_root: H256::default(),
+		max_pov_size: 4096 * 1024,
+	};
+	let pov = PoV { block_data: BlockData(Vec::new()) };
+
+	let raw_code = vec![2u8; VALIDATION_CODE_BOMB_LIMIT as usize + 1];
+	let validation_code = sp_maybe_compressed_blob::compress_strongly(
+		&raw_code,
+		VALIDATION_CODE_BOMB_LIMIT as usize + 1,
+	)
+	.unwrap();
+
+	let result = host
+		.validate_candidate(&validation_code, pvd, pov, Default::default(), H256::default())
+		.await;
+
+	assert_matches!(
+		result,
+		Err(ValidationError::Preparation(PrepareError::CouldNotDecompressCodeBlob(_)))
+	);
+}
+
+// Checks that we cannot validate with an oversized PoV
+#[tokio::test]
+async fn invalid_compressed_pov_fails_validation() {
+	let host = TestHost::new().await;
+	let pvd = PersistedValidationData {
+		parent_head: Default::default(),
+		relay_parent_number: 1u32,
+		relay_parent_storage_root: H256::default(),
+		max_pov_size: 4096 * 1024,
+	};
+	let raw_block_data = vec![1u8; POV_BOMB_LIMIT + 1];
+	let block_data =
+		sp_maybe_compressed_blob::compress_weakly(&raw_block_data, POV_BOMB_LIMIT + 1).unwrap();
+	let pov = PoV { block_data: BlockData(block_data) };
+
+	let result = host
+		.validate_candidate(
+			test_teyrchain_halt::wasm_binary_unwrap(),
+			pvd,
+			pov,
+			Default::default(),
+			H256::default(),
+		)
+		.await;
+
+	assert_matches!(
+		result,
+		Err(ValidationError::Invalid(InvalidCandidate::PoVDecompressionFailure))
+	);
+}
@@ -0,0 +1,403 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+//! Test unexpected behaviors of the spawned processes. We test both worker processes (directly
+//! spawned by the host) and job processes (spawned by the workers to securely perform PVF jobs).
+
+use super::TestHost;
+use assert_matches::assert_matches;
+use codec::Encode;
+use pezkuwi_node_core_pvf::{
+	InvalidCandidate, PossiblyInvalidError, PrepareError, ValidationError,
+};
+use pezkuwi_node_primitives::PoV;
+use pezkuwi_primitives::PersistedValidationData;
+use pezkuwi_teyrchain_primitives::primitives::{
+	BlockData as GenericBlockData, HeadData as GenericHeadData,
+};
+use procfs::process;
+use rusty_fork::rusty_fork_test;
+use sp_core::H256;
+use std::{future::Future, sync::Arc, time::Duration};
+use test_teyrchain_adder::{hash_state, BlockData, HeadData};
+
+const PREPARE_PROCESS_NAME: &'static str = "pezkuwi-prepare-worker";
+const EXECUTE_PROCESS_NAME: &'static str = "pezkuwi-execute-worker";
+
+const SIGNAL_KILL: i32 = 9;
+const SIGNAL_STOP: i32 = 19;
+
+fn send_signal_by_sid_and_name(
+	sid: i32,
+	exe_name: &'static str,
+	is_direct_child: bool,
+	signal: i32,
+) {
+	let process = find_process_by_sid_and_name(sid, exe_name, is_direct_child)
+		.expect("Should have found the expected process");
+	assert_eq!(unsafe { libc::kill(process.pid(), signal) }, 0);
+}
+fn get_num_threads_by_sid_and_name(sid: i32, exe_name: &'static str, is_direct_child: bool) -> i64 {
+	let process = find_process_by_sid_and_name(sid, exe_name, is_direct_child)
+		.expect("Should have found the expected process");
+	process.stat().unwrap().num_threads
+}
+
+fn find_process_by_sid_and_name(
+	sid: i32,
+	exe_name: &'static str,
+	is_direct_child: bool,
+) -> Option<process::Process> {
+	let all_processes: Vec<process::Process> = process::all_processes()
+		.expect("Can't read /proc")
+		.filter_map(|p| match p {
+			Ok(p) => Some(p), // happy path
+			Err(e) => match e {
+				// process vanished during iteration, ignore it
+				procfs::ProcError::NotFound(_) => None,
+				x => {
+					panic!("some unknown error: {}", x);
+				},
+			},
+		})
+		.collect();
+
+	let mut found = None;
+	for process in all_processes {
+		let Ok(stat) = process.stat() else {
+			continue;
+		};
+
+		if stat.session != sid || !process.exe().unwrap().to_str().unwrap().contains(exe_name) {
+			continue;
+		}
+		// The workers are direct children of the current process, the worker job processes are not
+		// (they are children of the workers).
+		let process_is_direct_child = stat.ppid as u32 == std::process::id();
+		if is_direct_child != process_is_direct_child {
+			continue;
+		}
+
+		if found.is_some() {
+			panic!("Found more than one process")
+		}
+		found = Some(process);
+	}
+	found
+}
+
+/// Sets up the test.
+///
+/// We run the runtime manually because `#[tokio::test]` doesn't work in `rusty_fork_test!`.
+fn test_wrapper<F, Fut>(f: F)
+where
+	F: FnOnce(Arc<TestHost>, i32) -> Fut,
+	Fut: Future<Output = ()>,
+{
+	let rt = tokio::runtime::Runtime::new().unwrap();
+	rt.block_on(async {
+		let host = Arc::new(TestHost::new().await);
+
+		// Create a new session and get the session ID.
+		let sid = unsafe { libc::setsid() };
+		assert!(sid > 0);
+
+		// Pass a clone of the host so that it does not get dropped after.
+		f(host.clone(), sid).await;
+	});
+}
+
+// Run these tests in their own processes with rusty-fork. They work by each creating a new session,
+// then finding the child process that matches the session ID and expected process name and doing
+// something with that child.
+rusty_fork_test! {
+	// Everything succeeds.
+	#[test]
+	fn successful_prepare_and_validate() {
+		test_wrapper(|host, _sid| async move {
+			let parent_head = HeadData { number: 0, parent_hash: [0; 32], post_state: hash_state(0) };
+			let block_data = BlockData { state: 0, add: 512 };
+			let pvd = PersistedValidationData {
+				parent_head: GenericHeadData(parent_head.encode()),
+				relay_parent_number: 1u32,
+				relay_parent_storage_root: H256::default(),
+				max_pov_size: 4096 * 1024,
+			};
+			let pov = PoV { block_data: GenericBlockData(block_data.encode()) };
+			host
+				.validate_candidate(
+					test_teyrchain_adder::wasm_binary_unwrap(),
+					pvd,
+					pov,
+					Default::default(),
+					H256::default(),
+				)
+				.await
+				.unwrap();
+		})
+	}
+
+	// What happens when the prepare worker (not the job) times out?
+	#[test]
+	fn prepare_worker_timeout() {
+		test_wrapper(|host, sid| async move {
+			let (result, _) = futures::join!(
+				// Choose a job that would normally take the entire timeout.
+				host.precheck_pvf(pezkuwichain_runtime::WASM_BINARY.unwrap(), Default::default()),
+				// Send a stop signal to pause the worker.
+				async {
+					tokio::time::sleep(Duration::from_secs(1)).await;
+					send_signal_by_sid_and_name(sid, PREPARE_PROCESS_NAME, true, SIGNAL_STOP);
+				}
+			);
+
+			assert_matches!(result, Err(PrepareError::TimedOut));
+		})
+	}
+
+	// What happens when the execute worker (not the job) times out?
+	#[test]
+	fn execute_worker_timeout() {
+		test_wrapper(|host, sid| async move {
+			// Prepare the artifact ahead of time.
+			let binary = test_teyrchain_halt::wasm_binary_unwrap();
+			host.precheck_pvf(binary, Default::default()).await.unwrap();
+			let pvd = PersistedValidationData {
+				parent_head: GenericHeadData(HeadData::default().encode()),
+				relay_parent_number: 1u32,
+				relay_parent_storage_root: H256::default(),
+				max_pov_size: 4096 * 1024,
+			};
+			let pov = PoV { block_data: GenericBlockData(Vec::new()) };
+
+			let (result, _) = futures::join!(
+				// Choose an job that would normally take the entire timeout.
+				host.validate_candidate(
+					binary,
+					pvd,
+					pov,
+					Default::default(),
+					H256::default(),
+				),
+				// Send a stop signal to pause the worker.
+				async {
+					tokio::time::sleep(Duration::from_secs(1)).await;
+					send_signal_by_sid_and_name(sid, EXECUTE_PROCESS_NAME, true, SIGNAL_STOP);
+				}
+			);
+
+			assert_matches!(
+				result,
+				Err(ValidationError::Invalid(InvalidCandidate::HardTimeout))
+			);
+		})
+	}
+
+	// What happens when the prepare worker dies in the middle of a job?
+	#[test]
+	fn prepare_worker_killed_during_job() {
+		test_wrapper(|host, sid| async move {
+			let (result, _) = futures::join!(
+				// Choose a job that would normally take the entire timeout.
+				host.precheck_pvf(pezkuwichain_runtime::WASM_BINARY.unwrap(), Default::default()),
+				// Run a future that kills the job while it's running.
+				async {
+					tokio::time::sleep(Duration::from_secs(1)).await;
+					send_signal_by_sid_and_name(sid, PREPARE_PROCESS_NAME, true, SIGNAL_KILL);
+				}
+			);
+
+			assert_matches!(result, Err(PrepareError::IoErr(_)));
+		})
+	}
+
+	// What happens when the execute worker dies in the middle of a job?
+	#[test]
+	fn execute_worker_killed_during_job() {
+		test_wrapper(|host, sid| async move {
+			// Prepare the artifact ahead of time.
+			let binary = test_teyrchain_halt::wasm_binary_unwrap();
+			host.precheck_pvf(binary, Default::default()).await.unwrap();
+			let pvd = PersistedValidationData {
+				parent_head: GenericHeadData(HeadData::default().encode()),
+				relay_parent_number: 1u32,
+				relay_parent_storage_root: H256::default(),
+				max_pov_size: 4096 * 1024,
+			};
+			let pov = PoV { block_data: GenericBlockData(Vec::new()) };
+
+			let (result, _) = futures::join!(
+				// Choose an job that would normally take the entire timeout.
+				host.validate_candidate(
+					binary,
+					pvd,
+					pov,
+					Default::default(),
+					H256::default(),
+				),
+				// Run a future that kills the job while it's running.
+				async {
+					tokio::time::sleep(Duration::from_secs(1)).await;
+					send_signal_by_sid_and_name(sid, EXECUTE_PROCESS_NAME, true, SIGNAL_KILL);
+				}
+			);
+
+			assert_matches!(
+				result,
+				Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::AmbiguousWorkerDeath))
+			);
+		})
+	}
+
+	// What happens when the forked prepare job dies in the middle of its job?
+	#[test]
+	fn forked_prepare_job_killed_during_job() {
+		test_wrapper(|host, sid| async move {
+			let (result, _) = futures::join!(
+				// Choose a job that would normally take the entire timeout.
+				host.precheck_pvf(pezkuwichain_runtime::WASM_BINARY.unwrap(), Default::default()),
+				// Run a future that kills the job while it's running.
+				async {
+					tokio::time::sleep(Duration::from_secs(1)).await;
+					send_signal_by_sid_and_name(sid, PREPARE_PROCESS_NAME, false, SIGNAL_KILL);
+				}
+			);
+
+			// Note that we get a more specific error if the job died than if the whole worker died.
+			assert_matches!(
+				result,
+				Err(PrepareError::JobDied{ err, job_pid: _ }) if err == "received signal: SIGKILL"
+			);
+		})
+	}
+
+	// What happens when the forked execute job dies in the middle of its job?
+	#[test]
+	fn forked_execute_job_killed_during_job() {
+		test_wrapper(|host, sid| async move {
+			// Prepare the artifact ahead of time.
+			let binary = test_teyrchain_halt::wasm_binary_unwrap();
+			host.precheck_pvf(binary, Default::default()).await.unwrap();
+			let pvd = PersistedValidationData {
+				parent_head: GenericHeadData(HeadData::default().encode()),
+				relay_parent_number: 1u32,
+				relay_parent_storage_root: H256::default(),
+				max_pov_size: 4096 * 1024,
+			};
+			let pov = PoV { block_data: GenericBlockData(Vec::new()) };
+
+			let (result, _) = futures::join!(
+				// Choose a job that would normally take the entire timeout.
+				host.validate_candidate(
+					binary,
+					pvd,
+					pov,
+					Default::default(),
+					H256::default(),
+				),
+				// Run a future that kills the job while it's running.
+				async {
+					tokio::time::sleep(Duration::from_secs(1)).await;
+					send_signal_by_sid_and_name(sid, EXECUTE_PROCESS_NAME, false, SIGNAL_KILL);
+				}
+			);
+
+			// Note that we get a more specific error if the job died than if the whole worker died.
+			assert_matches!(
+				result,
+				Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::AmbiguousJobDeath(err)))
+					if err == "received signal: SIGKILL"
+			);
+		})
+	}
+
+	// Ensure that the spawned prepare worker is single-threaded.
+	//
+	// See `run_worker` for why we need this invariant.
+	#[test]
+	fn ensure_prepare_processes_have_correct_num_threads() {
+		test_wrapper(|host, sid| async move {
+			let _ = futures::join!(
+				// Choose a job that would normally take the entire timeout.
+				host.precheck_pvf(pezkuwichain_runtime::WASM_BINARY.unwrap(), Default::default()),
+				// Run a future that kills the job while it's running.
+				async {
+					tokio::time::sleep(Duration::from_secs(1)).await;
+					assert_eq!(
+						get_num_threads_by_sid_and_name(sid, PREPARE_PROCESS_NAME, true),
+						1
+					);
+					// Child job should have three threads: main thread, execute thread, CPU time
+					// monitor, and memory tracking.
+					assert_eq!(
+						get_num_threads_by_sid_and_name(sid, PREPARE_PROCESS_NAME, false),
+						pezkuwi_node_core_pvf_prepare_worker::PREPARE_WORKER_THREAD_NUMBER as i64,
+					);
+
+					// End the test.
+					send_signal_by_sid_and_name(sid, PREPARE_PROCESS_NAME, true, SIGNAL_KILL);
+				}
+			);
+		})
+	}
+
+	// Ensure that the spawned execute worker is single-threaded.
+	//
+	// See `run_worker` for why we need this invariant.
+	#[test]
+	fn ensure_execute_processes_have_correct_num_threads() {
+		test_wrapper(|host, sid| async move {
+			// Prepare the artifact ahead of time.
+			let binary = test_teyrchain_halt::wasm_binary_unwrap();
+			host.precheck_pvf(binary, Default::default()).await.unwrap();
+			let pvd = PersistedValidationData {
+				parent_head: GenericHeadData(HeadData::default().encode()),
+				relay_parent_number: 1u32,
+				relay_parent_storage_root: H256::default(),
+				max_pov_size: 4096 * 1024,
+			};
+			let pov = PoV { block_data: GenericBlockData(Vec::new()) };
+
+			let _ = futures::join!(
+				// Choose a job that would normally take the entire timeout.
+				host.validate_candidate(
+					binary,
+					pvd,
+					pov,
+					Default::default(),
+					H256::default(),
+				),
+				// Run a future that tests the thread count while the worker is running.
+				async {
+					tokio::time::sleep(Duration::from_secs(1)).await;
+					assert_eq!(
+						get_num_threads_by_sid_and_name(sid, EXECUTE_PROCESS_NAME, true),
+						1
+					);
+					// Child job should have three threads: main thread, execute thread, and CPU
+					// time monitor.
+					assert_eq!(
+						get_num_threads_by_sid_and_name(sid, EXECUTE_PROCESS_NAME, false),
+						pezkuwi_node_core_pvf_execute_worker::EXECUTE_WORKER_THREAD_NUMBER as i64,
+					);
+
+					// End the test.
+					send_signal_by_sid_and_name(sid, EXECUTE_PROCESS_NAME, true, SIGNAL_KILL);
+				}
+			);
+		})
+	}
+}
@@ -0,0 +1,78 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Pezkuwi.
+
+// Pezkuwi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Pezkuwi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Pezkuwi.  If not, see <http://www.gnu.org/licenses/>.
+
+use pezkuwi_node_core_pvf::{
+	testing::{build_workers_and_get_paths, spawn_with_program_path, SpawnErr},
+	SecurityStatus,
+};
+use std::{env, time::Duration};
+
+// Test spawning a program that immediately exits with a failure code.
+#[tokio::test]
+async fn spawn_immediate_exit() {
+	let (prepare_worker_path, _) = build_workers_and_get_paths();
+
+	// There's no explicit `exit` subcommand in the worker; it will panic on an unknown
+	// subcommand anyway
+	let spawn_timeout = Duration::from_secs(2);
+	let result = spawn_with_program_path(
+		"integration-test",
+		prepare_worker_path,
+		&env::temp_dir(),
+		&["exit"],
+		Duration::from_secs(2),
+		SecurityStatus::default(),
+	)
+	.await;
+	assert!(
+		matches!(result, Err(SpawnErr::AcceptTimeout { spawn_timeout: s }) if s == spawn_timeout)
+	);
+}
+
+#[tokio::test]
+async fn spawn_timeout() {
+	let (_, execute_worker_path) = build_workers_and_get_paths();
+
+	let spawn_timeout = Duration::from_secs(2);
+	let result = spawn_with_program_path(
+		"integration-test",
+		execute_worker_path,
+		&env::temp_dir(),
+		&["test-sleep"],
+		spawn_timeout,
+		SecurityStatus::default(),
+	)
+	.await;
+	assert!(
+		matches!(result, Err(SpawnErr::AcceptTimeout { spawn_timeout: s }) if s == spawn_timeout)
+	);
+}
+
+#[tokio::test]
+async fn should_connect() {
+	let (prepare_worker_path, _) = build_workers_and_get_paths();
+
+	let _ = spawn_with_program_path(
+		"integration-test",
+		prepare_worker_path,
+		&env::temp_dir(),
+		&["prepare-worker"],
+		Duration::from_secs(2),
+		SecurityStatus::default(),
+	)
+	.await
+	.unwrap();
+}