PVF: Refactor workers into separate crates, remove host dependency (#7253)

* PVF: Refactor workers into separate crates, remove host dependency * Fix compile error * Remove some leftover code * Fix compile errors * Update Cargo.lock * Remove worker main.rs files I accidentally copied these from the other PR. This PR isn't intended to introduce standalone workers yet. * Address review comments * cargo fmt * Update a couple of comments * Update log targets
2026-06-16 22:31:03 +00:00 · 2023-05-25 16:29:13 -04:00
parent 4146c26f3c
commit 8782dde411
50 changed files with 777 additions and 519 deletions
@@ -0,0 +1,106 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Polkadot.
+
+// Polkadot is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Polkadot is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
+
+use crate::prepare::PrepareStats;
+use parity_scale_codec::{Decode, Encode};
+use std::fmt;
+
+/// Result of PVF preparation performed by the validation host. Contains stats about the preparation if
+/// successful
+pub type PrepareResult = Result<PrepareStats, PrepareError>;
+
+/// An error that occurred during the prepare part of the PVF pipeline.
+#[derive(Debug, Clone, Encode, Decode)]
+pub enum PrepareError {
+	/// During the prevalidation stage of preparation an issue was found with the PVF.
+	Prevalidation(String),
+	/// Compilation failed for the given PVF.
+	Preparation(String),
+	/// An unexpected panic has occurred in the preparation worker.
+	Panic(String),
+	/// Failed to prepare the PVF due to the time limit.
+	TimedOut,
+	/// An IO error occurred. This state is reported by either the validation host or by the worker.
+	IoErr(String),
+	/// The temporary file for the artifact could not be created at the given cache path. This state is reported by the
+	/// validation host (not by the worker).
+	CreateTmpFileErr(String),
+	/// The response from the worker is received, but the file cannot be renamed (moved) to the final destination
+	/// location. This state is reported by the validation host (not by the worker).
+	RenameTmpFileErr(String),
+}
+
+impl PrepareError {
+	/// Returns whether this is a deterministic error, i.e. one that should trigger reliably. Those
+	/// errors depend on the PVF itself and the sc-executor/wasmtime logic.
+	///
+	/// Non-deterministic errors can happen spuriously. Typically, they occur due to resource
+	/// starvation, e.g. under heavy load or memory pressure. Those errors are typically transient
+	/// but may persist e.g. if the node is run by overwhelmingly underpowered machine.
+	pub fn is_deterministic(&self) -> bool {
+		use PrepareError::*;
+		match self {
+			Prevalidation(_) | Preparation(_) | Panic(_) => true,
+			TimedOut | IoErr(_) | CreateTmpFileErr(_) | RenameTmpFileErr(_) => false,
+		}
+	}
+}
+
+impl fmt::Display for PrepareError {
+	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+		use PrepareError::*;
+		match self {
+			Prevalidation(err) => write!(f, "prevalidation: {}", err),
+			Preparation(err) => write!(f, "preparation: {}", err),
+			Panic(err) => write!(f, "panic: {}", err),
+			TimedOut => write!(f, "prepare: timeout"),
+			IoErr(err) => write!(f, "prepare: io error while receiving response: {}", err),
+			CreateTmpFileErr(err) => write!(f, "prepare: error creating tmp file: {}", err),
+			RenameTmpFileErr(err) => write!(f, "prepare: error renaming tmp file: {}", err),
+		}
+	}
+}
+
+/// Some internal error occurred.
+///
+/// Should only ever be used for validation errors independent of the candidate and PVF, or for errors we ruled out
+/// during pre-checking (so preparation errors are fine).
+#[derive(Debug, Clone, Encode, Decode)]
+pub enum InternalValidationError {
+	/// Some communication error occurred with the host.
+	HostCommunication(String),
+	/// Could not find or open compiled artifact file.
+	CouldNotOpenFile(String),
+	/// An error occurred in the CPU time monitor thread. Should be totally unrelated to validation.
+	CpuTimeMonitorThread(String),
+	/// Some non-deterministic preparation error occurred.
+	NonDeterministicPrepareError(PrepareError),
+}
+
+impl fmt::Display for InternalValidationError {
+	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+		use InternalValidationError::*;
+		match self {
+			HostCommunication(err) =>
+				write!(f, "validation: some communication error occurred with the host: {}", err),
+			CouldNotOpenFile(err) =>
+				write!(f, "validation: could not find or open compiled artifact file: {}", err),
+			CpuTimeMonitorThread(err) =>
+				write!(f, "validation: an error occurred in the CPU time monitor thread: {}", err),
+			NonDeterministicPrepareError(err) => write!(f, "validation: prepare: {}", err),
+		}
+	}
+}
@@ -0,0 +1,60 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Polkadot.
+
+// Polkadot is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Polkadot is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
+
+use crate::error::InternalValidationError;
+use parity_scale_codec::{Decode, Encode};
+use polkadot_parachain::primitives::ValidationResult;
+use polkadot_primitives::ExecutorParams;
+use std::time::Duration;
+
+/// The payload of the one-time handshake that is done when a worker process is created. Carries
+/// data from the host to the worker.
+#[derive(Encode, Decode)]
+pub struct Handshake {
+	/// The executor parameters.
+	pub executor_params: ExecutorParams,
+}
+
+/// The response from an execution job on the worker.
+#[derive(Encode, Decode)]
+pub enum Response {
+	/// The job completed successfully.
+	Ok {
+		/// The result of parachain validation.
+		result_descriptor: ValidationResult,
+		/// The amount of CPU time taken by the job.
+		duration: Duration,
+	},
+	/// The candidate is invalid.
+	InvalidCandidate(String),
+	/// The job timed out.
+	TimedOut,
+	/// An unexpected panic has occurred in the execution worker.
+	Panic(String),
+	/// Some internal error occurred.
+	InternalError(InternalValidationError),
+}
+
+impl Response {
+	/// Creates an invalid response from a context `ctx` and a message `msg` (which can be empty).
+	pub fn format_invalid(ctx: &'static str, msg: &str) -> Self {
+		if msg.is_empty() {
+			Self::InvalidCandidate(ctx.to_string())
+		} else {
+			Self::InvalidCandidate(format!("{}: {}", ctx, msg))
+		}
+	}
+}
@@ -0,0 +1,114 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Polkadot.
+
+// Polkadot is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Polkadot is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
+
+//! Interface to the Substrate Executor
+
+use polkadot_primitives::{ExecutorParam, ExecutorParams};
+use sc_executor_common::wasm_runtime::HeapAllocStrategy;
+use sc_executor_wasmtime::{Config, DeterministicStackLimit, Semantics};
+
+// Memory configuration
+//
+// When Substrate Runtime is instantiated, a number of WASM pages are allocated for the Substrate
+// Runtime instance's linear memory. The exact number of pages is a sum of whatever the WASM blob
+// itself requests (by default at least enough to hold the data section as well as have some space
+// left for the stack; this is, of course, overridable at link time when compiling the runtime)
+// plus the number of pages specified in the `extra_heap_pages` passed to the executor.
+//
+// By default, rustc (or `lld` specifically) should allocate 1 MiB for the shadow stack, or 16 pages.
+// The data section for runtimes are typically rather small and can fit in a single digit number of
+// WASM pages, so let's say an extra 16 pages. Thus let's assume that 32 pages or 2 MiB are used for
+// these needs by default.
+const DEFAULT_HEAP_PAGES_ESTIMATE: u32 = 32;
+const EXTRA_HEAP_PAGES: u32 = 2048;
+
+/// The number of bytes devoted for the stack during wasm execution of a PVF.
+pub const NATIVE_STACK_MAX: u32 = 256 * 1024 * 1024;
+
+// VALUES OF THE DEFAULT CONFIGURATION SHOULD NEVER BE CHANGED
+// They are used as base values for the execution environment parametrization.
+// To overwrite them, add new ones to `EXECUTOR_PARAMS` in the `session_info` pallet and perform
+// a runtime upgrade to make them active.
+pub const DEFAULT_CONFIG: Config = Config {
+	allow_missing_func_imports: true,
+	cache_path: None,
+	semantics: Semantics {
+		heap_alloc_strategy: sc_executor_common::wasm_runtime::HeapAllocStrategy::Dynamic {
+			maximum_pages: Some(DEFAULT_HEAP_PAGES_ESTIMATE + EXTRA_HEAP_PAGES),
+		},
+
+		instantiation_strategy:
+			sc_executor_wasmtime::InstantiationStrategy::RecreateInstanceCopyOnWrite,
+
+		// Enable deterministic stack limit to pin down the exact number of items the wasmtime stack
+		// can contain before it traps with stack overflow.
+		//
+		// Here is how the values below were chosen.
+		//
+		// At the moment of writing, the default native stack size limit is 1 MiB. Assuming a logical item
+		// (see the docs about the field and the instrumentation algorithm) is 8 bytes, 1 MiB can
+		// fit 2x 65536 logical items.
+		//
+		// Since reaching the native stack limit is undesirable, we halve the logical item limit and
+		// also increase the native 256x. This hopefully should preclude wasm code from reaching
+		// the stack limit set by the wasmtime.
+		deterministic_stack_limit: Some(DeterministicStackLimit {
+			logical_max: 65536,
+			native_stack_max: NATIVE_STACK_MAX,
+		}),
+		canonicalize_nans: true,
+		// Rationale for turning the multi-threaded compilation off is to make the preparation time
+		// easily reproducible and as deterministic as possible.
+		//
+		// Currently the prepare queue doesn't distinguish between precheck and prepare requests.
+		// On the one hand, it simplifies the code, on the other, however, slows down compile times
+		// for execute requests. This behavior may change in future.
+		parallel_compilation: false,
+
+		// WASM extensions. Only those that are meaningful to us may be controlled here. By default,
+		// we're using WASM MVP, which means all the extensions are disabled. Nevertheless, some
+		// extensions (e.g., sign extension ops) are enabled by Wasmtime and cannot be disabled.
+		wasm_reference_types: false,
+		wasm_simd: false,
+		wasm_bulk_memory: false,
+		wasm_multi_value: false,
+	},
+};
+
+pub fn params_to_wasmtime_semantics(par: &ExecutorParams) -> Result<Semantics, String> {
+	let mut sem = DEFAULT_CONFIG.semantics.clone();
+	let mut stack_limit = if let Some(stack_limit) = sem.deterministic_stack_limit.clone() {
+		stack_limit
+	} else {
+		return Err("No default stack limit set".to_owned())
+	};
+
+	for p in par.iter() {
+		match p {
+			ExecutorParam::MaxMemoryPages(max_pages) =>
+				sem.heap_alloc_strategy =
+					HeapAllocStrategy::Dynamic { maximum_pages: Some(*max_pages) },
+			ExecutorParam::StackLogicalMax(slm) => stack_limit.logical_max = *slm,
+			ExecutorParam::StackNativeMax(snm) => stack_limit.native_stack_max = *snm,
+			ExecutorParam::WasmExtBulkMemory => sem.wasm_bulk_memory = true,
+			// TODO: Not implemented yet; <https://github.com/paritytech/polkadot/issues/6472>.
+			ExecutorParam::PrecheckingMaxMemory(_) => (),
+			ExecutorParam::PvfPrepTimeout(_, _) | ExecutorParam::PvfExecTimeout(_, _) => (), // Not used here
+		}
+	}
+	sem.deterministic_stack_limit = Some(stack_limit);
+	Ok(sem)
+}
@@ -0,0 +1,57 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Polkadot.
+
+// Polkadot is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Polkadot is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
+
+//! Functionality that is shared by the host and the workers.
+
+pub mod error;
+pub mod execute;
+pub mod executor_intf;
+pub mod prepare;
+pub mod pvf;
+pub mod worker;
+
+pub use cpu_time::ProcessTime;
+
+const LOG_TARGET: &str = "parachain::pvf-common";
+
+use std::mem;
+use tokio::io::{self, AsyncRead, AsyncReadExt as _, AsyncWrite, AsyncWriteExt as _};
+
+#[doc(hidden)]
+pub mod tests {
+	use std::time::Duration;
+
+	pub const TEST_EXECUTION_TIMEOUT: Duration = Duration::from_secs(3);
+	pub const TEST_PREPARATION_TIMEOUT: Duration = Duration::from_secs(30);
+}
+
+/// Write some data prefixed by its length into `w`.
+pub async fn framed_send(w: &mut (impl AsyncWrite + Unpin), buf: &[u8]) -> io::Result<()> {
+	let len_buf = buf.len().to_le_bytes();
+	w.write_all(&len_buf).await?;
+	w.write_all(buf).await?;
+	Ok(())
+}
+
+/// Read some data prefixed by its length from `r`.
+pub async fn framed_recv(r: &mut (impl AsyncRead + Unpin)) -> io::Result<Vec<u8>> {
+	let mut len_buf = [0u8; mem::size_of::<usize>()];
+	r.read_exact(&mut len_buf).await?;
+	let len = usize::from_le_bytes(len_buf);
+	let mut buf = vec![0; len];
+	r.read_exact(&mut buf).await?;
+	Ok(buf)
+}
@@ -0,0 +1,48 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Polkadot.
+
+// Polkadot is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Polkadot is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
+
+use parity_scale_codec::{Decode, Encode};
+
+/// Preparation statistics, including the CPU time and memory taken.
+#[derive(Debug, Clone, Default, Encode, Decode)]
+pub struct PrepareStats {
+	/// The CPU time that elapsed for the preparation job.
+	pub cpu_time_elapsed: std::time::Duration,
+	/// The observed memory statistics for the preparation job.
+	pub memory_stats: MemoryStats,
+}
+
+/// Helper struct to contain all the memory stats, including `MemoryAllocationStats` and, if
+/// supported by the OS, `ru_maxrss`.
+#[derive(Clone, Debug, Default, Encode, Decode)]
+pub struct MemoryStats {
+	/// Memory stats from `tikv_jemalloc_ctl`.
+	#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))]
+	pub memory_tracker_stats: Option<MemoryAllocationStats>,
+	/// `ru_maxrss` from `getrusage`. `None` if an error occurred.
+	#[cfg(target_os = "linux")]
+	pub max_rss: Option<i64>,
+}
+
+/// Statistics of collected memory metrics.
+#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))]
+#[derive(Clone, Debug, Default, Encode, Decode)]
+pub struct MemoryAllocationStats {
+	/// Total resident memory, in bytes.
+	pub resident: u64,
+	/// Total allocated memory, in bytes.
+	pub allocated: u64,
+}
@@ -0,0 +1,108 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Polkadot.
+
+// Polkadot is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Polkadot is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
+
+use parity_scale_codec::{Decode, Encode};
+use polkadot_parachain::primitives::ValidationCodeHash;
+use polkadot_primitives::ExecutorParams;
+use sp_core::blake2_256;
+use std::{
+	cmp::{Eq, PartialEq},
+	fmt,
+	sync::Arc,
+	time::Duration,
+};
+
+/// A struct that carries the exhaustive set of data to prepare an artifact out of plain
+/// Wasm binary
+///
+/// Should be cheap to clone.
+#[derive(Clone, Encode, Decode)]
+pub struct PvfPrepData {
+	/// Wasm code (uncompressed)
+	code: Arc<Vec<u8>>,
+	/// Wasm code hash
+	code_hash: ValidationCodeHash,
+	/// Executor environment parameters for the session for which artifact is prepared
+	executor_params: Arc<ExecutorParams>,
+	/// Preparation timeout
+	prep_timeout: Duration,
+}
+
+impl PvfPrepData {
+	/// Returns an instance of the PVF out of the given PVF code and executor params.
+	pub fn from_code(
+		code: Vec<u8>,
+		executor_params: ExecutorParams,
+		prep_timeout: Duration,
+	) -> Self {
+		let code = Arc::new(code);
+		let code_hash = blake2_256(&code).into();
+		let executor_params = Arc::new(executor_params);
+		Self { code, code_hash, executor_params, prep_timeout }
+	}
+
+	/// Returns validation code hash for the PVF
+	pub fn code_hash(&self) -> ValidationCodeHash {
+		self.code_hash
+	}
+
+	/// Returns PVF code
+	pub fn code(&self) -> Arc<Vec<u8>> {
+		self.code.clone()
+	}
+
+	/// Returns executor params
+	pub fn executor_params(&self) -> Arc<ExecutorParams> {
+		self.executor_params.clone()
+	}
+
+	/// Returns preparation timeout.
+	pub fn prep_timeout(&self) -> Duration {
+		self.prep_timeout
+	}
+
+	/// Creates a structure for tests.
+	#[doc(hidden)]
+	pub fn from_discriminator_and_timeout(num: u32, timeout: Duration) -> Self {
+		let descriminator_buf = num.to_le_bytes().to_vec();
+		Self::from_code(descriminator_buf, ExecutorParams::default(), timeout)
+	}
+
+	/// Creates a structure for tests.
+	#[doc(hidden)]
+	pub fn from_discriminator(num: u32) -> Self {
+		Self::from_discriminator_and_timeout(num, crate::tests::TEST_PREPARATION_TIMEOUT)
+	}
+}
+
+impl fmt::Debug for PvfPrepData {
+	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+		write!(
+			f,
+			"Pvf {{ code, code_hash: {:?}, executor_params: {:?}, prep_timeout: {:?} }}",
+			self.code_hash, self.executor_params, self.prep_timeout
+		)
+	}
+}
+
+impl PartialEq for PvfPrepData {
+	fn eq(&self, other: &Self) -> bool {
+		self.code_hash == other.code_hash &&
+			self.executor_params.hash() == other.executor_params.hash()
+	}
+}
+
+impl Eq for PvfPrepData {}
@@ -0,0 +1,312 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Polkadot.
+
+// Polkadot is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Polkadot is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
+
+//! Functionality common to both prepare and execute workers.
+
+use crate::LOG_TARGET;
+use cpu_time::ProcessTime;
+use futures::never::Never;
+use std::{
+	any::Any,
+	path::PathBuf,
+	sync::mpsc::{Receiver, RecvTimeoutError},
+	time::Duration,
+};
+use tokio::{io, net::UnixStream, runtime::Runtime};
+
+/// Use this macro to declare a `fn main() {}` that will create an executable that can be used for
+/// spawning the desired worker.
+#[macro_export]
+macro_rules! decl_worker_main {
+	($expected_command:expr, $entrypoint:expr) => {
+		fn main() {
+			::sp_tracing::try_init_simple();
+
+			let args = std::env::args().collect::<Vec<_>>();
+			if args.len() < 3 {
+				panic!("wrong number of arguments");
+			}
+
+			let mut version = None;
+			let mut socket_path: &str = "";
+
+			for i in 2..args.len() {
+				match args[i].as_ref() {
+					"--socket-path" => socket_path = args[i + 1].as_str(),
+					"--node-version" => version = Some(args[i + 1].as_str()),
+					_ => (),
+				}
+			}
+
+			let subcommand = &args[1];
+			if subcommand != $expected_command {
+				panic!(
+					"trying to run {} binary with the {} subcommand",
+					$expected_command, subcommand
+				)
+			}
+			$entrypoint(&socket_path, version);
+		}
+	};
+}
+
+/// Some allowed overhead that we account for in the "CPU time monitor" thread's sleeps, on the
+/// child process.
+pub const JOB_TIMEOUT_OVERHEAD: Duration = Duration::from_millis(50);
+
+/// Interprets the given bytes as a path. Returns `None` if the given bytes do not constitute a
+/// a proper utf-8 string.
+pub fn bytes_to_path(bytes: &[u8]) -> Option<PathBuf> {
+	std::str::from_utf8(bytes).ok().map(PathBuf::from)
+}
+
+pub fn worker_event_loop<F, Fut>(
+	debug_id: &'static str,
+	socket_path: &str,
+	node_version: Option<&str>,
+	mut event_loop: F,
+) where
+	F: FnMut(UnixStream) -> Fut,
+	Fut: futures::Future<Output = io::Result<Never>>,
+{
+	let worker_pid = std::process::id();
+	gum::debug!(target: LOG_TARGET, %worker_pid, "starting pvf worker ({})", debug_id);
+
+	// Check for a mismatch between the node and worker versions.
+	if let Some(version) = node_version {
+		if version != env!("SUBSTRATE_CLI_IMPL_VERSION") {
+			gum::error!(
+				target: LOG_TARGET,
+				%worker_pid,
+				"Node and worker version mismatch, node needs restarting, forcing shutdown",
+			);
+			kill_parent_node_in_emergency();
+			let err: io::Result<Never> =
+				Err(io::Error::new(io::ErrorKind::Unsupported, "Version mismatch"));
+			gum::debug!(target: LOG_TARGET, %worker_pid, "quitting pvf worker({}): {:?}", debug_id, err);
+			return
+		}
+	}
+
+	// Run the main worker loop.
+	let rt = Runtime::new().expect("Creates tokio runtime. If this panics the worker will die and the host will detect that and deal with it.");
+	let err = rt
+		.block_on(async move {
+			let stream = UnixStream::connect(socket_path).await?;
+			let _ = tokio::fs::remove_file(socket_path).await;
+
+			let result = event_loop(stream).await;
+
+			result
+		})
+		// It's never `Ok` because it's `Ok(Never)`.
+		.unwrap_err();
+
+	gum::debug!(target: LOG_TARGET, %worker_pid, "quitting pvf worker ({}): {:?}", debug_id, err);
+
+	// We don't want tokio to wait for the tasks to finish. We want to bring down the worker as fast
+	// as possible and not wait for stalled validation to finish. This isn't strictly necessary now,
+	// but may be in the future.
+	rt.shutdown_background();
+}
+
+/// Loop that runs in the CPU time monitor thread on prepare and execute jobs. Continuously wakes up
+/// and then either blocks for the remaining CPU time, or returns if we exceed the CPU timeout.
+///
+/// Returning `Some` indicates that we should send a `TimedOut` error to the host. Will return
+/// `None` if the other thread finishes first, without us timing out.
+///
+/// NOTE: Sending a `TimedOut` error to the host will cause the worker, whether preparation or
+/// execution, to be killed by the host. We do not kill the process here because it would interfere
+/// with the proper handling of this error.
+pub fn cpu_time_monitor_loop(
+	cpu_time_start: ProcessTime,
+	timeout: Duration,
+	finished_rx: Receiver<()>,
+) -> Option<Duration> {
+	loop {
+		let cpu_time_elapsed = cpu_time_start.elapsed();
+
+		// Treat the timeout as CPU time, which is less subject to variance due to load.
+		if cpu_time_elapsed <= timeout {
+			// Sleep for the remaining CPU time, plus a bit to account for overhead. (And we don't
+			// want to wake up too often -- so, since we just want to halt the worker thread if it
+			// stalled, we can sleep longer than necessary.) Note that the sleep is wall clock time.
+			// The CPU clock may be slower than the wall clock.
+			let sleep_interval = timeout.saturating_sub(cpu_time_elapsed) + JOB_TIMEOUT_OVERHEAD;
+			match finished_rx.recv_timeout(sleep_interval) {
+				// Received finish signal.
+				Ok(()) => return None,
+				// Timed out, restart loop.
+				Err(RecvTimeoutError::Timeout) => continue,
+				Err(RecvTimeoutError::Disconnected) => return None,
+			}
+		}
+
+		return Some(cpu_time_elapsed)
+	}
+}
+
+/// Attempt to convert an opaque panic payload to a string.
+///
+/// This is a best effort, and is not guaranteed to provide the most accurate value.
+pub fn stringify_panic_payload(payload: Box<dyn Any + Send + 'static>) -> String {
+	match payload.downcast::<&'static str>() {
+		Ok(msg) => msg.to_string(),
+		Err(payload) => match payload.downcast::<String>() {
+			Ok(msg) => *msg,
+			// At least we tried...
+			Err(_) => "unknown panic payload".to_string(),
+		},
+	}
+}
+
+/// In case of node and worker version mismatch (as a result of in-place upgrade), send `SIGTERM`
+/// to the node to tear it down and prevent it from raising disputes on valid candidates. Node
+/// restart should be handled by the node owner. As node exits, unix sockets opened to workers
+/// get closed by the OS and other workers receive error on socket read and also exit. Preparation
+/// jobs are written to the temporary files that are renamed to real artifacts on the node side, so
+/// no leftover artifacts are possible.
+fn kill_parent_node_in_emergency() {
+	unsafe {
+		// SAFETY: `getpid()` never fails but may return "no-parent" (0) or "parent-init" (1) in
+		// some corner cases, which is checked. `kill()` never fails.
+		let ppid = libc::getppid();
+		if ppid > 1 {
+			libc::kill(ppid, libc::SIGTERM);
+		}
+	}
+}
+
+/// Functionality related to threads spawned by the workers.
+///
+/// The motivation for this module is to coordinate worker threads without using async Rust.
+pub mod thread {
+	use std::{
+		panic,
+		sync::{Arc, Condvar, Mutex},
+		thread,
+		time::Duration,
+	};
+
+	/// Contains the outcome of waiting on threads, or `Pending` if none are ready.
+	#[derive(Clone, Copy)]
+	pub enum WaitOutcome {
+		Finished,
+		TimedOut,
+		Pending,
+	}
+
+	impl WaitOutcome {
+		pub fn is_pending(&self) -> bool {
+			matches!(self, Self::Pending)
+		}
+	}
+
+	/// Helper type.
+	pub type Cond = Arc<(Mutex<WaitOutcome>, Condvar)>;
+
+	/// Gets a condvar initialized to `Pending`.
+	pub fn get_condvar() -> Cond {
+		Arc::new((Mutex::new(WaitOutcome::Pending), Condvar::new()))
+	}
+
+	/// Runs a thread, afterwards notifying the threads waiting on the condvar. Catches panics and
+	/// resumes them after triggering the condvar, so that the waiting thread is notified on panics.
+	pub fn spawn_worker_thread<F, R>(
+		name: &str,
+		f: F,
+		cond: Cond,
+		outcome: WaitOutcome,
+	) -> std::io::Result<thread::JoinHandle<R>>
+	where
+		F: FnOnce() -> R,
+		F: Send + 'static + panic::UnwindSafe,
+		R: Send + 'static,
+	{
+		thread::Builder::new()
+			.name(name.into())
+			.spawn(move || cond_notify_on_done(f, cond, outcome))
+	}
+
+	/// Runs a worker thread with the given stack size. See [`spawn_worker_thread`].
+	pub fn spawn_worker_thread_with_stack_size<F, R>(
+		name: &str,
+		f: F,
+		cond: Cond,
+		outcome: WaitOutcome,
+		stack_size: usize,
+	) -> std::io::Result<thread::JoinHandle<R>>
+	where
+		F: FnOnce() -> R,
+		F: Send + 'static + panic::UnwindSafe,
+		R: Send + 'static,
+	{
+		thread::Builder::new()
+			.name(name.into())
+			.stack_size(stack_size)
+			.spawn(move || cond_notify_on_done(f, cond, outcome))
+	}
+
+	/// Runs a function, afterwards notifying the threads waiting on the condvar. Catches panics and
+	/// resumes them after triggering the condvar, so that the waiting thread is notified on panics.
+	fn cond_notify_on_done<F, R>(f: F, cond: Cond, outcome: WaitOutcome) -> R
+	where
+		F: FnOnce() -> R,
+		F: panic::UnwindSafe,
+	{
+		let result = panic::catch_unwind(|| f());
+		cond_notify_all(cond, outcome);
+		match result {
+			Ok(inner) => return inner,
+			Err(err) => panic::resume_unwind(err),
+		}
+	}
+
+	/// Helper function to notify all threads waiting on this condvar.
+	fn cond_notify_all(cond: Cond, outcome: WaitOutcome) {
+		let (lock, cvar) = &*cond;
+		let mut flag = lock.lock().unwrap();
+		if !flag.is_pending() {
+			// Someone else already triggered the condvar.
+			return
+		}
+		*flag = outcome;
+		cvar.notify_all();
+	}
+
+	/// Block the thread while it waits on the condvar.
+	pub fn wait_for_threads(cond: Cond) -> WaitOutcome {
+		let (lock, cvar) = &*cond;
+		let guard = cvar.wait_while(lock.lock().unwrap(), |flag| flag.is_pending()).unwrap();
+		*guard
+	}
+
+	/// Block the thread while it waits on the condvar or on a timeout. If the timeout is hit,
+	/// returns `None`.
+	#[cfg_attr(not(any(target_os = "linux", feature = "jemalloc-allocator")), allow(dead_code))]
+	pub fn wait_for_threads_with_timeout(cond: &Cond, dur: Duration) -> Option<WaitOutcome> {
+		let (lock, cvar) = &**cond;
+		let result = cvar
+			.wait_timeout_while(lock.lock().unwrap(), dur, |flag| flag.is_pending())
+			.unwrap();
+		if result.1.timed_out() {
+			None
+		} else {
+			Some(*result.0)
+		}
+	}
+}