change prepare worker to use fork instead of threads (#1685)

Co-authored-by: Marcin S <marcin@realemail.net>
2026-06-14 05:11:09 +00:00 · 2023-11-14 14:50:18 -03:00
parent 3a87390b30
commit 54f84285bf
24 changed files with 1468 additions and 534 deletions
@@ -33,36 +33,37 @@ pub enum ValidationError {
 pub enum InvalidCandidate {
 	/// PVF preparation ended up with a deterministic error.
 	PrepareError(String),
-	/// The failure is reported by the execution worker. The string contains the error message.
-	WorkerReportedError(String),
-	/// The worker has died during validation of a candidate. That may fall in one of the following
-	/// categories, which we cannot distinguish programmatically:
+	/// The candidate is reported to be invalid by the execution worker. The string contains the
+	/// error message.
+	WorkerReportedInvalid(String),
+	/// The worker process (not the job) has died during validation of a candidate.
 	///
-	/// (a) Some sort of transient glitch caused the worker process to abort. An example would be
-	/// that the host machine ran out of free memory and the OOM killer started killing the
-	/// processes, and in order to save the parent it will "sacrifice child" first.
-	///
-	/// (b) The candidate triggered a code path that has lead to the process death. For example,
-	///     the PVF found a way to consume unbounded amount of resources and then it either
-	///     exceeded an `rlimit` (if set) or, again, invited OOM killer. Another possibility is a
-	///     bug in wasmtime allowed the PVF to gain control over the execution worker.
-	///
-	/// We attribute such an event to an *invalid candidate* in either case.
-	///
-	/// The rationale for this is that a glitch may lead to unfair rejecting candidate by a single
-	/// validator. If the glitch is somewhat more persistent the validator will reject all
-	/// candidate thrown at it and hopefully the operator notices it by decreased reward
-	/// performance of the validator. On the other hand, if the worker died because of (b) we would
-	/// have better chances to stop the attack.
+	/// It's unlikely that this is caused by malicious code since workers spawn separate job
+	/// processes, and those job processes are sandboxed. But, it is possible. We retry in this
+	/// case, and if the error persists, we assume it's caused by the candidate and vote against.
 	AmbiguousWorkerDeath,
 	/// PVF execution (compilation is not included) took more time than was allotted.
 	HardTimeout,
-	/// A panic occurred and we can't be sure whether the candidate is really invalid or some
-	/// internal glitch occurred. Whenever we are unsure, we can never treat an error as internal
-	/// as we would abstain from voting. This is bad because if the issue was due to the candidate,
-	/// then all validators would abstain, stalling finality on the chain. So we will first retry
-	/// the candidate, and if the issue persists we are forced to vote invalid.
-	Panic(String),
+	/// The job process (not the worker) has died for one of the following reasons:
+	///
+	/// (a) A seccomp violation occurred, most likely due to an attempt by malicious code to
+	/// execute arbitrary code. Note that there is no foolproof way to detect this if the operator
+	/// has seccomp auditing disabled.
+	///
+	/// (b) The host machine ran out of free memory and the OOM killer started killing the
+	/// processes, and in order to save the parent it will "sacrifice child" first.
+	///
+	/// (c) Some other reason, perhaps transient or perhaps caused by malicious code.
+	///
+	/// We cannot treat this as an internal error because malicious code may have caused this.
+	AmbiguousJobDeath(String),
+	/// An unexpected error occurred in the job process and we can't be sure whether the candidate
+	/// is really invalid or some internal glitch occurred. Whenever we are unsure, we can never
+	/// treat an error as internal as we would abstain from voting. This is bad because if the
+	/// issue was due to the candidate, then all validators would abstain, stalling finality on the
+	/// chain. So we will first retry the candidate, and if the issue persists we are forced to
+	/// vote invalid.
+	JobError(String),
 }

 impl From<InternalValidationError> for ValidationError {
@@ -342,20 +342,27 @@ fn handle_job_finish(
 		},
 		Outcome::InvalidCandidate { err, idle_worker } => (
 			Some(idle_worker),
-			Err(ValidationError::InvalidCandidate(InvalidCandidate::WorkerReportedError(err))),
+			Err(ValidationError::InvalidCandidate(InvalidCandidate::WorkerReportedInvalid(err))),
 			None,
 		),
 		Outcome::InternalError { err } => (None, Err(ValidationError::InternalError(err)), None),
+		// Either the worker or the job timed out. Kill the worker in either case. Treated as
+		// definitely-invalid, because if we timed out, there's no time left for a retry.
 		Outcome::HardTimeout =>
 			(None, Err(ValidationError::InvalidCandidate(InvalidCandidate::HardTimeout)), None),
 		// "Maybe invalid" errors (will retry).
-		Outcome::IoErr => (
+		Outcome::WorkerIntfErr => (
 			None,
 			Err(ValidationError::InvalidCandidate(InvalidCandidate::AmbiguousWorkerDeath)),
 			None,
 		),
-		Outcome::Panic { err } =>
-			(None, Err(ValidationError::InvalidCandidate(InvalidCandidate::Panic(err))), None),
+		Outcome::JobDied { err } => (
+			None,
+			Err(ValidationError::InvalidCandidate(InvalidCandidate::AmbiguousJobDeath(err))),
+			None,
+		),
+		Outcome::JobError { err } =>
+			(None, Err(ValidationError::InvalidCandidate(InvalidCandidate::JobError(err))), None),
 	};

 	queue.metrics.execute_finished();
@@ -30,7 +30,7 @@ use futures_timer::Delay;
 use parity_scale_codec::{Decode, Encode};
 use polkadot_node_core_pvf_common::{
 	error::InternalValidationError,
-	execute::{Handshake, Response},
+	execute::{Handshake, WorkerResponse},
 	worker_dir, SecurityStatus,
 };
 use polkadot_parachain_primitives::primitives::ValidationResult;
@@ -88,19 +88,26 @@ pub enum Outcome {
 	/// a trap. Errors related to the preparation process are not expected to be encountered by the
 	/// execution workers.
 	InvalidCandidate { err: String, idle_worker: IdleWorker },
+	/// The execution time exceeded the hard limit. The worker is terminated.
+	HardTimeout,
+	/// An I/O error happened during communication with the worker. This may mean that the worker
+	/// process already died. The token is not returned in any case.
+	WorkerIntfErr,
+	/// The job process has died. We must kill the worker just in case.
+	///
+	/// We cannot treat this as an internal error because malicious code may have caused this.
+	JobDied { err: String },
+	/// An unexpected error occurred in the job process.
+	///
+	/// Because malicious code can cause a job error, we must not treat it as an internal error.
+	JobError { err: String },
+
 	/// An internal error happened during the validation. Such an error is most likely related to
 	/// some transient glitch.
 	///
 	/// Should only ever be used for errors independent of the candidate and PVF. Therefore it may
 	/// be a problem with the worker, so we terminate it.
 	InternalError { err: InternalValidationError },
-	/// The execution time exceeded the hard limit. The worker is terminated.
-	HardTimeout,
-	/// An I/O error happened during communication with the worker. This may mean that the worker
-	/// process already died. The token is not returned in any case.
-	IoErr,
-	/// An unexpected panic has occurred in the execution worker.
-	Panic { err: String },
 }

 /// Given the idle token of a worker and parameters of work, communicates with the worker and
@@ -137,7 +144,7 @@ pub async fn start_work(
 				?error,
 				"failed to send an execute request",
 			);
-			return Outcome::IoErr
+			return Outcome::WorkerIntfErr
 		}

 		// We use a generous timeout here. This is in addition to the one in the child process, in
@@ -173,7 +180,7 @@ pub async fn start_work(
 							);
 						}

-						return Outcome::IoErr
+						return Outcome::WorkerIntfErr
 					},
 					Ok(response) => {
 						// Check if any syscall violations occurred during the job. For now this is
@@ -189,7 +196,7 @@ pub async fn start_work(
 							);
 						}

-						if let Response::Ok{duration, ..} = response {
+						if let WorkerResponse::Ok{duration, ..} = response {
 							if duration > execution_timeout {
 								// The job didn't complete within the timeout.
 								gum::warn!(
@@ -201,7 +208,7 @@ pub async fn start_work(
 								);

 								// Return a timeout error.
-								return Outcome::HardTimeout;
+								return Outcome::HardTimeout
 							}
 						}

@@ -216,23 +223,25 @@ pub async fn start_work(
 					validation_code_hash = ?artifact.id.code_hash,
 					"execution worker exceeded lenient timeout for execution, child worker likely stalled",
 				);
-				Response::TimedOut
+				WorkerResponse::JobTimedOut
 			},
 		};

 		match response {
-			Response::Ok { result_descriptor, duration } => Outcome::Ok {
+			WorkerResponse::Ok { result_descriptor, duration } => Outcome::Ok {
 				result_descriptor,
 				duration,
 				idle_worker: IdleWorker { stream, pid, worker_dir },
 			},
-			Response::InvalidCandidate(err) => Outcome::InvalidCandidate {
+			WorkerResponse::InvalidCandidate(err) => Outcome::InvalidCandidate {
 				err,
 				idle_worker: IdleWorker { stream, pid, worker_dir },
 			},
-			Response::TimedOut => Outcome::HardTimeout,
-			Response::Panic(err) => Outcome::Panic { err },
-			Response::InternalError(err) => Outcome::InternalError { err },
+			WorkerResponse::JobTimedOut => Outcome::HardTimeout,
+			WorkerResponse::JobDied(err) => Outcome::JobDied { err },
+			WorkerResponse::JobError(err) => Outcome::JobError { err },
+
+			WorkerResponse::InternalError(err) => Outcome::InternalError { err },
 		}
 	})
 	.await
@@ -306,9 +315,9 @@ async fn send_request(
 	framed_send(stream, &execution_timeout.encode()).await
 }

-async fn recv_response(stream: &mut UnixStream) -> io::Result<Response> {
+async fn recv_response(stream: &mut UnixStream) -> io::Result<WorkerResponse> {
 	let response_bytes = framed_recv(stream).await?;
-	Response::decode(&mut &response_bytes[..]).map_err(|e| {
+	WorkerResponse::decode(&mut response_bytes.as_slice()).map_err(|e| {
 		io::Error::new(
 			io::ErrorKind::Other,
 			format!("execute pvf recv_response: decode error: {:?}", e),
@@ -339,17 +339,17 @@ fn handle_mux(
 					spawned,
 					worker,
 					idle,
-					Err(PrepareError::CreateTmpFileErr(err)),
+					Err(PrepareError::CreateTmpFile(err)),
 				),
 				// Return `Concluded`, but do not kill the worker since the error was on the host
 				// side.
-				Outcome::RenameTmpFileErr { worker: idle, result: _, err, src, dest } =>
+				Outcome::RenameTmpFile { worker: idle, result: _, err, src, dest } =>
 					handle_concluded_no_rip(
 						from_pool,
 						spawned,
 						worker,
 						idle,
-						Err(PrepareError::RenameTmpFileErr { err, src, dest }),
+						Err(PrepareError::RenameTmpFile { err, src, dest }),
 					),
 				// Could not clear worker cache. Kill the worker so other jobs can't see the data.
 				Outcome::ClearWorkerDir { err } => {
@@ -387,6 +387,21 @@ fn handle_mux(

 					Ok(())
 				},
+				// The worker might still be usable, but we kill it just in case.
+				Outcome::JobDied(err) => {
+					if attempt_retire(metrics, spawned, worker) {
+						reply(
+							from_pool,
+							FromPool::Concluded {
+								worker,
+								rip: true,
+								result: Err(PrepareError::JobDied(err)),
+							},
+						)?;
+					}
+
+					Ok(())
+				},
 				Outcome::TimedOut => {
 					if attempt_retire(metrics, spawned, worker) {
 						reply(
@@ -79,7 +79,7 @@ pub enum Outcome {
 	CreateTmpFileErr { worker: IdleWorker, err: String },
 	/// The response from the worker is received, but the tmp file cannot be renamed (moved) to the
 	/// final destination location.
-	RenameTmpFileErr {
+	RenameTmpFile {
 		worker: IdleWorker,
 		result: PrepareResult,
 		err: String,
@@ -100,6 +100,10 @@ pub enum Outcome {
 	IoErr(String),
 	/// The worker ran out of memory and is aborting. The worker should be ripped.
 	OutOfMemory,
+	/// The preparation job process died, due to OOM, a seccomp violation, or some other factor.
+	///
+	/// The worker might still be usable, but we kill it just in case.
+	JobDied(String),
 }

 /// Given the idle token of a worker and parameters of work, communicates with the worker and
@@ -187,21 +191,6 @@ pub async fn start_work(
 						"failed to recv a prepare response: {:?}",
 						err,
 					);
-
-					// The worker died. Check if it was due to a seccomp violation.
-					//
-					// NOTE: Log, but don't change the outcome. Not all validators may have auditing
-					// enabled, so we don't want attackers to abuse a non-deterministic outcome.
-					for syscall in security::check_seccomp_violations_for_worker(audit_log_file, pid).await {
-						gum::error!(
-							target: LOG_TARGET,
-							worker_pid = %pid,
-							%syscall,
-							?pvf,
-							"A forbidden syscall was attempted! This is a violation of our seccomp security policy. Report an issue ASAP!"
-						);
-					}
-
 					Outcome::IoErr(err.to_string())
 				},
 				Err(_) => {
@@ -236,6 +225,7 @@ async fn handle_response(
 		Ok(result) => result,
 		// Timed out on the child. This should already be logged by the child.
 		Err(PrepareError::TimedOut) => return Outcome::TimedOut,
+		Err(PrepareError::JobDied(err)) => return Outcome::JobDied(err),
 		Err(PrepareError::OutOfMemory) => return Outcome::OutOfMemory,
 		Err(_) => return Outcome::Concluded { worker, result },
 	};
@@ -272,7 +262,7 @@ async fn handle_response(
 				artifact_path.display(),
 				err,
 			);
-			Outcome::RenameTmpFileErr {
+			Outcome::RenameTmpFile {
 				worker,
 				result,
 				err: format!("{:?}", err),
@@ -14,7 +14,7 @@
 // You should have received a copy of the GNU General Public License
 // along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.

-//! Various things for testing other crates.
+//! Various utilities for testing.

 pub use crate::{
 	host::{EXECUTE_BINARY_NAME, PREPARE_BINARY_NAME},
@@ -59,27 +59,33 @@ pub fn validate_candidate(
 ///
 /// NOTE: This should only be called in dev code (tests, benchmarks) as it relies on the relative
 /// paths of the built workers.
-pub fn get_and_check_worker_paths() -> (PathBuf, PathBuf) {
+pub fn build_workers_and_get_paths(is_bench: bool) -> (PathBuf, PathBuf) {
 	// Only needs to be called once for the current process.
 	static WORKER_PATHS: OnceLock<Mutex<(PathBuf, PathBuf)>> = OnceLock::new();

-	fn build_workers() {
-		let build_args = vec![
+	fn build_workers(is_bench: bool) {
+		let mut build_args = vec![
 			"build",
 			"--package=polkadot",
 			"--bin=polkadot-prepare-worker",
 			"--bin=polkadot-execute-worker",
 		];
-		let exit_status = std::process::Command::new("cargo")
+		if is_bench {
+			// Benches require --release. Regular tests are debug (no flag needed).
+			build_args.push("--release");
+		}
+		let mut cargo = std::process::Command::new("cargo");
+		let cmd = cargo
 			// wasm runtime not needed
 			.env("SKIP_WASM_BUILD", "1")
 			.args(build_args)
-			.stdout(std::process::Stdio::piped())
-			.status()
-			.expect("Failed to run the build program");
+			.stdout(std::process::Stdio::piped());
+
+		println!("INFO: calling `{cmd:?}`");
+		let exit_status = cmd.status().expect("Failed to run the build program");

 		if !exit_status.success() {
-			eprintln!("Failed to build workers: {}", exit_status.code().unwrap());
+			eprintln!("ERROR: Failed to build workers: {}", exit_status.code().unwrap());
 			std::process::exit(1);
 		}
 	}
@@ -95,23 +101,23 @@ pub fn get_and_check_worker_paths() -> (PathBuf, PathBuf) {

 		// explain why a build happens
 		if !prepare_worker_path.is_executable() {
-			eprintln!("Prepare worker does not exist or is not executable. Workers directory: {:?}", workers_path);
+			println!("WARN: Prepare worker does not exist or is not executable. Workers directory: {:?}", workers_path);
 		}
 		if !execute_worker_path.is_executable() {
-			eprintln!("Execute worker does not exist or is not executable. Workers directory: {:?}", workers_path);
+			println!("WARN: Execute worker does not exist or is not executable. Workers directory: {:?}", workers_path);
 		}
 		if let Ok(ver) = get_worker_version(&prepare_worker_path) {
 			if ver != NODE_VERSION {
-				eprintln!("Prepare worker version {ver} does not match node version {NODE_VERSION}; worker path: {prepare_worker_path:?}");
+				println!("WARN: Prepare worker version {ver} does not match node version {NODE_VERSION}; worker path: {prepare_worker_path:?}");
 			}
 		}
 		if let Ok(ver) = get_worker_version(&execute_worker_path) {
 			if ver != NODE_VERSION {
-				eprintln!("Execute worker version {ver} does not match node version {NODE_VERSION}; worker path: {execute_worker_path:?}");
+				println!("WARN: Execute worker version {ver} does not match node version {NODE_VERSION}; worker path: {execute_worker_path:?}");
 			}
 		}

-		build_workers();
+		build_workers(is_bench);

 		Mutex::new((prepare_worker_path, execute_worker_path))
 	});