PVF: Refactor workers into separate crates, remove host dependency (#7253)

* PVF: Refactor workers into separate crates, remove host dependency

* Fix compile error

* Remove some leftover code

* Fix compile errors

* Update Cargo.lock

* Remove worker main.rs files

I accidentally copied these from the other PR. This PR isn't intended to
introduce standalone workers yet.

* Address review comments

* cargo fmt

* Update a couple of comments

* Update log targets
This commit is contained in:
Marcin S
2023-05-25 16:29:13 -04:00
committed by GitHub
parent 4146c26f3c
commit 8782dde411
50 changed files with 777 additions and 519 deletions
+106
View File
@@ -0,0 +1,106 @@
// Copyright (C) Parity Technologies (UK) Ltd.
// This file is part of Polkadot.
// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
use crate::prepare::PrepareStats;
use parity_scale_codec::{Decode, Encode};
use std::fmt;
/// Result of PVF preparation performed by the validation host. Contains stats about the preparation if
/// successful
pub type PrepareResult = Result<PrepareStats, PrepareError>;
/// An error that occurred during the prepare part of the PVF pipeline.
#[derive(Debug, Clone, Encode, Decode)]
pub enum PrepareError {
/// During the prevalidation stage of preparation an issue was found with the PVF.
Prevalidation(String),
/// Compilation failed for the given PVF.
Preparation(String),
/// An unexpected panic has occurred in the preparation worker.
Panic(String),
/// Failed to prepare the PVF due to the time limit.
TimedOut,
/// An IO error occurred. This state is reported by either the validation host or by the worker.
IoErr(String),
/// The temporary file for the artifact could not be created at the given cache path. This state is reported by the
/// validation host (not by the worker).
CreateTmpFileErr(String),
/// The response from the worker is received, but the file cannot be renamed (moved) to the final destination
/// location. This state is reported by the validation host (not by the worker).
RenameTmpFileErr(String),
}
impl PrepareError {
/// Returns whether this is a deterministic error, i.e. one that should trigger reliably. Those
/// errors depend on the PVF itself and the sc-executor/wasmtime logic.
///
/// Non-deterministic errors can happen spuriously. Typically, they occur due to resource
/// starvation, e.g. under heavy load or memory pressure. Those errors are typically transient
/// but may persist e.g. if the node is run by overwhelmingly underpowered machine.
pub fn is_deterministic(&self) -> bool {
use PrepareError::*;
match self {
Prevalidation(_) | Preparation(_) | Panic(_) => true,
TimedOut | IoErr(_) | CreateTmpFileErr(_) | RenameTmpFileErr(_) => false,
}
}
}
impl fmt::Display for PrepareError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use PrepareError::*;
match self {
Prevalidation(err) => write!(f, "prevalidation: {}", err),
Preparation(err) => write!(f, "preparation: {}", err),
Panic(err) => write!(f, "panic: {}", err),
TimedOut => write!(f, "prepare: timeout"),
IoErr(err) => write!(f, "prepare: io error while receiving response: {}", err),
CreateTmpFileErr(err) => write!(f, "prepare: error creating tmp file: {}", err),
RenameTmpFileErr(err) => write!(f, "prepare: error renaming tmp file: {}", err),
}
}
}
/// Some internal error occurred.
///
/// Should only ever be used for validation errors independent of the candidate and PVF, or for errors we ruled out
/// during pre-checking (so preparation errors are fine).
#[derive(Debug, Clone, Encode, Decode)]
pub enum InternalValidationError {
/// Some communication error occurred with the host.
HostCommunication(String),
/// Could not find or open compiled artifact file.
CouldNotOpenFile(String),
/// An error occurred in the CPU time monitor thread. Should be totally unrelated to validation.
CpuTimeMonitorThread(String),
/// Some non-deterministic preparation error occurred.
NonDeterministicPrepareError(PrepareError),
}
impl fmt::Display for InternalValidationError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use InternalValidationError::*;
match self {
HostCommunication(err) =>
write!(f, "validation: some communication error occurred with the host: {}", err),
CouldNotOpenFile(err) =>
write!(f, "validation: could not find or open compiled artifact file: {}", err),
CpuTimeMonitorThread(err) =>
write!(f, "validation: an error occurred in the CPU time monitor thread: {}", err),
NonDeterministicPrepareError(err) => write!(f, "validation: prepare: {}", err),
}
}
}
@@ -0,0 +1,60 @@
// Copyright (C) Parity Technologies (UK) Ltd.
// This file is part of Polkadot.
// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
use crate::error::InternalValidationError;
use parity_scale_codec::{Decode, Encode};
use polkadot_parachain::primitives::ValidationResult;
use polkadot_primitives::ExecutorParams;
use std::time::Duration;
/// The payload of the one-time handshake that is done when a worker process is created. Carries
/// data from the host to the worker.
#[derive(Encode, Decode)]
pub struct Handshake {
/// The executor parameters.
pub executor_params: ExecutorParams,
}
/// The response from an execution job on the worker.
#[derive(Encode, Decode)]
pub enum Response {
/// The job completed successfully.
Ok {
/// The result of parachain validation.
result_descriptor: ValidationResult,
/// The amount of CPU time taken by the job.
duration: Duration,
},
/// The candidate is invalid.
InvalidCandidate(String),
/// The job timed out.
TimedOut,
/// An unexpected panic has occurred in the execution worker.
Panic(String),
/// Some internal error occurred.
InternalError(InternalValidationError),
}
impl Response {
/// Creates an invalid response from a context `ctx` and a message `msg` (which can be empty).
pub fn format_invalid(ctx: &'static str, msg: &str) -> Self {
if msg.is_empty() {
Self::InvalidCandidate(ctx.to_string())
} else {
Self::InvalidCandidate(format!("{}: {}", ctx, msg))
}
}
}
@@ -0,0 +1,114 @@
// Copyright (C) Parity Technologies (UK) Ltd.
// This file is part of Polkadot.
// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
//! Interface to the Substrate Executor
use polkadot_primitives::{ExecutorParam, ExecutorParams};
use sc_executor_common::wasm_runtime::HeapAllocStrategy;
use sc_executor_wasmtime::{Config, DeterministicStackLimit, Semantics};
// Memory configuration
//
// When Substrate Runtime is instantiated, a number of WASM pages are allocated for the Substrate
// Runtime instance's linear memory. The exact number of pages is a sum of whatever the WASM blob
// itself requests (by default at least enough to hold the data section as well as have some space
// left for the stack; this is, of course, overridable at link time when compiling the runtime)
// plus the number of pages specified in the `extra_heap_pages` passed to the executor.
//
// By default, rustc (or `lld` specifically) should allocate 1 MiB for the shadow stack, or 16 pages.
// The data section for runtimes are typically rather small and can fit in a single digit number of
// WASM pages, so let's say an extra 16 pages. Thus let's assume that 32 pages or 2 MiB are used for
// these needs by default.
const DEFAULT_HEAP_PAGES_ESTIMATE: u32 = 32;
const EXTRA_HEAP_PAGES: u32 = 2048;
/// The number of bytes devoted for the stack during wasm execution of a PVF.
pub const NATIVE_STACK_MAX: u32 = 256 * 1024 * 1024;
// VALUES OF THE DEFAULT CONFIGURATION SHOULD NEVER BE CHANGED
// They are used as base values for the execution environment parametrization.
// To overwrite them, add new ones to `EXECUTOR_PARAMS` in the `session_info` pallet and perform
// a runtime upgrade to make them active.
pub const DEFAULT_CONFIG: Config = Config {
allow_missing_func_imports: true,
cache_path: None,
semantics: Semantics {
heap_alloc_strategy: sc_executor_common::wasm_runtime::HeapAllocStrategy::Dynamic {
maximum_pages: Some(DEFAULT_HEAP_PAGES_ESTIMATE + EXTRA_HEAP_PAGES),
},
instantiation_strategy:
sc_executor_wasmtime::InstantiationStrategy::RecreateInstanceCopyOnWrite,
// Enable deterministic stack limit to pin down the exact number of items the wasmtime stack
// can contain before it traps with stack overflow.
//
// Here is how the values below were chosen.
//
// At the moment of writing, the default native stack size limit is 1 MiB. Assuming a logical item
// (see the docs about the field and the instrumentation algorithm) is 8 bytes, 1 MiB can
// fit 2x 65536 logical items.
//
// Since reaching the native stack limit is undesirable, we halve the logical item limit and
// also increase the native 256x. This hopefully should preclude wasm code from reaching
// the stack limit set by the wasmtime.
deterministic_stack_limit: Some(DeterministicStackLimit {
logical_max: 65536,
native_stack_max: NATIVE_STACK_MAX,
}),
canonicalize_nans: true,
// Rationale for turning the multi-threaded compilation off is to make the preparation time
// easily reproducible and as deterministic as possible.
//
// Currently the prepare queue doesn't distinguish between precheck and prepare requests.
// On the one hand, it simplifies the code, on the other, however, slows down compile times
// for execute requests. This behavior may change in future.
parallel_compilation: false,
// WASM extensions. Only those that are meaningful to us may be controlled here. By default,
// we're using WASM MVP, which means all the extensions are disabled. Nevertheless, some
// extensions (e.g., sign extension ops) are enabled by Wasmtime and cannot be disabled.
wasm_reference_types: false,
wasm_simd: false,
wasm_bulk_memory: false,
wasm_multi_value: false,
},
};
pub fn params_to_wasmtime_semantics(par: &ExecutorParams) -> Result<Semantics, String> {
let mut sem = DEFAULT_CONFIG.semantics.clone();
let mut stack_limit = if let Some(stack_limit) = sem.deterministic_stack_limit.clone() {
stack_limit
} else {
return Err("No default stack limit set".to_owned())
};
for p in par.iter() {
match p {
ExecutorParam::MaxMemoryPages(max_pages) =>
sem.heap_alloc_strategy =
HeapAllocStrategy::Dynamic { maximum_pages: Some(*max_pages) },
ExecutorParam::StackLogicalMax(slm) => stack_limit.logical_max = *slm,
ExecutorParam::StackNativeMax(snm) => stack_limit.native_stack_max = *snm,
ExecutorParam::WasmExtBulkMemory => sem.wasm_bulk_memory = true,
// TODO: Not implemented yet; <https://github.com/paritytech/polkadot/issues/6472>.
ExecutorParam::PrecheckingMaxMemory(_) => (),
ExecutorParam::PvfPrepTimeout(_, _) | ExecutorParam::PvfExecTimeout(_, _) => (), // Not used here
}
}
sem.deterministic_stack_limit = Some(stack_limit);
Ok(sem)
}
+57
View File
@@ -0,0 +1,57 @@
// Copyright (C) Parity Technologies (UK) Ltd.
// This file is part of Polkadot.
// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
//! Functionality that is shared by the host and the workers.
pub mod error;
pub mod execute;
pub mod executor_intf;
pub mod prepare;
pub mod pvf;
pub mod worker;
pub use cpu_time::ProcessTime;
const LOG_TARGET: &str = "parachain::pvf-common";
use std::mem;
use tokio::io::{self, AsyncRead, AsyncReadExt as _, AsyncWrite, AsyncWriteExt as _};
#[doc(hidden)]
pub mod tests {
use std::time::Duration;
pub const TEST_EXECUTION_TIMEOUT: Duration = Duration::from_secs(3);
pub const TEST_PREPARATION_TIMEOUT: Duration = Duration::from_secs(30);
}
/// Write some data prefixed by its length into `w`.
pub async fn framed_send(w: &mut (impl AsyncWrite + Unpin), buf: &[u8]) -> io::Result<()> {
let len_buf = buf.len().to_le_bytes();
w.write_all(&len_buf).await?;
w.write_all(buf).await?;
Ok(())
}
/// Read some data prefixed by its length from `r`.
pub async fn framed_recv(r: &mut (impl AsyncRead + Unpin)) -> io::Result<Vec<u8>> {
let mut len_buf = [0u8; mem::size_of::<usize>()];
r.read_exact(&mut len_buf).await?;
let len = usize::from_le_bytes(len_buf);
let mut buf = vec![0; len];
r.read_exact(&mut buf).await?;
Ok(buf)
}
@@ -0,0 +1,48 @@
// Copyright (C) Parity Technologies (UK) Ltd.
// This file is part of Polkadot.
// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
use parity_scale_codec::{Decode, Encode};
/// Preparation statistics, including the CPU time and memory taken.
#[derive(Debug, Clone, Default, Encode, Decode)]
pub struct PrepareStats {
/// The CPU time that elapsed for the preparation job.
pub cpu_time_elapsed: std::time::Duration,
/// The observed memory statistics for the preparation job.
pub memory_stats: MemoryStats,
}
/// Helper struct to contain all the memory stats, including `MemoryAllocationStats` and, if
/// supported by the OS, `ru_maxrss`.
#[derive(Clone, Debug, Default, Encode, Decode)]
pub struct MemoryStats {
/// Memory stats from `tikv_jemalloc_ctl`.
#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))]
pub memory_tracker_stats: Option<MemoryAllocationStats>,
/// `ru_maxrss` from `getrusage`. `None` if an error occurred.
#[cfg(target_os = "linux")]
pub max_rss: Option<i64>,
}
/// Statistics of collected memory metrics.
#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))]
#[derive(Clone, Debug, Default, Encode, Decode)]
pub struct MemoryAllocationStats {
/// Total resident memory, in bytes.
pub resident: u64,
/// Total allocated memory, in bytes.
pub allocated: u64,
}
+108
View File
@@ -0,0 +1,108 @@
// Copyright (C) Parity Technologies (UK) Ltd.
// This file is part of Polkadot.
// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
use parity_scale_codec::{Decode, Encode};
use polkadot_parachain::primitives::ValidationCodeHash;
use polkadot_primitives::ExecutorParams;
use sp_core::blake2_256;
use std::{
cmp::{Eq, PartialEq},
fmt,
sync::Arc,
time::Duration,
};
/// A struct that carries the exhaustive set of data to prepare an artifact out of plain
/// Wasm binary
///
/// Should be cheap to clone.
#[derive(Clone, Encode, Decode)]
pub struct PvfPrepData {
/// Wasm code (uncompressed)
code: Arc<Vec<u8>>,
/// Wasm code hash
code_hash: ValidationCodeHash,
/// Executor environment parameters for the session for which artifact is prepared
executor_params: Arc<ExecutorParams>,
/// Preparation timeout
prep_timeout: Duration,
}
impl PvfPrepData {
/// Returns an instance of the PVF out of the given PVF code and executor params.
pub fn from_code(
code: Vec<u8>,
executor_params: ExecutorParams,
prep_timeout: Duration,
) -> Self {
let code = Arc::new(code);
let code_hash = blake2_256(&code).into();
let executor_params = Arc::new(executor_params);
Self { code, code_hash, executor_params, prep_timeout }
}
/// Returns validation code hash for the PVF
pub fn code_hash(&self) -> ValidationCodeHash {
self.code_hash
}
/// Returns PVF code
pub fn code(&self) -> Arc<Vec<u8>> {
self.code.clone()
}
/// Returns executor params
pub fn executor_params(&self) -> Arc<ExecutorParams> {
self.executor_params.clone()
}
/// Returns preparation timeout.
pub fn prep_timeout(&self) -> Duration {
self.prep_timeout
}
/// Creates a structure for tests.
#[doc(hidden)]
pub fn from_discriminator_and_timeout(num: u32, timeout: Duration) -> Self {
let descriminator_buf = num.to_le_bytes().to_vec();
Self::from_code(descriminator_buf, ExecutorParams::default(), timeout)
}
/// Creates a structure for tests.
#[doc(hidden)]
pub fn from_discriminator(num: u32) -> Self {
Self::from_discriminator_and_timeout(num, crate::tests::TEST_PREPARATION_TIMEOUT)
}
}
impl fmt::Debug for PvfPrepData {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"Pvf {{ code, code_hash: {:?}, executor_params: {:?}, prep_timeout: {:?} }}",
self.code_hash, self.executor_params, self.prep_timeout
)
}
}
impl PartialEq for PvfPrepData {
fn eq(&self, other: &Self) -> bool {
self.code_hash == other.code_hash &&
self.executor_params.hash() == other.executor_params.hash()
}
}
impl Eq for PvfPrepData {}
+312
View File
@@ -0,0 +1,312 @@
// Copyright (C) Parity Technologies (UK) Ltd.
// This file is part of Polkadot.
// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
//! Functionality common to both prepare and execute workers.
use crate::LOG_TARGET;
use cpu_time::ProcessTime;
use futures::never::Never;
use std::{
any::Any,
path::PathBuf,
sync::mpsc::{Receiver, RecvTimeoutError},
time::Duration,
};
use tokio::{io, net::UnixStream, runtime::Runtime};
/// Use this macro to declare a `fn main() {}` that will create an executable that can be used for
/// spawning the desired worker.
#[macro_export]
macro_rules! decl_worker_main {
($expected_command:expr, $entrypoint:expr) => {
fn main() {
::sp_tracing::try_init_simple();
let args = std::env::args().collect::<Vec<_>>();
if args.len() < 3 {
panic!("wrong number of arguments");
}
let mut version = None;
let mut socket_path: &str = "";
for i in 2..args.len() {
match args[i].as_ref() {
"--socket-path" => socket_path = args[i + 1].as_str(),
"--node-version" => version = Some(args[i + 1].as_str()),
_ => (),
}
}
let subcommand = &args[1];
if subcommand != $expected_command {
panic!(
"trying to run {} binary with the {} subcommand",
$expected_command, subcommand
)
}
$entrypoint(&socket_path, version);
}
};
}
/// Some allowed overhead that we account for in the "CPU time monitor" thread's sleeps, on the
/// child process.
pub const JOB_TIMEOUT_OVERHEAD: Duration = Duration::from_millis(50);
/// Interprets the given bytes as a path. Returns `None` if the given bytes do not constitute a
/// a proper utf-8 string.
pub fn bytes_to_path(bytes: &[u8]) -> Option<PathBuf> {
std::str::from_utf8(bytes).ok().map(PathBuf::from)
}
pub fn worker_event_loop<F, Fut>(
debug_id: &'static str,
socket_path: &str,
node_version: Option<&str>,
mut event_loop: F,
) where
F: FnMut(UnixStream) -> Fut,
Fut: futures::Future<Output = io::Result<Never>>,
{
let worker_pid = std::process::id();
gum::debug!(target: LOG_TARGET, %worker_pid, "starting pvf worker ({})", debug_id);
// Check for a mismatch between the node and worker versions.
if let Some(version) = node_version {
if version != env!("SUBSTRATE_CLI_IMPL_VERSION") {
gum::error!(
target: LOG_TARGET,
%worker_pid,
"Node and worker version mismatch, node needs restarting, forcing shutdown",
);
kill_parent_node_in_emergency();
let err: io::Result<Never> =
Err(io::Error::new(io::ErrorKind::Unsupported, "Version mismatch"));
gum::debug!(target: LOG_TARGET, %worker_pid, "quitting pvf worker({}): {:?}", debug_id, err);
return
}
}
// Run the main worker loop.
let rt = Runtime::new().expect("Creates tokio runtime. If this panics the worker will die and the host will detect that and deal with it.");
let err = rt
.block_on(async move {
let stream = UnixStream::connect(socket_path).await?;
let _ = tokio::fs::remove_file(socket_path).await;
let result = event_loop(stream).await;
result
})
// It's never `Ok` because it's `Ok(Never)`.
.unwrap_err();
gum::debug!(target: LOG_TARGET, %worker_pid, "quitting pvf worker ({}): {:?}", debug_id, err);
// We don't want tokio to wait for the tasks to finish. We want to bring down the worker as fast
// as possible and not wait for stalled validation to finish. This isn't strictly necessary now,
// but may be in the future.
rt.shutdown_background();
}
/// Loop that runs in the CPU time monitor thread on prepare and execute jobs. Continuously wakes up
/// and then either blocks for the remaining CPU time, or returns if we exceed the CPU timeout.
///
/// Returning `Some` indicates that we should send a `TimedOut` error to the host. Will return
/// `None` if the other thread finishes first, without us timing out.
///
/// NOTE: Sending a `TimedOut` error to the host will cause the worker, whether preparation or
/// execution, to be killed by the host. We do not kill the process here because it would interfere
/// with the proper handling of this error.
pub fn cpu_time_monitor_loop(
cpu_time_start: ProcessTime,
timeout: Duration,
finished_rx: Receiver<()>,
) -> Option<Duration> {
loop {
let cpu_time_elapsed = cpu_time_start.elapsed();
// Treat the timeout as CPU time, which is less subject to variance due to load.
if cpu_time_elapsed <= timeout {
// Sleep for the remaining CPU time, plus a bit to account for overhead. (And we don't
// want to wake up too often -- so, since we just want to halt the worker thread if it
// stalled, we can sleep longer than necessary.) Note that the sleep is wall clock time.
// The CPU clock may be slower than the wall clock.
let sleep_interval = timeout.saturating_sub(cpu_time_elapsed) + JOB_TIMEOUT_OVERHEAD;
match finished_rx.recv_timeout(sleep_interval) {
// Received finish signal.
Ok(()) => return None,
// Timed out, restart loop.
Err(RecvTimeoutError::Timeout) => continue,
Err(RecvTimeoutError::Disconnected) => return None,
}
}
return Some(cpu_time_elapsed)
}
}
/// Attempt to convert an opaque panic payload to a string.
///
/// This is a best effort, and is not guaranteed to provide the most accurate value.
pub fn stringify_panic_payload(payload: Box<dyn Any + Send + 'static>) -> String {
match payload.downcast::<&'static str>() {
Ok(msg) => msg.to_string(),
Err(payload) => match payload.downcast::<String>() {
Ok(msg) => *msg,
// At least we tried...
Err(_) => "unknown panic payload".to_string(),
},
}
}
/// In case of node and worker version mismatch (as a result of in-place upgrade), send `SIGTERM`
/// to the node to tear it down and prevent it from raising disputes on valid candidates. Node
/// restart should be handled by the node owner. As node exits, unix sockets opened to workers
/// get closed by the OS and other workers receive error on socket read and also exit. Preparation
/// jobs are written to the temporary files that are renamed to real artifacts on the node side, so
/// no leftover artifacts are possible.
fn kill_parent_node_in_emergency() {
unsafe {
// SAFETY: `getpid()` never fails but may return "no-parent" (0) or "parent-init" (1) in
// some corner cases, which is checked. `kill()` never fails.
let ppid = libc::getppid();
if ppid > 1 {
libc::kill(ppid, libc::SIGTERM);
}
}
}
/// Functionality related to threads spawned by the workers.
///
/// The motivation for this module is to coordinate worker threads without using async Rust.
pub mod thread {
use std::{
panic,
sync::{Arc, Condvar, Mutex},
thread,
time::Duration,
};
/// Contains the outcome of waiting on threads, or `Pending` if none are ready.
#[derive(Clone, Copy)]
pub enum WaitOutcome {
Finished,
TimedOut,
Pending,
}
impl WaitOutcome {
pub fn is_pending(&self) -> bool {
matches!(self, Self::Pending)
}
}
/// Helper type.
pub type Cond = Arc<(Mutex<WaitOutcome>, Condvar)>;
/// Gets a condvar initialized to `Pending`.
pub fn get_condvar() -> Cond {
Arc::new((Mutex::new(WaitOutcome::Pending), Condvar::new()))
}
/// Runs a thread, afterwards notifying the threads waiting on the condvar. Catches panics and
/// resumes them after triggering the condvar, so that the waiting thread is notified on panics.
pub fn spawn_worker_thread<F, R>(
name: &str,
f: F,
cond: Cond,
outcome: WaitOutcome,
) -> std::io::Result<thread::JoinHandle<R>>
where
F: FnOnce() -> R,
F: Send + 'static + panic::UnwindSafe,
R: Send + 'static,
{
thread::Builder::new()
.name(name.into())
.spawn(move || cond_notify_on_done(f, cond, outcome))
}
/// Runs a worker thread with the given stack size. See [`spawn_worker_thread`].
pub fn spawn_worker_thread_with_stack_size<F, R>(
name: &str,
f: F,
cond: Cond,
outcome: WaitOutcome,
stack_size: usize,
) -> std::io::Result<thread::JoinHandle<R>>
where
F: FnOnce() -> R,
F: Send + 'static + panic::UnwindSafe,
R: Send + 'static,
{
thread::Builder::new()
.name(name.into())
.stack_size(stack_size)
.spawn(move || cond_notify_on_done(f, cond, outcome))
}
/// Runs a function, afterwards notifying the threads waiting on the condvar. Catches panics and
/// resumes them after triggering the condvar, so that the waiting thread is notified on panics.
fn cond_notify_on_done<F, R>(f: F, cond: Cond, outcome: WaitOutcome) -> R
where
F: FnOnce() -> R,
F: panic::UnwindSafe,
{
let result = panic::catch_unwind(|| f());
cond_notify_all(cond, outcome);
match result {
Ok(inner) => return inner,
Err(err) => panic::resume_unwind(err),
}
}
/// Helper function to notify all threads waiting on this condvar.
fn cond_notify_all(cond: Cond, outcome: WaitOutcome) {
let (lock, cvar) = &*cond;
let mut flag = lock.lock().unwrap();
if !flag.is_pending() {
// Someone else already triggered the condvar.
return
}
*flag = outcome;
cvar.notify_all();
}
/// Block the thread while it waits on the condvar.
pub fn wait_for_threads(cond: Cond) -> WaitOutcome {
let (lock, cvar) = &*cond;
let guard = cvar.wait_while(lock.lock().unwrap(), |flag| flag.is_pending()).unwrap();
*guard
}
/// Block the thread while it waits on the condvar or on a timeout. If the timeout is hit,
/// returns `None`.
#[cfg_attr(not(any(target_os = "linux", feature = "jemalloc-allocator")), allow(dead_code))]
pub fn wait_for_threads_with_timeout(cond: &Cond, dur: Duration) -> Option<WaitOutcome> {
let (lock, cvar) = &**cond;
let result = cvar
.wait_timeout_while(lock.lock().unwrap(), dur, |flag| flag.is_pending())
.unwrap();
if result.1.timed_out() {
None
} else {
Some(*result.0)
}
}
}