Tracking/limiting memory allocator (#1192)

This commit is contained in:
s0me0ne-unkn0wn
2023-11-03 16:48:41 +01:00
committed by GitHub
parent 8cfbee706d
commit cd2d5d2579
19 changed files with 569 additions and 58 deletions
+26 -1
View File
@@ -23,27 +23,36 @@ use std::fmt;
pub type PrepareResult = Result<PrepareStats, PrepareError>;
/// An error that occurred during the prepare part of the PVF pipeline.
// Codec indexes are intended to stabilize pre-encoded payloads (see `OOM_PAYLOAD` below)
#[derive(Debug, Clone, Encode, Decode)]
pub enum PrepareError {
/// During the prevalidation stage of preparation an issue was found with the PVF.
#[codec(index = 0)]
Prevalidation(String),
/// Compilation failed for the given PVF.
#[codec(index = 1)]
Preparation(String),
/// Instantiation of the WASM module instance failed.
#[codec(index = 2)]
RuntimeConstruction(String),
/// An unexpected panic has occurred in the preparation worker.
#[codec(index = 3)]
Panic(String),
/// Failed to prepare the PVF due to the time limit.
#[codec(index = 4)]
TimedOut,
/// An IO error occurred. This state is reported by either the validation host or by the
/// worker.
#[codec(index = 5)]
IoErr(String),
/// The temporary file for the artifact could not be created at the given cache path. This
/// state is reported by the validation host (not by the worker).
#[codec(index = 6)]
CreateTmpFileErr(String),
/// The response from the worker is received, but the file cannot be renamed (moved) to the
/// final destination location. This state is reported by the validation host (not by the
/// worker).
#[codec(index = 7)]
RenameTmpFileErr {
err: String,
// Unfortunately `PathBuf` doesn't implement `Encode`/`Decode`, so we do a fallible
@@ -51,12 +60,19 @@ pub enum PrepareError {
src: Option<String>,
dest: Option<String>,
},
/// Memory limit reached
#[codec(index = 8)]
OutOfMemory,
/// The response from the worker is received, but the worker cache could not be cleared. The
/// worker has to be killed to avoid jobs having access to data from other jobs. This state is
/// reported by the validation host (not by the worker).
#[codec(index = 9)]
ClearWorkerDir(String),
}
/// Pre-encoded length-prefixed `PrepareResult::Err(PrepareError::OutOfMemory)`
pub const OOM_PAYLOAD: &[u8] = b"\x02\x00\x00\x00\x00\x00\x00\x00\x01\x08";
impl PrepareError {
/// Returns whether this is a deterministic error, i.e. one that should trigger reliably. Those
/// errors depend on the PVF itself and the sc-executor/wasmtime logic.
@@ -67,7 +83,7 @@ impl PrepareError {
pub fn is_deterministic(&self) -> bool {
use PrepareError::*;
match self {
Prevalidation(_) | Preparation(_) | Panic(_) => true,
Prevalidation(_) | Preparation(_) | Panic(_) | OutOfMemory => true,
TimedOut |
IoErr(_) |
CreateTmpFileErr(_) |
@@ -92,6 +108,7 @@ impl fmt::Display for PrepareError {
CreateTmpFileErr(err) => write!(f, "prepare: error creating tmp file: {}", err),
RenameTmpFileErr { err, src, dest } =>
write!(f, "prepare: error renaming tmp file ({:?} -> {:?}): {}", src, dest, err),
OutOfMemory => write!(f, "prepare: out of memory"),
ClearWorkerDir(err) => write!(f, "prepare: error clearing worker cache: {}", err),
}
}
@@ -147,3 +164,11 @@ impl fmt::Display for InternalValidationError {
}
}
}
#[test]
fn pre_encoded_payloads() {
let oom_enc = PrepareResult::Err(PrepareError::OutOfMemory).encode();
let mut oom_payload = oom_enc.len().to_le_bytes().to_vec();
oom_payload.extend(oom_enc);
assert_eq!(oom_payload, OOM_PAYLOAD);
}
@@ -166,15 +166,36 @@ pub fn params_to_wasmtime_semantics(par: &ExecutorParams) -> Result<Semantics, S
ExecutorParam::StackLogicalMax(slm) => stack_limit.logical_max = *slm,
ExecutorParam::StackNativeMax(snm) => stack_limit.native_stack_max = *snm,
ExecutorParam::WasmExtBulkMemory => sem.wasm_bulk_memory = true,
// TODO: Not implemented yet; <https://github.com/paritytech/polkadot/issues/6472>.
ExecutorParam::PrecheckingMaxMemory(_) => (),
ExecutorParam::PvfPrepTimeout(_, _) | ExecutorParam::PvfExecTimeout(_, _) => (), /* Not used here */
ExecutorParam::PrecheckingMaxMemory(_) |
ExecutorParam::PvfPrepTimeout(_, _) |
ExecutorParam::PvfExecTimeout(_, _) => (), /* Not used here */
}
}
sem.deterministic_stack_limit = Some(stack_limit);
Ok(sem)
}
/// Runs the prevalidation on the given code. Returns a [`RuntimeBlob`] if it succeeds.
pub fn prevalidate(code: &[u8]) -> Result<RuntimeBlob, sc_executor_common::error::WasmError> {
let blob = RuntimeBlob::new(code)?;
// It's assumed this function will take care of any prevalidation logic
// that needs to be done.
//
// Do nothing for now.
Ok(blob)
}
/// Runs preparation on the given runtime blob. If successful, it returns a serialized compiled
/// artifact which can then be used to pass into `Executor::execute` after writing it to the disk.
pub fn prepare(
blob: RuntimeBlob,
executor_params: &ExecutorParams,
) -> Result<Vec<u8>, sc_executor_common::error::WasmError> {
let semantics = params_to_wasmtime_semantics(executor_params)
.map_err(|e| sc_executor_common::error::WasmError::Other(e))?;
sc_executor_wasmtime::prepare_runtime_artifact(blob, &semantics)
}
/// Available host functions. We leave out:
///
/// 1. storage related stuff (PVF doesn't have a notion of a persistent storage/trie)
+3 -1
View File
@@ -29,12 +29,14 @@ pub struct PrepareStats {
/// supported by the OS, `ru_maxrss`.
#[derive(Clone, Debug, Default, Encode, Decode)]
pub struct MemoryStats {
/// Memory stats from `tikv_jemalloc_ctl`.
/// Memory stats from `tikv_jemalloc_ctl`, polling-based and not very precise.
#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))]
pub memory_tracker_stats: Option<MemoryAllocationStats>,
/// `ru_maxrss` from `getrusage`. `None` if an error occurred.
#[cfg(target_os = "linux")]
pub max_rss: Option<i64>,
/// Peak allocation in bytes measured by tracking allocator
pub peak_tracked_alloc: u64,
}
/// Statistics of collected memory metrics.