PVF: NaN canonicalization & deteriministic stack (#9069)

* NaN canonicalization

* Introduce a simple stack depth metering

* Be explicit about the wasm features we enable

* Pull the latest latast fix for the pwasm-utils crate

* Disable `wasm_threads` as well.

* Factor out deterministic stack params

* Add more docs

* Remove redundant dep

* Refine comments

* Typo

Co-authored-by: Andronik Ordian <write@reusable.software>

Co-authored-by: Andronik Ordian <write@reusable.software>
This commit is contained in:
Sergei Shulepov
2021-07-07 11:29:39 +03:00
committed by GitHub
parent d80e1bc978
commit f388b66ab5
9 changed files with 2610 additions and 66 deletions
+102 -25
View File
@@ -232,10 +232,75 @@ directory = \"{cache_dir}\"
Ok(())
}
fn common_config() -> wasmtime::Config {
fn common_config(semantics: &Semantics) -> std::result::Result<wasmtime::Config, WasmError> {
let mut config = wasmtime::Config::new();
config.cranelift_opt_level(wasmtime::OptLevel::SpeedAndSize);
config
config.cranelift_nan_canonicalization(semantics.canonicalize_nans);
if let Some(DeterministicStackLimit {
native_stack_max, ..
}) = semantics.deterministic_stack_limit
{
config
.max_wasm_stack(native_stack_max as usize)
.map_err(|e| WasmError::Other(format!("cannot set max wasm stack: {}", e)))?;
}
// Be clear and specific about the extensions we support. If an update brings new features
// they should be introduced here as well.
config.wasm_reference_types(false);
config.wasm_simd(false);
config.wasm_bulk_memory(false);
config.wasm_multi_value(false);
config.wasm_multi_memory(false);
config.wasm_module_linking(false);
config.wasm_threads(false);
Ok(config)
}
/// Knobs for deterministic stack height limiting.
///
/// The WebAssembly standard defines a call/value stack but it doesn't say anything about its
/// size except that it has to be finite. The implementations are free to choose their own notion
/// of limit: some may count the number of calls or values, others would rely on the host machine
/// stack and trap on reaching a guard page.
///
/// This obviously is a source of non-determinism during execution. This feature can be used
/// to instrument the code so that it will count the depth of execution in some deterministic
/// way (the machine stack limit should be so high that the deterministic limit always triggers
/// first).
///
/// The deterministic stack height limiting feature allows to instrument the code so that it will
/// count the number of items that may be on the stack. This counting will only act as an rough
/// estimate of the actual stack limit in wasmtime. This is because wasmtime measures it's stack
/// usage in bytes.
///
/// The actual number of bytes consumed by a function is not trivial to compute without going through
/// full compilation. Therefore, it's expected that `native_stack_max` is grealy overestimated and
/// thus never reached in practice. The stack overflow check introduced by the instrumentation and
/// that relies on the logical item count should be reached first.
///
/// See [here][stack_height] for more details of the instrumentation
///
/// [stack_height]: https://github.com/paritytech/wasm-utils/blob/d9432baf/src/stack_height/mod.rs#L1-L50
pub struct DeterministicStackLimit {
/// A number of logical "values" that can be pushed on the wasm stack. A trap will be triggered
/// if exceeded.
///
/// A logical value is a local, an argument or a value pushed on operand stack.
pub logical_max: u32,
/// The maximum number of bytes for stack used by wasmtime JITed code.
///
/// It's not specified how much bytes will be consumed by a stack frame for a given wasm function
/// after translation into machine code. It is also not quite trivial.
///
/// Therefore, this number should be choosen conservatively. It must be so large so that it can
/// fit the [`logical_max`] logical values on the stack, according to the current instrumentation
/// algorithm.
///
/// This value cannot be 0.
pub native_stack_max: u32,
}
pub struct Semantics {
@@ -254,24 +319,30 @@ pub struct Semantics {
/// is used.
pub fast_instance_reuse: bool,
/// The WebAssembly standard defines a call/value stack but it doesn't say anything about its
/// size except that it has to be finite. The implementations are free to choose their own notion
/// of limit: some may count the number of calls or values, others would rely on the host machine
/// stack and trap on reaching a guard page.
/// Specifiying `Some` will enable deterministic stack height. That is, all executor invocations
/// will reach stack overflow at the exactly same point across different wasmtime versions and
/// architectures.
///
/// This obviously is a source of non-determinism during execution. This feature can be used
/// to instrument the code so that it will count the depth of execution in some deterministic
/// way (the machine stack limit should be so high that the deterministic limit always triggers
/// first).
///
/// See [here][stack_height] for more details of the instrumentation
/// This is achieved by a combination of running an instrumentation pass on input code and
/// configuring wasmtime accordingly.
///
/// Since this feature depends on instrumentation, it can be set only if [`CodeSupplyMode::Verbatim`]
/// is used.
pub deterministic_stack_limit: Option<DeterministicStackLimit>,
/// Controls whether wasmtime should compile floating point in a way that doesn't allow for
/// non-determinism.
///
/// [stack_height]: https://github.com/paritytech/wasm-utils/blob/d9432baf/src/stack_height/mod.rs#L1-L50
pub stack_depth_metering: bool,
// Other things like nan canonicalization can be added here.
/// By default, the wasm spec allows some local non-determinism wrt. certain floating point
/// operations. Specifically, those operations that are not defined to operate on bits (e.g. fneg)
/// can produce NaN values. The exact bit pattern for those is not specified and may depend
/// on the particular machine that executes wasmtime generated JITed machine code. That is
/// a source of non-deterministic values.
///
/// The classical runtime environment for Substrate allowed it and punted this on the runtime
/// developers. For PVFs, we want to ensure that execution is deterministic though. Therefore,
/// for PVF execution this flag is meant to be turned on.
pub canonicalize_nans: bool,
}
pub struct Config {
@@ -355,7 +426,7 @@ unsafe fn do_create_runtime(
host_functions: Vec<&'static dyn Function>,
) -> std::result::Result<WasmtimeRuntime, WasmError> {
// Create the engine, store and finally the module from the given code.
let mut wasmtime_config = common_config();
let mut wasmtime_config = common_config(&config.semantics)?;
if let Some(ref cache_path) = config.cache_path {
if let Err(reason) = setup_wasmtime_caching(cache_path, &mut wasmtime_config) {
log::warn!(
@@ -369,8 +440,8 @@ unsafe fn do_create_runtime(
.map_err(|e| WasmError::Other(format!("cannot create the engine for runtime: {}", e)))?;
let (module, snapshot_data) = match code_supply_mode {
CodeSupplyMode::Verbatim { mut blob } => {
instrument(&mut blob, &config.semantics);
CodeSupplyMode::Verbatim { blob } => {
let blob = instrument(blob, &config.semantics)?;
if config.semantics.fast_instance_reuse {
let data_segments_snapshot = DataSegmentsSnapshot::take(&blob).map_err(|e| {
@@ -412,25 +483,31 @@ unsafe fn do_create_runtime(
})
}
fn instrument(blob: &mut RuntimeBlob, semantics: &Semantics) {
fn instrument(
mut blob: RuntimeBlob,
semantics: &Semantics,
) -> std::result::Result<RuntimeBlob, WasmError> {
if let Some(DeterministicStackLimit { logical_max, .. }) = semantics.deterministic_stack_limit {
blob = blob.inject_stack_depth_metering(logical_max)?;
}
// If enabled, this should happen after all other passes that may introduce global variables.
if semantics.fast_instance_reuse {
blob.expose_mutable_globals();
}
if semantics.stack_depth_metering {
// TODO: implement deterministic stack metering https://github.com/paritytech/substrate/issues/8393
}
Ok(blob)
}
/// Takes a [`RuntimeBlob`] and precompiles it returning the serialized result of compilation. It
/// can then be used for calling [`create_runtime`] avoiding long compilation times.
pub fn prepare_runtime_artifact(
mut blob: RuntimeBlob,
blob: RuntimeBlob,
semantics: &Semantics,
) -> std::result::Result<Vec<u8>, WasmError> {
instrument(&mut blob, semantics);
let blob = instrument(blob, semantics)?;
let engine = Engine::new(&common_config())
let engine = Engine::new(&common_config(semantics)?)
.map_err(|e| WasmError::Other(format!("cannot create the engine: {}", e)))?;
engine