Switch to pooling copy-on-write instantiation strategy for WASM (#11232)

* Switch to pooling copy-on-write instantiation strategy for WASM * Fix benchmark compilation * Fix `cargo fmt` * Fix compilation of another benchmark I've missed * Cleanups according to review comments * Move `max_memory_size` to `Semantics` * Set `memory_guaranteed_dense_image_size` to `max_memory_size` * Rename `wasm_instantiation_strategy` to `wasmtime_instantiation_strategy` * Update the doc-comments regarding the instantiation strategy * Extend the integration tests to test every instantiation strategy * Don't drop the temporary directory until the runtime is dropped in benchmarks * Don't drop the temporary directory until the runtime is dropped in tests
2026-04-27 01:07:57 +00:00 · 2022-05-19 16:32:53 +09:00
parent b3b7b4ddc7
commit dd854c16e2
21 changed files with 726 additions and 236 deletions
@@ -80,7 +80,7 @@ impl StoreData {
 pub(crate) type Store = wasmtime::Store<StoreData>;

 enum Strategy {
-	FastInstanceReuse {
+	LegacyInstanceReuse {
 		instance_wrapper: InstanceWrapper,
 		globals_snapshot: GlobalsSnapshot<wasmtime::Global>,
 		data_segments_snapshot: Arc<DataSegmentsSnapshot>,
@@ -136,41 +136,42 @@ struct InstanceSnapshotData {
 pub struct WasmtimeRuntime {
 	engine: wasmtime::Engine,
 	instance_pre: Arc<wasmtime::InstancePre<StoreData>>,
-	snapshot_data: Option<InstanceSnapshotData>,
+	instantiation_strategy: InternalInstantiationStrategy,
 	config: Config,
 }

 impl WasmModule for WasmtimeRuntime {
 	fn new_instance(&self) -> Result<Box<dyn WasmInstance>> {
-		let strategy = if let Some(ref snapshot_data) = self.snapshot_data {
-			let mut instance_wrapper = InstanceWrapper::new(
-				&self.engine,
-				&self.instance_pre,
-				self.config.max_memory_size,
-			)?;
-			let heap_base = instance_wrapper.extract_heap_base()?;
+		let strategy = match self.instantiation_strategy {
+			InternalInstantiationStrategy::LegacyInstanceReuse(ref snapshot_data) => {
+				let mut instance_wrapper = InstanceWrapper::new(
+					&self.engine,
+					&self.instance_pre,
+					self.config.semantics.max_memory_size,
+				)?;
+				let heap_base = instance_wrapper.extract_heap_base()?;

-			// This function panics if the instance was created from a runtime blob different from
-			// which the mutable globals were collected. Here, it is easy to see that there is only
-			// a single runtime blob and thus it's the same that was used for both creating the
-			// instance and collecting the mutable globals.
-			let globals_snapshot = GlobalsSnapshot::take(
-				&snapshot_data.mutable_globals,
-				&mut InstanceGlobals { instance: &mut instance_wrapper },
-			);
+				// This function panics if the instance was created from a runtime blob different
+				// from which the mutable globals were collected. Here, it is easy to see that there
+				// is only a single runtime blob and thus it's the same that was used for both
+				// creating the instance and collecting the mutable globals.
+				let globals_snapshot = GlobalsSnapshot::take(
+					&snapshot_data.mutable_globals,
+					&mut InstanceGlobals { instance: &mut instance_wrapper },
+				);

-			Strategy::FastInstanceReuse {
-				instance_wrapper,
-				globals_snapshot,
-				data_segments_snapshot: snapshot_data.data_segments_snapshot.clone(),
-				heap_base,
-			}
-		} else {
-			Strategy::RecreateInstance(InstanceCreator {
+				Strategy::LegacyInstanceReuse {
+					instance_wrapper,
+					globals_snapshot,
+					data_segments_snapshot: snapshot_data.data_segments_snapshot.clone(),
+					heap_base,
+				}
+			},
+			InternalInstantiationStrategy::Builtin => Strategy::RecreateInstance(InstanceCreator {
 				engine: self.engine.clone(),
 				instance_pre: self.instance_pre.clone(),
-				max_memory_size: self.config.max_memory_size,
-			})
+				max_memory_size: self.config.semantics.max_memory_size,
+			}),
 		};

 		Ok(Box::new(WasmtimeInstance { strategy }))
@@ -186,7 +187,7 @@ pub struct WasmtimeInstance {
 impl WasmInstance for WasmtimeInstance {
 	fn call(&mut self, method: InvokeMethod, data: &[u8]) -> Result<Vec<u8>> {
 		match &mut self.strategy {
-			Strategy::FastInstanceReuse {
+			Strategy::LegacyInstanceReuse {
 				ref mut instance_wrapper,
 				globals_snapshot,
 				data_segments_snapshot,
@@ -225,7 +226,7 @@ impl WasmInstance for WasmtimeInstance {

 	fn get_global_const(&mut self, name: &str) -> Result<Option<Value>> {
 		match &mut self.strategy {
-			Strategy::FastInstanceReuse { instance_wrapper, .. } =>
+			Strategy::LegacyInstanceReuse { instance_wrapper, .. } =>
 				instance_wrapper.get_global_val(name),
 			Strategy::RecreateInstance(ref mut instance_creator) =>
 				instance_creator.instantiate()?.get_global_val(name),
@@ -239,7 +240,7 @@ impl WasmInstance for WasmtimeInstance {
 				// associated with it.
 				None
 			},
-			Strategy::FastInstanceReuse { instance_wrapper, .. } =>
+			Strategy::LegacyInstanceReuse { instance_wrapper, .. } =>
 				Some(instance_wrapper.base_ptr()),
 		}
 	}
@@ -326,6 +327,48 @@ fn common_config(semantics: &Semantics) -> std::result::Result<wasmtime::Config,
 	config.wasm_threads(false);
 	config.wasm_memory64(false);

+	let (use_pooling, use_cow) = match semantics.instantiation_strategy {
+		InstantiationStrategy::PoolingCopyOnWrite => (true, true),
+		InstantiationStrategy::Pooling => (true, false),
+		InstantiationStrategy::RecreateInstanceCopyOnWrite => (false, true),
+		InstantiationStrategy::RecreateInstance => (false, false),
+		InstantiationStrategy::LegacyInstanceReuse => (false, false),
+	};
+
+	config.memory_init_cow(use_cow);
+	config.memory_guaranteed_dense_image_size(
+		semantics.max_memory_size.map(|max| max as u64).unwrap_or(u64::MAX),
+	);
+
+	if use_pooling {
+		config.allocation_strategy(wasmtime::InstanceAllocationStrategy::Pooling {
+			strategy: wasmtime::PoolingAllocationStrategy::ReuseAffinity,
+
+			// Pooling needs a bunch of hard limits to be set; if we go over
+			// any of these then the instantiation will fail.
+			instance_limits: wasmtime::InstanceLimits {
+				// Current minimum values for kusama (as of 2022-04-14):
+				//   size: 32384
+				//   table_elements: 1249
+				//   memory_pages: 2070
+				size: 64 * 1024,
+				table_elements: 2048,
+				memory_pages: 4096,
+
+				// We can only have a single of those.
+				tables: 1,
+				memories: 1,
+
+				// This determines how many instances of the module can be
+				// instantiated in parallel from the same `Module`.
+				//
+				// This includes nested instances spawned with `sp_tasks::spawn`
+				// from *within* the runtime.
+				count: 32,
+			},
+		});
+	}
+
 	Ok(config)
 }

@@ -373,18 +416,47 @@ pub struct DeterministicStackLimit {
 	pub native_stack_max: u32,
 }

+/// The instantiation strategy to use for the WASM executor.
+///
+/// All of the CoW strategies (with `CopyOnWrite` suffix) are only supported when either:
+///   a) we're running on Linux,
+///   b) we're running on an Unix-like system and we're precompiling
+///      our module beforehand.
+///
+/// If the CoW variant of a strategy is unsupported the executor will
+/// fall back to the non-CoW equivalent.
+#[non_exhaustive]
+#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
+pub enum InstantiationStrategy {
+	/// Pool the instances to avoid initializing everything from scratch
+	/// on each instantiation. Use copy-on-write memory when possible.
+	///
+	/// This is the fastest instantiation strategy.
+	PoolingCopyOnWrite,
+
+	/// Recreate the instance from scratch on every instantiation.
+	/// Use copy-on-write memory when possible.
+	RecreateInstanceCopyOnWrite,
+
+	/// Pool the instances to avoid initializing everything from scratch
+	/// on each instantiation.
+	Pooling,
+
+	/// Recreate the instance from scratch on every instantiation. Very slow.
+	RecreateInstance,
+
+	/// Legacy instance reuse mechanism. DEPRECATED. Will be removed. Do not use.
+	LegacyInstanceReuse,
+}
+
+enum InternalInstantiationStrategy {
+	LegacyInstanceReuse(InstanceSnapshotData),
+	Builtin,
+}
+
 pub struct Semantics {
-	/// Enabling this will lead to some optimization shenanigans that make calling [`WasmInstance`]
-	/// extremely fast.
-	///
-	/// Primarily this is achieved by not recreating the instance for each call and performing a
-	/// bare minimum clean up: reapplying the data segments and restoring the values for global
-	/// variables.
-	///
-	/// Since this feature depends on instrumentation, it can be set only if runtime is
-	/// instantiated using the runtime blob, e.g. using [`create_runtime`].
-	// I.e. if [`CodeSupplyMode::Verbatim`] is used.
-	pub fast_instance_reuse: bool,
+	/// The instantiation strategy to use.
+	pub instantiation_strategy: InstantiationStrategy,

 	/// Specifying `Some` will enable deterministic stack height. That is, all executor
 	/// invocations will reach stack overflow at the exactly same point across different wasmtime
@@ -418,9 +490,7 @@ pub struct Semantics {
 	/// The number of extra WASM pages which will be allocated
 	/// on top of what is requested by the WASM blob itself.
 	pub extra_heap_pages: u64,
-}

-pub struct Config {
 	/// The total amount of memory in bytes an instance can request.
 	///
 	/// If specified, the runtime will be able to allocate only that much of wasm memory.
@@ -436,7 +506,9 @@ pub struct Config {
 	///
 	/// The default is `None`.
 	pub max_memory_size: Option<usize>,
+}

+pub struct Config {
 	/// The WebAssembly standard requires all imports of an instantiated module to be resolved,
 	/// otherwise, the instantiation fails. If this option is set to `true`, then this behavior is
 	/// overriden and imports that are requested by the module and not provided by the host
@@ -452,24 +524,16 @@ pub struct Config {

 enum CodeSupplyMode<'a> {
 	/// The runtime is instantiated using the given runtime blob.
-	Verbatim {
-		// Rationale to take the `RuntimeBlob` here is so that the client will be able to reuse
-		// the blob e.g. if they did a prevalidation. If they didn't they can pass a `RuntimeBlob`
-		// instance and it will be used anyway in most cases, because we are going to do at least
-		// some instrumentations for both anticipated paths: substrate execution and PVF execution.
-		//
-		// Should there raise a need in performing no instrumentation and the client doesn't need
-		// to do any checks, then we can provide a `Cow` like semantics here: if we need the blob
-		// and  the user got `RuntimeBlob` then extract it, or otherwise create it from the given
-		// bytecode.
-		blob: RuntimeBlob,
-	},
+	Fresh(RuntimeBlob),

-	/// The code is supplied in a form of a compiled artifact.
+	/// The runtime is instantiated using a precompiled module.
 	///
 	/// This assumes that the code is already prepared for execution and the same `Config` was
 	/// used.
-	Artifact { compiled_artifact: &'a [u8] },
+	///
+	/// We use a `Path` here instead of simply passing a byte slice to allow `wasmtime` to
+	/// map the runtime's linear memory on supported platforms in a copy-on-write fashion.
+	Precompiled(&'a Path),
 }

 /// Create a new `WasmtimeRuntime` given the code. This function performs translation from Wasm to
@@ -484,29 +548,34 @@ pub fn create_runtime<H>(
 where
 	H: HostFunctions,
 {
-	// SAFETY: this is safe because it doesn't use `CodeSupplyMode::Artifact`.
-	unsafe { do_create_runtime::<H>(CodeSupplyMode::Verbatim { blob }, config) }
+	// SAFETY: this is safe because it doesn't use `CodeSupplyMode::Precompiled`.
+	unsafe { do_create_runtime::<H>(CodeSupplyMode::Fresh(blob), config) }
 }

-/// The same as [`create_runtime`] but takes a precompiled artifact, which makes this function
-/// considerably faster than [`create_runtime`].
+/// The same as [`create_runtime`] but takes a path to a precompiled artifact,
+/// which makes this function considerably faster than [`create_runtime`].
 ///
 /// # Safety
 ///
-/// The caller must ensure that the compiled artifact passed here was produced by
-/// [`prepare_runtime_artifact`]. Otherwise, there is a risk of arbitrary code execution with all
-/// implications.
+/// The caller must ensure that the compiled artifact passed here was:
+///   1) produced by [`prepare_runtime_artifact`],
+///   2) written to the disk as a file,
+///   3) was not modified,
+///   4) will not be modified while any runtime using this artifact is alive, or is being
+///      instantiated.
 ///
-/// It is ok though if the `compiled_artifact` was created by code of another version or with
+/// Failure to adhere to these requirements might lead to crashes and arbitrary code execution.
+///
+/// It is ok though if the compiled artifact was created by code of another version or with
 /// different configuration flags. In such case the caller will receive an `Err` deterministically.
 pub unsafe fn create_runtime_from_artifact<H>(
-	compiled_artifact: &[u8],
+	compiled_artifact_path: &Path,
 	config: Config,
 ) -> std::result::Result<WasmtimeRuntime, WasmError>
 where
 	H: HostFunctions,
 {
-	do_create_runtime::<H>(CodeSupplyMode::Artifact { compiled_artifact }, config)
+	do_create_runtime::<H>(CodeSupplyMode::Precompiled(compiled_artifact_path), config)
 }

 /// # Safety
@@ -520,7 +589,6 @@ unsafe fn do_create_runtime<H>(
 where
 	H: HostFunctions,
 {
-	// Create the engine, store and finally the module from the given code.
 	let mut wasmtime_config = common_config(&config.semantics)?;
 	if let Some(ref cache_path) = config.cache_path {
 		if let Err(reason) = setup_wasmtime_caching(cache_path, &mut wasmtime_config) {
@@ -534,45 +602,71 @@ where
 	let engine = Engine::new(&wasmtime_config)
 		.map_err(|e| WasmError::Other(format!("cannot create the wasmtime engine: {}", e)))?;

-	let (module, snapshot_data) = match code_supply_mode {
-		CodeSupplyMode::Verbatim { blob } => {
+	let (module, instantiation_strategy) = match code_supply_mode {
+		CodeSupplyMode::Fresh(blob) => {
 			let blob = prepare_blob_for_compilation(blob, &config.semantics)?;
 			let serialized_blob = blob.clone().serialize();

 			let module = wasmtime::Module::new(&engine, &serialized_blob)
 				.map_err(|e| WasmError::Other(format!("cannot create module: {}", e)))?;

-			if config.semantics.fast_instance_reuse {
-				let data_segments_snapshot = DataSegmentsSnapshot::take(&blob).map_err(|e| {
-					WasmError::Other(format!("cannot take data segments snapshot: {}", e))
-				})?;
-				let data_segments_snapshot = Arc::new(data_segments_snapshot);
-				let mutable_globals = ExposedMutableGlobalsSet::collect(&blob);
+			match config.semantics.instantiation_strategy {
+				InstantiationStrategy::LegacyInstanceReuse => {
+					let data_segments_snapshot =
+						DataSegmentsSnapshot::take(&blob).map_err(|e| {
+							WasmError::Other(format!("cannot take data segments snapshot: {}", e))
+						})?;
+					let data_segments_snapshot = Arc::new(data_segments_snapshot);
+					let mutable_globals = ExposedMutableGlobalsSet::collect(&blob);

-				(module, Some(InstanceSnapshotData { data_segments_snapshot, mutable_globals }))
-			} else {
-				(module, None)
+					(
+						module,
+						InternalInstantiationStrategy::LegacyInstanceReuse(InstanceSnapshotData {
+							data_segments_snapshot,
+							mutable_globals,
+						}),
+					)
+				},
+				InstantiationStrategy::Pooling |
+				InstantiationStrategy::PoolingCopyOnWrite |
+				InstantiationStrategy::RecreateInstance |
+				InstantiationStrategy::RecreateInstanceCopyOnWrite =>
+					(module, InternalInstantiationStrategy::Builtin),
 			}
 		},
-		CodeSupplyMode::Artifact { compiled_artifact } => {
-			// SAFETY: The unsafity of `deserialize` is covered by this function. The
+		CodeSupplyMode::Precompiled(compiled_artifact_path) => {
+			if let InstantiationStrategy::LegacyInstanceReuse =
+				config.semantics.instantiation_strategy
+			{
+				return Err(WasmError::Other("the legacy instance reuse instantiation strategy is incompatible with precompiled modules".into()));
+			}
+
+			// SAFETY: The unsafety of `deserialize_file` is covered by this function. The
 			//         responsibilities to maintain the invariants are passed to the caller.
-			let module = wasmtime::Module::deserialize(&engine, compiled_artifact)
+			//
+			//         See [`create_runtime_from_artifact`] for more details.
+			let module = wasmtime::Module::deserialize_file(&engine, compiled_artifact_path)
 				.map_err(|e| WasmError::Other(format!("cannot deserialize module: {}", e)))?;

-			(module, None)
+			(module, InternalInstantiationStrategy::Builtin)
 		},
 	};

 	let mut linker = wasmtime::Linker::new(&engine);
 	crate::imports::prepare_imports::<H>(&mut linker, &module, config.allow_missing_func_imports)?;

-	let mut store = crate::instance_wrapper::create_store(module.engine(), config.max_memory_size);
+	let mut store =
+		crate::instance_wrapper::create_store(module.engine(), config.semantics.max_memory_size);
 	let instance_pre = linker
 		.instantiate_pre(&mut store, &module)
 		.map_err(|e| WasmError::Other(format!("cannot preinstantiate module: {}", e)))?;

-	Ok(WasmtimeRuntime { engine, instance_pre: Arc::new(instance_pre), snapshot_data, config })
+	Ok(WasmtimeRuntime {
+		engine,
+		instance_pre: Arc::new(instance_pre),
+		instantiation_strategy,
+		config,
+	})
 }

 fn prepare_blob_for_compilation(
@@ -583,16 +677,17 @@ fn prepare_blob_for_compilation(
 		blob = blob.inject_stack_depth_metering(logical_max)?;
 	}

-	// If enabled, this should happen after all other passes that may introduce global variables.
-	if semantics.fast_instance_reuse {
+	if let InstantiationStrategy::LegacyInstanceReuse = semantics.instantiation_strategy {
+		// When this strategy is used this must be called after all other passes which may introduce
+		// new global variables, otherwise they will not be reset when we call into the runtime
+		// again.
 		blob.expose_mutable_globals();
 	}

 	// We don't actually need the memory to be imported so we can just convert any memory
 	// import into an export with impunity. This simplifies our code since `wasmtime` will
-	// now automatically take care of creating the memory for us, and it also allows us
-	// to potentially enable `wasmtime`'s instance pooling at a later date. (Imported
-	// memories are ineligible for pooling.)
+	// now automatically take care of creating the memory for us, and it is also necessary
+	// to enable `wasmtime`'s instance pooling. (Imported memories are ineligible for pooling.)
 	blob.convert_memory_import_into_export()?;
 	blob.add_extra_heap_pages_to_memory_section(
 		semantics