Files
pezkuwi-subxt/substrate/client/executor/benches/bench.rs
T
yjh e38998801e Executor: Remove LegacyInstanceReuse strategy (#1486)
It seems the old strategy have been depracted more than one year. 
So maybe it's time to clean up old strategy for wasm executor.


---
polkadot address: 15ouFh2SHpGbHtDPsJ6cXQfes9Cx1gEFnJJsJVqPGzBSTudr

---------

Co-authored-by: Bastian Köcher <git@kchr.de>
Co-authored-by: Koute <koute@users.noreply.github.com>
2023-09-18 14:53:06 +09:00

253 lines
7.3 KiB
Rust

// This file is part of Substrate.
// Copyright (C) Parity Technologies (UK) Ltd.
// SPDX-License-Identifier: Apache-2.0
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use criterion::{criterion_group, criterion_main, Criterion};
use codec::Encode;
use sc_executor_common::{
runtime_blob::RuntimeBlob,
wasm_runtime::{WasmInstance, WasmModule, DEFAULT_HEAP_ALLOC_STRATEGY},
};
use sc_executor_wasmtime::InstantiationStrategy;
use sc_runtime_test::wasm_binary_unwrap as test_runtime;
use std::sync::{
atomic::{AtomicBool, AtomicUsize, Ordering},
Arc,
};
#[derive(Clone)]
enum Method {
Compiled { instantiation_strategy: InstantiationStrategy, precompile: bool },
}
// This is just a bog-standard Kusama runtime with an extra
// `test_empty_return` and `test_dirty_plenty_memory` functions
// copy-pasted from the test runtime.
fn kusama_runtime() -> &'static [u8] {
include_bytes!("kusama_runtime.wasm")
}
fn initialize(
_tmpdir: &mut Option<tempfile::TempDir>,
runtime: &[u8],
method: Method,
) -> Box<dyn WasmModule> {
let blob = RuntimeBlob::uncompress_if_needed(runtime).unwrap();
let allow_missing_func_imports = true;
match method {
Method::Compiled { instantiation_strategy, precompile } => {
let config = sc_executor_wasmtime::Config {
allow_missing_func_imports,
cache_path: None,
semantics: sc_executor_wasmtime::Semantics {
heap_alloc_strategy: DEFAULT_HEAP_ALLOC_STRATEGY,
instantiation_strategy,
deterministic_stack_limit: None,
canonicalize_nans: false,
parallel_compilation: true,
wasm_multi_value: false,
wasm_bulk_memory: false,
wasm_reference_types: false,
wasm_simd: false,
},
};
if precompile {
let precompiled_blob =
sc_executor_wasmtime::prepare_runtime_artifact(blob, &config.semantics)
.unwrap();
// Create a fresh temporary directory to make absolutely sure
// we'll use the right module.
*_tmpdir = Some(tempfile::tempdir().unwrap());
let tmpdir = _tmpdir.as_ref().unwrap();
let path = tmpdir.path().join("module.bin");
std::fs::write(&path, &precompiled_blob).unwrap();
unsafe {
sc_executor_wasmtime::create_runtime_from_artifact::<
sp_io::SubstrateHostFunctions,
>(&path, config)
}
} else {
sc_executor_wasmtime::create_runtime::<sp_io::SubstrateHostFunctions>(blob, config)
}
.map(|runtime| -> Box<dyn WasmModule> { Box::new(runtime) })
},
}
.unwrap()
}
fn run_benchmark(
c: &mut Criterion,
benchmark_name: &str,
thread_count: usize,
runtime: &dyn WasmModule,
testcase: impl Fn(&mut Box<dyn WasmInstance>) + Copy + Send + 'static,
) {
c.bench_function(benchmark_name, |b| {
// Here we deliberately start a bunch of extra threads which will just
// keep on independently instantiating the runtime over and over again.
//
// We don't really have to measure how much time those take since the
// work done is essentially the same on each thread, and what we're
// interested in here is only how those extra threads affect the execution
// on the current thread.
//
// In an ideal case assuming we have enough CPU cores those extra threads
// shouldn't affect the main thread's runtime at all, however in practice
// they're not completely independent. There might be per-process
// locks in the kernel which are briefly held during instantiation, etc.,
// and how much those affect the execution here is what we want to measure.
let is_benchmark_running = Arc::new(AtomicBool::new(true));
let threads_running = Arc::new(AtomicUsize::new(0));
let aux_threads: Vec<_> = (0..thread_count - 1)
.map(|_| {
let mut instance = runtime.new_instance().unwrap();
let is_benchmark_running = is_benchmark_running.clone();
let threads_running = threads_running.clone();
std::thread::spawn(move || {
threads_running.fetch_add(1, Ordering::SeqCst);
while is_benchmark_running.load(Ordering::Relaxed) {
testcase(&mut instance);
}
})
})
.collect();
while threads_running.load(Ordering::SeqCst) != (thread_count - 1) {
std::thread::yield_now();
}
let mut instance = runtime.new_instance().unwrap();
b.iter(|| testcase(&mut instance));
is_benchmark_running.store(false, Ordering::SeqCst);
for thread in aux_threads {
thread.join().unwrap();
}
});
}
fn bench_call_instance(c: &mut Criterion) {
let _ = env_logger::try_init();
let strategies = [
(
"recreate_instance_vanilla",
Method::Compiled {
instantiation_strategy: InstantiationStrategy::RecreateInstance,
precompile: false,
},
),
(
"recreate_instance_cow_fresh",
Method::Compiled {
instantiation_strategy: InstantiationStrategy::RecreateInstanceCopyOnWrite,
precompile: false,
},
),
(
"recreate_instance_cow_precompiled",
Method::Compiled {
instantiation_strategy: InstantiationStrategy::RecreateInstanceCopyOnWrite,
precompile: true,
},
),
(
"pooling_vanilla_fresh",
Method::Compiled {
instantiation_strategy: InstantiationStrategy::Pooling,
precompile: false,
},
),
(
"pooling_vanilla_precompiled",
Method::Compiled {
instantiation_strategy: InstantiationStrategy::Pooling,
precompile: true,
},
),
(
"pooling_cow_fresh",
Method::Compiled {
instantiation_strategy: InstantiationStrategy::PoolingCopyOnWrite,
precompile: false,
},
),
(
"pooling_cow_precompiled",
Method::Compiled {
instantiation_strategy: InstantiationStrategy::PoolingCopyOnWrite,
precompile: true,
},
),
];
let runtimes = [("kusama_runtime", kusama_runtime()), ("test_runtime", test_runtime())];
let thread_counts = [1, 2, 4, 8, 16];
fn test_call_empty_function(instance: &mut Box<dyn WasmInstance>) {
instance.call_export("test_empty_return", &[0]).unwrap();
}
fn test_dirty_1mb_of_memory(instance: &mut Box<dyn WasmInstance>) {
instance.call_export("test_dirty_plenty_memory", &(0, 16).encode()).unwrap();
}
let testcases = [
("call_empty_function", test_call_empty_function as fn(&mut Box<dyn WasmInstance>)),
("dirty_1mb_of_memory", test_dirty_1mb_of_memory),
];
let num_cpus = num_cpus::get_physical();
let mut tmpdir = None;
for (strategy_name, strategy) in strategies {
for (runtime_name, runtime) in runtimes {
let runtime = initialize(&mut tmpdir, runtime, strategy.clone());
for (testcase_name, testcase) in testcases {
for thread_count in thread_counts {
if thread_count > num_cpus {
// If there are not enough cores available the benchmark is pointless.
continue
}
let benchmark_name = format!(
"{}_from_{}_with_{}_on_{}_threads",
testcase_name, runtime_name, strategy_name, thread_count
);
run_benchmark(c, &benchmark_name, thread_count, &*runtime, testcase);
}
}
}
}
}
criterion_group! {
name = benches;
config = Criterion::default();
targets = bench_call_instance
}
criterion_main!(benches);