mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-06-09 20:11:09 +00:00
Update Wasm benchmarks (#2957)
In https://github.com/paritytech/polkadot-sdk/pull/2941 we found out that the new Wasmi (register) is very effective at optimizing away certain benchmark bytecode constructs in a way that created an unfair advantage over Wasmi (stack) which yielded our former benchmarks to be ineffective at properly measuring the performance impact. This PR adjusts both affected benchmarks to fix the stated problems. Affected are - `instr_i64const` -> `instr_i64add`: Renamed since it now measures the performance impact of the Wasm `i64.add` instruction with locals as inputs and outputs. This makes it impossible for Wasmi (register) to aggressively optimize away the entire function body (as it previously did) but still provides a way for Wasmi (register) to shine with its register based execution model. - `call_with_code_per_byte`: Now uses `local.get` instead of `i32.const` for the `if` condition which prevents Wasmi (register) to aggressively optimizing away whole parts of the `if` creating an unfair advantage. cc @athei --------- Co-authored-by: command-bot <> Co-authored-by: Alexander Theißen <alex.theissen@me.com> Co-authored-by: Ignacio Palacios <ignacio.palacios.santos@gmail.com>
This commit is contained in:
@@ -31,8 +31,8 @@ use sp_std::{borrow::ToOwned, prelude::*};
|
||||
use wasm_instrument::parity_wasm::{
|
||||
builder,
|
||||
elements::{
|
||||
self, BlockType, CustomSection, External, FuncBody, Instruction, Instructions, Module,
|
||||
Section, ValueType,
|
||||
self, BlockType, CustomSection, External, FuncBody, Instruction, Instructions, Local,
|
||||
Module, Section, ValueType,
|
||||
},
|
||||
};
|
||||
|
||||
@@ -281,17 +281,21 @@ impl<T: Config> WasmModule<T> {
|
||||
/// instrumentation runtime by nesting blocks as deeply as possible given the byte budget.
|
||||
/// `code_location`: Whether to place the code into `deploy` or `call`.
|
||||
pub fn sized(target_bytes: u32, code_location: Location) -> Self {
|
||||
use self::elements::Instruction::{End, I32Const, If, Return};
|
||||
use self::elements::Instruction::{End, GetLocal, If, Return};
|
||||
// Base size of a contract is 63 bytes and each expansion adds 6 bytes.
|
||||
// We do one expansion less to account for the code section and function body
|
||||
// size fields inside the binary wasm module representation which are leb128 encoded
|
||||
// and therefore grow in size when the contract grows. We are not allowed to overshoot
|
||||
// because of the maximum code size that is enforced by `instantiate_with_code`.
|
||||
let expansions = (target_bytes.saturating_sub(63) / 6).saturating_sub(1);
|
||||
const EXPANSION: [Instruction; 4] = [I32Const(0), If(BlockType::NoResult), Return, End];
|
||||
const EXPANSION: [Instruction; 4] = [GetLocal(0), If(BlockType::NoResult), Return, End];
|
||||
let mut module =
|
||||
ModuleDefinition { memory: Some(ImportedMemory::max::<T>()), ..Default::default() };
|
||||
let body = Some(body::repeated(expansions, &EXPANSION));
|
||||
let body = Some(body::repeated_with_locals(
|
||||
&[Local::new(1, ValueType::I32)],
|
||||
expansions,
|
||||
&EXPANSION,
|
||||
));
|
||||
match code_location {
|
||||
Location::Call => module.call_body = body,
|
||||
Location::Deploy => module.deploy_body = body,
|
||||
@@ -373,8 +377,6 @@ pub mod body {
|
||||
/// Insert a I32Const with incrementing value for each insertion.
|
||||
/// (start_at, increment_by)
|
||||
Counter(u32, u32),
|
||||
/// Insert the specified amount of I64Const with a random value.
|
||||
RandomI64Repeated(usize),
|
||||
}
|
||||
|
||||
pub fn plain(instructions: Vec<Instruction>) -> FuncBody {
|
||||
@@ -382,6 +384,14 @@ pub mod body {
|
||||
}
|
||||
|
||||
pub fn repeated(repetitions: u32, instructions: &[Instruction]) -> FuncBody {
|
||||
repeated_with_locals(&[], repetitions, instructions)
|
||||
}
|
||||
|
||||
pub fn repeated_with_locals(
|
||||
locals: &[Local],
|
||||
repetitions: u32,
|
||||
instructions: &[Instruction],
|
||||
) -> FuncBody {
|
||||
let instructions = Instructions::new(
|
||||
instructions
|
||||
.iter()
|
||||
@@ -391,15 +401,23 @@ pub mod body {
|
||||
.chain(sp_std::iter::once(Instruction::End))
|
||||
.collect(),
|
||||
);
|
||||
FuncBody::new(Vec::new(), instructions)
|
||||
FuncBody::new(locals.to_vec(), instructions)
|
||||
}
|
||||
|
||||
pub fn repeated_with_locals_using<const N: usize>(
|
||||
locals: &[Local],
|
||||
repetitions: u32,
|
||||
mut f: impl FnMut() -> [Instruction; N],
|
||||
) -> FuncBody {
|
||||
let mut instructions = Vec::new();
|
||||
for _ in 0..repetitions {
|
||||
instructions.extend(f());
|
||||
}
|
||||
instructions.push(Instruction::End);
|
||||
FuncBody::new(locals.to_vec(), Instructions::new(instructions))
|
||||
}
|
||||
|
||||
pub fn repeated_dyn(repetitions: u32, mut instructions: Vec<DynInstr>) -> FuncBody {
|
||||
use rand::{distributions::Standard, prelude::*};
|
||||
|
||||
// We do not need to be secure here.
|
||||
let mut rng = rand_pcg::Pcg32::seed_from_u64(8446744073709551615);
|
||||
|
||||
// We need to iterate over indices because we cannot cycle over mutable references
|
||||
let body = (0..instructions.len())
|
||||
.cycle()
|
||||
@@ -411,8 +429,6 @@ pub mod body {
|
||||
*offset += *increment_by;
|
||||
vec![Instruction::I32Const(current as i32)]
|
||||
},
|
||||
DynInstr::RandomI64Repeated(num) =>
|
||||
(&mut rng).sample_iter(Standard).take(*num).map(Instruction::I64Const).collect(),
|
||||
})
|
||||
.chain(sp_std::iter::once(Instruction::End))
|
||||
.collect();
|
||||
|
||||
@@ -48,7 +48,7 @@ use pallet_balances;
|
||||
use pallet_contracts_uapi::CallFlags;
|
||||
use sp_runtime::traits::{Bounded, Hash};
|
||||
use sp_std::prelude::*;
|
||||
use wasm_instrument::parity_wasm::elements::{BlockType, Instruction, ValueType};
|
||||
use wasm_instrument::parity_wasm::elements::{BlockType, Instruction, Local, ValueType};
|
||||
|
||||
/// How many runs we do per API benchmark.
|
||||
///
|
||||
@@ -2582,19 +2582,45 @@ benchmarks! {
|
||||
let origin = RawOrigin::Signed(instance.caller.clone());
|
||||
}: call(origin, instance.addr, 0u32.into(), Weight::MAX, None, vec![])
|
||||
|
||||
// We make the assumption that pushing a constant and dropping a value takes roughly
|
||||
// the same amount of time. We call this weight `w_base`.
|
||||
// The weight that would result from the respective benchmark we call: `w_bench`.
|
||||
// We load `i64` values from random linear memory locations and store the loaded
|
||||
// values back into yet another random linear memory location.
|
||||
// The random addresses are uniformely distributed across the entire span of the linear memory.
|
||||
// We do this to enforce random memory accesses which are particularly expensive.
|
||||
//
|
||||
// w_base = w_i{32,64}const = w_drop = w_bench / 2
|
||||
// The combination of this computation is our weight base `w_base`.
|
||||
#[pov_mode = Ignored]
|
||||
instr_i64const {
|
||||
instr_i64_load_store {
|
||||
let r in 0 .. INSTR_BENCHMARK_RUNS;
|
||||
|
||||
use rand::prelude::*;
|
||||
|
||||
// We do not need to be secure here. Fixed seed allows for determinstic results.
|
||||
let mut rng = rand_pcg::Pcg32::seed_from_u64(8446744073709551615);
|
||||
|
||||
let memory = ImportedMemory::max::<T>();
|
||||
let bytes_per_page = 65536;
|
||||
let bytes_per_memory = memory.max_pages * bytes_per_page;
|
||||
let mut sbox = Sandbox::from(&WasmModule::<T>::from(ModuleDefinition {
|
||||
call_body: Some(body::repeated_dyn(r, vec![
|
||||
RandomI64Repeated(1),
|
||||
Regular(Instruction::Drop),
|
||||
])),
|
||||
memory: Some(memory),
|
||||
call_body: Some(body::repeated_with_locals_using(
|
||||
&[Local::new(1, ValueType::I64)],
|
||||
r,
|
||||
|| {
|
||||
// Instruction sequence to load a `i64` from linear memory
|
||||
// at a random memory location and store it back into another
|
||||
// location of the linear memory.
|
||||
let c0: i32 = rng.gen_range(0..bytes_per_memory as i32);
|
||||
let c1: i32 = rng.gen_range(0..bytes_per_memory as i32);
|
||||
[
|
||||
Instruction::I32Const(c0), // address for `i64.load_8s`
|
||||
Instruction::I64Load8S(0, 0),
|
||||
Instruction::SetLocal(0), // temporarily store value loaded in `i64.load_8s`
|
||||
Instruction::I32Const(c1), // address for `i64.store8`
|
||||
Instruction::GetLocal(0), // value to be stores in `i64.store8`
|
||||
Instruction::I64Store8(0, 0),
|
||||
]
|
||||
}
|
||||
)),
|
||||
.. Default::default()
|
||||
}));
|
||||
}: {
|
||||
|
||||
@@ -358,25 +358,12 @@ macro_rules! cost_args {
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! cost_instr_no_params {
|
||||
($name:ident) => {
|
||||
cost_args!($name, 1).ref_time() as u32
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! cost {
|
||||
($name:ident) => {
|
||||
cost_args!($name, 1)
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! cost_instr {
|
||||
($name:ident, $num_params:expr) => {
|
||||
cost_instr_no_params!($name)
|
||||
.saturating_sub((cost_instr_no_params!(instr_i64const) / 2).saturating_mul($num_params))
|
||||
};
|
||||
}
|
||||
|
||||
impl Default for Limits {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
@@ -396,10 +383,13 @@ impl Default for Limits {
|
||||
}
|
||||
|
||||
impl<T: Config> Default for InstructionWeights<T> {
|
||||
/// We price both `i64.const` and `drop` as `instr_i64const / 2`. The reason
|
||||
/// for that is that we cannot benchmark either of them on its own.
|
||||
/// We execute 6 different instructions therefore we have to divide the actual
|
||||
/// computed gas costs by 6 to have a rough estimate as to how expensive each
|
||||
/// single executed instruction is going to be.
|
||||
fn default() -> Self {
|
||||
Self { base: cost_instr!(instr_i64const, 1), _phantom: PhantomData }
|
||||
let instr_cost = cost!(instr_i64_load_store).ref_time() as u32;
|
||||
let base = instr_cost / 6;
|
||||
Self { base, _phantom: PhantomData }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Generated
+624
-616
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user