Refactor Benchmarks for Less Wasm Memory Usage (#9373)

* extract repeat out of benchmark

* remove r

* unused

* cargo run --quiet --release --features=runtime-benchmarks --manifest-path=bin/node/cli/Cargo.toml -- benchmark --chain=dev --steps=50 --repeat=20 --pallet=pallet_balances --extrinsic=* --execution=wasm --wasm-execution=compiled --heap-pages=4096 --output=./frame/balances/src/weights.rs --template=./.maintain/frame-weight-template.hbs

* cargo run --quiet --release --features=runtime-benchmarks --manifest-path=bin/node/cli/Cargo.toml -- benchmark --chain=dev --steps=50 --repeat=20 --pallet=pallet_balances --extrinsic=* --execution=wasm --wasm-execution=compiled --heap-pages=4096 --output=./frame/balances/src/weights.rs --template=./.maintain/frame-weight-template.hbs

* cargo run --quiet --release --features=runtime-benchmarks --manifest-path=bin/node/cli/Cargo.toml -- benchmark --chain=dev --steps=50 --repeat=20 --pallet=pallet_staking --extrinsic=* --execution=wasm --wasm-execution=compiled --heap-pages=4096 --output=./frame/staking/src/weights.rs --template=./.maintain/frame-weight-template.hbs

* use linked map to keep order

* cargo run --quiet --release --features=runtime-benchmarks --manifest-path=bin/node/cli/Cargo.toml -- benchmark --chain=dev --steps=50 --repeat=20 --pallet=pallet_balances --extrinsic=* --execution=wasm --wasm-execution=compiled --heap-pages=4096 --output=./frame/balances/src/weights.rs --template=./.maintain/frame-weight-template.hbs

* Delete pallet_balances.rs

* Delete out

* cargo run --quiet --release --features=runtime-benchmarks --manifest-path=bin/node/cli/Cargo.toml -- benchmark --chain=dev --steps=50 --repeat=20 --pallet=pallet_staking --extrinsic=* --execution=wasm --wasm-execution=compiled --heap-pages=4096 --output=./frame/staking/src/weights.rs --template=./.maintain/frame-weight-template.hbs

* steps and repeat to tuple (current_*, total_*)

* idea for list command

* fmt

* use benchmark list in cli

* handle steps in cli

* move log update to cli

* fmt

* remove old todo

* line width

* cargo run --quiet --release --features=runtime-benchmarks --manifest-path=bin/node/cli/Cargo.toml -- benchmark --chain=dev --steps=50 --repeat=20 --pallet=pallet_balances --extrinsic=* --execution=wasm --wasm-execution=compiled --heap-pages=4096 --output=./frame/balances/src/weights.rs --template=./.maintain/frame-weight-template.hbs

* benchmark metadata function

* don't need this warm up

* cargo run --quiet --release --features=runtime-benchmarks --manifest-path=bin/node/cli/Cargo.toml -- benchmark --chain=dev --steps=50 --repeat=20 --pallet=pallet_balances --extrinsic=* --execution=wasm --wasm-execution=compiled --heap-pages=4096 --output=./frame/balances/src/weights.rs --template=./.maintain/frame-weight-template.hbs

* fix warnings

* fix node-template

* fix

* fmt

* line width

* cargo run --quiet --release --features=runtime-benchmarks --manifest-path=bin/node/cli/Cargo.toml -- benchmark --chain=dev --steps=50 --repeat=20 --pallet=pallet_staking --extrinsic=* --execution=wasm --wasm-execution=compiled --heap-pages=4096 --output=./frame/staking/src/weights.rs --template=./.maintain/frame-weight-template.hbs

* improve docs

* improve cli

* fix format

* fix bug?

* Revert "fix bug?"

This reverts commit 8051bf1bf9bae862ff28dfff386e7045cd3f045e.

* skip frame-metadata

* extract repeat out of benchmark

* remove r

* cargo run --quiet --release --features=runtime-benchmarks --manifest-path=bin/node/cli/Cargo.toml -- benchmark --chain=dev --steps=50 --repeat=20 --pallet=pallet_balances --extrinsic=* --execution=wasm --wasm-execution=compiled --heap-pages=4096 --output=./frame/balances/src/weights.rs --template=./.maintain/frame-weight-template.hbs

* cargo run --quiet --release --features=runtime-benchmarks --manifest-path=bin/node/cli/Cargo.toml -- benchmark --chain=dev --steps=50 --repeat=20 --pallet=pallet_balances --extrinsic=* --execution=wasm --wasm-execution=compiled --heap-pages=4096 --output=./frame/balances/src/weights.rs --template=./.maintain/frame-weight-template.hbs

* cargo run --quiet --release --features=runtime-benchmarks --manifest-path=bin/node/cli/Cargo.toml -- benchmark --chain=dev --steps=50 --repeat=20 --pallet=pallet_staking --extrinsic=* --execution=wasm --wasm-execution=compiled --heap-pages=4096 --output=./frame/staking/src/weights.rs --template=./.maintain/frame-weight-template.hbs

* use linked map to keep order

* cargo run --quiet --release --features=runtime-benchmarks --manifest-path=bin/node/cli/Cargo.toml -- benchmark --chain=dev --steps=50 --repeat=20 --pallet=pallet_balances --extrinsic=* --execution=wasm --wasm-execution=compiled --heap-pages=4096 --output=./frame/balances/src/weights.rs --template=./.maintain/frame-weight-template.hbs

* Delete pallet_balances.rs

* Delete out

* cargo run --quiet --release --features=runtime-benchmarks --manifest-path=bin/node/cli/Cargo.toml -- benchmark --chain=dev --steps=50 --repeat=20 --pallet=pallet_staking --extrinsic=* --execution=wasm --wasm-execution=compiled --heap-pages=4096 --output=./frame/staking/src/weights.rs --template=./.maintain/frame-weight-template.hbs

* steps and repeat to tuple (current_*, total_*)

* idea for list command

* fmt

* use benchmark list in cli

* handle steps in cli

* move log update to cli

* remove old todo

* line width

* cargo run --quiet --release --features=runtime-benchmarks --manifest-path=bin/node/cli/Cargo.toml -- benchmark --chain=dev --steps=50 --repeat=20 --pallet=pallet_balances --extrinsic=* --execution=wasm --wasm-execution=compiled --heap-pages=4096 --output=./frame/balances/src/weights.rs --template=./.maintain/frame-weight-template.hbs

* benchmark metadata function

* don't need this warm up

* cargo run --quiet --release --features=runtime-benchmarks --manifest-path=bin/node/cli/Cargo.toml -- benchmark --chain=dev --steps=50 --repeat=20 --pallet=pallet_balances --extrinsic=* --execution=wasm --wasm-execution=compiled --heap-pages=4096 --output=./frame/balances/src/weights.rs --template=./.maintain/frame-weight-template.hbs

* fix warnings

* fix node-template

* fix

* fmt

* line width

* cargo run --quiet --release --features=runtime-benchmarks --manifest-path=bin/node/cli/Cargo.toml -- benchmark --chain=dev --steps=50 --repeat=20 --pallet=pallet_staking --extrinsic=* --execution=wasm --wasm-execution=compiled --heap-pages=4096 --output=./frame/staking/src/weights.rs --template=./.maintain/frame-weight-template.hbs

* improve docs

* improve cli

* fix format

* fix bug?

* Revert "fix bug?"

This reverts commit 8051bf1bf9bae862ff28dfff386e7045cd3f045e.

* skip frame-metadata

* Update .gitlab-ci.yml

* fix import

* Update .gitlab-ci.yml

Co-authored-by: Parity Benchmarking Bot <admin@parity.io>
This commit is contained in:
Shawn Tabrizi
2021-08-01 20:13:58 +02:00
committed by GitHub
parent df59596ec0
commit fa8c6274ac
12 changed files with 1003 additions and 483 deletions
+180 -203
View File
@@ -711,8 +711,8 @@ macro_rules! impl_benchmark {
extrinsic: &[u8],
lowest_range_values: &[u32],
highest_range_values: &[u32],
steps: &[u32],
repeat: u32,
steps: (u32, u32),
_repeat: (u32, u32),
whitelist: &[$crate::TrackedStorageKey],
verify: bool,
) -> Result<$crate::Vec<$crate::BenchmarkResults>, &'static str> {
@@ -724,9 +724,6 @@ macro_rules! impl_benchmark {
_ => return Err("Could not find extrinsic."),
};
let mut results: $crate::Vec<$crate::BenchmarkResults> = $crate::Vec::new();
if repeat == 0 {
return Ok(results);
}
// Add whitelist to DB including whitelisted caller
let mut whitelist = whitelist.to_vec();
@@ -737,141 +734,110 @@ macro_rules! impl_benchmark {
whitelist.push(whitelisted_caller_key.into());
$crate::benchmarking::set_whitelist(whitelist);
// Warm up the DB
$crate::benchmarking::commit_db();
$crate::benchmarking::wipe_db();
let components = <
SelectedBenchmark as $crate::BenchmarkingSetup<T $(, $instance)?>
>::components(&selected_benchmark);
let mut progress = $crate::benchmarking::current_time();
// Default number of steps for a component.
let mut prev_steps = 10;
let mut repeat_benchmark = |
repeat: u32,
let do_benchmark = |
c: &[($crate::BenchmarkParameter, u32)],
results: &mut $crate::Vec<$crate::BenchmarkResults>,
verify: bool,
step: u32,
num_steps: u32,
| -> Result<(), &'static str> {
// Run the benchmark `repeat` times.
for r in 0..repeat {
// Set up the externalities environment for the setup we want to
// benchmark.
let closure_to_benchmark = <
SelectedBenchmark as $crate::BenchmarkingSetup<T $(, $instance)?>
>::instance(&selected_benchmark, c, verify)?;
// Set up the externalities environment for the setup we want to
// benchmark.
let closure_to_benchmark = <
SelectedBenchmark as $crate::BenchmarkingSetup<T $(, $instance)?>
>::instance(&selected_benchmark, c, verify)?;
// Set the block number to at least 1 so events are deposited.
if $crate::Zero::is_zero(&frame_system::Pallet::<T>::block_number()) {
frame_system::Pallet::<T>::set_block_number(1u32.into());
}
// Commit the externalities to the database, flushing the DB cache.
// This will enable worst case scenario for reading from the database.
$crate::benchmarking::commit_db();
// Reset the read/write counter so we don't count operations in the setup process.
$crate::benchmarking::reset_read_write_count();
if verify {
closure_to_benchmark()?;
} else {
// Time the extrinsic logic.
$crate::log::trace!(
target: "benchmark",
"Start Benchmark: {:?}", c
);
let start_pov = $crate::benchmarking::proof_size();
let start_extrinsic = $crate::benchmarking::current_time();
closure_to_benchmark()?;
let finish_extrinsic = $crate::benchmarking::current_time();
let end_pov = $crate::benchmarking::proof_size();
// Calculate the diff caused by the benchmark.
let elapsed_extrinsic = finish_extrinsic.saturating_sub(start_extrinsic);
let diff_pov = match (start_pov, end_pov) {
(Some(start), Some(end)) => end.saturating_sub(start),
_ => Default::default(),
};
// Commit the changes to get proper write count
$crate::benchmarking::commit_db();
$crate::log::trace!(
target: "benchmark",
"End Benchmark: {} ns", elapsed_extrinsic
);
let read_write_count = $crate::benchmarking::read_write_count();
$crate::log::trace!(
target: "benchmark",
"Read/Write Count {:?}", read_write_count
);
let time = $crate::benchmarking::current_time();
if time.saturating_sub(progress) > 5000000000 {
progress = $crate::benchmarking::current_time();
$crate::log::info!(
target: "benchmark",
"Benchmarking {} {}/{}, run {}/{}",
extrinsic,
step,
num_steps,
r,
repeat,
);
}
// Time the storage root recalculation.
let start_storage_root = $crate::benchmarking::current_time();
$crate::storage_root();
let finish_storage_root = $crate::benchmarking::current_time();
let elapsed_storage_root = finish_storage_root - start_storage_root;
// TODO: Fix memory allocation issue then re-enable
// let read_and_written_keys = $crate::benchmarking::get_read_and_written_keys();
let read_and_written_keys = Default::default();
results.push($crate::BenchmarkResults {
components: c.to_vec(),
extrinsic_time: elapsed_extrinsic,
storage_root_time: elapsed_storage_root,
reads: read_write_count.0,
repeat_reads: read_write_count.1,
writes: read_write_count.2,
repeat_writes: read_write_count.3,
proof_size: diff_pov,
keys: read_and_written_keys,
});
}
// Wipe the DB back to the genesis state.
$crate::benchmarking::wipe_db();
// Set the block number to at least 1 so events are deposited.
if $crate::Zero::is_zero(&frame_system::Pallet::<T>::block_number()) {
frame_system::Pallet::<T>::set_block_number(1u32.into());
}
// Commit the externalities to the database, flushing the DB cache.
// This will enable worst case scenario for reading from the database.
$crate::benchmarking::commit_db();
// Reset the read/write counter so we don't count operations in the setup process.
$crate::benchmarking::reset_read_write_count();
if verify {
closure_to_benchmark()?;
} else {
// Time the extrinsic logic.
$crate::log::trace!(
target: "benchmark",
"Start Benchmark: {:?}", c
);
let start_pov = $crate::benchmarking::proof_size();
let start_extrinsic = $crate::benchmarking::current_time();
closure_to_benchmark()?;
let finish_extrinsic = $crate::benchmarking::current_time();
let end_pov = $crate::benchmarking::proof_size();
// Calculate the diff caused by the benchmark.
let elapsed_extrinsic = finish_extrinsic.saturating_sub(start_extrinsic);
let diff_pov = match (start_pov, end_pov) {
(Some(start), Some(end)) => end.saturating_sub(start),
_ => Default::default(),
};
// Commit the changes to get proper write count
$crate::benchmarking::commit_db();
$crate::log::trace!(
target: "benchmark",
"End Benchmark: {} ns", elapsed_extrinsic
);
let read_write_count = $crate::benchmarking::read_write_count();
$crate::log::trace!(
target: "benchmark",
"Read/Write Count {:?}", read_write_count
);
// Time the storage root recalculation.
let start_storage_root = $crate::benchmarking::current_time();
$crate::storage_root();
let finish_storage_root = $crate::benchmarking::current_time();
let elapsed_storage_root = finish_storage_root - start_storage_root;
let read_and_written_keys = $crate::benchmarking::get_read_and_written_keys();
results.push($crate::BenchmarkResults {
components: c.to_vec(),
extrinsic_time: elapsed_extrinsic,
storage_root_time: elapsed_storage_root,
reads: read_write_count.0,
repeat_reads: read_write_count.1,
writes: read_write_count.2,
repeat_writes: read_write_count.3,
proof_size: diff_pov,
keys: read_and_written_keys,
});
}
// Wipe the DB back to the genesis state.
$crate::benchmarking::wipe_db();
Ok(())
};
let (current_step, total_steps) = steps;
if components.is_empty() {
if verify {
// If `--verify` is used, run the benchmark once to verify it would complete.
repeat_benchmark(1, Default::default(), &mut $crate::Vec::new(), true, 1, 1)?;
// The CLI could ask to do more steps than is sensible, so we skip those.
if current_step == 0 {
if verify {
// If `--verify` is used, run the benchmark once to verify it would complete.
do_benchmark(Default::default(), &mut $crate::Vec::new(), true)?;
}
do_benchmark(Default::default(), &mut results, false)?;
}
repeat_benchmark(repeat, Default::default(), &mut results, false, 1, 1)?;
} else {
// Select the component we will be benchmarking. Each component will be benchmarked.
for (idx, (name, low, high)) in components.iter().enumerate() {
// Get the number of steps for this component.
let steps = steps.get(idx).cloned().unwrap_or(prev_steps);
prev_steps = steps;
// Skip this loop if steps is zero
if steps == 0 { continue }
let lowest = lowest_range_values.get(idx).cloned().unwrap_or(*low);
let highest = highest_range_values.get(idx).cloned().unwrap_or(*high);
@@ -879,31 +845,34 @@ macro_rules! impl_benchmark {
let diff = highest - lowest;
// Create up to `STEPS` steps for that component between high and low.
let step_size = (diff / steps).max(1);
let step_size = (diff / total_steps).max(1);
let num_of_steps = diff / step_size + 1;
for s in 0..num_of_steps {
// This is the value we will be testing for component `name`
let component_value = lowest + step_size * s;
// Select the max value for all the other components.
let c: $crate::Vec<($crate::BenchmarkParameter, u32)> = components.iter()
.enumerate()
.map(|(idx, (n, _, h))|
if n == name {
(*n, component_value)
} else {
(*n, *highest_range_values.get(idx).unwrap_or(h))
}
)
.collect();
if verify {
// If `--verify` is used, run the benchmark once to verify it would complete.
repeat_benchmark(1, &c, &mut $crate::Vec::new(), true, s, num_of_steps)?;
}
repeat_benchmark(repeat, &c, &mut results, false, s, num_of_steps)?;
// The CLI could ask to do more steps than is sensible, so we just skip those.
if current_step >= num_of_steps {
continue;
}
// This is the value we will be testing for component `name`
let component_value = lowest + step_size * current_step;
// Select the max value for all the other components.
let c: $crate::Vec<($crate::BenchmarkParameter, u32)> = components.iter()
.enumerate()
.map(|(idx, (n, _, h))|
if n == name {
(*n, component_value)
} else {
(*n, *highest_range_values.get(idx).unwrap_or(h))
}
)
.collect();
if verify {
// If `--verify` is used, run the benchmark once to verify it would complete.
do_benchmark(&c, &mut $crate::Vec::new(), true)?;
}
do_benchmark(&c, &mut results, false)?;
}
}
return Ok(results);
@@ -1253,8 +1222,8 @@ pub fn show_benchmark_debug_info(
benchmark: &[u8],
lowest_range_values: &sp_std::prelude::Vec<u32>,
highest_range_values: &sp_std::prelude::Vec<u32>,
steps: &sp_std::prelude::Vec<u32>,
repeat: &u32,
steps: &(u32, u32),
repeat: &(u32, u32),
verify: &bool,
error_message: &str,
) -> sp_runtime::RuntimeString {
@@ -1273,8 +1242,8 @@ pub fn show_benchmark_debug_info(
.expect("it's all just strings ran through the wasm interface. qed"),
lowest_range_values,
highest_range_values,
steps,
repeat,
steps.1,
repeat.1,
verify,
error_message,
)
@@ -1359,62 +1328,70 @@ macro_rules! add_benchmark {
verify,
extra,
} = config;
if &pallet[..] == &name_string[..] || &pallet[..] == &b"*"[..] {
if &pallet[..] == &b"*"[..] || &benchmark[..] == &b"*"[..] {
for benchmark in $( $location )*::benchmarks(*extra).into_iter() {
$batches.push($crate::BenchmarkBatch {
pallet: name_string.to_vec(),
instance: instance_string.to_vec(),
benchmark: benchmark.to_vec(),
results: $( $location )*::run_benchmark(
benchmark,
&lowest_range_values[..],
&highest_range_values[..],
&steps[..],
*repeat,
whitelist,
*verify,
).map_err(|e| {
$crate::show_benchmark_debug_info(
instance_string,
benchmark,
lowest_range_values,
highest_range_values,
steps,
repeat,
verify,
e,
)
})?,
});
}
} else {
$batches.push($crate::BenchmarkBatch {
pallet: name_string.to_vec(),
instance: instance_string.to_vec(),
benchmark: benchmark.clone(),
results: $( $location )*::run_benchmark(
&benchmark[..],
&lowest_range_values[..],
&highest_range_values[..],
&steps[..],
*repeat,
whitelist,
*verify,
).map_err(|e| {
$crate::show_benchmark_debug_info(
instance_string,
benchmark,
lowest_range_values,
highest_range_values,
steps,
repeat,
verify,
e,
)
})?,
});
}
if &pallet[..] == &name_string[..] {
$batches.push($crate::BenchmarkBatch {
pallet: name_string.to_vec(),
instance: instance_string.to_vec(),
benchmark: benchmark.clone(),
results: $( $location )*::run_benchmark(
&benchmark[..],
&lowest_range_values[..],
&highest_range_values[..],
*steps,
*repeat,
whitelist,
*verify,
).map_err(|e| {
$crate::show_benchmark_debug_info(
instance_string,
benchmark,
lowest_range_values,
highest_range_values,
steps,
repeat,
verify,
e,
)
})?
});
}
)
}
/// This macro allows users to easily generate a list of benchmarks for the pallets configured
/// in the runtime.
///
/// To use this macro, first create a an object to store the list:
///
/// ```ignore
/// let mut list = Vec::<BenchmarkList>::new();
/// ```
///
/// Then pass this `list` to the macro, along with the `extra` boolean, the pallet crate, and
/// pallet struct:
///
/// ```ignore
/// list_benchmark!(list, extra, pallet_balances, Balances);
/// list_benchmark!(list, extra, pallet_session, SessionBench::<Runtime>);
/// list_benchmark!(list, extra, frame_system, SystemBench::<Runtime>);
/// ```
///
/// This should match what exists with the `add_benchmark!` macro.
#[macro_export]
macro_rules! list_benchmark {
( $list:ident, $extra:ident, $name:path, $( $location:tt )* ) => (
let pallet_string = stringify!($name).as_bytes();
let instance_string = stringify!( $( $location )* ).as_bytes();
let benchmarks = $( $location )*::benchmarks($extra)
.iter()
.map(|b| b.to_vec())
.collect::<Vec<_>>();
let pallet_benchmarks = BenchmarkList {
pallet: pallet_string.to_vec(),
instance: instance_string.to_vec(),
benchmarks: benchmarks.to_vec(),
};
$list.push(pallet_benchmarks)
)
}
+32 -11
View File
@@ -103,22 +103,41 @@ pub struct BenchmarkConfig {
pub lowest_range_values: Vec<u32>,
/// An optional manual override to the highest values used in the `steps` range.
pub highest_range_values: Vec<u32>,
/// The number of samples to take across the range of values for components.
pub steps: Vec<u32>,
/// The number of times to repeat a benchmark.
pub repeat: u32,
/// The number of samples to take across the range of values for components. (current_step,
/// total_steps)
pub steps: (u32, u32),
/// The number times to repeat each benchmark to increase accuracy of results. (current_repeat,
/// total_repeat)
pub repeat: (u32, u32),
/// Enable an extra benchmark iteration which runs the verification logic for a benchmark.
pub verify: bool,
/// Enable benchmarking of "extra" extrinsics, i.e. those that are not directly used in a pallet.
/// Enable benchmarking of "extra" extrinsics, i.e. those that are not directly used in a
/// pallet.
pub extra: bool,
}
/// A list of benchmarks available for a particular pallet and instance.
///
/// All `Vec<u8>` must be valid utf8 strings.
#[derive(Encode, Decode, Default, Clone, PartialEq, Debug)]
pub struct BenchmarkList {
pub pallet: Vec<u8>,
pub instance: Vec<u8>,
pub benchmarks: Vec<Vec<u8>>,
}
sp_api::decl_runtime_apis! {
/// Runtime api for benchmarking a FRAME runtime.
pub trait Benchmark {
/// Get the benchmark metadata available for this runtime.
///
/// Parameters
/// - `extra`: Also list benchmarks marked "extra" which would otherwise not be
/// needed for weight calculation.
fn benchmark_metadata(extra: bool) -> (Vec<BenchmarkList>, Vec<StorageInfo>);
/// Dispatch the given benchmark.
fn dispatch_benchmark(config: BenchmarkConfig)
-> Result<(Vec<BenchmarkBatch>, Vec<StorageInfo>), sp_runtime::RuntimeString>;
fn dispatch_benchmark(config: BenchmarkConfig) -> Result<Vec<BenchmarkBatch>, sp_runtime::RuntimeString>;
}
}
@@ -216,16 +235,18 @@ pub trait Benchmarking<T> {
/// Parameters
/// - `name`: The name of extrinsic function or benchmark you want to benchmark encoded as
/// bytes.
/// - `steps`: The number of sample points you want to take across the range of parameters.
/// - `lowest_range_values`: The lowest number for each range of parameters.
/// - `highest_range_values`: The highest number for each range of parameters.
/// - `repeat`: The number of times you want to repeat a benchmark.
/// - `steps`: The number of sample points you want to take across the range of parameters.
/// (current_step, total_steps)
/// - `repeat`: The total number times to repeat each benchmark to increase accuracy of results.
/// (current_repeat, total_repeats)
fn run_benchmark(
name: &[u8],
lowest_range_values: &[u32],
highest_range_values: &[u32],
steps: &[u32],
repeat: u32,
steps: (u32, u32),
repeat: (u32, u32),
whitelist: &[TrackedStorageKey],
verify: bool,
) -> Result<Vec<T>, &'static str>;