Follow ups for benchmark machine (#11270)

* Follow ups for the MachineCmd

Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io>

* Fix CI

Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io>

* Review fixes

Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io>

* Add to node-template

Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io>

* Fix test with feature flag

Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io>

* Review fixes

Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io>

* Lower disk requirements

Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io>

* Add ExecutionLimit to the disk benchmarks

Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io>

* fmt

Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io>

* Add doc

Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io>

* Review fixes

Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io>

* Rename DISK_WRITE_LIMIT -> DEFAULT_DISK_EXECUTION_LIMIT

Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io>

* Rename POLKADOT_REFERENCE_HARDWARE -> SUBSTRATE_REFERENCE_HARDWARE

Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io>

* Fix build profile + add license

Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io>

* Remove deps

Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io>

* Set tolerance to 10%

Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io>

* Fix tests

Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io>

* Ignore test

I cannot reproduce the CI error, even with the full command:
cargo test --workspace --locked --release --verbose --features runtime-benchmarks --manifest-path ./bin/node/cli/Cargo.toml

I will put an 'ignore' on that test for now, since it works for me and is worth having.

Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io>

* Remove test

Still cannot reproduce the error and it fails in the CI.
Removing it now.

Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io>

Co-authored-by: Shawn Tabrizi <shawntabrizi@gmail.com>
This commit is contained in:
Oliver Tale-Yazdi
2022-04-26 16:31:26 +02:00
committed by GitHub
parent 9a3201ef3d
commit 9980d314b1
14 changed files with 512 additions and 42 deletions
@@ -0,0 +1,191 @@
// This file is part of Substrate.
// Copyright (C) 2022 Parity Technologies (UK) Ltd.
// SPDX-License-Identifier: Apache-2.0
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Contains types to define hardware requirements.
use lazy_static::lazy_static;
use serde::{Deserialize, Serialize};
use std::fmt;
lazy_static! {
/// The hardware requirements as measured on reference hardware.
///
/// These values are provided by Parity, however it is possible
/// to use your own requirements if you are running a custom chain.
///
/// The reference hardware is describe here:
/// <https://wiki.polkadot.network/docs/maintain-guides-how-to-validate-polkadot>
pub static ref SUBSTRATE_REFERENCE_HARDWARE: Requirements = {
let raw = include_bytes!("reference_hardware.json").as_slice();
serde_json::from_slice(raw).expect("Hardcoded data is known good; qed")
};
}
/// Multiple requirements for the hardware.
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
pub struct Requirements(pub Vec<Requirement>);
/// A single requirement for the hardware.
#[derive(Deserialize, Serialize, Debug, Clone, Copy, PartialEq)]
pub struct Requirement {
/// The metric to measure.
pub metric: Metric,
/// The minimal throughput that needs to be archived for this requirement.
pub minimum: Throughput,
}
/// A single hardware metric.
///
/// The implementation of these is in `sc-sysinfo`.
#[derive(Deserialize, Serialize, Debug, Clone, Copy, PartialEq)]
pub enum Metric {
/// SR25519 signature verification.
Sr25519Verify,
/// Blake2-256 hashing algorithm.
Blake2256,
/// Copying data in RAM.
MemCopy,
/// Disk sequential write.
DiskSeqWrite,
/// Disk random write.
DiskRndWrite,
}
/// Throughput as measured in bytes per second.
#[derive(Deserialize, Serialize, Debug, Clone, Copy, PartialEq)]
pub enum Throughput {
/// KiB/s
KiBs(f64),
/// MiB/s
MiBs(f64),
/// GiB/s
GiBs(f64),
}
impl Metric {
/// The category of the metric.
pub fn category(&self) -> &'static str {
match self {
Self::Sr25519Verify | Self::Blake2256 => "CPU",
Self::MemCopy => "Memory",
Self::DiskSeqWrite | Self::DiskRndWrite => "Disk",
}
}
/// The name of the metric. It is always prefixed by the [`self::category()`].
pub fn name(&self) -> &'static str {
match self {
Self::Sr25519Verify => "SR25519-Verify",
Self::Blake2256 => "BLAKE2-256",
Self::MemCopy => "Copy",
Self::DiskSeqWrite => "Seq Write",
Self::DiskRndWrite => "Rnd Write",
}
}
}
const KIBIBYTE: f64 = 1024.0;
impl Throughput {
/// The unit of the metric.
pub fn unit(&self) -> &'static str {
match self {
Self::KiBs(_) => "KiB/s",
Self::MiBs(_) => "MiB/s",
Self::GiBs(_) => "GiB/s",
}
}
/// [`Self`] as number of byte/s.
pub fn to_bs(&self) -> f64 {
self.to_kibs() * KIBIBYTE
}
/// [`Self`] as number of kibibyte/s.
pub fn to_kibs(&self) -> f64 {
self.to_mibs() * KIBIBYTE
}
/// [`Self`] as number of mebibyte/s.
pub fn to_mibs(&self) -> f64 {
self.to_gibs() * KIBIBYTE
}
/// [`Self`] as number of gibibyte/s.
pub fn to_gibs(&self) -> f64 {
match self {
Self::KiBs(k) => *k / (KIBIBYTE * KIBIBYTE),
Self::MiBs(m) => *m / KIBIBYTE,
Self::GiBs(g) => *g,
}
}
/// Normalizes [`Self`] to use the larges unit possible.
pub fn normalize(&self) -> Self {
let bs = self.to_bs();
if bs >= KIBIBYTE * KIBIBYTE * KIBIBYTE {
Self::GiBs(self.to_gibs())
} else if bs >= KIBIBYTE * KIBIBYTE {
Self::MiBs(self.to_mibs())
} else {
Self::KiBs(self.to_kibs())
}
}
}
impl fmt::Display for Throughput {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let normalized = self.normalize();
match normalized {
Self::KiBs(s) | Self::MiBs(s) | Self::GiBs(s) =>
write!(f, "{:.2?} {}", s, normalized.unit()),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use sp_runtime::assert_eq_error_rate;
/// `SUBSTRATE_REFERENCE_HARDWARE` can be en- and decoded.
#[test]
fn json_static_data() {
let raw = serde_json::to_string(&*SUBSTRATE_REFERENCE_HARDWARE).unwrap();
let decoded: Requirements = serde_json::from_str(&raw).unwrap();
assert_eq!(decoded, SUBSTRATE_REFERENCE_HARDWARE.clone());
}
/// Test the [`Throughput`].
#[test]
fn throughput_works() {
/// Float precision.
const EPS: f64 = 0.1;
let gib = Throughput::GiBs(14.324);
assert_eq_error_rate!(14.324, gib.to_gibs(), EPS);
assert_eq_error_rate!(14667.776, gib.to_mibs(), EPS);
assert_eq_error_rate!(14667.776 * 1024.0, gib.to_kibs(), EPS);
assert_eq!("14.32 GiB/s", gib.to_string());
assert_eq!("14.32 GiB/s", gib.normalize().to_string());
let mib = Throughput::MiBs(1029.0);
assert_eq!("1.00 GiB/s", mib.to_string());
}
}
@@ -18,6 +18,8 @@
//! Contains the [`MachineCmd`] as entry point for the node
//! and the core benchmarking logic.
pub mod hardware;
use sc_cli::{CliConfiguration, Result, SharedParams};
use sc_service::Configuration;
use sc_sysinfo::{
@@ -26,9 +28,12 @@ use sc_sysinfo::{
};
use clap::Parser;
use log::info;
use log::{error, info, warn};
use prettytable::{cell, row, table};
use std::{fmt::Debug, fs, time::Duration};
use std::{boxed::Box, fmt::Debug, fs, path::Path};
use crate::shared::check_build_profile;
pub use hardware::{Metric, Requirement, Requirements, Throughput, SUBSTRATE_REFERENCE_HARDWARE};
/// Command to benchmark the hardware.
///
@@ -44,38 +49,174 @@ pub struct MachineCmd {
#[clap(flatten)]
pub shared_params: SharedParams,
/// Do not return an error if any check fails.
///
/// Should only be used for debugging.
#[clap(long)]
pub allow_fail: bool,
/// Set a fault tolerance for passing a requirement.
///
/// 10% means that the test would pass even when only 90% score was archived.
/// Can be used to mitigate outliers of the benchmarks.
#[clap(long, default_value = "10.0", value_name = "PERCENT")]
pub tolerance: f64,
/// Time limit for the verification benchmark.
#[clap(long, default_value = "2.0", value_name = "SECONDS")]
pub verify_duration: f32,
/// Time limit for each disk benchmark.
#[clap(long, default_value = "5.0", value_name = "SECONDS")]
pub disk_duration: f32,
}
/// Helper for the result of a concrete benchmark.
struct BenchResult {
/// Did the hardware pass the benchmark?
passed: bool,
/// The absolute score that was archived.
score: Throughput,
/// The score relative to the minimal required score.
///
/// Is in range [0, 1].
rel_score: f64,
}
/// Errors that can be returned by the this command.
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error {
#[error("One of the benchmarks had a score that was lower than its requirement")]
UnmetRequirement,
#[error("The build profile is unfit for benchmarking: {0}")]
BadBuildProfile(String),
#[error("Benchmark results are off by at least factor 100")]
BadResults,
}
impl MachineCmd {
/// Execute the benchmark and print the results.
pub fn run(&self, cfg: &Configuration) -> Result<()> {
pub fn run(&self, cfg: &Configuration, requirements: Requirements) -> Result<()> {
self.validate_args()?;
// Ensure that the dir exists since the node is not started to take care of it.
let dir = cfg.database.path().ok_or("No DB directory provided")?;
fs::create_dir_all(dir)?;
info!("Running machine benchmarks...");
let write = benchmark_disk_sequential_writes(dir)?;
let read = benchmark_disk_random_writes(dir)?;
let verify_limit =
ExecutionLimit::MaxDuration(Duration::from_secs_f32(self.verify_duration));
let verify = benchmark_sr25519_verify(verify_limit) * 1024.0;
let mut results = Vec::new();
for requirement in &requirements.0 {
let result = self.run_benchmark(requirement, &dir)?;
results.push(result);
}
self.print_summary(requirements, results)
}
/// Benchmarks a specific metric of the hardware and judges the resulting score.
fn run_benchmark(&self, requirement: &Requirement, dir: &Path) -> Result<BenchResult> {
// Dispatch the concrete function from `sc-sysinfo`.
let score = self.measure(&requirement.metric, dir)?;
let rel_score = score.to_bs() / requirement.minimum.to_bs();
// Sanity check if the result is off by factor >100x.
if rel_score >= 100.0 || rel_score <= 0.01 {
self.check_failed(Error::BadResults)?;
}
let passed = rel_score >= (1.0 - (self.tolerance / 100.0));
Ok(BenchResult { passed, score, rel_score })
}
/// Measures a metric of the hardware.
fn measure(&self, metric: &Metric, dir: &Path) -> Result<Throughput> {
let verify_limit = ExecutionLimit::from_secs_f32(self.verify_duration);
let disk_limit = ExecutionLimit::from_secs_f32(self.disk_duration);
let score = match metric {
Metric::Blake2256 => Throughput::MiBs(benchmark_cpu() as f64),
Metric::Sr25519Verify => Throughput::MiBs(benchmark_sr25519_verify(verify_limit)),
Metric::MemCopy => Throughput::MiBs(benchmark_memory() as f64),
Metric::DiskSeqWrite =>
Throughput::MiBs(benchmark_disk_sequential_writes(disk_limit, dir)? as f64),
Metric::DiskRndWrite =>
Throughput::MiBs(benchmark_disk_random_writes(disk_limit, dir)? as f64),
};
Ok(score)
}
/// Prints a human-readable summary.
fn print_summary(&self, requirements: Requirements, results: Vec<BenchResult>) -> Result<()> {
// Use a table for nicer console output.
let table = table!(
["Category", "Function", "Score", "Unit"],
["CPU", "BLAKE2-256", benchmark_cpu(), "MB/s"],
["CPU", "SR25519 Verify", format!("{:.1}", verify), "KB/s"],
["Memory", "Copy", benchmark_memory(), "MB/s"],
["Disk", "Seq Write", write, "MB/s"],
["Disk", "Rnd Write", read, "MB/s"]
);
let mut table = table!(["Category", "Function", "Score", "Minimum", "Result"]);
// Count how many passed and how many failed.
let (mut passed, mut failed) = (0, 0);
for (requirement, result) in requirements.0.iter().zip(results.iter()) {
if result.passed {
passed += 1
} else {
failed += 1
}
info!("\n{}", table);
table.add_row(result.to_row(requirement));
}
// Print the table and a summary.
info!(
"\n{}\nFrom {} benchmarks in total, {} passed and {} failed ({:.0?}% fault tolerance).",
table,
passed + failed,
passed,
failed,
self.tolerance
);
// Print the final result.
if failed != 0 {
info!("The hardware fails to meet the requirements");
self.check_failed(Error::UnmetRequirement)?;
} else {
info!("The hardware meets the requirements ");
}
// Check that the results were not created by a bad build profile.
if let Err(err) = check_build_profile() {
self.check_failed(Error::BadBuildProfile(err))?;
}
Ok(())
}
/// Returns `Ok` if [`self.allow_fail`] is set and otherwise the error argument.
fn check_failed(&self, e: Error) -> Result<()> {
if !self.allow_fail {
error!("Failing since --allow-fail is not set");
Err(sc_cli::Error::Application(Box::new(e)))
} else {
warn!("Ignoring error since --allow-fail is set: {:?}", e);
Ok(())
}
}
/// Validates the CLI arguments.
fn validate_args(&self) -> Result<()> {
if self.tolerance > 100.0 || self.tolerance < 0.0 {
return Err("The --tolerance argument is out of range".into())
}
Ok(())
}
}
impl BenchResult {
/// Format [`Self`] as row that can be printed in a table.
fn to_row(&self, req: &Requirement) -> prettytable::Row {
let passed = if self.passed { "✅ Pass" } else { "❌ Fail" };
row![
req.metric.category(),
req.metric.name(),
format!("{}", self.score),
format!("{}", req.minimum),
format!("{} ({: >5.1?} %)", passed, self.rel_score * 100.0)
]
}
}
// Boilerplate
@@ -0,0 +1,32 @@
[
{
"metric": "Blake2256",
"minimum": {
"MiBs": 1029.0
}
},
{
"metric": "Sr25519Verify",
"minimum": {
"KiBs": 666.0
}
},
{
"metric": "MemCopy",
"minimum": {
"GiBs": 14.323
}
},
{
"metric": "DiskSeqWrite",
"minimum": {
"MiBs": 450.0
}
},
{
"metric": "DiskRndWrite",
"minimum": {
"MiBs": 200.0
}
}
]