Follow ups for benchmark machine (#11270)

* Follow ups for the MachineCmd Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io> * Fix CI Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io> * Review fixes Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io> * Add to node-template Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io> * Fix test with feature flag Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io> * Review fixes Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io> * Lower disk requirements Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io> * Add ExecutionLimit to the disk benchmarks Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io> * fmt Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io> * Add doc Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io> * Review fixes Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io> * Rename DISK_WRITE_LIMIT -> DEFAULT_DISK_EXECUTION_LIMIT Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io> * Rename POLKADOT_REFERENCE_HARDWARE -> SUBSTRATE_REFERENCE_HARDWARE Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io> * Fix build profile + add license Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io> * Remove deps Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io> * Set tolerance to 10% Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io> * Fix tests Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io> * Ignore test I cannot reproduce the CI error, even with the full command: cargo test --workspace --locked --release --verbose --features runtime-benchmarks --manifest-path ./bin/node/cli/Cargo.toml I will put an 'ignore' on that test for now, since it works for me and is worth having. Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io> * Remove test Still cannot reproduce the error and it fails in the CI. Removing it now. Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io> Co-authored-by: Shawn Tabrizi <shawntabrizi@gmail.com>
2026-04-26 11:07:56 +00:00 · 2022-04-26 16:31:26 +02:00
parent 9a3201ef3d
commit 9980d314b1
14 changed files with 512 additions and 42 deletions
@@ -0,0 +1,191 @@
+// This file is part of Substrate.
+
+// Copyright (C) 2022 Parity Technologies (UK) Ltd.
+// SPDX-License-Identifier: Apache-2.0
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Contains types to define hardware requirements.
+
+use lazy_static::lazy_static;
+use serde::{Deserialize, Serialize};
+use std::fmt;
+
+lazy_static! {
+	/// The hardware requirements as measured on reference hardware.
+	///
+	/// These values are provided by Parity, however it is possible
+	/// to use your own requirements if you are running a custom chain.
+	///
+	/// The reference hardware is describe here:
+	/// <https://wiki.polkadot.network/docs/maintain-guides-how-to-validate-polkadot>
+	pub static ref SUBSTRATE_REFERENCE_HARDWARE: Requirements = {
+		let raw = include_bytes!("reference_hardware.json").as_slice();
+		serde_json::from_slice(raw).expect("Hardcoded data is known good; qed")
+	};
+}
+
+/// Multiple requirements for the hardware.
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
+pub struct Requirements(pub Vec<Requirement>);
+
+/// A single requirement for the hardware.
+#[derive(Deserialize, Serialize, Debug, Clone, Copy, PartialEq)]
+pub struct Requirement {
+	/// The metric to measure.
+	pub metric: Metric,
+	/// The minimal throughput that needs to be archived for this requirement.
+	pub minimum: Throughput,
+}
+
+/// A single hardware metric.
+///
+/// The implementation of these is in `sc-sysinfo`.
+#[derive(Deserialize, Serialize, Debug, Clone, Copy, PartialEq)]
+pub enum Metric {
+	/// SR25519 signature verification.
+	Sr25519Verify,
+	/// Blake2-256 hashing algorithm.
+	Blake2256,
+	/// Copying data in RAM.
+	MemCopy,
+	/// Disk sequential write.
+	DiskSeqWrite,
+	/// Disk random write.
+	DiskRndWrite,
+}
+
+/// Throughput as measured in bytes per second.
+#[derive(Deserialize, Serialize, Debug, Clone, Copy, PartialEq)]
+pub enum Throughput {
+	/// KiB/s
+	KiBs(f64),
+	/// MiB/s
+	MiBs(f64),
+	/// GiB/s
+	GiBs(f64),
+}
+
+impl Metric {
+	/// The category of the metric.
+	pub fn category(&self) -> &'static str {
+		match self {
+			Self::Sr25519Verify | Self::Blake2256 => "CPU",
+			Self::MemCopy => "Memory",
+			Self::DiskSeqWrite | Self::DiskRndWrite => "Disk",
+		}
+	}
+
+	/// The name of the metric. It is always prefixed by the [`self::category()`].
+	pub fn name(&self) -> &'static str {
+		match self {
+			Self::Sr25519Verify => "SR25519-Verify",
+			Self::Blake2256 => "BLAKE2-256",
+			Self::MemCopy => "Copy",
+			Self::DiskSeqWrite => "Seq Write",
+			Self::DiskRndWrite => "Rnd Write",
+		}
+	}
+}
+
+const KIBIBYTE: f64 = 1024.0;
+
+impl Throughput {
+	/// The unit of the metric.
+	pub fn unit(&self) -> &'static str {
+		match self {
+			Self::KiBs(_) => "KiB/s",
+			Self::MiBs(_) => "MiB/s",
+			Self::GiBs(_) => "GiB/s",
+		}
+	}
+
+	/// [`Self`] as number of byte/s.
+	pub fn to_bs(&self) -> f64 {
+		self.to_kibs() * KIBIBYTE
+	}
+
+	/// [`Self`] as number of kibibyte/s.
+	pub fn to_kibs(&self) -> f64 {
+		self.to_mibs() * KIBIBYTE
+	}
+
+	/// [`Self`] as number of mebibyte/s.
+	pub fn to_mibs(&self) -> f64 {
+		self.to_gibs() * KIBIBYTE
+	}
+
+	/// [`Self`] as number of gibibyte/s.
+	pub fn to_gibs(&self) -> f64 {
+		match self {
+			Self::KiBs(k) => *k / (KIBIBYTE * KIBIBYTE),
+			Self::MiBs(m) => *m / KIBIBYTE,
+			Self::GiBs(g) => *g,
+		}
+	}
+
+	/// Normalizes [`Self`] to use the larges unit possible.
+	pub fn normalize(&self) -> Self {
+		let bs = self.to_bs();
+
+		if bs >= KIBIBYTE * KIBIBYTE * KIBIBYTE {
+			Self::GiBs(self.to_gibs())
+		} else if bs >= KIBIBYTE * KIBIBYTE {
+			Self::MiBs(self.to_mibs())
+		} else {
+			Self::KiBs(self.to_kibs())
+		}
+	}
+}
+
+impl fmt::Display for Throughput {
+	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+		let normalized = self.normalize();
+		match normalized {
+			Self::KiBs(s) | Self::MiBs(s) | Self::GiBs(s) =>
+				write!(f, "{:.2?} {}", s, normalized.unit()),
+		}
+	}
+}
+
+#[cfg(test)]
+mod tests {
+	use super::*;
+	use sp_runtime::assert_eq_error_rate;
+
+	/// `SUBSTRATE_REFERENCE_HARDWARE` can be en- and decoded.
+	#[test]
+	fn json_static_data() {
+		let raw = serde_json::to_string(&*SUBSTRATE_REFERENCE_HARDWARE).unwrap();
+		let decoded: Requirements = serde_json::from_str(&raw).unwrap();
+
+		assert_eq!(decoded, SUBSTRATE_REFERENCE_HARDWARE.clone());
+	}
+
+	/// Test the [`Throughput`].
+	#[test]
+	fn throughput_works() {
+		/// Float precision.
+		const EPS: f64 = 0.1;
+		let gib = Throughput::GiBs(14.324);
+
+		assert_eq_error_rate!(14.324, gib.to_gibs(), EPS);
+		assert_eq_error_rate!(14667.776, gib.to_mibs(), EPS);
+		assert_eq_error_rate!(14667.776 * 1024.0, gib.to_kibs(), EPS);
+		assert_eq!("14.32 GiB/s", gib.to_string());
+		assert_eq!("14.32 GiB/s", gib.normalize().to_string());
+
+		let mib = Throughput::MiBs(1029.0);
+		assert_eq!("1.00 GiB/s", mib.to_string());
+	}
+}
@@ -18,6 +18,8 @@
 //! Contains the [`MachineCmd`] as entry point for the node
 //! and the core benchmarking logic.

+pub mod hardware;
+
 use sc_cli::{CliConfiguration, Result, SharedParams};
 use sc_service::Configuration;
 use sc_sysinfo::{
@@ -26,9 +28,12 @@ use sc_sysinfo::{
 };

 use clap::Parser;
-use log::info;
+use log::{error, info, warn};
 use prettytable::{cell, row, table};
-use std::{fmt::Debug, fs, time::Duration};
+use std::{boxed::Box, fmt::Debug, fs, path::Path};
+
+use crate::shared::check_build_profile;
+pub use hardware::{Metric, Requirement, Requirements, Throughput, SUBSTRATE_REFERENCE_HARDWARE};

 /// Command to benchmark the hardware.
 ///
@@ -44,38 +49,174 @@ pub struct MachineCmd {
 	#[clap(flatten)]
 	pub shared_params: SharedParams,

+	/// Do not return an error if any check fails.
+	///
+	/// Should only be used for debugging.
+	#[clap(long)]
+	pub allow_fail: bool,
+
+	/// Set a fault tolerance for passing a requirement.
+	///
+	/// 10% means that the test would pass even when only 90% score was archived.
+	/// Can be used to mitigate outliers of the benchmarks.
+	#[clap(long, default_value = "10.0", value_name = "PERCENT")]
+	pub tolerance: f64,
+
 	/// Time limit for the verification benchmark.
 	#[clap(long, default_value = "2.0", value_name = "SECONDS")]
 	pub verify_duration: f32,
+
+	/// Time limit for each disk benchmark.
+	#[clap(long, default_value = "5.0", value_name = "SECONDS")]
+	pub disk_duration: f32,
+}
+
+/// Helper for the result of a concrete benchmark.
+struct BenchResult {
+	/// Did the hardware pass the benchmark?
+	passed: bool,
+
+	/// The absolute score that was archived.
+	score: Throughput,
+
+	/// The score relative to the minimal required score.
+	///
+	/// Is in range [0, 1].
+	rel_score: f64,
+}
+
+/// Errors that can be returned by the this command.
+#[derive(Debug, thiserror::Error)]
+#[allow(missing_docs)]
+pub enum Error {
+	#[error("One of the benchmarks had a score that was lower than its requirement")]
+	UnmetRequirement,
+
+	#[error("The build profile is unfit for benchmarking: {0}")]
+	BadBuildProfile(String),
+
+	#[error("Benchmark results are off by at least factor 100")]
+	BadResults,
 }

 impl MachineCmd {
 	/// Execute the benchmark and print the results.
-	pub fn run(&self, cfg: &Configuration) -> Result<()> {
+	pub fn run(&self, cfg: &Configuration, requirements: Requirements) -> Result<()> {
+		self.validate_args()?;
 		// Ensure that the dir exists since the node is not started to take care of it.
 		let dir = cfg.database.path().ok_or("No DB directory provided")?;
 		fs::create_dir_all(dir)?;

 		info!("Running machine benchmarks...");
-		let write = benchmark_disk_sequential_writes(dir)?;
-		let read = benchmark_disk_random_writes(dir)?;
-		let verify_limit =
-			ExecutionLimit::MaxDuration(Duration::from_secs_f32(self.verify_duration));
-		let verify = benchmark_sr25519_verify(verify_limit) * 1024.0;
+		let mut results = Vec::new();
+		for requirement in &requirements.0 {
+			let result = self.run_benchmark(requirement, &dir)?;
+			results.push(result);
+		}
+		self.print_summary(requirements, results)
+	}

+	/// Benchmarks a specific metric of the hardware and judges the resulting score.
+	fn run_benchmark(&self, requirement: &Requirement, dir: &Path) -> Result<BenchResult> {
+		// Dispatch the concrete function from `sc-sysinfo`.
+		let score = self.measure(&requirement.metric, dir)?;
+		let rel_score = score.to_bs() / requirement.minimum.to_bs();
+
+		// Sanity check if the result is off by factor >100x.
+		if rel_score >= 100.0 || rel_score <= 0.01 {
+			self.check_failed(Error::BadResults)?;
+		}
+		let passed = rel_score >= (1.0 - (self.tolerance / 100.0));
+		Ok(BenchResult { passed, score, rel_score })
+	}
+
+	/// Measures a metric of the hardware.
+	fn measure(&self, metric: &Metric, dir: &Path) -> Result<Throughput> {
+		let verify_limit = ExecutionLimit::from_secs_f32(self.verify_duration);
+		let disk_limit = ExecutionLimit::from_secs_f32(self.disk_duration);
+
+		let score = match metric {
+			Metric::Blake2256 => Throughput::MiBs(benchmark_cpu() as f64),
+			Metric::Sr25519Verify => Throughput::MiBs(benchmark_sr25519_verify(verify_limit)),
+			Metric::MemCopy => Throughput::MiBs(benchmark_memory() as f64),
+			Metric::DiskSeqWrite =>
+				Throughput::MiBs(benchmark_disk_sequential_writes(disk_limit, dir)? as f64),
+			Metric::DiskRndWrite =>
+				Throughput::MiBs(benchmark_disk_random_writes(disk_limit, dir)? as f64),
+		};
+		Ok(score)
+	}
+
+	/// Prints a human-readable summary.
+	fn print_summary(&self, requirements: Requirements, results: Vec<BenchResult>) -> Result<()> {
 		// Use a table for nicer console output.
-		let table = table!(
-			["Category", "Function", "Score", "Unit"],
-			["CPU", "BLAKE2-256", benchmark_cpu(), "MB/s"],
-			["CPU", "SR25519 Verify", format!("{:.1}", verify), "KB/s"],
-			["Memory", "Copy", benchmark_memory(), "MB/s"],
-			["Disk", "Seq Write", write, "MB/s"],
-			["Disk", "Rnd Write", read, "MB/s"]
-		);
+		let mut table = table!(["Category", "Function", "Score", "Minimum", "Result"]);
+		// Count how many passed and how many failed.
+		let (mut passed, mut failed) = (0, 0);
+		for (requirement, result) in requirements.0.iter().zip(results.iter()) {
+			if result.passed {
+				passed += 1
+			} else {
+				failed += 1
+			}

-		info!("\n{}", table);
+			table.add_row(result.to_row(requirement));
+		}
+		// Print the table and a summary.
+		info!(
+			"\n{}\nFrom {} benchmarks in total, {} passed and {} failed ({:.0?}% fault tolerance).",
+			table,
+			passed + failed,
+			passed,
+			failed,
+			self.tolerance
+		);
+		// Print the final result.
+		if failed != 0 {
+			info!("The hardware fails to meet the requirements");
+			self.check_failed(Error::UnmetRequirement)?;
+		} else {
+			info!("The hardware meets the requirements ");
+		}
+		// Check that the results were not created by a bad build profile.
+		if let Err(err) = check_build_profile() {
+			self.check_failed(Error::BadBuildProfile(err))?;
+		}
 		Ok(())
 	}
+
+	/// Returns `Ok` if [`self.allow_fail`] is set and otherwise the error argument.
+	fn check_failed(&self, e: Error) -> Result<()> {
+		if !self.allow_fail {
+			error!("Failing since --allow-fail is not set");
+			Err(sc_cli::Error::Application(Box::new(e)))
+		} else {
+			warn!("Ignoring error since --allow-fail is set: {:?}", e);
+			Ok(())
+		}
+	}
+
+	/// Validates the CLI arguments.
+	fn validate_args(&self) -> Result<()> {
+		if self.tolerance > 100.0 || self.tolerance < 0.0 {
+			return Err("The --tolerance argument is out of range".into())
+		}
+		Ok(())
+	}
+}
+
+impl BenchResult {
+	/// Format [`Self`] as row that can be printed in a table.
+	fn to_row(&self, req: &Requirement) -> prettytable::Row {
+		let passed = if self.passed { "✅ Pass" } else { "❌ Fail" };
+		row![
+			req.metric.category(),
+			req.metric.name(),
+			format!("{}", self.score),
+			format!("{}", req.minimum),
+			format!("{} ({: >5.1?} %)", passed, self.rel_score * 100.0)
+		]
+	}
 }

 // Boilerplate
@@ -0,0 +1,32 @@
+[
+	{
+		"metric": "Blake2256",
+		"minimum": {
+			"MiBs": 1029.0
+		}
+	},
+	{
+		"metric": "Sr25519Verify",
+		"minimum": {
+			"KiBs": 666.0
+		}
+	},
+	{
+		"metric": "MemCopy",
+		"minimum": {
+			"GiBs": 14.323
+		}
+	},
+	{
+		"metric": "DiskSeqWrite",
+		"minimum": {
+			"MiBs": 450.0
+		}
+	},
+	{
+		"metric": "DiskRndWrite",
+		"minimum": {
+			"MiBs": 200.0
+		}
+	}
+]