From dca142398e00786429d69eb8b9e4454499962b2a Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe <49718502+alexggh@users.noreply.github.com> Date: Fri, 3 Nov 2023 15:26:40 +0200 Subject: [PATCH] substrate: sysinfo: Expose failed hardware requirements (#2144) The check_hardware functions does not give us too much information as to what is failing, so let's return the list of failed metrics, so that callers can print it. This would make debugging easier, rather than try to guess which dimension is actually failing. Signed-off-by: Alexandru Gheorghe --- Cargo.lock | 1 + .../parachain-template/node/src/service.rs | 10 ++- cumulus/polkadot-parachain/src/service.rs | 7 +- polkadot/node/service/src/lib.rs | 12 ++-- substrate/bin/node/cli/src/service.rs | 12 ++-- substrate/client/sysinfo/Cargo.toml | 1 + substrate/client/sysinfo/src/sysinfo.rs | 66 +++++++++++++++++-- 7 files changed, 89 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bfe54bf8ee..7d9e4bf725 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -15980,6 +15980,7 @@ dependencies = [ name = "sc-sysinfo" version = "6.0.0-dev" dependencies = [ + "derive_more", "futures", "libc", "log", diff --git a/cumulus/parachain-template/node/src/service.rs b/cumulus/parachain-template/node/src/service.rs index 84dcd6dd1b..43d16ee0d5 100644 --- a/cumulus/parachain-template/node/src/service.rs +++ b/cumulus/parachain-template/node/src/service.rs @@ -255,10 +255,14 @@ async fn start_node_impl( // Here you can check whether the hardware meets your chains' requirements. Putting a link // in there and swapping out the requirements for your own are probably a good idea. The // requirements for a para-chain are dictated by its relay-chain. - if !SUBSTRATE_REFERENCE_HARDWARE.check_hardware(&hwbench) && validator { - log::warn!( - "⚠️ The hardware does not meet the minimal requirements for role 'Authority'." + match SUBSTRATE_REFERENCE_HARDWARE.check_hardware(&hwbench) { + Err(err) if validator => { + log::warn!( + "⚠️ The hardware does not meet the minimal requirements {} for role 'Authority'.", + err ); + }, + _ => {}, } if let Some(ref mut telemetry) = telemetry { diff --git a/cumulus/polkadot-parachain/src/service.rs b/cumulus/polkadot-parachain/src/service.rs index 438d09a4c7..3bcc9b7f60 100644 --- a/cumulus/polkadot-parachain/src/service.rs +++ b/cumulus/polkadot-parachain/src/service.rs @@ -1955,10 +1955,11 @@ pub async fn start_contracts_rococo_node( fn warn_if_slow_hardware(hwbench: &sc_sysinfo::HwBench) { // Polkadot para-chains should generally use these requirements to ensure that the relay-chain // will not take longer than expected to import its blocks. - if !frame_benchmarking_cli::SUBSTRATE_REFERENCE_HARDWARE.check_hardware(hwbench) { + if let Err(err) = frame_benchmarking_cli::SUBSTRATE_REFERENCE_HARDWARE.check_hardware(hwbench) { log::warn!( - "⚠️ The hardware does not meet the minimal requirements for role 'Authority' find out more at:\n\ - https://wiki.polkadot.network/docs/maintain-guides-how-to-validate-polkadot#reference-hardware" + "⚠️ The hardware does not meet the minimal requirements {} for role 'Authority' find out more at:\n\ + https://wiki.polkadot.network/docs/maintain-guides-how-to-validate-polkadot#reference-hardware", + err ); } } diff --git a/polkadot/node/service/src/lib.rs b/polkadot/node/service/src/lib.rs index ced89c3843..0ed7940b3e 100644 --- a/polkadot/node/service/src/lib.rs +++ b/polkadot/node/service/src/lib.rs @@ -965,11 +965,15 @@ pub fn new_full( if let Some(hwbench) = hwbench { sc_sysinfo::print_hwbench(&hwbench); - if !SUBSTRATE_REFERENCE_HARDWARE.check_hardware(&hwbench) && role.is_authority() { - log::warn!( - "⚠️ The hardware does not meet the minimal requirements for role 'Authority' find out more at:\n\ - https://wiki.polkadot.network/docs/maintain-guides-how-to-validate-polkadot#reference-hardware" + match SUBSTRATE_REFERENCE_HARDWARE.check_hardware(&hwbench) { + Err(err) if role.is_authority() => { + log::warn!( + "⚠️ The hardware does not meet the minimal requirements {} for role 'Authority' find out more at:\n\ + https://wiki.polkadot.network/docs/maintain-guides-how-to-validate-polkadot#reference-hardware", + err ); + }, + _ => {}, } if let Some(ref mut telemetry) = telemetry { diff --git a/substrate/bin/node/cli/src/service.rs b/substrate/bin/node/cli/src/service.rs index 5a85f4cde0..153dda5c0a 100644 --- a/substrate/bin/node/cli/src/service.rs +++ b/substrate/bin/node/cli/src/service.rs @@ -449,10 +449,14 @@ pub fn new_full_base( if let Some(hwbench) = hwbench { sc_sysinfo::print_hwbench(&hwbench); - if !SUBSTRATE_REFERENCE_HARDWARE.check_hardware(&hwbench) && role.is_authority() { - log::warn!( - "⚠️ The hardware does not meet the minimal requirements for role 'Authority'." - ); + match SUBSTRATE_REFERENCE_HARDWARE.check_hardware(&hwbench) { + Err(err) if role.is_authority() => { + log::warn!( + "⚠️ The hardware does not meet the minimal requirements {} for role 'Authority'.", + err + ); + }, + _ => {}, } if let Some(ref mut telemetry) = telemetry { diff --git a/substrate/client/sysinfo/Cargo.toml b/substrate/client/sysinfo/Cargo.toml index fdf987ed45..86f4d217dd 100644 --- a/substrate/client/sysinfo/Cargo.toml +++ b/substrate/client/sysinfo/Cargo.toml @@ -19,6 +19,7 @@ libc = "0.2" log = "0.4.17" rand = "0.8.5" rand_pcg = "0.3.1" +derive_more = "0.99" regex = "1" serde = { version = "1.0.188", features = ["derive"] } serde_json = "1.0.107" diff --git a/substrate/client/sysinfo/src/sysinfo.rs b/substrate/client/sysinfo/src/sysinfo.rs index 41161fc685..bef87a83e4 100644 --- a/substrate/client/sysinfo/src/sysinfo.rs +++ b/substrate/client/sysinfo/src/sysinfo.rs @@ -23,9 +23,11 @@ use sp_core::{sr25519, Pair}; use sp_io::crypto::sr25519_verify; use sp_std::{fmt, fmt::Formatter, prelude::*}; +use derive_more::From; use rand::{seq::SliceRandom, Rng, RngCore}; use serde::{de::Visitor, Deserialize, Deserializer, Serialize, Serializer}; use std::{ + fmt::Display, fs::File, io::{Seek, SeekFrom, Write}, ops::{Deref, DerefMut}, @@ -48,6 +50,37 @@ pub enum Metric { DiskRndWrite, } +/// Describes a checking failure for the hardware requirements. +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct CheckFailure { + /// The metric that failed the check. + pub metric: Metric, + /// The expected minimum value. + pub expected: Throughput, + /// The measured value. + pub found: Throughput, +} + +/// A list of metrics that failed to meet the minimum hardware requirements. +#[derive(Debug, Clone, PartialEq, From)] +pub struct CheckFailures(pub Vec); + +impl Display for CheckFailures { + fn fmt(&self, formatter: &mut Formatter) -> fmt::Result { + write!(formatter, "Failed checks: ")?; + for failure in &self.0 { + write!( + formatter, + "{}(expected: {}, found: {}), ", + failure.metric.name(), + failure.expected, + failure.found + )? + } + Ok(()) + } +} + impl Metric { /// The category of the metric. pub fn category(&self) -> &'static str { @@ -626,33 +659,54 @@ pub fn gather_hwbench(scratch_directory: Option<&Path>) -> HwBench { impl Requirements { /// Whether the hardware requirements are met by the provided benchmark results. - pub fn check_hardware(&self, hwbench: &HwBench) -> bool { + pub fn check_hardware(&self, hwbench: &HwBench) -> Result<(), CheckFailures> { + let mut failures = Vec::new(); for requirement in self.0.iter() { match requirement.metric { Metric::Blake2256 => if requirement.minimum > hwbench.cpu_hashrate_score { - return false + failures.push(CheckFailure { + metric: requirement.metric, + expected: requirement.minimum, + found: hwbench.cpu_hashrate_score, + }); }, Metric::MemCopy => if requirement.minimum > hwbench.memory_memcpy_score { - return false + failures.push(CheckFailure { + metric: requirement.metric, + expected: requirement.minimum, + found: hwbench.memory_memcpy_score, + }); }, Metric::DiskSeqWrite => if let Some(score) = hwbench.disk_sequential_write_score { if requirement.minimum > score { - return false + failures.push(CheckFailure { + metric: requirement.metric, + expected: requirement.minimum, + found: score, + }); } }, Metric::DiskRndWrite => if let Some(score) = hwbench.disk_random_write_score { if requirement.minimum > score { - return false + failures.push(CheckFailure { + metric: requirement.metric, + expected: requirement.minimum, + found: score, + }); } }, Metric::Sr25519Verify => {}, } } - true + if failures.is_empty() { + Ok(()) + } else { + Err(failures.into()) + } } }