mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-04-26 02:57:57 +00:00
ec7bfae00a
## Why we need it To provide another level of understanding to why polkadot's subsystems may perform slower than expected. Cache misses occur when processing large amounts of data, such as during availability recovery. ## Why Cachegrind Cachegrind has many drawbacks: it is slow, it uses its own cache simulation, which is very basic. But unlike `perf`, which is a great tool, Cachegrind can run in a virtual machine. This means we can easily run it in remote installations and even use it in CI/CD to catch possible regressions. Why Cachegrind and not Callgrind, another part of Valgrind? It is simply empirically proven that profiling runs faster with Cachegrind. ## First results First results have been obtained while testing of the approach. Here is an example. ``` $ target/testnet/subsystem-bench --n-cores 10 --cache-misses data-availability-read $ cat cachegrind_report.txt I refs: 64,622,081,485 I1 misses: 3,018,168 LLi misses: 437,654 I1 miss rate: 0.00% LLi miss rate: 0.00% D refs: 12,161,833,115 (9,868,356,364 rd + 2,293,476,751 wr) D1 misses: 167,940,701 ( 71,060,073 rd + 96,880,628 wr) LLd misses: 33,550,018 ( 16,685,853 rd + 16,864,165 wr) D1 miss rate: 1.4% ( 0.7% + 4.2% ) LLd miss rate: 0.3% ( 0.2% + 0.7% ) LL refs: 170,958,869 ( 74,078,241 rd + 96,880,628 wr) LL misses: 33,987,672 ( 17,123,507 rd + 16,864,165 wr) LL miss rate: 0.0% ( 0.0% + 0.7% ) ``` The CLI output shows that 1.4% of the L1 data cache missed, which is not so bad, given that the last-level cache had that data most of the time missing only 0.3%. Instruction data of the L1 has 0.00% misses of the time. Looking at an output file with `cg_annotate` shows that most of the misses occur during reed-solomon, which is expected.
227 lines
6.6 KiB
Rust
227 lines
6.6 KiB
Rust
// Copyright (C) Parity Technologies (UK) Ltd.
|
|
// This file is part of Polkadot.
|
|
|
|
// Polkadot is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
|
|
// Polkadot is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
//! A tool for running subsystem benchmark tests designed for development and
|
|
//! CI regression testing.
|
|
|
|
use clap::Parser;
|
|
use color_eyre::eyre;
|
|
use pyroscope::PyroscopeAgent;
|
|
use pyroscope_pprofrs::{pprof_backend, PprofConfig};
|
|
|
|
use colored::Colorize;
|
|
use std::{path::Path, time::Duration};
|
|
|
|
pub(crate) mod availability;
|
|
pub(crate) mod cli;
|
|
pub(crate) mod core;
|
|
mod valgrind;
|
|
|
|
use availability::{prepare_test, NetworkEmulation, TestState};
|
|
use cli::TestObjective;
|
|
|
|
use core::{
|
|
configuration::TestConfiguration,
|
|
environment::{TestEnvironment, GENESIS_HASH},
|
|
};
|
|
|
|
use clap_num::number_range;
|
|
|
|
use crate::core::display::display_configuration;
|
|
|
|
fn le_100(s: &str) -> Result<usize, String> {
|
|
number_range(s, 0, 100)
|
|
}
|
|
|
|
fn le_5000(s: &str) -> Result<usize, String> {
|
|
number_range(s, 0, 5000)
|
|
}
|
|
|
|
#[derive(Debug, Parser)]
|
|
#[allow(missing_docs)]
|
|
struct BenchCli {
|
|
#[arg(long, value_enum, ignore_case = true, default_value_t = NetworkEmulation::Ideal)]
|
|
/// The type of network to be emulated
|
|
pub network: NetworkEmulation,
|
|
|
|
#[clap(flatten)]
|
|
pub standard_configuration: cli::StandardTestOptions,
|
|
|
|
#[clap(short, long)]
|
|
/// The bandwidth of simulated remote peers in KiB
|
|
pub peer_bandwidth: Option<usize>,
|
|
|
|
#[clap(short, long)]
|
|
/// The bandwidth of our simulated node in KiB
|
|
pub bandwidth: Option<usize>,
|
|
|
|
#[clap(long, value_parser=le_100)]
|
|
/// Simulated conection error ratio [0-100].
|
|
pub peer_error: Option<usize>,
|
|
|
|
#[clap(long, value_parser=le_5000)]
|
|
/// Minimum remote peer latency in milliseconds [0-5000].
|
|
pub peer_min_latency: Option<u64>,
|
|
|
|
#[clap(long, value_parser=le_5000)]
|
|
/// Maximum remote peer latency in milliseconds [0-5000].
|
|
pub peer_max_latency: Option<u64>,
|
|
|
|
#[clap(long, default_value_t = false)]
|
|
/// Enable CPU Profiling with Pyroscope
|
|
pub profile: bool,
|
|
|
|
#[clap(long, requires = "profile", default_value_t = String::from("http://localhost:4040"))]
|
|
/// Pyroscope Server URL
|
|
pub pyroscope_url: String,
|
|
|
|
#[clap(long, requires = "profile", default_value_t = 113)]
|
|
/// Pyroscope Sample Rate
|
|
pub pyroscope_sample_rate: u32,
|
|
|
|
#[clap(long, default_value_t = false)]
|
|
/// Enable Cache Misses Profiling with Valgrind. Linux only, Valgrind must be in the PATH
|
|
pub cache_misses: bool,
|
|
|
|
#[command(subcommand)]
|
|
pub objective: cli::TestObjective,
|
|
}
|
|
|
|
impl BenchCli {
|
|
fn launch(self) -> eyre::Result<()> {
|
|
let is_valgrind_running = valgrind::is_valgrind_running();
|
|
if !is_valgrind_running && self.cache_misses {
|
|
return valgrind::relaunch_in_valgrind_mode()
|
|
}
|
|
|
|
let agent_running = if self.profile {
|
|
let agent = PyroscopeAgent::builder(self.pyroscope_url.as_str(), "subsystem-bench")
|
|
.backend(pprof_backend(PprofConfig::new().sample_rate(self.pyroscope_sample_rate)))
|
|
.build()?;
|
|
|
|
Some(agent.start()?)
|
|
} else {
|
|
None
|
|
};
|
|
|
|
let configuration = self.standard_configuration;
|
|
let mut test_config = match self.objective {
|
|
TestObjective::TestSequence(options) => {
|
|
let test_sequence =
|
|
core::configuration::TestSequence::new_from_file(Path::new(&options.path))
|
|
.expect("File exists")
|
|
.into_vec();
|
|
let num_steps = test_sequence.len();
|
|
gum::info!(
|
|
"{}",
|
|
format!("Sequence contains {} step(s)", num_steps).bright_purple()
|
|
);
|
|
for (index, test_config) in test_sequence.into_iter().enumerate() {
|
|
gum::info!("{}", format!("Step {}/{}", index + 1, num_steps).bright_purple(),);
|
|
display_configuration(&test_config);
|
|
|
|
let mut state = TestState::new(&test_config);
|
|
let (mut env, _protocol_config) = prepare_test(test_config, &mut state);
|
|
env.runtime()
|
|
.block_on(availability::benchmark_availability_read(&mut env, state));
|
|
}
|
|
return Ok(())
|
|
},
|
|
TestObjective::DataAvailabilityRead(ref _options) => match self.network {
|
|
NetworkEmulation::Healthy => TestConfiguration::healthy_network(
|
|
self.objective,
|
|
configuration.num_blocks,
|
|
configuration.n_validators,
|
|
configuration.n_cores,
|
|
configuration.min_pov_size,
|
|
configuration.max_pov_size,
|
|
),
|
|
NetworkEmulation::Degraded => TestConfiguration::degraded_network(
|
|
self.objective,
|
|
configuration.num_blocks,
|
|
configuration.n_validators,
|
|
configuration.n_cores,
|
|
configuration.min_pov_size,
|
|
configuration.max_pov_size,
|
|
),
|
|
NetworkEmulation::Ideal => TestConfiguration::ideal_network(
|
|
self.objective,
|
|
configuration.num_blocks,
|
|
configuration.n_validators,
|
|
configuration.n_cores,
|
|
configuration.min_pov_size,
|
|
configuration.max_pov_size,
|
|
),
|
|
},
|
|
};
|
|
|
|
let mut latency_config = test_config.latency.clone().unwrap_or_default();
|
|
|
|
if let Some(latency) = self.peer_min_latency {
|
|
latency_config.min_latency = Duration::from_millis(latency);
|
|
}
|
|
|
|
if let Some(latency) = self.peer_max_latency {
|
|
latency_config.max_latency = Duration::from_millis(latency);
|
|
}
|
|
|
|
if let Some(error) = self.peer_error {
|
|
test_config.error = error;
|
|
}
|
|
|
|
if let Some(bandwidth) = self.peer_bandwidth {
|
|
// CLI expects bw in KiB
|
|
test_config.peer_bandwidth = bandwidth * 1024;
|
|
}
|
|
|
|
if let Some(bandwidth) = self.bandwidth {
|
|
// CLI expects bw in KiB
|
|
test_config.bandwidth = bandwidth * 1024;
|
|
}
|
|
|
|
display_configuration(&test_config);
|
|
|
|
let mut state = TestState::new(&test_config);
|
|
let (mut env, _protocol_config) = prepare_test(test_config, &mut state);
|
|
|
|
env.runtime()
|
|
.block_on(availability::benchmark_availability_read(&mut env, state));
|
|
|
|
if let Some(agent_running) = agent_running {
|
|
let agent_ready = agent_running.stop()?;
|
|
agent_ready.shutdown();
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
fn main() -> eyre::Result<()> {
|
|
color_eyre::install()?;
|
|
env_logger::builder()
|
|
.filter(Some("hyper"), log::LevelFilter::Info)
|
|
// Avoid `Terminating due to subsystem exit subsystem` warnings
|
|
.filter(Some("polkadot_overseer"), log::LevelFilter::Error)
|
|
.filter(None, log::LevelFilter::Info)
|
|
// .filter(None, log::LevelFilter::Trace)
|
|
.try_init()
|
|
.unwrap();
|
|
|
|
let cli: BenchCli = BenchCli::parse();
|
|
cli.launch()?;
|
|
Ok(())
|
|
}
|