Compare commits

...

6 Commits

Author SHA1 Message Date
Omar Abdulla 2e58cdfa7f Cleanup benchmarks 2025-10-30 03:53:14 +03:00
Omar Abdulla d2af7f6c2b Fix tests 2025-10-27 03:07:45 +03:00
Omar Abdulla 3dd4299bf1 Fix tests 2025-10-27 03:00:21 +03:00
Omar Abdulla 07e7a62fd3 Add default arguments for tests 2025-10-27 02:25:17 +03:00
Omar Abdulla 65e129654d Increase tx timeout and channel limits 2025-10-26 04:00:00 +03:00
Omar Abdulla b3c8b0368c Require test argument 2025-10-26 02:46:47 +03:00
13 changed files with 307 additions and 23 deletions
Generated
+11
View File
@@ -1920,6 +1920,7 @@ dependencies = [
"anstyle",
"clap_lex",
"strsim",
"terminal_size",
]
[[package]]
@@ -7838,6 +7839,16 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "terminal_size"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "45c6481c4829e4cc63825e62c49186a34538b7b2750b73b266581ffb612fb5ed"
dependencies = [
"rustix",
"windows-sys 0.59.0",
]
[[package]]
name = "thiserror"
version = "1.0.69"
+1 -1
View File
@@ -26,7 +26,7 @@ ansi_term = "0.12.1"
anyhow = "1.0"
bson = { version = "2.15.0" }
cacache = { version = "13.1.0" }
clap = { version = "4", features = ["derive"] }
clap = { version = "4", features = ["derive", "wrap_help"] }
dashmap = { version = "6.1.0" }
foundry-compilers-artifacts = { version = "0.18.0" }
futures = { version = "0.3.31" }
+4 -4
View File
@@ -24,7 +24,7 @@ use strum::{AsRefStr, Display, EnumString, IntoStaticStr};
use temp_dir::TempDir;
#[derive(Clone, Debug, Parser, Serialize, Deserialize)]
#[command(name = "retester")]
#[command(name = "retester", term_width = 100)]
pub enum Context {
/// Executes tests in the MatterLabs format differentially on multiple targets concurrently.
Test(Box<TestExecutionContext>),
@@ -510,7 +510,7 @@ pub struct ExportGenesisContext {
impl Default for TestExecutionContext {
fn default() -> Self {
Self::parse_from(["execution-context"])
Self::parse_from(["execution-context", "--test", "."])
}
}
@@ -612,7 +612,7 @@ impl AsRef<IgnoreSuccessConfiguration> for TestExecutionContext {
impl Default for BenchmarkingContext {
fn default() -> Self {
Self::parse_from(["benchmarking-context"])
Self::parse_from(["benchmarking-context", "--test", "."])
}
}
@@ -759,7 +759,7 @@ pub struct CorpusConfiguration {
/// - `{metadata_file_path}::{case_idx}::{mode}`: This is very similar to the above specifier
/// with the exception that in this case the mode is specified and will be used in the test.
#[serde_as(as = "Vec<serde_with::DisplayFromStr>")]
#[arg(short = 't', long = "test")]
#[arg(short = 't', long = "test", required = true)]
pub test_specifiers: Vec<ParsedTestSpecifier>,
}
@@ -112,12 +112,23 @@ impl Watcher {
let all_transactions_submitted = all_transactions_submitted.clone();
let mut blocks_information_stream = self.blocks_stream;
async move {
while let Some(block) = blocks_information_stream.next().await {
while let Some(mut block) = blocks_information_stream.next().await {
// If the block number is equal to or less than the last block before the
// repetition then we ignore it and continue on to the next block.
if block.ethereum_block_information.block_number <= ignore_block_before {
continue;
}
{
let watch_for_transaction_hashes =
watch_for_transaction_hashes.read().await;
for tx_hash in block.ethereum_block_information.transaction_hashes.iter() {
let Some((step_path, _)) = watch_for_transaction_hashes.get(tx_hash)
else {
continue;
};
*block.tx_counts.entry(step_path.clone()).or_default() += 1
}
}
reporter
.report_block_mined_event(block.clone())
.expect("Can't fail");
@@ -189,7 +200,6 @@ pub enum WatcherEvent {
/// streaming the blocks.
ignore_block_before: BlockNumber,
},
/// Informs the watcher that a transaction was submitted and that the watcher should watch for a
/// transaction with this hash in the blocks that it watches.
SubmittedTransaction {
@@ -198,7 +208,6 @@ pub enum WatcherEvent {
/// The step path of the step that the transaction belongs to.
step_path: StepPath,
},
/// Informs the watcher that all of the transactions of this benchmark have been submitted and
/// that it can expect to receive no further transaction hashes and not even watch the channel
/// any longer.
@@ -330,15 +330,18 @@ async fn start_cli_reporting_task(output_format: OutputFormat, reporter: Reporte
.unwrap();
writeln!(buf).unwrap();
buf = tokio::task::spawn_blocking(move || {
buf.flush().unwrap();
buf
})
.await
.unwrap();
if aggregator_events_rx.is_empty() {
buf = tokio::task::spawn_blocking(move || {
buf.flush().unwrap();
buf
})
.await
.unwrap();
}
}
}
}
info!("Aggregator Broadcast Channel Closed");
// Summary at the end.
match output_format {
@@ -540,6 +540,7 @@ impl EthereumNode for GethNode {
.to_vec(),
},
substrate_block_information: None,
tx_counts: Default::default(),
})
});
@@ -771,6 +771,7 @@ impl EthereumNode for LighthouseGethNode {
.to_vec(),
},
substrate_block_information: None,
tx_counts: Default::default(),
})
});
@@ -578,6 +578,7 @@ impl EthereumNode for SubstrateNode {
proof_size: block_proof_size,
max_proof_size,
}),
tx_counts: Default::default(),
})
}
});
@@ -210,6 +210,7 @@ impl ZombienetNode {
.with_args(vec![
("--pool-limit", u32::MAX.to_string().as_str()).into(),
("--pool-kbytes", u32::MAX.to_string().as_str()).into(),
("--dev-block-time", 12000u16.to_string().as_str()).into(),
])
})
})
@@ -599,6 +600,7 @@ impl EthereumNode for ZombienetNode {
proof_size: block_proof_size,
max_proof_size,
}),
tx_counts: Default::default(),
})
}
});
+1 -1
View File
@@ -104,7 +104,7 @@ where
};
debug!(%tx_hash, "Submitted Transaction");
pending_transaction.set_timeout(Some(Duration::from_secs(120)));
pending_transaction.set_timeout(Some(Duration::from_secs(240)));
let tx_hash = pending_transaction.watch().await.context(format!(
"Transaction inclusion watching timeout for {tx_hash}"
))?;
+8 -8
View File
@@ -41,7 +41,7 @@ pub struct ReportAggregator {
impl ReportAggregator {
pub fn new(context: Context) -> Self {
let (runner_tx, runner_rx) = unbounded_channel::<RunnerEvent>();
let (listener_tx, _) = channel::<ReporterEvent>(1024);
let (listener_tx, _) = channel::<ReporterEvent>(0xFFFF);
Self {
report: Report::new(context),
remaining_cases: Default::default(),
@@ -64,7 +64,7 @@ impl ReportAggregator {
debug!("Starting to aggregate report");
while let Some(event) = self.runner_rx.recv().await {
debug!(?event, "Received Event");
debug!(event = event.variant_name(), "Received Event");
match event {
RunnerEvent::SubscribeToEvents(event) => {
self.handle_subscribe_to_events_event(*event);
@@ -412,8 +412,8 @@ impl ReportAggregator {
{
block_information.sort_by(|a, b| {
a.ethereum_block_information
.block_timestamp
.cmp(&b.ethereum_block_information.block_timestamp)
.block_number
.cmp(&b.ethereum_block_information.block_number)
});
// Computing the TPS.
@@ -466,7 +466,6 @@ impl ReportAggregator {
.filter_map(|block| block.ref_time_block_fullness_percentage())
.map(|v| v as u64)
.collect::<Vec<_>>();
dbg!(&reftime_block_fullness);
if !reftime_block_fullness.is_empty() {
report
.metrics
@@ -482,7 +481,6 @@ impl ReportAggregator {
.filter_map(|block| block.proof_size_block_fullness_percentage())
.map(|v| v as u64)
.collect::<Vec<_>>();
dbg!(&proof_size_block_fullness);
if !proof_size_block_fullness.is_empty() {
report
.metrics
@@ -803,8 +801,9 @@ where
pub fn with_list(
&mut self,
platform_identifier: PlatformIdentifier,
mut list: Vec<T>,
original_list: Vec<T>,
) -> &mut Self {
let mut list = original_list.clone();
list.sort();
let Some(min) = list.first().copied() else {
return self;
@@ -842,7 +841,7 @@ where
.insert(platform_identifier, median);
self.raw
.get_or_insert_default()
.insert(platform_identifier, list);
.insert(platform_identifier, original_list);
self
}
@@ -883,6 +882,7 @@ pub struct ContractInformation {
pub struct MinedBlockInformation {
pub ethereum_block_information: EthereumMinedBlockInformation,
pub substrate_block_information: Option<SubstrateMinedBlockInformation>,
pub tx_counts: BTreeMap<StepPath, usize>,
}
impl MinedBlockInformation {
+10
View File
@@ -347,6 +347,16 @@ macro_rules! define_event {
),*
}
impl $ident {
pub fn variant_name(&self) -> &'static str {
match self {
$(
Self::$variant_ident { .. } => stringify!($variant_ident)
),*
}
}
}
$(
#[derive(Debug)]
$(#[$variant_meta])*
+246
View File
@@ -0,0 +1,246 @@
"""
Utilities to print benchmark metrics from a report JSON into CSV.
Usage:
python scripts/print_benchmark_metrics_csv.py /absolute/path/to/report.json
The script prints, for each metadata path, case index, and mode combination,
CSV rows aligned to mined blocks with the following columns:
- block_number
- number_of_txs
- tps (transaction_per_second)
- gps (gas_per_second)
- gas_block_fullness
- ref_time (if available)
- max_ref_time (if available)
- proof_size (if available)
- max_proof_size (if available)
- ref_time_block_fullness (if available)
- proof_size_block_fullness (if available)
Important nuance: TPS and GPS arrays have (number_of_blocks - 1) items. The
first block row has no TPS/GPS; the CSV leaves those cells empty for the first
row and aligns subsequent values to their corresponding next block.
"""
from __future__ import annotations
import json
import sys
import csv
from typing import List, Mapping, TypedDict
class EthereumMinedBlockInformation(TypedDict):
"""EVM block information extracted from the report.
Attributes:
block_number: The block height.
block_timestamp: The UNIX timestamp of the block.
mined_gas: Total gas used (mined) in the block.
block_gas_limit: The gas limit of the block.
transaction_hashes: List of transaction hashes included in the block.
"""
block_number: int
block_timestamp: int
mined_gas: int
block_gas_limit: int
transaction_hashes: List[str]
class SubstrateMinedBlockInformation(TypedDict):
"""Substrate-specific block resource usage fields.
Attributes:
ref_time: The consumed ref time in the block.
max_ref_time: The maximum ref time allowed for the block.
proof_size: The consumed proof size in the block.
max_proof_size: The maximum proof size allowed for the block.
"""
ref_time: int
max_ref_time: int
proof_size: int
max_proof_size: int
class MinedBlockInformation(TypedDict):
"""Block-level information for a mined block with both EVM and optional Substrate fields."""
ethereum_block_information: EthereumMinedBlockInformation
substrate_block_information: SubstrateMinedBlockInformation
class Metric(TypedDict):
"""Metric data of integer values keyed by platform identifier.
Attributes:
minimum: Single scalar minimum per platform.
maximum: Single scalar maximum per platform.
mean: Single scalar mean per platform.
median: Single scalar median per platform.
raw: Time-series (or list) of values per platform.
"""
minimum: Mapping[str, int]
maximum: Mapping[str, int]
mean: Mapping[str, int]
median: Mapping[str, int]
raw: Mapping[str, List[int]]
class Metrics(TypedDict):
"""All metrics that may be present for a given execution report.
Note that some metrics are optional and present only for specific platforms
or execution modes.
"""
transaction_per_second: Metric
gas_per_second: Metric
gas_block_fullness: Metric
ref_time_block_fullness: Metric
proof_size_block_fullness: Metric
class ExecutionReport(TypedDict):
"""Execution report for a mode containing mined blocks and metrics.
Attributes:
mined_block_information: Mapping from platform identifier to the list of
mined blocks observed for that platform.
metrics: The computed metrics for the execution.
"""
mined_block_information: Mapping[str, List[MinedBlockInformation]]
metrics: Metrics
class CaseReport(TypedDict):
"""Report for a single case, keyed by mode string."""
mode_execution_reports: Mapping[str, ExecutionReport]
class MetadataFileReport(TypedDict):
"""Report subtree keyed by case indices for a metadata file path."""
case_reports: Mapping[str, CaseReport]
class ReportRoot(TypedDict):
"""Top-level report schema with execution information keyed by metadata path."""
execution_information: Mapping[str, MetadataFileReport]
BlockInformation = TypedDict(
"BlockInformation",
{
"Block Number": int,
"Timestamp": int,
"Datetime": None,
"Transaction Count": int,
"TPS": int | None,
"GPS": int | None,
"Ref Time": int,
"Max Ref Time": int,
"Block Fullness Ref Time": int,
"Proof Size": int,
"Max Proof Size": int,
"Block Fullness Proof Size": int,
},
)
"""A typed dictionary used to hold all of the block information"""
def load_report(path: str) -> ReportRoot:
"""Load the report JSON from disk.
Args:
path: Absolute or relative filesystem path to the JSON report file.
Returns:
The parsed report as a typed dictionary structure.
"""
with open(path, "r", encoding="utf-8") as f:
data: ReportRoot = json.load(f)
return data
def main() -> None:
report_path: str = sys.argv[1]
report: ReportRoot = load_report(report_path)
# TODO: Remove this in the future, but for now, the target is fixed.
target: str = "revive-dev-node-revm-solc"
csv_writer = csv.writer(sys.stdout)
for _, metadata_file_report in report["execution_information"].items():
for _, case_report in metadata_file_report["case_reports"].items():
for _, execution_report in case_report["mode_execution_reports"].items():
blocks_information: list[MinedBlockInformation] = execution_report[
"mined_block_information"
][target]
resolved_blocks: list[BlockInformation] = []
for i, block_information in enumerate(blocks_information):
resolved_blocks.append(
{
"Block Number": block_information[
"ethereum_block_information"
]["block_number"],
"Timestamp": block_information[
"ethereum_block_information"
]["block_timestamp"],
"Datetime": None,
"Transaction Count": len(
block_information["ethereum_block_information"][
"transaction_hashes"
]
),
"TPS": (
None
if i == 0
else execution_report["metrics"][
"transaction_per_second"
]["raw"][target][i - 1]
),
"GPS": (
None
if i == 0
else execution_report["metrics"]["gas_per_second"][
"raw"
][target][i - 1]
),
"Ref Time": block_information[
"substrate_block_information"
]["ref_time"],
"Max Ref Time": block_information[
"substrate_block_information"
]["max_ref_time"],
"Block Fullness Ref Time": execution_report["metrics"][
"ref_time_block_fullness"
]["raw"][target][i],
"Proof Size": block_information[
"substrate_block_information"
]["proof_size"],
"Max Proof Size": block_information[
"substrate_block_information"
]["max_proof_size"],
"Block Fullness Proof Size": execution_report["metrics"][
"proof_size_block_fullness"
]["raw"][target][i],
}
)
csv_writer = csv.DictWriter(sys.stdout, resolved_blocks[0].keys())
csv_writer.writeheader()
csv_writer.writerows(resolved_blocks)
if __name__ == "__main__":
main()