ml-runner init

This commit is contained in:
pgherveou
2025-10-07 16:10:43 +00:00
parent 6da3172581
commit 6e64f678ee
12 changed files with 762 additions and 3 deletions
Generated
+21
View File
@@ -4526,6 +4526,27 @@ dependencies = [
"windows-sys 0.59.0",
]
[[package]]
name = "ml-test-runner"
version = "0.1.0"
dependencies = [
"alloy",
"anyhow",
"clap",
"revive-dt-common",
"revive-dt-compiler",
"revive-dt-config",
"revive-dt-core",
"revive-dt-format",
"revive-dt-node",
"revive-dt-node-interaction",
"revive-dt-report",
"temp-dir",
"tokio",
"tracing",
"tracing-subscriber",
]
[[package]]
name = "moka"
version = "0.12.10"
@@ -10,7 +10,7 @@ use std::{
use anyhow::Context as _;
use futures::{FutureExt, StreamExt};
use revive_dt_common::types::PrivateKeyAllocator;
use revive_dt_core::Platform;
use crate::Platform;
use tokio::sync::{Mutex, RwLock, Semaphore};
use tracing::{Instrument, error, info, info_span, instrument};
+1 -1
View File
@@ -11,7 +11,7 @@ use std::{
use futures::FutureExt;
use revive_dt_common::{iterators::FilesWithExtensionIterator, types::CompilerIdentifier};
use revive_dt_compiler::{Compiler, CompilerOutput, Mode, SolidityCompiler};
use revive_dt_core::Platform;
use crate::Platform;
use revive_dt_format::metadata::{ContractIdent, ContractInstance, Metadata};
use alloy::{hex::ToHexExt, json_abi::JsonAbi, primitives::Address};
+1 -1
View File
@@ -4,7 +4,7 @@ use std::sync::atomic::{AtomicUsize, Ordering};
use anyhow::Context as _;
use revive_dt_config::*;
use revive_dt_core::Platform;
use crate::Platform;
use revive_dt_node_interaction::EthereumNode;
/// The node pool starts one or more [Node] which then can be accessed
+6
View File
@@ -3,6 +3,9 @@
//! This crate defines the testing configuration and
//! provides a helper utility to execute tests.
pub mod helpers;
pub mod differential_tests;
use std::{
pin::Pin,
thread::{self, JoinHandle},
@@ -21,6 +24,9 @@ use revive_dt_node::{
use revive_dt_node_interaction::EthereumNode;
use tracing::info;
// Re-export helper types
pub use helpers::CachedCompiler;
/// A trait that describes the interface for the platforms that are supported by the tool.
#[allow(clippy::type_complexity)]
pub trait Platform {
+34
View File
@@ -0,0 +1,34 @@
[package]
name = "ml-test-runner"
description = "ML-based test runner for executing differential tests file by file"
version.workspace = true
authors.workspace = true
license.workspace = true
edition.workspace = true
repository.workspace = true
rust-version.workspace = true
[[bin]]
name = "ml-test-runner"
path = "src/main.rs"
[dependencies]
revive-dt-common = { workspace = true }
revive-dt-compiler = { workspace = true }
revive-dt-config = { workspace = true }
revive-dt-core = { workspace = true }
revive-dt-format = { workspace = true }
revive-dt-node = { workspace = true }
revive-dt-node-interaction = { workspace = true }
revive-dt-report = { workspace = true }
alloy = { workspace = true }
anyhow = { workspace = true }
clap = { workspace = true }
tokio = { workspace = true }
temp-dir = { workspace = true }
tracing = { workspace = true }
tracing-subscriber = { workspace = true }
[lints]
workspace = true
+124
View File
@@ -0,0 +1,124 @@
# ML Test Runner
A test runner for executing Revive differential tests file-by-file with cargo-test-style output.
This is similar to the `retester` binary but designed for ML-based test execution with a focus on:
- Running tests file-by-file (rather than in bulk)
- Caching passed tests to skip them in future runs
- Providing cargo-test-style output for easy integration with ML pipelines
- Single platform testing (rather than differential testing)
## Features
- **File-by-file execution**: Run tests on individual `.sol` files, corpus files (`.json`), or recursively walk directories
- **Cached results**: Skip tests that have already passed using `--cached-passed`
- **Fail fast**: Stop on first failure with `--bail`
- **Cargo-like output**: Familiar test output format with colored pass/fail indicators
- **Platform support**: Test against `geth` or `kitchensink` platforms
## Usage
```bash
# Run a single .sol file (compile-only mode, default)
./ml-test-runner path/to/test.sol --platform geth
# Run all tests in a corpus file
./ml-test-runner path/to/corpus.json --platform kitchensink
# Walk a directory recursively for .sol files
./ml-test-runner path/to/tests/ --platform geth
# Use cached results and bail on first failure
./ml-test-runner path/to/tests/ --cached-passed ./cache.txt --bail
# Start the platform and execute tests (full mode)
./ml-test-runner path/to/tests/ --platform geth --start-platform
# Enable verbose logging (info, debug, or trace level)
RUST_LOG=info ./ml-test-runner path/to/tests/
RUST_LOG=debug ./ml-test-runner path/to/tests/ --start-platform
RUST_LOG=trace ./ml-test-runner path/to/tests/ --start-platform
```
## Arguments
- `<PATH>` - Path to test file (`.sol`), corpus file (`.json`), or folder of `.sol` files
- `--cached-passed <FILE>` - File to track tests that have already passed
- `--bail` - Stop after the first file failure
- `--platform <PLATFORM>` - Platform to test against (`geth`, `kitchensink`, or `zombienet`, default: `geth`)
- `--start-platform` - Start the platform and execute tests (default: `false`, compile-only mode)
## Logging
The ml-test-runner uses the `tracing` crate for logging. Set the `RUST_LOG` environment variable to control log output:
- `RUST_LOG=info` - Shows high-level progress (file discovery, node startup, test execution)
- `RUST_LOG=debug` - Shows detailed execution flow (compilation, driver creation, step execution)
- `RUST_LOG=trace` - Shows very detailed tracing (mostly from dependencies)
Logs are written to stderr, while test results are written to stdout for easy filtering.
## Output Format
The runner produces cargo-test-style output:
```
test path/to/test1.sol ... ok
test path/to/test2.sol ... FAILED
test path/to/test3.sol ... cached
failures:
---- path/to/test2.sol ----
Error: ...
test result: FAILED. 1 passed; 1 failed; 1 cached; finished in 2.34s
```
## Implementation Status
### ✅ Completed
- CLI argument parsing with full configuration options
- File discovery (single file, corpus, recursive directory walk)
- Cached-passed tracking system
- Cargo-test-style output formatting
- Contract compilation with caching
- Platform node management and spawning
- Library deployment and linking
- Full case execution using Driver pattern
- Test file discovery and metadata loading
- Pass/fail tracking and caching
- Output formatting and summary generation
- Error handling and bail behavior
- Optional node startup with `--start-platform` flag
- Compile-only mode (default) for fast validation
- Full execution mode (with `--start-platform`) for actual testing
- Tracing/logging support via `RUST_LOG`
### 🚧 TODO
- Additional optimizations and performance tuning
- Support for custom working directories
## Implementation Details
The ml-test-runner is a **simplified, single-platform test runner** that shares core components with the main differential testing tool:
- **Compilation**: Uses the shared `CachedCompiler` from `revive-dt-core` that stores compilation artifacts to avoid recompiling
- **Library Deployment**: Automatically deploys library contracts when needed and links them
- **Test Execution**: Uses the shared `Driver` from `revive-dt-core::differential_tests` to execute test cases on the configured platform
- **Node Management**: Optionally spawns and manages blockchain nodes (when `--start-platform` is used)
- **Single Platform**: Unlike the main tool which does differential testing (comparing multiple platforms), `ml-test-runner` executes against a single platform
- **Two Modes**:
- **Compile-only mode** (default): Fast validation that contracts compile correctly, no node required
- **Full execution mode** (`--start-platform`): Spawns a node and executes all test steps with assertions
- **Tracing**: Full logging support via `tracing` and `tracing-subscriber` crates
The implementation is clean, focused code that reuses battle-tested components from `revive-dt-core`. This ensures consistency while maintaining a lean codebase optimized for ML pipeline integration.
## Building
```bash
cargo build --release -p ml-test-runner
```
The binary will be available at `target/release/ml-test-runner`.
@@ -0,0 +1,2 @@
// Re-export the cached compiler from core to avoid code duplication
pub use revive_dt_core::CachedCompiler;
+531
View File
@@ -0,0 +1,531 @@
use anyhow::Context;
use clap::Parser;
use revive_dt_common::{iterators::FilesWithExtensionIterator, types::PrivateKeyAllocator};
use revive_dt_config::{TestExecutionContext, TestingPlatform};
use revive_dt_core::{
CachedCompiler, Platform,
helpers::{TestDefinition, TestPlatformInformation},
};
use revive_dt_format::{
case::CaseIdx,
corpus::Corpus,
metadata::{Metadata, MetadataFile},
};
use revive_dt_node_interaction::EthereumNode;
use std::{
borrow::Cow,
collections::{BTreeMap, HashSet},
fs::File,
io::{BufRead, BufReader, BufWriter, Write},
path::{Path, PathBuf},
sync::Arc,
time::Instant,
};
use temp_dir::TempDir;
use tokio::sync::Mutex;
use tracing::info;
use tracing_subscriber::{EnvFilter, FmtSubscriber};
/// ML-based test runner for executing differential tests file by file
#[derive(Debug, Parser)]
#[command(name = "ml-test-runner")]
struct MlTestRunnerArgs {
/// Path to test file (.sol), corpus file (.json), or folder containing .sol files
#[arg(value_name = "PATH")]
path: PathBuf,
/// File to cache tests that have already passed
#[arg(long = "cached-passed")]
cached_passed: Option<PathBuf>,
/// Stop after the first file failure
#[arg(long = "bail")]
bail: bool,
/// Platform to test against (geth or kitchensink)
#[arg(long = "platform", default_value = "geth")]
platform: TestingPlatform,
/// Start the platform and wait for RPC readiness
#[arg(long = "start-platform", default_value = "false")]
start_platform: bool,
}
fn main() -> anyhow::Result<()> {
// Initialize tracing subscriber
let subscriber = FmtSubscriber::builder()
.with_env_filter(EnvFilter::from_default_env())
.with_writer(std::io::stderr)
.finish();
tracing::subscriber::set_global_default(subscriber).expect("Failed to set tracing subscriber");
let args = MlTestRunnerArgs::parse();
info!("ML test runner starting");
info!("Platform: {:?}", args.platform);
info!("Start platform: {}", args.start_platform);
// Run the async body
tokio::runtime::Builder::new_multi_thread()
.enable_all()
.build()
.expect("Failed building the Runtime")
.block_on(run(args))
}
async fn run(args: MlTestRunnerArgs) -> anyhow::Result<()> {
let start_time = Instant::now();
// Discover test files
info!("Discovering test files from: {}", args.path.display());
let test_files = discover_test_files(&args.path)?;
info!("Found {} test file(s)", test_files.len());
// Load cached passed tests if provided
let cached_passed = if let Some(cache_file) = &args.cached_passed {
let cached = load_cached_passed(cache_file)?;
info!("Loaded {} cached passed test(s)", cached.len());
cached
} else {
HashSet::new()
};
let cached_passed = Arc::new(Mutex::new(cached_passed));
// Statistics
let mut passed_files = 0;
let mut failed_files = 0;
let mut skipped_files = 0;
let mut failures = Vec::new();
const GREEN: &str = "\x1B[32m";
const RED: &str = "\x1B[31m";
const YELLOW: &str = "\x1B[33m";
const COLOUR_RESET: &str = "\x1B[0m";
const BOLD: &str = "\x1B[1m";
const BOLD_RESET: &str = "\x1B[22m";
// Process each file
for test_file in test_files {
let file_display = test_file.display().to_string();
// Check if already passed
{
let cache = cached_passed.lock().await;
if cache.contains(&file_display) {
println!("test {} ... {YELLOW}cached{COLOUR_RESET}", file_display);
skipped_files += 1;
continue;
}
}
// Load metadata from file
info!("Loading metadata from: {}", test_file.display());
let metadata_file = match load_metadata_file(&test_file) {
Ok(mf) => {
info!("Loaded metadata with {} case(s)", mf.cases.len());
mf
}
Err(e) => {
println!("test {} ... {RED}FAILED{COLOUR_RESET}", file_display);
println!(" Error loading metadata: {}", e);
failed_files += 1;
failures.push((
file_display.clone(),
format!("Error loading metadata: {}", e),
));
if args.bail {
break;
}
continue;
}
};
// Execute test cases for this file
info!("Executing test file: {}", file_display);
match execute_test_file(&args, &metadata_file).await {
Ok(_) => {
println!("test {} ... {GREEN}ok{COLOUR_RESET}", file_display);
info!("Test file passed: {}", file_display);
passed_files += 1;
// Add to cache
{
let mut cache = cached_passed.lock().await;
cache.insert(file_display);
}
}
Err(e) => {
println!("test {} ... {RED}FAILED{COLOUR_RESET}", file_display);
info!("Test file failed: {}", file_display);
failed_files += 1;
failures.push((file_display, format!("{:?}", e)));
if args.bail {
info!("Bailing after first failure");
break;
}
}
}
}
// Save cached passed tests
if let Some(cache_file) = &args.cached_passed {
let cache = cached_passed.lock().await;
info!("Saving {} cached passed test(s)", cache.len());
save_cached_passed(cache_file, &cache)?;
}
// Print summary
println!();
if !failures.is_empty() {
println!("{BOLD}failures:{BOLD_RESET}");
println!();
for (file, error) in &failures {
println!("---- {} ----", file);
println!("{}", error);
println!();
}
}
let elapsed = start_time.elapsed();
println!(
"test result: {}. {} passed; {} failed; {} cached; finished in {:.2}s",
if failed_files == 0 {
format!("{GREEN}ok{COLOUR_RESET}")
} else {
format!("{RED}FAILED{COLOUR_RESET}")
},
passed_files,
failed_files,
skipped_files,
elapsed.as_secs_f64()
);
if failed_files > 0 {
std::process::exit(1);
}
Ok(())
}
/// Discover test files from the given path
fn discover_test_files(path: &Path) -> anyhow::Result<Vec<PathBuf>> {
if !path.exists() {
anyhow::bail!("Path does not exist: {}", path.display());
}
let mut files = Vec::new();
if path.is_file() {
let extension = path.extension().and_then(|s| s.to_str()).unwrap_or("");
match extension {
"sol" => {
// Single .sol file
files.push(path.to_path_buf());
}
"json" => {
// Corpus file - enumerate its tests
let corpus = Corpus::try_from_path(path)?;
let metadata_files = corpus.enumerate_tests();
for metadata in metadata_files {
files.push(metadata.metadata_file_path);
}
}
_ => anyhow::bail!(
"Unsupported file extension: {}. Expected .sol or .json",
extension
),
}
} else if path.is_dir() {
// Walk directory recursively for .sol files
for entry in FilesWithExtensionIterator::new(path)
.with_allowed_extension("sol")
.with_use_cached_fs(true)
{
files.push(entry);
}
} else {
anyhow::bail!("Path is neither a file nor a directory: {}", path.display());
}
Ok(files)
}
/// Load metadata from a test file
fn load_metadata_file(path: &Path) -> anyhow::Result<MetadataFile> {
let metadata = Metadata::try_from_file(path)
.ok_or_else(|| anyhow::anyhow!("Failed to load metadata from {}", path.display()))?;
Ok(MetadataFile {
metadata_file_path: path.to_path_buf(),
corpus_file_path: path.to_path_buf(),
content: metadata,
})
}
/// Execute all test cases in a metadata file
async fn execute_test_file(
args: &MlTestRunnerArgs,
metadata_file: &MetadataFile,
) -> anyhow::Result<()> {
if metadata_file.cases.is_empty() {
anyhow::bail!("No test cases found in file");
}
info!("Processing {} test case(s)", metadata_file.cases.len());
// Get the platform based on CLI args
let platform: &dyn Platform = match args.platform {
TestingPlatform::Geth => &revive_dt_core::GethEvmSolcPlatform,
TestingPlatform::Kitchensink => &revive_dt_core::KitchensinkPolkavmResolcPlatform,
TestingPlatform::Zombienet => &revive_dt_core::ZombienetPolkavmResolcPlatform,
};
// Create temporary working directory
let temp_dir = TempDir::new()?;
info!("Created temporary directory: {}", temp_dir.path().display());
// Create a test execution context (with defaults)
let test_context = TestExecutionContext::default();
let context = revive_dt_config::Context::Test(Box::new(test_context));
// Optionally start a node based on the --start-platform flag
let node: &'static dyn revive_dt_node_interaction::EthereumNode = if args.start_platform {
info!("Starting blockchain node...");
let node_handle = platform
.new_node(context.clone())
.context("Failed to spawn node thread")?;
info!("Waiting for node to start...");
let node = node_handle
.join()
.map_err(|e| anyhow::anyhow!("Node thread panicked: {:?}", e))?
.context("Failed to start node")?;
info!(
"Node started with ID: {}, connection: {}",
node.id(),
node.connection_string()
);
// Run pre-transactions on the node
let node = Box::leak(node); // Leak to get 'static lifetime for simplicity
info!("Running pre-transactions...");
node.pre_transactions()
.await
.context("Failed to run pre-transactions")?;
info!("Pre-transactions completed");
node
} else {
info!("Using existing node");
let existing_node: Box<dyn revive_dt_node_interaction::EthereumNode> = match args.platform {
TestingPlatform::Geth => {
Box::new(revive_dt_node::node_implementations::geth::GethNode::new_existing())
}
TestingPlatform::Kitchensink | TestingPlatform::Zombienet => Box::new(
revive_dt_node::node_implementations::substrate::SubstrateNode::new_existing(),
),
};
Box::leak(existing_node)
};
// Create a cached compiler for this file (wrapped in Arc like the main code does)
info!("Initializing cached compiler");
let cached_compiler = CachedCompiler::new(temp_dir.path().join("compilation_cache"), false)
.await
.map(Arc::new)
.context("Failed to create cached compiler")?;
// Create a private key allocator
let private_key_allocator = Arc::new(Mutex::new(PrivateKeyAllocator::new(
alloy::primitives::U256::from(100),
)));
// Create reporter infrastructure (minimal, just for the Driver API)
// Note: We need to keep the report_task alive, otherwise the reporter channel closes
let (reporter, report_task) =
revive_dt_report::ReportAggregator::new(context.clone()).into_task();
// Spawn the report task in the background to keep the channel open
tokio::spawn(report_task);
info!(
"Building test definitions for {} case(s)",
metadata_file.cases.len()
);
// Build all test definitions upfront
let mut test_definitions = Vec::new();
for (case_idx, case) in metadata_file.cases.iter().enumerate() {
info!("Building test definition for case {}", case_idx);
let test_def = build_test_definition(
metadata_file,
case,
case_idx,
platform,
node,
&context,
&reporter,
)
.await?;
if let Some(test_def) = test_def {
info!("Test definition for case {} created successfully", case_idx);
test_definitions.push(test_def);
}
}
// Execute each test case
info!("Executing {} test definition(s)", test_definitions.len());
for (idx, test_definition) in test_definitions.iter().enumerate() {
info!("─────────────────────────────────────────────────────────────────");
info!(
"Executing case {}/{}: case_idx={}, mode={}, steps={}",
idx + 1,
test_definitions.len(),
test_definition.case_idx,
test_definition.mode,
test_definition.case.steps.len()
);
info!("Creating driver for case {}", test_definition.case_idx);
let driver = revive_dt_core::differential_tests::Driver::new_root(
test_definition,
private_key_allocator.clone(),
&cached_compiler,
)
.await
.context("Failed to create driver")?;
info!(
"Running {} step(s) for case {}",
test_definition.case.steps.len(),
test_definition.case_idx
);
let steps_executed = driver.execute_all().await.context(format!(
"Failed to execute case {}",
test_definition.case_idx
))?;
info!(
"✓ Case {} completed successfully, executed {} step(s)",
test_definition.case_idx, steps_executed
);
}
info!("─────────────────────────────────────────────────────────────────");
info!(
"All {} test case(s) executed successfully",
test_definitions.len()
);
Ok(())
}
/// Build a test definition for a single test case
async fn build_test_definition<'a>(
metadata_file: &'a MetadataFile,
case: &'a revive_dt_format::case::Case,
case_idx: usize,
platform: &'a dyn Platform,
node: &'a dyn revive_dt_node_interaction::EthereumNode,
context: &revive_dt_config::Context,
reporter: &revive_dt_report::Reporter,
) -> anyhow::Result<Option<TestDefinition<'a>>> {
// Determine mode - use case mode if specified, otherwise use default
let mode = case
.modes
.as_ref()
.or(metadata_file.modes.as_ref())
.and_then(|modes| modes.first())
.and_then(|parsed_mode| parsed_mode.to_modes().next())
.map(Cow::Owned)
.or_else(|| revive_dt_compiler::Mode::all().next().map(Cow::Borrowed))
.unwrap();
// Create a compiler for this mode
let compiler = platform
.new_compiler(context.clone(), mode.version.clone().map(Into::into))
.await
.context("Failed to create compiler")?;
// Create test-specific reporter
let test_reporter =
reporter.test_specific_reporter(Arc::new(revive_dt_report::TestSpecifier {
solc_mode: mode.as_ref().clone(),
metadata_file_path: metadata_file.metadata_file_path.clone(),
case_idx: CaseIdx::new(case_idx),
}));
// Create execution-specific reporter
let execution_reporter =
test_reporter.execution_specific_reporter(node.id(), platform.platform_identifier());
// Build platform information
let mut platforms = BTreeMap::new();
platforms.insert(
platform.platform_identifier(),
TestPlatformInformation {
platform,
node,
compiler,
reporter: execution_reporter,
},
);
// Build test definition
let test_definition = TestDefinition {
metadata: metadata_file,
metadata_file_path: &metadata_file.metadata_file_path,
mode,
case_idx: CaseIdx::new(case_idx),
case,
platforms,
reporter: test_reporter,
};
// Check compatibility
if let Err((reason, _)) = test_definition.check_compatibility() {
println!(" Skipping case {}: {}", case_idx, reason);
return Ok(None);
}
Ok(Some(test_definition))
}
/// Load cached passed tests from file
fn load_cached_passed(path: &Path) -> anyhow::Result<HashSet<String>> {
if !path.exists() {
return Ok(HashSet::new());
}
let file = File::open(path).context("Failed to open cached-passed file")?;
let reader = BufReader::new(file);
let mut cache = HashSet::new();
for line in reader.lines() {
let line = line?;
let trimmed = line.trim();
if !trimmed.is_empty() {
cache.insert(trimmed.to_string());
}
}
Ok(cache)
}
/// Save cached passed tests to file
fn save_cached_passed(path: &Path, cache: &HashSet<String>) -> anyhow::Result<()> {
let file = File::create(path).context("Failed to create cached-passed file")?;
let mut writer = BufWriter::new(file);
let mut entries: Vec<_> = cache.iter().collect();
entries.sort();
for entry in entries {
writeln!(writer, "{}", entry)?;
}
writer.flush()?;
Ok(())
}
+8
View File
@@ -74,6 +74,14 @@ pub trait EthereumNode {
+ '_,
>,
>;
/// Creates a node instance from an existing running node.
fn new_existing() -> Self
where
Self: Sized,
{
panic!("new_existing is not implemented for this node type")
}
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
@@ -542,6 +542,22 @@ impl EthereumNode for GethNode {
as Pin<Box<dyn Stream<Item = MinedBlockInformation>>>)
})
}
fn new_existing() -> Self {
Self {
connection_string: "http://localhost:8545".to_string(),
base_directory: PathBuf::new(),
data_directory: PathBuf::new(),
logs_directory: PathBuf::new(),
geth: PathBuf::new(),
id: 0,
handle: None,
start_timeout: Duration::from_secs(0),
wallet: Arc::new(EthereumWallet::default()),
nonce_manager: Default::default(),
provider: Default::default(),
}
}
}
pub struct GethNodeResolver {
@@ -541,6 +541,23 @@ impl EthereumNode for SubstrateNode {
as Pin<Box<dyn Stream<Item = MinedBlockInformation>>>)
})
}
fn new_existing() -> Self {
Self {
id: 0,
node_binary: PathBuf::new(),
eth_proxy_binary: PathBuf::new(),
export_chainspec_command: String::new(),
rpc_url: "http://localhost:8545".to_string(),
base_directory: PathBuf::new(),
logs_directory: PathBuf::new(),
substrate_process: None,
eth_proxy_process: None,
wallet: Arc::new(EthereumWallet::default()),
nonce_manager: Default::default(),
provider: Default::default(),
}
}
}
pub struct SubstrateNodeResolver {