ml-runner init

2026-04-29 14:48:00 +00:00 · 2025-10-07 16:10:43 +00:00
parent 6da3172581
commit 6e64f678ee
12 changed files with 762 additions and 3 deletions
@@ -4526,6 +4526,27 @@ dependencies = [
 "windows-sys 0.59.0",
 ]

+[[package]]
+name = "ml-test-runner"
+version = "0.1.0"
+dependencies = [
+ "alloy",
+ "anyhow",
+ "clap",
+ "revive-dt-common",
+ "revive-dt-compiler",
+ "revive-dt-config",
+ "revive-dt-core",
+ "revive-dt-format",
+ "revive-dt-node",
+ "revive-dt-node-interaction",
+ "revive-dt-report",
+ "temp-dir",
+ "tokio",
+ "tracing",
+ "tracing-subscriber",
+]
+
 [[package]]
 name = "moka"
 version = "0.12.10"
@@ -10,7 +10,7 @@ use std::{
 use anyhow::Context as _;
 use futures::{FutureExt, StreamExt};
 use revive_dt_common::types::PrivateKeyAllocator;
-use revive_dt_core::Platform;
+use crate::Platform;
 use tokio::sync::{Mutex, RwLock, Semaphore};
 use tracing::{Instrument, error, info, info_span, instrument};

@@ -11,7 +11,7 @@ use std::{
 use futures::FutureExt;
 use revive_dt_common::{iterators::FilesWithExtensionIterator, types::CompilerIdentifier};
 use revive_dt_compiler::{Compiler, CompilerOutput, Mode, SolidityCompiler};
-use revive_dt_core::Platform;
+use crate::Platform;
 use revive_dt_format::metadata::{ContractIdent, ContractInstance, Metadata};

 use alloy::{hex::ToHexExt, json_abi::JsonAbi, primitives::Address};
@@ -4,7 +4,7 @@ use std::sync::atomic::{AtomicUsize, Ordering};

 use anyhow::Context as _;
 use revive_dt_config::*;
-use revive_dt_core::Platform;
+use crate::Platform;
 use revive_dt_node_interaction::EthereumNode;

 /// The node pool starts one or more [Node] which then can be accessed
@@ -3,6 +3,9 @@
 //! This crate defines the testing configuration and
 //! provides a helper utility to execute tests.

+pub mod helpers;
+pub mod differential_tests;
+
 use std::{
    pin::Pin,
    thread::{self, JoinHandle},
@@ -21,6 +24,9 @@ use revive_dt_node::{
 use revive_dt_node_interaction::EthereumNode;
 use tracing::info;

+// Re-export helper types
+pub use helpers::CachedCompiler;
+
 /// A trait that describes the interface for the platforms that are supported by the tool.
 #[allow(clippy::type_complexity)]
 pub trait Platform {
@@ -0,0 +1,34 @@
+[package]
+name = "ml-test-runner"
+description = "ML-based test runner for executing differential tests file by file"
+version.workspace = true
+authors.workspace = true
+license.workspace = true
+edition.workspace = true
+repository.workspace = true
+rust-version.workspace = true
+
+[[bin]]
+name = "ml-test-runner"
+path = "src/main.rs"
+
+[dependencies]
+revive-dt-common = { workspace = true }
+revive-dt-compiler = { workspace = true }
+revive-dt-config = { workspace = true }
+revive-dt-core = { workspace = true }
+revive-dt-format = { workspace = true }
+revive-dt-node = { workspace = true }
+revive-dt-node-interaction = { workspace = true }
+revive-dt-report = { workspace = true }
+
+alloy = { workspace = true }
+anyhow = { workspace = true }
+clap = { workspace = true }
+tokio = { workspace = true }
+temp-dir = { workspace = true }
+tracing = { workspace = true }
+tracing-subscriber = { workspace = true }
+
+[lints]
+workspace = true
@@ -0,0 +1,124 @@
+# ML Test Runner
+
+A test runner for executing Revive differential tests file-by-file with cargo-test-style output.
+
+This is similar to the `retester` binary but designed for ML-based test execution with a focus on:
+- Running tests file-by-file (rather than in bulk)
+- Caching passed tests to skip them in future runs
+- Providing cargo-test-style output for easy integration with ML pipelines
+- Single platform testing (rather than differential testing)
+
+## Features
+
+- **File-by-file execution**: Run tests on individual `.sol` files, corpus files (`.json`), or recursively walk directories
+- **Cached results**: Skip tests that have already passed using `--cached-passed`
+- **Fail fast**: Stop on first failure with `--bail`
+- **Cargo-like output**: Familiar test output format with colored pass/fail indicators
+- **Platform support**: Test against `geth` or `kitchensink` platforms
+
+## Usage
+
+```bash
+# Run a single .sol file (compile-only mode, default)
+./ml-test-runner path/to/test.sol --platform geth
+
+# Run all tests in a corpus file
+./ml-test-runner path/to/corpus.json --platform kitchensink
+
+# Walk a directory recursively for .sol files
+./ml-test-runner path/to/tests/ --platform geth
+
+# Use cached results and bail on first failure
+./ml-test-runner path/to/tests/ --cached-passed ./cache.txt --bail
+
+# Start the platform and execute tests (full mode)
+./ml-test-runner path/to/tests/ --platform geth --start-platform
+
+# Enable verbose logging (info, debug, or trace level)
+RUST_LOG=info ./ml-test-runner path/to/tests/
+RUST_LOG=debug ./ml-test-runner path/to/tests/ --start-platform
+RUST_LOG=trace ./ml-test-runner path/to/tests/ --start-platform
+```
+
+## Arguments
+
+- `<PATH>` - Path to test file (`.sol`), corpus file (`.json`), or folder of `.sol` files
+- `--cached-passed <FILE>` - File to track tests that have already passed
+- `--bail` - Stop after the first file failure
+- `--platform <PLATFORM>` - Platform to test against (`geth`, `kitchensink`, or `zombienet`, default: `geth`)
+- `--start-platform` - Start the platform and execute tests (default: `false`, compile-only mode)
+
+## Logging
+
+The ml-test-runner uses the `tracing` crate for logging. Set the `RUST_LOG` environment variable to control log output:
+
+- `RUST_LOG=info` - Shows high-level progress (file discovery, node startup, test execution)
+- `RUST_LOG=debug` - Shows detailed execution flow (compilation, driver creation, step execution)
+- `RUST_LOG=trace` - Shows very detailed tracing (mostly from dependencies)
+
+Logs are written to stderr, while test results are written to stdout for easy filtering.
+
+## Output Format
+
+The runner produces cargo-test-style output:
+
+```
+test path/to/test1.sol ... ok
+test path/to/test2.sol ... FAILED
+test path/to/test3.sol ... cached
+
+failures:
+
+---- path/to/test2.sol ----
+Error: ...
+
+test result: FAILED. 1 passed; 1 failed; 1 cached; finished in 2.34s
+```
+
+## Implementation Status
+
+### ✅ Completed
+- CLI argument parsing with full configuration options
+- File discovery (single file, corpus, recursive directory walk)
+- Cached-passed tracking system
+- Cargo-test-style output formatting
+- Contract compilation with caching
+- Platform node management and spawning
+- Library deployment and linking
+- Full case execution using Driver pattern
+- Test file discovery and metadata loading
+- Pass/fail tracking and caching
+- Output formatting and summary generation
+- Error handling and bail behavior
+- Optional node startup with `--start-platform` flag
+- Compile-only mode (default) for fast validation
+- Full execution mode (with `--start-platform`) for actual testing
+- Tracing/logging support via `RUST_LOG`
+
+### 🚧 TODO
+- Additional optimizations and performance tuning
+- Support for custom working directories
+
+## Implementation Details
+
+The ml-test-runner is a **simplified, single-platform test runner** that shares core components with the main differential testing tool:
+
+- **Compilation**: Uses the shared `CachedCompiler` from `revive-dt-core` that stores compilation artifacts to avoid recompiling
+- **Library Deployment**: Automatically deploys library contracts when needed and links them
+- **Test Execution**: Uses the shared `Driver` from `revive-dt-core::differential_tests` to execute test cases on the configured platform
+- **Node Management**: Optionally spawns and manages blockchain nodes (when `--start-platform` is used)
+- **Single Platform**: Unlike the main tool which does differential testing (comparing multiple platforms), `ml-test-runner` executes against a single platform
+- **Two Modes**:
+  - **Compile-only mode** (default): Fast validation that contracts compile correctly, no node required
+  - **Full execution mode** (`--start-platform`): Spawns a node and executes all test steps with assertions
+- **Tracing**: Full logging support via `tracing` and `tracing-subscriber` crates
+
+The implementation is clean, focused code that reuses battle-tested components from `revive-dt-core`. This ensures consistency while maintaining a lean codebase optimized for ML pipeline integration.
+
+## Building
+
+```bash
+cargo build --release -p ml-test-runner
+```
+
+The binary will be available at `target/release/ml-test-runner`.
@@ -0,0 +1,2 @@
+// Re-export the cached compiler from core to avoid code duplication
+pub use revive_dt_core::CachedCompiler;
@@ -0,0 +1,531 @@
+use anyhow::Context;
+use clap::Parser;
+use revive_dt_common::{iterators::FilesWithExtensionIterator, types::PrivateKeyAllocator};
+use revive_dt_config::{TestExecutionContext, TestingPlatform};
+use revive_dt_core::{
+    CachedCompiler, Platform,
+    helpers::{TestDefinition, TestPlatformInformation},
+};
+use revive_dt_format::{
+    case::CaseIdx,
+    corpus::Corpus,
+    metadata::{Metadata, MetadataFile},
+};
+use revive_dt_node_interaction::EthereumNode;
+use std::{
+    borrow::Cow,
+    collections::{BTreeMap, HashSet},
+    fs::File,
+    io::{BufRead, BufReader, BufWriter, Write},
+    path::{Path, PathBuf},
+    sync::Arc,
+    time::Instant,
+};
+use temp_dir::TempDir;
+use tokio::sync::Mutex;
+use tracing::info;
+use tracing_subscriber::{EnvFilter, FmtSubscriber};
+
+/// ML-based test runner for executing differential tests file by file
+#[derive(Debug, Parser)]
+#[command(name = "ml-test-runner")]
+struct MlTestRunnerArgs {
+    /// Path to test file (.sol), corpus file (.json), or folder containing .sol files
+    #[arg(value_name = "PATH")]
+    path: PathBuf,
+
+    /// File to cache tests that have already passed
+    #[arg(long = "cached-passed")]
+    cached_passed: Option<PathBuf>,
+
+    /// Stop after the first file failure
+    #[arg(long = "bail")]
+    bail: bool,
+
+    /// Platform to test against (geth or kitchensink)
+    #[arg(long = "platform", default_value = "geth")]
+    platform: TestingPlatform,
+
+    /// Start the platform and wait for RPC readiness
+    #[arg(long = "start-platform", default_value = "false")]
+    start_platform: bool,
+}
+
+fn main() -> anyhow::Result<()> {
+    // Initialize tracing subscriber
+    let subscriber = FmtSubscriber::builder()
+        .with_env_filter(EnvFilter::from_default_env())
+        .with_writer(std::io::stderr)
+        .finish();
+    tracing::subscriber::set_global_default(subscriber).expect("Failed to set tracing subscriber");
+
+    let args = MlTestRunnerArgs::parse();
+
+    info!("ML test runner starting");
+    info!("Platform: {:?}", args.platform);
+    info!("Start platform: {}", args.start_platform);
+
+    // Run the async body
+    tokio::runtime::Builder::new_multi_thread()
+        .enable_all()
+        .build()
+        .expect("Failed building the Runtime")
+        .block_on(run(args))
+}
+
+async fn run(args: MlTestRunnerArgs) -> anyhow::Result<()> {
+    let start_time = Instant::now();
+
+    // Discover test files
+    info!("Discovering test files from: {}", args.path.display());
+    let test_files = discover_test_files(&args.path)?;
+    info!("Found {} test file(s)", test_files.len());
+
+    // Load cached passed tests if provided
+    let cached_passed = if let Some(cache_file) = &args.cached_passed {
+        let cached = load_cached_passed(cache_file)?;
+        info!("Loaded {} cached passed test(s)", cached.len());
+        cached
+    } else {
+        HashSet::new()
+    };
+
+    let cached_passed = Arc::new(Mutex::new(cached_passed));
+
+    // Statistics
+    let mut passed_files = 0;
+    let mut failed_files = 0;
+    let mut skipped_files = 0;
+    let mut failures = Vec::new();
+
+    const GREEN: &str = "\x1B[32m";
+    const RED: &str = "\x1B[31m";
+    const YELLOW: &str = "\x1B[33m";
+    const COLOUR_RESET: &str = "\x1B[0m";
+    const BOLD: &str = "\x1B[1m";
+    const BOLD_RESET: &str = "\x1B[22m";
+
+    // Process each file
+    for test_file in test_files {
+        let file_display = test_file.display().to_string();
+
+        // Check if already passed
+        {
+            let cache = cached_passed.lock().await;
+            if cache.contains(&file_display) {
+                println!("test {} ... {YELLOW}cached{COLOUR_RESET}", file_display);
+                skipped_files += 1;
+                continue;
+            }
+        }
+
+        // Load metadata from file
+        info!("Loading metadata from: {}", test_file.display());
+        let metadata_file = match load_metadata_file(&test_file) {
+            Ok(mf) => {
+                info!("Loaded metadata with {} case(s)", mf.cases.len());
+                mf
+            }
+            Err(e) => {
+                println!("test {} ... {RED}FAILED{COLOUR_RESET}", file_display);
+                println!("    Error loading metadata: {}", e);
+                failed_files += 1;
+                failures.push((
+                    file_display.clone(),
+                    format!("Error loading metadata: {}", e),
+                ));
+                if args.bail {
+                    break;
+                }
+                continue;
+            }
+        };
+
+        // Execute test cases for this file
+        info!("Executing test file: {}", file_display);
+        match execute_test_file(&args, &metadata_file).await {
+            Ok(_) => {
+                println!("test {} ... {GREEN}ok{COLOUR_RESET}", file_display);
+                info!("Test file passed: {}", file_display);
+                passed_files += 1;
+
+                // Add to cache
+                {
+                    let mut cache = cached_passed.lock().await;
+                    cache.insert(file_display);
+                }
+            }
+            Err(e) => {
+                println!("test {} ... {RED}FAILED{COLOUR_RESET}", file_display);
+                info!("Test file failed: {}", file_display);
+                failed_files += 1;
+                failures.push((file_display, format!("{:?}", e)));
+
+                if args.bail {
+                    info!("Bailing after first failure");
+                    break;
+                }
+            }
+        }
+    }
+
+    // Save cached passed tests
+    if let Some(cache_file) = &args.cached_passed {
+        let cache = cached_passed.lock().await;
+        info!("Saving {} cached passed test(s)", cache.len());
+        save_cached_passed(cache_file, &cache)?;
+    }
+
+    // Print summary
+    println!();
+    if !failures.is_empty() {
+        println!("{BOLD}failures:{BOLD_RESET}");
+        println!();
+        for (file, error) in &failures {
+            println!("---- {} ----", file);
+            println!("{}", error);
+            println!();
+        }
+    }
+
+    let elapsed = start_time.elapsed();
+    println!(
+        "test result: {}. {} passed; {} failed; {} cached; finished in {:.2}s",
+        if failed_files == 0 {
+            format!("{GREEN}ok{COLOUR_RESET}")
+        } else {
+            format!("{RED}FAILED{COLOUR_RESET}")
+        },
+        passed_files,
+        failed_files,
+        skipped_files,
+        elapsed.as_secs_f64()
+    );
+
+    if failed_files > 0 {
+        std::process::exit(1);
+    }
+
+    Ok(())
+}
+
+/// Discover test files from the given path
+fn discover_test_files(path: &Path) -> anyhow::Result<Vec<PathBuf>> {
+    if !path.exists() {
+        anyhow::bail!("Path does not exist: {}", path.display());
+    }
+
+    let mut files = Vec::new();
+
+    if path.is_file() {
+        let extension = path.extension().and_then(|s| s.to_str()).unwrap_or("");
+
+        match extension {
+            "sol" => {
+                // Single .sol file
+                files.push(path.to_path_buf());
+            }
+            "json" => {
+                // Corpus file - enumerate its tests
+                let corpus = Corpus::try_from_path(path)?;
+                let metadata_files = corpus.enumerate_tests();
+                for metadata in metadata_files {
+                    files.push(metadata.metadata_file_path);
+                }
+            }
+            _ => anyhow::bail!(
+                "Unsupported file extension: {}. Expected .sol or .json",
+                extension
+            ),
+        }
+    } else if path.is_dir() {
+        // Walk directory recursively for .sol files
+        for entry in FilesWithExtensionIterator::new(path)
+            .with_allowed_extension("sol")
+            .with_use_cached_fs(true)
+        {
+            files.push(entry);
+        }
+    } else {
+        anyhow::bail!("Path is neither a file nor a directory: {}", path.display());
+    }
+
+    Ok(files)
+}
+
+/// Load metadata from a test file
+fn load_metadata_file(path: &Path) -> anyhow::Result<MetadataFile> {
+    let metadata = Metadata::try_from_file(path)
+        .ok_or_else(|| anyhow::anyhow!("Failed to load metadata from {}", path.display()))?;
+
+    Ok(MetadataFile {
+        metadata_file_path: path.to_path_buf(),
+        corpus_file_path: path.to_path_buf(),
+        content: metadata,
+    })
+}
+
+/// Execute all test cases in a metadata file
+async fn execute_test_file(
+    args: &MlTestRunnerArgs,
+    metadata_file: &MetadataFile,
+) -> anyhow::Result<()> {
+    if metadata_file.cases.is_empty() {
+        anyhow::bail!("No test cases found in file");
+    }
+
+    info!("Processing {} test case(s)", metadata_file.cases.len());
+
+    // Get the platform based on CLI args
+    let platform: &dyn Platform = match args.platform {
+        TestingPlatform::Geth => &revive_dt_core::GethEvmSolcPlatform,
+        TestingPlatform::Kitchensink => &revive_dt_core::KitchensinkPolkavmResolcPlatform,
+        TestingPlatform::Zombienet => &revive_dt_core::ZombienetPolkavmResolcPlatform,
+    };
+
+    // Create temporary working directory
+    let temp_dir = TempDir::new()?;
+    info!("Created temporary directory: {}", temp_dir.path().display());
+
+    // Create a test execution context (with defaults)
+    let test_context = TestExecutionContext::default();
+    let context = revive_dt_config::Context::Test(Box::new(test_context));
+
+    // Optionally start a node based on the --start-platform flag
+    let node: &'static dyn revive_dt_node_interaction::EthereumNode = if args.start_platform {
+        info!("Starting blockchain node...");
+        let node_handle = platform
+            .new_node(context.clone())
+            .context("Failed to spawn node thread")?;
+
+        info!("Waiting for node to start...");
+        let node = node_handle
+            .join()
+            .map_err(|e| anyhow::anyhow!("Node thread panicked: {:?}", e))?
+            .context("Failed to start node")?;
+
+        info!(
+            "Node started with ID: {}, connection: {}",
+            node.id(),
+            node.connection_string()
+        );
+
+        // Run pre-transactions on the node
+        let node = Box::leak(node); // Leak to get 'static lifetime for simplicity
+        info!("Running pre-transactions...");
+        node.pre_transactions()
+            .await
+            .context("Failed to run pre-transactions")?;
+        info!("Pre-transactions completed");
+
+        node
+    } else {
+        info!("Using existing node");
+        let existing_node: Box<dyn revive_dt_node_interaction::EthereumNode> = match args.platform {
+            TestingPlatform::Geth => {
+                Box::new(revive_dt_node::node_implementations::geth::GethNode::new_existing())
+            }
+            TestingPlatform::Kitchensink | TestingPlatform::Zombienet => Box::new(
+                revive_dt_node::node_implementations::substrate::SubstrateNode::new_existing(),
+            ),
+        };
+        Box::leak(existing_node)
+    };
+
+    // Create a cached compiler for this file (wrapped in Arc like the main code does)
+    info!("Initializing cached compiler");
+    let cached_compiler = CachedCompiler::new(temp_dir.path().join("compilation_cache"), false)
+        .await
+        .map(Arc::new)
+        .context("Failed to create cached compiler")?;
+
+    // Create a private key allocator
+    let private_key_allocator = Arc::new(Mutex::new(PrivateKeyAllocator::new(
+        alloy::primitives::U256::from(100),
+    )));
+
+    // Create reporter infrastructure (minimal, just for the Driver API)
+    // Note: We need to keep the report_task alive, otherwise the reporter channel closes
+    let (reporter, report_task) =
+        revive_dt_report::ReportAggregator::new(context.clone()).into_task();
+
+    // Spawn the report task in the background to keep the channel open
+    tokio::spawn(report_task);
+
+    info!(
+        "Building test definitions for {} case(s)",
+        metadata_file.cases.len()
+    );
+    // Build all test definitions upfront
+    let mut test_definitions = Vec::new();
+    for (case_idx, case) in metadata_file.cases.iter().enumerate() {
+        info!("Building test definition for case {}", case_idx);
+        let test_def = build_test_definition(
+            metadata_file,
+            case,
+            case_idx,
+            platform,
+            node,
+            &context,
+            &reporter,
+        )
+        .await?;
+
+        if let Some(test_def) = test_def {
+            info!("Test definition for case {} created successfully", case_idx);
+            test_definitions.push(test_def);
+        }
+    }
+
+    // Execute each test case
+    info!("Executing {} test definition(s)", test_definitions.len());
+    for (idx, test_definition) in test_definitions.iter().enumerate() {
+        info!("─────────────────────────────────────────────────────────────────");
+        info!(
+            "Executing case {}/{}: case_idx={}, mode={}, steps={}",
+            idx + 1,
+            test_definitions.len(),
+            test_definition.case_idx,
+            test_definition.mode,
+            test_definition.case.steps.len()
+        );
+
+        info!("Creating driver for case {}", test_definition.case_idx);
+        let driver = revive_dt_core::differential_tests::Driver::new_root(
+            test_definition,
+            private_key_allocator.clone(),
+            &cached_compiler,
+        )
+        .await
+        .context("Failed to create driver")?;
+
+        info!(
+            "Running {} step(s) for case {}",
+            test_definition.case.steps.len(),
+            test_definition.case_idx
+        );
+        let steps_executed = driver.execute_all().await.context(format!(
+            "Failed to execute case {}",
+            test_definition.case_idx
+        ))?;
+        info!(
+            "✓ Case {} completed successfully, executed {} step(s)",
+            test_definition.case_idx, steps_executed
+        );
+    }
+    info!("─────────────────────────────────────────────────────────────────");
+    info!(
+        "All {} test case(s) executed successfully",
+        test_definitions.len()
+    );
+
+    Ok(())
+}
+
+/// Build a test definition for a single test case
+async fn build_test_definition<'a>(
+    metadata_file: &'a MetadataFile,
+    case: &'a revive_dt_format::case::Case,
+    case_idx: usize,
+    platform: &'a dyn Platform,
+    node: &'a dyn revive_dt_node_interaction::EthereumNode,
+    context: &revive_dt_config::Context,
+    reporter: &revive_dt_report::Reporter,
+) -> anyhow::Result<Option<TestDefinition<'a>>> {
+    // Determine mode - use case mode if specified, otherwise use default
+    let mode = case
+        .modes
+        .as_ref()
+        .or(metadata_file.modes.as_ref())
+        .and_then(|modes| modes.first())
+        .and_then(|parsed_mode| parsed_mode.to_modes().next())
+        .map(Cow::Owned)
+        .or_else(|| revive_dt_compiler::Mode::all().next().map(Cow::Borrowed))
+        .unwrap();
+
+    // Create a compiler for this mode
+    let compiler = platform
+        .new_compiler(context.clone(), mode.version.clone().map(Into::into))
+        .await
+        .context("Failed to create compiler")?;
+
+    // Create test-specific reporter
+    let test_reporter =
+        reporter.test_specific_reporter(Arc::new(revive_dt_report::TestSpecifier {
+            solc_mode: mode.as_ref().clone(),
+            metadata_file_path: metadata_file.metadata_file_path.clone(),
+            case_idx: CaseIdx::new(case_idx),
+        }));
+
+    // Create execution-specific reporter
+    let execution_reporter =
+        test_reporter.execution_specific_reporter(node.id(), platform.platform_identifier());
+
+    // Build platform information
+    let mut platforms = BTreeMap::new();
+    platforms.insert(
+        platform.platform_identifier(),
+        TestPlatformInformation {
+            platform,
+            node,
+            compiler,
+            reporter: execution_reporter,
+        },
+    );
+
+    // Build test definition
+    let test_definition = TestDefinition {
+        metadata: metadata_file,
+        metadata_file_path: &metadata_file.metadata_file_path,
+        mode,
+        case_idx: CaseIdx::new(case_idx),
+        case,
+        platforms,
+        reporter: test_reporter,
+    };
+
+    // Check compatibility
+    if let Err((reason, _)) = test_definition.check_compatibility() {
+        println!("    Skipping case {}: {}", case_idx, reason);
+        return Ok(None);
+    }
+
+    Ok(Some(test_definition))
+}
+
+/// Load cached passed tests from file
+fn load_cached_passed(path: &Path) -> anyhow::Result<HashSet<String>> {
+    if !path.exists() {
+        return Ok(HashSet::new());
+    }
+
+    let file = File::open(path).context("Failed to open cached-passed file")?;
+    let reader = BufReader::new(file);
+
+    let mut cache = HashSet::new();
+    for line in reader.lines() {
+        let line = line?;
+        let trimmed = line.trim();
+        if !trimmed.is_empty() {
+            cache.insert(trimmed.to_string());
+        }
+    }
+
+    Ok(cache)
+}
+
+/// Save cached passed tests to file
+fn save_cached_passed(path: &Path, cache: &HashSet<String>) -> anyhow::Result<()> {
+    let file = File::create(path).context("Failed to create cached-passed file")?;
+    let mut writer = BufWriter::new(file);
+
+    let mut entries: Vec<_> = cache.iter().collect();
+    entries.sort();
+
+    for entry in entries {
+        writeln!(writer, "{}", entry)?;
+    }
+
+    writer.flush()?;
+    Ok(())
+}
@@ -74,6 +74,14 @@ pub trait EthereumNode {
                + '_,
        >,
    >;
+
+    /// Creates a node instance from an existing running node.
+    fn new_existing() -> Self
+    where
+        Self: Sized,
+    {
+        panic!("new_existing is not implemented for this node type")
+    }
 }

 #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
@@ -542,6 +542,22 @@ impl EthereumNode for GethNode {
                as Pin<Box<dyn Stream<Item = MinedBlockInformation>>>)
        })
    }
+
+    fn new_existing() -> Self {
+        Self {
+            connection_string: "http://localhost:8545".to_string(),
+            base_directory: PathBuf::new(),
+            data_directory: PathBuf::new(),
+            logs_directory: PathBuf::new(),
+            geth: PathBuf::new(),
+            id: 0,
+            handle: None,
+            start_timeout: Duration::from_secs(0),
+            wallet: Arc::new(EthereumWallet::default()),
+            nonce_manager: Default::default(),
+            provider: Default::default(),
+        }
+    }
 }

 pub struct GethNodeResolver {
@@ -541,6 +541,23 @@ impl EthereumNode for SubstrateNode {
                as Pin<Box<dyn Stream<Item = MinedBlockInformation>>>)
        })
    }
+
+    fn new_existing() -> Self {
+        Self {
+            id: 0,
+            node_binary: PathBuf::new(),
+            eth_proxy_binary: PathBuf::new(),
+            export_chainspec_command: String::new(),
+            rpc_url: "http://localhost:8545".to_string(),
+            base_directory: PathBuf::new(),
+            logs_directory: PathBuf::new(),
+            substrate_process: None,
+            eth_proxy_process: None,
+            wallet: Arc::new(EthereumWallet::default()),
+            nonce_manager: Default::default(),
+            provider: Default::default(),
+        }
+    }
 }

 pub struct SubstrateNodeResolver {