ml-runner init

2026-06-09 20:21:04 +00:00 · 2025-10-07 16:10:43 +00:00
parent 6da3172581
commit 6e64f678ee
12 changed files with 762 additions and 3 deletions
@@ -0,0 +1,2 @@
+// Re-export the cached compiler from core to avoid code duplication
+pub use revive_dt_core::CachedCompiler;
@@ -0,0 +1,531 @@
+use anyhow::Context;
+use clap::Parser;
+use revive_dt_common::{iterators::FilesWithExtensionIterator, types::PrivateKeyAllocator};
+use revive_dt_config::{TestExecutionContext, TestingPlatform};
+use revive_dt_core::{
+    CachedCompiler, Platform,
+    helpers::{TestDefinition, TestPlatformInformation},
+};
+use revive_dt_format::{
+    case::CaseIdx,
+    corpus::Corpus,
+    metadata::{Metadata, MetadataFile},
+};
+use revive_dt_node_interaction::EthereumNode;
+use std::{
+    borrow::Cow,
+    collections::{BTreeMap, HashSet},
+    fs::File,
+    io::{BufRead, BufReader, BufWriter, Write},
+    path::{Path, PathBuf},
+    sync::Arc,
+    time::Instant,
+};
+use temp_dir::TempDir;
+use tokio::sync::Mutex;
+use tracing::info;
+use tracing_subscriber::{EnvFilter, FmtSubscriber};
+
+/// ML-based test runner for executing differential tests file by file
+#[derive(Debug, Parser)]
+#[command(name = "ml-test-runner")]
+struct MlTestRunnerArgs {
+    /// Path to test file (.sol), corpus file (.json), or folder containing .sol files
+    #[arg(value_name = "PATH")]
+    path: PathBuf,
+
+    /// File to cache tests that have already passed
+    #[arg(long = "cached-passed")]
+    cached_passed: Option<PathBuf>,
+
+    /// Stop after the first file failure
+    #[arg(long = "bail")]
+    bail: bool,
+
+    /// Platform to test against (geth or kitchensink)
+    #[arg(long = "platform", default_value = "geth")]
+    platform: TestingPlatform,
+
+    /// Start the platform and wait for RPC readiness
+    #[arg(long = "start-platform", default_value = "false")]
+    start_platform: bool,
+}
+
+fn main() -> anyhow::Result<()> {
+    // Initialize tracing subscriber
+    let subscriber = FmtSubscriber::builder()
+        .with_env_filter(EnvFilter::from_default_env())
+        .with_writer(std::io::stderr)
+        .finish();
+    tracing::subscriber::set_global_default(subscriber).expect("Failed to set tracing subscriber");
+
+    let args = MlTestRunnerArgs::parse();
+
+    info!("ML test runner starting");
+    info!("Platform: {:?}", args.platform);
+    info!("Start platform: {}", args.start_platform);
+
+    // Run the async body
+    tokio::runtime::Builder::new_multi_thread()
+        .enable_all()
+        .build()
+        .expect("Failed building the Runtime")
+        .block_on(run(args))
+}
+
+async fn run(args: MlTestRunnerArgs) -> anyhow::Result<()> {
+    let start_time = Instant::now();
+
+    // Discover test files
+    info!("Discovering test files from: {}", args.path.display());
+    let test_files = discover_test_files(&args.path)?;
+    info!("Found {} test file(s)", test_files.len());
+
+    // Load cached passed tests if provided
+    let cached_passed = if let Some(cache_file) = &args.cached_passed {
+        let cached = load_cached_passed(cache_file)?;
+        info!("Loaded {} cached passed test(s)", cached.len());
+        cached
+    } else {
+        HashSet::new()
+    };
+
+    let cached_passed = Arc::new(Mutex::new(cached_passed));
+
+    // Statistics
+    let mut passed_files = 0;
+    let mut failed_files = 0;
+    let mut skipped_files = 0;
+    let mut failures = Vec::new();
+
+    const GREEN: &str = "\x1B[32m";
+    const RED: &str = "\x1B[31m";
+    const YELLOW: &str = "\x1B[33m";
+    const COLOUR_RESET: &str = "\x1B[0m";
+    const BOLD: &str = "\x1B[1m";
+    const BOLD_RESET: &str = "\x1B[22m";
+
+    // Process each file
+    for test_file in test_files {
+        let file_display = test_file.display().to_string();
+
+        // Check if already passed
+        {
+            let cache = cached_passed.lock().await;
+            if cache.contains(&file_display) {
+                println!("test {} ... {YELLOW}cached{COLOUR_RESET}", file_display);
+                skipped_files += 1;
+                continue;
+            }
+        }
+
+        // Load metadata from file
+        info!("Loading metadata from: {}", test_file.display());
+        let metadata_file = match load_metadata_file(&test_file) {
+            Ok(mf) => {
+                info!("Loaded metadata with {} case(s)", mf.cases.len());
+                mf
+            }
+            Err(e) => {
+                println!("test {} ... {RED}FAILED{COLOUR_RESET}", file_display);
+                println!("    Error loading metadata: {}", e);
+                failed_files += 1;
+                failures.push((
+                    file_display.clone(),
+                    format!("Error loading metadata: {}", e),
+                ));
+                if args.bail {
+                    break;
+                }
+                continue;
+            }
+        };
+
+        // Execute test cases for this file
+        info!("Executing test file: {}", file_display);
+        match execute_test_file(&args, &metadata_file).await {
+            Ok(_) => {
+                println!("test {} ... {GREEN}ok{COLOUR_RESET}", file_display);
+                info!("Test file passed: {}", file_display);
+                passed_files += 1;
+
+                // Add to cache
+                {
+                    let mut cache = cached_passed.lock().await;
+                    cache.insert(file_display);
+                }
+            }
+            Err(e) => {
+                println!("test {} ... {RED}FAILED{COLOUR_RESET}", file_display);
+                info!("Test file failed: {}", file_display);
+                failed_files += 1;
+                failures.push((file_display, format!("{:?}", e)));
+
+                if args.bail {
+                    info!("Bailing after first failure");
+                    break;
+                }
+            }
+        }
+    }
+
+    // Save cached passed tests
+    if let Some(cache_file) = &args.cached_passed {
+        let cache = cached_passed.lock().await;
+        info!("Saving {} cached passed test(s)", cache.len());
+        save_cached_passed(cache_file, &cache)?;
+    }
+
+    // Print summary
+    println!();
+    if !failures.is_empty() {
+        println!("{BOLD}failures:{BOLD_RESET}");
+        println!();
+        for (file, error) in &failures {
+            println!("---- {} ----", file);
+            println!("{}", error);
+            println!();
+        }
+    }
+
+    let elapsed = start_time.elapsed();
+    println!(
+        "test result: {}. {} passed; {} failed; {} cached; finished in {:.2}s",
+        if failed_files == 0 {
+            format!("{GREEN}ok{COLOUR_RESET}")
+        } else {
+            format!("{RED}FAILED{COLOUR_RESET}")
+        },
+        passed_files,
+        failed_files,
+        skipped_files,
+        elapsed.as_secs_f64()
+    );
+
+    if failed_files > 0 {
+        std::process::exit(1);
+    }
+
+    Ok(())
+}
+
+/// Discover test files from the given path
+fn discover_test_files(path: &Path) -> anyhow::Result<Vec<PathBuf>> {
+    if !path.exists() {
+        anyhow::bail!("Path does not exist: {}", path.display());
+    }
+
+    let mut files = Vec::new();
+
+    if path.is_file() {
+        let extension = path.extension().and_then(|s| s.to_str()).unwrap_or("");
+
+        match extension {
+            "sol" => {
+                // Single .sol file
+                files.push(path.to_path_buf());
+            }
+            "json" => {
+                // Corpus file - enumerate its tests
+                let corpus = Corpus::try_from_path(path)?;
+                let metadata_files = corpus.enumerate_tests();
+                for metadata in metadata_files {
+                    files.push(metadata.metadata_file_path);
+                }
+            }
+            _ => anyhow::bail!(
+                "Unsupported file extension: {}. Expected .sol or .json",
+                extension
+            ),
+        }
+    } else if path.is_dir() {
+        // Walk directory recursively for .sol files
+        for entry in FilesWithExtensionIterator::new(path)
+            .with_allowed_extension("sol")
+            .with_use_cached_fs(true)
+        {
+            files.push(entry);
+        }
+    } else {
+        anyhow::bail!("Path is neither a file nor a directory: {}", path.display());
+    }
+
+    Ok(files)
+}
+
+/// Load metadata from a test file
+fn load_metadata_file(path: &Path) -> anyhow::Result<MetadataFile> {
+    let metadata = Metadata::try_from_file(path)
+        .ok_or_else(|| anyhow::anyhow!("Failed to load metadata from {}", path.display()))?;
+
+    Ok(MetadataFile {
+        metadata_file_path: path.to_path_buf(),
+        corpus_file_path: path.to_path_buf(),
+        content: metadata,
+    })
+}
+
+/// Execute all test cases in a metadata file
+async fn execute_test_file(
+    args: &MlTestRunnerArgs,
+    metadata_file: &MetadataFile,
+) -> anyhow::Result<()> {
+    if metadata_file.cases.is_empty() {
+        anyhow::bail!("No test cases found in file");
+    }
+
+    info!("Processing {} test case(s)", metadata_file.cases.len());
+
+    // Get the platform based on CLI args
+    let platform: &dyn Platform = match args.platform {
+        TestingPlatform::Geth => &revive_dt_core::GethEvmSolcPlatform,
+        TestingPlatform::Kitchensink => &revive_dt_core::KitchensinkPolkavmResolcPlatform,
+        TestingPlatform::Zombienet => &revive_dt_core::ZombienetPolkavmResolcPlatform,
+    };
+
+    // Create temporary working directory
+    let temp_dir = TempDir::new()?;
+    info!("Created temporary directory: {}", temp_dir.path().display());
+
+    // Create a test execution context (with defaults)
+    let test_context = TestExecutionContext::default();
+    let context = revive_dt_config::Context::Test(Box::new(test_context));
+
+    // Optionally start a node based on the --start-platform flag
+    let node: &'static dyn revive_dt_node_interaction::EthereumNode = if args.start_platform {
+        info!("Starting blockchain node...");
+        let node_handle = platform
+            .new_node(context.clone())
+            .context("Failed to spawn node thread")?;
+
+        info!("Waiting for node to start...");
+        let node = node_handle
+            .join()
+            .map_err(|e| anyhow::anyhow!("Node thread panicked: {:?}", e))?
+            .context("Failed to start node")?;
+
+        info!(
+            "Node started with ID: {}, connection: {}",
+            node.id(),
+            node.connection_string()
+        );
+
+        // Run pre-transactions on the node
+        let node = Box::leak(node); // Leak to get 'static lifetime for simplicity
+        info!("Running pre-transactions...");
+        node.pre_transactions()
+            .await
+            .context("Failed to run pre-transactions")?;
+        info!("Pre-transactions completed");
+
+        node
+    } else {
+        info!("Using existing node");
+        let existing_node: Box<dyn revive_dt_node_interaction::EthereumNode> = match args.platform {
+            TestingPlatform::Geth => {
+                Box::new(revive_dt_node::node_implementations::geth::GethNode::new_existing())
+            }
+            TestingPlatform::Kitchensink | TestingPlatform::Zombienet => Box::new(
+                revive_dt_node::node_implementations::substrate::SubstrateNode::new_existing(),
+            ),
+        };
+        Box::leak(existing_node)
+    };
+
+    // Create a cached compiler for this file (wrapped in Arc like the main code does)
+    info!("Initializing cached compiler");
+    let cached_compiler = CachedCompiler::new(temp_dir.path().join("compilation_cache"), false)
+        .await
+        .map(Arc::new)
+        .context("Failed to create cached compiler")?;
+
+    // Create a private key allocator
+    let private_key_allocator = Arc::new(Mutex::new(PrivateKeyAllocator::new(
+        alloy::primitives::U256::from(100),
+    )));
+
+    // Create reporter infrastructure (minimal, just for the Driver API)
+    // Note: We need to keep the report_task alive, otherwise the reporter channel closes
+    let (reporter, report_task) =
+        revive_dt_report::ReportAggregator::new(context.clone()).into_task();
+
+    // Spawn the report task in the background to keep the channel open
+    tokio::spawn(report_task);
+
+    info!(
+        "Building test definitions for {} case(s)",
+        metadata_file.cases.len()
+    );
+    // Build all test definitions upfront
+    let mut test_definitions = Vec::new();
+    for (case_idx, case) in metadata_file.cases.iter().enumerate() {
+        info!("Building test definition for case {}", case_idx);
+        let test_def = build_test_definition(
+            metadata_file,
+            case,
+            case_idx,
+            platform,
+            node,
+            &context,
+            &reporter,
+        )
+        .await?;
+
+        if let Some(test_def) = test_def {
+            info!("Test definition for case {} created successfully", case_idx);
+            test_definitions.push(test_def);
+        }
+    }
+
+    // Execute each test case
+    info!("Executing {} test definition(s)", test_definitions.len());
+    for (idx, test_definition) in test_definitions.iter().enumerate() {
+        info!("─────────────────────────────────────────────────────────────────");
+        info!(
+            "Executing case {}/{}: case_idx={}, mode={}, steps={}",
+            idx + 1,
+            test_definitions.len(),
+            test_definition.case_idx,
+            test_definition.mode,
+            test_definition.case.steps.len()
+        );
+
+        info!("Creating driver for case {}", test_definition.case_idx);
+        let driver = revive_dt_core::differential_tests::Driver::new_root(
+            test_definition,
+            private_key_allocator.clone(),
+            &cached_compiler,
+        )
+        .await
+        .context("Failed to create driver")?;
+
+        info!(
+            "Running {} step(s) for case {}",
+            test_definition.case.steps.len(),
+            test_definition.case_idx
+        );
+        let steps_executed = driver.execute_all().await.context(format!(
+            "Failed to execute case {}",
+            test_definition.case_idx
+        ))?;
+        info!(
+            "✓ Case {} completed successfully, executed {} step(s)",
+            test_definition.case_idx, steps_executed
+        );
+    }
+    info!("─────────────────────────────────────────────────────────────────");
+    info!(
+        "All {} test case(s) executed successfully",
+        test_definitions.len()
+    );
+
+    Ok(())
+}
+
+/// Build a test definition for a single test case
+async fn build_test_definition<'a>(
+    metadata_file: &'a MetadataFile,
+    case: &'a revive_dt_format::case::Case,
+    case_idx: usize,
+    platform: &'a dyn Platform,
+    node: &'a dyn revive_dt_node_interaction::EthereumNode,
+    context: &revive_dt_config::Context,
+    reporter: &revive_dt_report::Reporter,
+) -> anyhow::Result<Option<TestDefinition<'a>>> {
+    // Determine mode - use case mode if specified, otherwise use default
+    let mode = case
+        .modes
+        .as_ref()
+        .or(metadata_file.modes.as_ref())
+        .and_then(|modes| modes.first())
+        .and_then(|parsed_mode| parsed_mode.to_modes().next())
+        .map(Cow::Owned)
+        .or_else(|| revive_dt_compiler::Mode::all().next().map(Cow::Borrowed))
+        .unwrap();
+
+    // Create a compiler for this mode
+    let compiler = platform
+        .new_compiler(context.clone(), mode.version.clone().map(Into::into))
+        .await
+        .context("Failed to create compiler")?;
+
+    // Create test-specific reporter
+    let test_reporter =
+        reporter.test_specific_reporter(Arc::new(revive_dt_report::TestSpecifier {
+            solc_mode: mode.as_ref().clone(),
+            metadata_file_path: metadata_file.metadata_file_path.clone(),
+            case_idx: CaseIdx::new(case_idx),
+        }));
+
+    // Create execution-specific reporter
+    let execution_reporter =
+        test_reporter.execution_specific_reporter(node.id(), platform.platform_identifier());
+
+    // Build platform information
+    let mut platforms = BTreeMap::new();
+    platforms.insert(
+        platform.platform_identifier(),
+        TestPlatformInformation {
+            platform,
+            node,
+            compiler,
+            reporter: execution_reporter,
+        },
+    );
+
+    // Build test definition
+    let test_definition = TestDefinition {
+        metadata: metadata_file,
+        metadata_file_path: &metadata_file.metadata_file_path,
+        mode,
+        case_idx: CaseIdx::new(case_idx),
+        case,
+        platforms,
+        reporter: test_reporter,
+    };
+
+    // Check compatibility
+    if let Err((reason, _)) = test_definition.check_compatibility() {
+        println!("    Skipping case {}: {}", case_idx, reason);
+        return Ok(None);
+    }
+
+    Ok(Some(test_definition))
+}
+
+/// Load cached passed tests from file
+fn load_cached_passed(path: &Path) -> anyhow::Result<HashSet<String>> {
+    if !path.exists() {
+        return Ok(HashSet::new());
+    }
+
+    let file = File::open(path).context("Failed to open cached-passed file")?;
+    let reader = BufReader::new(file);
+
+    let mut cache = HashSet::new();
+    for line in reader.lines() {
+        let line = line?;
+        let trimmed = line.trim();
+        if !trimmed.is_empty() {
+            cache.insert(trimmed.to_string());
+        }
+    }
+
+    Ok(cache)
+}
+
+/// Save cached passed tests to file
+fn save_cached_passed(path: &Path, cache: &HashSet<String>) -> anyhow::Result<()> {
+    let file = File::create(path).context("Failed to create cached-passed file")?;
+    let mut writer = BufWriter::new(file);
+
+    let mut entries: Vec<_> = cache.iter().collect();
+    entries.sort();
+
+    for entry in entries {
+        writeln!(writer, "{}", entry)?;
+    }
+
+    writer.flush()?;
+    Ok(())
+}