the revive-explorer utility (#364)

A maintainable and more precise version of what was a hacky but useful
script, exploring the compilers YUL lowering unit.

It analyzes a given shared objects from the debug dump and outputs:
- The count of each YUL statement translated.
- A per YUL statement break-down of bytecode size contributed per.
- Estimated `yul-phaser` cost parameters.

Signed-off-by: Cyrill Leutwiler <bigcyrill@hotmail.com>
This commit is contained in:
xermicus
2025-07-22 09:17:55 +02:00
committed by GitHub
parent c285a6ec3d
commit 9751481f6b
15 changed files with 709 additions and 1 deletions
+59
View File
@@ -0,0 +1,59 @@
//! The `llvm-dwarfdump` utility helper library.
use std::{
path::{Path, PathBuf},
process::{Command, Stdio},
};
pub static EXECUTABLE: &str = "llvm-dwarfdump";
pub static DEBUG_LINES_ARGUMENTS: [&str; 1] = ["--debug-line"];
pub static SOURCE_FILE_ARGUMENTS: [&str; 1] = ["--show-sources"];
/// Calls the `llvm-dwarfdump` tool to extract debug line information
/// from the shared object at `path`. Returns the output.
///
/// Provide `Some(dwarfdump_exectuable)` to override the default executable.
pub fn debug_lines(
shared_object: &Path,
dwarfdump_executable: &Option<PathBuf>,
) -> anyhow::Result<String> {
dwarfdump(shared_object, dwarfdump_executable, &DEBUG_LINES_ARGUMENTS)
}
/// Calls the `llvm-dwarfdump` tool to extract the source file name.
/// Returns the source file path.
///
/// Provide `Some(dwarfdump_exectuable)` to override the default executable.
pub fn source_file(
shared_object: &Path,
dwarfdump_executable: &Option<PathBuf>,
) -> anyhow::Result<PathBuf> {
let output = dwarfdump(shared_object, dwarfdump_executable, &SOURCE_FILE_ARGUMENTS)?;
Ok(output.trim().into())
}
/// The internal `llvm-dwarfdump` helper function.
fn dwarfdump(
shared_object: &Path,
dwarfdump_executable: &Option<PathBuf>,
arguments: &[&str],
) -> anyhow::Result<String> {
let executable = dwarfdump_executable
.to_owned()
.unwrap_or_else(|| PathBuf::from(EXECUTABLE));
let output = Command::new(executable)
.args(arguments)
.arg(shared_object)
.stdin(Stdio::null())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()?
.wait_with_output()?;
if !output.status.success() {
anyhow::bail!(String::from_utf8_lossy(&output.stderr).to_string());
}
Ok(String::from_utf8_lossy(&output.stdout).to_string())
}
+250
View File
@@ -0,0 +1,250 @@
//! The core dwarf dump analyzer library.
use std::{
collections::HashMap,
path::{Path, PathBuf},
};
use revive_yul::lexer::token::location::Location;
use crate::location_mapper::{self, map_locations, LocationMap};
/// Unknwon code.
pub const OTHER: &str = "other";
/// Compiler internal code.
pub const INTERNAL: &str = "internal";
/// YUL block code.
pub const BLOCK: &str = "block";
/// YUL function call code.
pub const FUNCTION_CALL: &str = "function_call";
/// YUL conditional code.
pub const IF: &str = "if";
/// YUL loop code.
pub const FOR: &str = "for";
/// YUL loop continue code.
pub const CONTINUE: &str = "continue";
/// YUL loop break code.
pub const BREAK: &str = "break";
/// YUL switch code.
pub const SWITCH: &str = "switch";
/// YUL variable declaration code.
pub const DECLARATION: &str = "let";
/// YUL variable assignment code.
pub const ASSIGNMENT: &str = "assignment";
/// YUL function definition code.
pub const FUNCTION_DEFINITION: &str = "function_definition";
/// YUL function leave code.
pub const LEAVE: &str = "leave";
/// The dwarf dump analyzer.
///
/// Loads debug information from `llvm-dwarfdump` and calculates statistics
/// about the compiled YUL statements:
/// - Statements count
/// - Per-statement
#[derive(Debug, Default)]
pub struct DwarfdumpAnalyzer {
/// The YUL source file path.
source: PathBuf,
/// The YUL location to statements map.
location_map: LocationMap,
/// The `llvm-dwarfdump --debug-lines` output.
debug_lines: String,
/// The observed statements.
statements_count: HashMap<String, usize>,
/// The observed statement to instructions size.
statements_size: HashMap<String, u64>,
}
impl DwarfdumpAnalyzer {
/// The debug info analyzer constructor.
///
/// `source` is the path to the YUL source file.
/// `debug_lines` is the `llvm-dwarfdump --debug-lines` output.
pub fn new(source: &Path, debug_lines: String) -> Self {
Self {
source: source.to_path_buf(),
debug_lines,
..Default::default()
}
}
/// Run the analysis.
pub fn analyze(&mut self) -> anyhow::Result<()> {
self.map_locations()?;
self.analyze_statements()?;
Ok(())
}
/// Populate the maps so that we can always unwrap later.
fn map_locations(&mut self) -> anyhow::Result<()> {
self.location_map = map_locations(&self.source)?;
self.statements_count = HashMap::with_capacity(self.location_map.len());
self.statements_size = HashMap::with_capacity(self.location_map.len());
for statement in self.location_map.values() {
if !self.statements_size.contains_key(statement) {
self.statements_size.insert(statement.clone(), 0);
}
*self.statements_count.entry(statement.clone()).or_insert(0) += 1;
}
Ok(())
}
/// Analyze how much bytes of insturctions each statement contributes.
fn analyze_statements(&mut self) -> anyhow::Result<()> {
let mut previous_offset = 0;
let mut previous_location = Location::new(0, 0);
for line in self
.debug_lines
.lines()
.skip_while(|line| !line.starts_with("Address"))
.skip(2)
{
let mut parts = line.split_whitespace();
let (Some(offset), Some(line), Some(column)) =
(parts.next(), parts.next(), parts.next())
else {
continue;
};
let current_offset = u64::from_str_radix(offset.trim_start_matches("0x"), 16)?;
let mut current_location = Location::new(line.parse()?, column.parse()?);
// TODO: A bug? Needs further investigation.
if current_location.line == 0 && current_location.column != 0 {
current_location.line = previous_location.line;
}
if let Some(statement) = self.location_map.get(&previous_location) {
let contribution = current_offset - previous_offset;
*self.statements_size.get_mut(statement).unwrap() += contribution;
}
previous_offset = current_offset;
previous_location = current_location;
}
Ok(())
}
/// Print the per-statement count break-down.
pub fn display_statement_count(&self) {
println!("statements count:");
for (statement, count) in self.statements_count.iter() {
println!("\t{statement} {count}");
}
}
/// Print the per-statement byte size contribution break-down.
pub fn display_statement_size(&self) {
println!("bytes per statement:");
for (statement, size) in self.statements_size.iter() {
println!("\t{statement} {size}");
}
}
/// Print the estimated `yul-phaser` cost parameters.
pub fn display_phaser_costs(&self, yul_phaser_scale: u64) {
println!("yul-phaser parameters:");
for (parameter, cost) in self.phaser_costs(yul_phaser_scale) {
println!("\t{parameter} {cost}");
}
}
/// Estimate the `yul-phaser` costs using the simplified weight function:
/// `Total size / toal count = cost`
pub fn phaser_costs(&self, yul_phaser_scale: u64) -> Vec<(String, u64)> {
let mut costs: HashMap<String, (usize, u64)> = HashMap::with_capacity(16);
for (statement, count) in self
.statements_count
.iter()
.filter(|(_, count)| **count > 0)
{
let size = self.statements_size.get(statement).unwrap();
let cost = match statement.as_str() {
location_mapper::FOR => "--for-loop-cost",
location_mapper::OTHER => continue,
location_mapper::INTERNAL => continue,
location_mapper::BLOCK => "--block-cost",
location_mapper::FUNCTION_CALL => "--function-call-cost",
location_mapper::IF => "--if-cost",
location_mapper::CONTINUE => "--continue-cost",
location_mapper::BREAK => "--break-cost",
location_mapper::LEAVE => "--leave-cost",
location_mapper::SWITCH => "--switch-cost",
location_mapper::DECLARATION => "--variable-declaration-cost",
location_mapper::ASSIGNMENT => "--assignment-cost",
location_mapper::FUNCTION_DEFINITION => "--function-definition-cost",
_ => "--expression-statement-cost",
};
let entry = costs.entry(cost.to_string()).or_default();
entry.0 += count;
entry.1 += size;
}
let costs = costs
.iter()
.map(|(cost, (count, size))| {
let ratio = *size / *count as u64;
(cost.to_string(), ratio.min(100))
})
.collect::<Vec<_>>();
let scaled_costs = scale_to(
costs
.iter()
.map(|(_, ratio)| *ratio)
.collect::<Vec<_>>()
.as_slice(),
yul_phaser_scale,
);
costs
.iter()
.zip(scaled_costs)
.map(|((cost, _), scaled_ratio)| (cost.to_string(), scaled_ratio))
.collect()
}
}
/// Given a slice of u64 values, returns a Vec<u64> where each element
/// is linearly scaled into the closed interval [1, 10].
fn scale_to(data: &[u64], scale_max: u64) -> Vec<u64> {
if data.is_empty() {
return Vec::new();
}
let mut min = data[0];
let mut max = data[0];
for &x in &data[1..] {
if x < min {
min = x;
}
if x > max {
max = x;
}
}
if max < scale_max {
return data.to_vec();
}
let range = max - min;
data.iter()
.map(|&x| {
if range == 0 {
1
} else {
1 + (x - min) * scale_max / range
}
})
.collect()
}
+6
View File
@@ -0,0 +1,6 @@
//! The revive explorer leverages debug info to get insights into emitted code.
pub mod dwarfdump;
pub mod dwarfdump_analyzer;
pub mod location_mapper;
pub mod yul_phaser;
+158
View File
@@ -0,0 +1,158 @@
//! The location mapper utility maps YUL source locations to AST statements.
//!
//! TODO: Refactor when the AST visitor is implemented.
use std::{collections::HashMap, path::Path};
use revive_yul::{
lexer::{token::location::Location, Lexer},
parser::statement::{
block::Block,
expression::{function_call::name::Name, Expression},
object::Object,
Statement,
},
};
/// Code attributed to an unknown location.
pub const OTHER: &str = "other";
/// Code attributed to a compiler internal location.
pub const INTERNAL: &str = "internal";
/// Code attributed to a
pub const BLOCK: &str = "block";
pub const FUNCTION_CALL: &str = "function_call";
pub const FOR: &str = "for";
pub const IF: &str = "if";
pub const CONTINUE: &str = "continue";
pub const BREAK: &str = "break";
pub const LEAVE: &str = "leave";
pub const SWITCH: &str = "switch";
pub const DECLARATION: &str = "let";
pub const ASSIGNMENT: &str = "assignment";
pub const FUNCTION_DEFINITION: &str = "function_definition";
/// The location to statements map type alias.
pub type LocationMap = HashMap<Location, String>;
/// Construct a [LocationMap] from the given YUL `source` file.
pub fn map_locations(source: &Path) -> anyhow::Result<LocationMap> {
let mut lexer = Lexer::new(std::fs::read_to_string(source)?);
let ast = Object::parse(&mut lexer, None).map_err(|error| {
anyhow::anyhow!("Contract `{}` parsing error: {:?}", source.display(), error)
})?;
let mut location_map = HashMap::with_capacity(1024);
crate::location_mapper::object_mapper(&mut location_map, &ast);
location_map.insert(Location::new(0, 0), OTHER.to_string());
location_map.insert(Location::new(1, 0), INTERNAL.to_string());
Ok(location_map)
}
/// Map the [Block].
fn block_mapper(map: &mut LocationMap, block: &Block) {
map.insert(block.location, BLOCK.to_string());
for statement in &block.statements {
statement_mapper(map, statement);
}
}
/// Map the [Expression].
fn expression_mapper(map: &mut LocationMap, expression: &Expression) {
if let Expression::FunctionCall(call) = expression {
let id = match call.name {
Name::UserDefined(_) => FUNCTION_CALL.to_string(),
_ => format!("{:?}", call.name),
};
map.insert(expression.location(), id);
for expression in &call.arguments {
expression_mapper(map, expression);
}
}
}
/// Map the [Statement].
fn statement_mapper(map: &mut LocationMap, statement: &Statement) {
match statement {
Statement::Object(object) => object_mapper(map, object),
Statement::Code(code) => block_mapper(map, &code.block),
Statement::Block(block) => block_mapper(map, block),
Statement::ForLoop(for_loop) => {
map.insert(for_loop.location, FOR.to_string());
expression_mapper(map, &for_loop.condition);
block_mapper(map, &for_loop.body);
block_mapper(map, &for_loop.initializer);
block_mapper(map, &for_loop.finalizer);
}
Statement::IfConditional(if_conditional) => {
map.insert(if_conditional.location, IF.to_string());
expression_mapper(map, &if_conditional.condition);
block_mapper(map, &if_conditional.block);
}
Statement::Expression(expression) => expression_mapper(map, expression),
Statement::Continue(location) => {
map.insert(*location, CONTINUE.to_string());
}
Statement::Leave(location) => {
map.insert(*location, LEAVE.to_string());
}
Statement::Break(location) => {
map.insert(*location, BREAK.to_string());
}
Statement::Switch(switch) => {
map.insert(switch.expression.location(), SWITCH.to_string());
expression_mapper(map, &switch.expression);
for case in &switch.cases {
block_mapper(map, &case.block);
}
if let Some(block) = switch.default.as_ref() {
block_mapper(map, block);
}
}
Statement::Assignment(assignment) => {
map.insert(assignment.location, ASSIGNMENT.to_string());
expression_mapper(map, &assignment.initializer);
}
Statement::VariableDeclaration(declaration) => {
map.insert(declaration.location, DECLARATION.to_string());
if let Some(expression) = declaration.expression.as_ref() {
expression_mapper(map, expression);
}
}
Statement::FunctionDefinition(definition) => {
map.insert(definition.location, FUNCTION_DEFINITION.to_string());
block_mapper(map, &definition.body);
}
}
}
/// Map the [Object].
fn object_mapper(map: &mut LocationMap, object: &Object) {
map.insert(object.location, object.identifier.clone());
block_mapper(map, &object.code.block);
if let Some(object) = object.inner_object.as_ref() {
object_mapper(map, object);
}
}
+56
View File
@@ -0,0 +1,56 @@
use std::path::PathBuf;
use clap::Parser;
use revive_explorer::{dwarfdump, dwarfdump_analyzer::DwarfdumpAnalyzer, yul_phaser};
/// The `revive-explorer` is a helper utility for exploring the compilers YUL lowering unit.
///
/// It analyzes a given shared objects from the debug dump and outputs:
/// - The count of each YUL statement translated.
/// - A per YUL statement break-down of bytecode size contributed per.
/// - Estimated `yul-phaser` cost parameters.
#[derive(Parser, Debug)]
#[command(version, about, long_about = None)]
struct Args {
/// Path of the dwarfdump executable.
#[arg(short, long)]
dwarfdump: Option<PathBuf>,
/// The YUL phaser cost scale maximum value.
#[arg(short, long, default_value_t = 10)]
cost_scale: u64,
/// Run the provided yul-phaser executable using the estimated costs.
#[arg(short, long)]
yul_phaser: Option<PathBuf>,
/// Path of the shared object to analyze.
file: PathBuf,
}
fn main() -> anyhow::Result<()> {
let args = Args::parse();
let source_file = dwarfdump::source_file(&args.file, &args.dwarfdump)?;
let debug_lines = dwarfdump::debug_lines(&args.file, &args.dwarfdump)?;
let mut analyzer = DwarfdumpAnalyzer::new(source_file.as_path(), debug_lines);
analyzer.analyze()?;
if let Some(path) = args.yul_phaser.as_ref() {
yul_phaser::run(
path,
source_file.as_path(),
analyzer.phaser_costs(args.cost_scale).as_slice(),
num_cpus::get() / 2, // TODO: should be configurable.
)?;
return Ok(());
}
analyzer.display_statement_count();
analyzer.display_statement_size();
analyzer.display_phaser_costs(args.cost_scale);
Ok(())
}
+79
View File
@@ -0,0 +1,79 @@
//! The revive explorer YUL phaser utility library.
//!
//! This can be used to invoke the `yul-phaser` utility,
//! used to find better YUL optimizer sequences.
use std::{
path::{Path, PathBuf},
process::{Command, Stdio},
thread,
time::{SystemTime, UNIX_EPOCH},
};
/// The `yul-phaser` sane default arguments:
/// - Less verbose output.
/// - Sufficient rounds.
/// - Sufficient random population start.
const ARGUMENTS: [&str; 6] = [
"--hide-round",
"--rounds",
"1000",
"--random-population",
"100",
"--show-only-top-chromosome",
];
/// Run multiple YUL phaser executables in parallel.
pub fn run(
executable: &Path,
source: &Path,
costs: &[(String, u64)],
n_threads: usize,
) -> anyhow::Result<()> {
let mut handles = Vec::with_capacity(n_threads);
for n in 0..n_threads {
let executable = executable.to_path_buf();
let source = source.to_path_buf();
let costs = costs.to_vec();
handles.push(thread::spawn(move || {
spawn_process(executable, source, costs, n)
}));
}
for handle in handles {
let _ = handle.join();
}
Ok(())
}
/// The `yul-phaser` process spawning helper function.
fn spawn_process(
executable: PathBuf,
source: PathBuf,
costs: Vec<(String, u64)>,
seed: usize,
) -> anyhow::Result<()> {
let cost_parameters = costs
.iter()
.flat_map(|(parameter, cost)| vec![parameter.clone(), cost.to_string()]);
let secs = SystemTime::now()
.duration_since(UNIX_EPOCH)
.expect("Time went backwards")
.as_secs();
Command::new(executable)
.args(cost_parameters)
.args(ARGUMENTS)
.arg("--seed")
.arg((seed + secs as usize).to_string())
.arg(source)
.stdin(Stdio::null())
.spawn()?
.wait()?;
Ok(())
}