the revive-explorer utility (#364)

A maintainable and more precise version of what was a hacky but useful
script, exploring the compilers YUL lowering unit.

It analyzes a given shared objects from the debug dump and outputs:
- The count of each YUL statement translated.
- A per YUL statement break-down of bytecode size contributed per.
- Estimated `yul-phaser` cost parameters.

Signed-off-by: Cyrill Leutwiler <bigcyrill@hotmail.com>
This commit is contained in:
xermicus
2025-07-22 09:17:55 +02:00
committed by GitHub
parent c285a6ec3d
commit 9751481f6b
15 changed files with 709 additions and 1 deletions
+1
View File
@@ -16,6 +16,7 @@ Supported `polkadot-sdk` rev: `2503.0.1`
- Line debug information per YUL builtin and for `if` statements.
- Column numbers in debug information.
- Support for the YUL optimizer details in the standard json input definition.
- The `revive-explorer` compiler utility.
### Fixed
- The debug info source file matches the YUL path in `--debug-output-dir`, allowing tools to display the source line.
Generated
+10
View File
@@ -8615,6 +8615,16 @@ dependencies = [
"tempfile",
]
[[package]]
name = "revive-explorer"
version = "0.1.0"
dependencies = [
"anyhow",
"clap",
"num_cpus",
"revive-yul",
]
[[package]]
name = "revive-integration"
version = "0.1.1"
+1
View File
@@ -19,6 +19,7 @@ revive-benchmarks = { version = "0.1.0", path = "crates/benchmarks" }
revive-builtins = { version = "0.1.0", path = "crates/builtins" }
revive-common = { version = "0.1.0", path = "crates/common" }
revive-differential = { version = "0.1.0", path = "crates/differential" }
revive-explorer = { version = "0.1.0", path = "crates/explore" }
revive-integration = { version = "0.1.1", path = "crates/integration" }
revive-linker = { version = "0.1.0", path = "crates/linker" }
lld-sys = { version = "0.1.0", path = "crates/lld-sys" }
+5 -1
View File
@@ -6,6 +6,7 @@
install-llvm-builder \
install-llvm \
install-revive-runner \
install-revive-explorer \
format \
clippy \
machete \
@@ -43,6 +44,9 @@ install-llvm: install-llvm-builder
install-revive-runner:
cargo install --locked --force --path crates/runner --no-default-features
install-revive-explorer:
cargo install --locked --force --path crates/explorer --no-default-features
format:
cargo fmt --all --check
@@ -53,7 +57,7 @@ machete:
cargo install cargo-machete
cargo machete
test: format clippy machete test-cli test-workspace install-revive-runner
test: format clippy machete test-cli test-workspace install-revive-runner install-revive-explorer
test-integration: install-bin
cargo test --package revive-integration
+20
View File
@@ -0,0 +1,20 @@
[package]
name = "revive-explorer"
version.workspace = true
license.workspace = true
edition.workspace = true
repository.workspace = true
authors.workspace = true
description = "Helper utility to inspect debug builds"
[[bin]]
name = "revive-explorer"
path = "src/main.rs"
[dependencies]
anyhow = { workspace = true }
clap = { workspace = true, features = ["help", "std", "derive"] }
num_cpus = { workspace = true }
revive-yul = { workspace = true }
+49
View File
@@ -0,0 +1,49 @@
# revive-explorer
The `revive-explorer` is a helper utility for exploring the compilers YUL lowering unit.
It analyzes a given shared objects from the debug dump and outputs:
- The count of each YUL statement translated.
- A per YUL statement break-down of bytecode size contributed per.
- Estimated `yul-phaser` cost parameters.
Example:
```
statements count:
block 532
Caller 20
Not 73
Gas 24
Shr 2
...
Shl 259
SetImmutable 2
CodeSize 1
CallDataLoad 87
Return 56
bytes per statement:
Or 756
CodeCopy 158
Log3 620
Return 1562
MStore 36128
...
ReturnDataCopy 2854
DataOffset 28
assignment 1194
Number 540
CallValue 4258
yul-phaser parameters:
--break-cost 1
--variable-declaration-cost 3
--function-call-cost 8
--if-cost 4
--expression-statement-cost 6
--function-definition-cost 11
--switch-cost 3
--block-cost 1
--leave-cost 1
--assignment-cost 1
```
+59
View File
@@ -0,0 +1,59 @@
//! The `llvm-dwarfdump` utility helper library.
use std::{
path::{Path, PathBuf},
process::{Command, Stdio},
};
pub static EXECUTABLE: &str = "llvm-dwarfdump";
pub static DEBUG_LINES_ARGUMENTS: [&str; 1] = ["--debug-line"];
pub static SOURCE_FILE_ARGUMENTS: [&str; 1] = ["--show-sources"];
/// Calls the `llvm-dwarfdump` tool to extract debug line information
/// from the shared object at `path`. Returns the output.
///
/// Provide `Some(dwarfdump_exectuable)` to override the default executable.
pub fn debug_lines(
shared_object: &Path,
dwarfdump_executable: &Option<PathBuf>,
) -> anyhow::Result<String> {
dwarfdump(shared_object, dwarfdump_executable, &DEBUG_LINES_ARGUMENTS)
}
/// Calls the `llvm-dwarfdump` tool to extract the source file name.
/// Returns the source file path.
///
/// Provide `Some(dwarfdump_exectuable)` to override the default executable.
pub fn source_file(
shared_object: &Path,
dwarfdump_executable: &Option<PathBuf>,
) -> anyhow::Result<PathBuf> {
let output = dwarfdump(shared_object, dwarfdump_executable, &SOURCE_FILE_ARGUMENTS)?;
Ok(output.trim().into())
}
/// The internal `llvm-dwarfdump` helper function.
fn dwarfdump(
shared_object: &Path,
dwarfdump_executable: &Option<PathBuf>,
arguments: &[&str],
) -> anyhow::Result<String> {
let executable = dwarfdump_executable
.to_owned()
.unwrap_or_else(|| PathBuf::from(EXECUTABLE));
let output = Command::new(executable)
.args(arguments)
.arg(shared_object)
.stdin(Stdio::null())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()?
.wait_with_output()?;
if !output.status.success() {
anyhow::bail!(String::from_utf8_lossy(&output.stderr).to_string());
}
Ok(String::from_utf8_lossy(&output.stdout).to_string())
}
+250
View File
@@ -0,0 +1,250 @@
//! The core dwarf dump analyzer library.
use std::{
collections::HashMap,
path::{Path, PathBuf},
};
use revive_yul::lexer::token::location::Location;
use crate::location_mapper::{self, map_locations, LocationMap};
/// Unknwon code.
pub const OTHER: &str = "other";
/// Compiler internal code.
pub const INTERNAL: &str = "internal";
/// YUL block code.
pub const BLOCK: &str = "block";
/// YUL function call code.
pub const FUNCTION_CALL: &str = "function_call";
/// YUL conditional code.
pub const IF: &str = "if";
/// YUL loop code.
pub const FOR: &str = "for";
/// YUL loop continue code.
pub const CONTINUE: &str = "continue";
/// YUL loop break code.
pub const BREAK: &str = "break";
/// YUL switch code.
pub const SWITCH: &str = "switch";
/// YUL variable declaration code.
pub const DECLARATION: &str = "let";
/// YUL variable assignment code.
pub const ASSIGNMENT: &str = "assignment";
/// YUL function definition code.
pub const FUNCTION_DEFINITION: &str = "function_definition";
/// YUL function leave code.
pub const LEAVE: &str = "leave";
/// The dwarf dump analyzer.
///
/// Loads debug information from `llvm-dwarfdump` and calculates statistics
/// about the compiled YUL statements:
/// - Statements count
/// - Per-statement
#[derive(Debug, Default)]
pub struct DwarfdumpAnalyzer {
/// The YUL source file path.
source: PathBuf,
/// The YUL location to statements map.
location_map: LocationMap,
/// The `llvm-dwarfdump --debug-lines` output.
debug_lines: String,
/// The observed statements.
statements_count: HashMap<String, usize>,
/// The observed statement to instructions size.
statements_size: HashMap<String, u64>,
}
impl DwarfdumpAnalyzer {
/// The debug info analyzer constructor.
///
/// `source` is the path to the YUL source file.
/// `debug_lines` is the `llvm-dwarfdump --debug-lines` output.
pub fn new(source: &Path, debug_lines: String) -> Self {
Self {
source: source.to_path_buf(),
debug_lines,
..Default::default()
}
}
/// Run the analysis.
pub fn analyze(&mut self) -> anyhow::Result<()> {
self.map_locations()?;
self.analyze_statements()?;
Ok(())
}
/// Populate the maps so that we can always unwrap later.
fn map_locations(&mut self) -> anyhow::Result<()> {
self.location_map = map_locations(&self.source)?;
self.statements_count = HashMap::with_capacity(self.location_map.len());
self.statements_size = HashMap::with_capacity(self.location_map.len());
for statement in self.location_map.values() {
if !self.statements_size.contains_key(statement) {
self.statements_size.insert(statement.clone(), 0);
}
*self.statements_count.entry(statement.clone()).or_insert(0) += 1;
}
Ok(())
}
/// Analyze how much bytes of insturctions each statement contributes.
fn analyze_statements(&mut self) -> anyhow::Result<()> {
let mut previous_offset = 0;
let mut previous_location = Location::new(0, 0);
for line in self
.debug_lines
.lines()
.skip_while(|line| !line.starts_with("Address"))
.skip(2)
{
let mut parts = line.split_whitespace();
let (Some(offset), Some(line), Some(column)) =
(parts.next(), parts.next(), parts.next())
else {
continue;
};
let current_offset = u64::from_str_radix(offset.trim_start_matches("0x"), 16)?;
let mut current_location = Location::new(line.parse()?, column.parse()?);
// TODO: A bug? Needs further investigation.
if current_location.line == 0 && current_location.column != 0 {
current_location.line = previous_location.line;
}
if let Some(statement) = self.location_map.get(&previous_location) {
let contribution = current_offset - previous_offset;
*self.statements_size.get_mut(statement).unwrap() += contribution;
}
previous_offset = current_offset;
previous_location = current_location;
}
Ok(())
}
/// Print the per-statement count break-down.
pub fn display_statement_count(&self) {
println!("statements count:");
for (statement, count) in self.statements_count.iter() {
println!("\t{statement} {count}");
}
}
/// Print the per-statement byte size contribution break-down.
pub fn display_statement_size(&self) {
println!("bytes per statement:");
for (statement, size) in self.statements_size.iter() {
println!("\t{statement} {size}");
}
}
/// Print the estimated `yul-phaser` cost parameters.
pub fn display_phaser_costs(&self, yul_phaser_scale: u64) {
println!("yul-phaser parameters:");
for (parameter, cost) in self.phaser_costs(yul_phaser_scale) {
println!("\t{parameter} {cost}");
}
}
/// Estimate the `yul-phaser` costs using the simplified weight function:
/// `Total size / toal count = cost`
pub fn phaser_costs(&self, yul_phaser_scale: u64) -> Vec<(String, u64)> {
let mut costs: HashMap<String, (usize, u64)> = HashMap::with_capacity(16);
for (statement, count) in self
.statements_count
.iter()
.filter(|(_, count)| **count > 0)
{
let size = self.statements_size.get(statement).unwrap();
let cost = match statement.as_str() {
location_mapper::FOR => "--for-loop-cost",
location_mapper::OTHER => continue,
location_mapper::INTERNAL => continue,
location_mapper::BLOCK => "--block-cost",
location_mapper::FUNCTION_CALL => "--function-call-cost",
location_mapper::IF => "--if-cost",
location_mapper::CONTINUE => "--continue-cost",
location_mapper::BREAK => "--break-cost",
location_mapper::LEAVE => "--leave-cost",
location_mapper::SWITCH => "--switch-cost",
location_mapper::DECLARATION => "--variable-declaration-cost",
location_mapper::ASSIGNMENT => "--assignment-cost",
location_mapper::FUNCTION_DEFINITION => "--function-definition-cost",
_ => "--expression-statement-cost",
};
let entry = costs.entry(cost.to_string()).or_default();
entry.0 += count;
entry.1 += size;
}
let costs = costs
.iter()
.map(|(cost, (count, size))| {
let ratio = *size / *count as u64;
(cost.to_string(), ratio.min(100))
})
.collect::<Vec<_>>();
let scaled_costs = scale_to(
costs
.iter()
.map(|(_, ratio)| *ratio)
.collect::<Vec<_>>()
.as_slice(),
yul_phaser_scale,
);
costs
.iter()
.zip(scaled_costs)
.map(|((cost, _), scaled_ratio)| (cost.to_string(), scaled_ratio))
.collect()
}
}
/// Given a slice of u64 values, returns a Vec<u64> where each element
/// is linearly scaled into the closed interval [1, 10].
fn scale_to(data: &[u64], scale_max: u64) -> Vec<u64> {
if data.is_empty() {
return Vec::new();
}
let mut min = data[0];
let mut max = data[0];
for &x in &data[1..] {
if x < min {
min = x;
}
if x > max {
max = x;
}
}
if max < scale_max {
return data.to_vec();
}
let range = max - min;
data.iter()
.map(|&x| {
if range == 0 {
1
} else {
1 + (x - min) * scale_max / range
}
})
.collect()
}
+6
View File
@@ -0,0 +1,6 @@
//! The revive explorer leverages debug info to get insights into emitted code.
pub mod dwarfdump;
pub mod dwarfdump_analyzer;
pub mod location_mapper;
pub mod yul_phaser;
+158
View File
@@ -0,0 +1,158 @@
//! The location mapper utility maps YUL source locations to AST statements.
//!
//! TODO: Refactor when the AST visitor is implemented.
use std::{collections::HashMap, path::Path};
use revive_yul::{
lexer::{token::location::Location, Lexer},
parser::statement::{
block::Block,
expression::{function_call::name::Name, Expression},
object::Object,
Statement,
},
};
/// Code attributed to an unknown location.
pub const OTHER: &str = "other";
/// Code attributed to a compiler internal location.
pub const INTERNAL: &str = "internal";
/// Code attributed to a
pub const BLOCK: &str = "block";
pub const FUNCTION_CALL: &str = "function_call";
pub const FOR: &str = "for";
pub const IF: &str = "if";
pub const CONTINUE: &str = "continue";
pub const BREAK: &str = "break";
pub const LEAVE: &str = "leave";
pub const SWITCH: &str = "switch";
pub const DECLARATION: &str = "let";
pub const ASSIGNMENT: &str = "assignment";
pub const FUNCTION_DEFINITION: &str = "function_definition";
/// The location to statements map type alias.
pub type LocationMap = HashMap<Location, String>;
/// Construct a [LocationMap] from the given YUL `source` file.
pub fn map_locations(source: &Path) -> anyhow::Result<LocationMap> {
let mut lexer = Lexer::new(std::fs::read_to_string(source)?);
let ast = Object::parse(&mut lexer, None).map_err(|error| {
anyhow::anyhow!("Contract `{}` parsing error: {:?}", source.display(), error)
})?;
let mut location_map = HashMap::with_capacity(1024);
crate::location_mapper::object_mapper(&mut location_map, &ast);
location_map.insert(Location::new(0, 0), OTHER.to_string());
location_map.insert(Location::new(1, 0), INTERNAL.to_string());
Ok(location_map)
}
/// Map the [Block].
fn block_mapper(map: &mut LocationMap, block: &Block) {
map.insert(block.location, BLOCK.to_string());
for statement in &block.statements {
statement_mapper(map, statement);
}
}
/// Map the [Expression].
fn expression_mapper(map: &mut LocationMap, expression: &Expression) {
if let Expression::FunctionCall(call) = expression {
let id = match call.name {
Name::UserDefined(_) => FUNCTION_CALL.to_string(),
_ => format!("{:?}", call.name),
};
map.insert(expression.location(), id);
for expression in &call.arguments {
expression_mapper(map, expression);
}
}
}
/// Map the [Statement].
fn statement_mapper(map: &mut LocationMap, statement: &Statement) {
match statement {
Statement::Object(object) => object_mapper(map, object),
Statement::Code(code) => block_mapper(map, &code.block),
Statement::Block(block) => block_mapper(map, block),
Statement::ForLoop(for_loop) => {
map.insert(for_loop.location, FOR.to_string());
expression_mapper(map, &for_loop.condition);
block_mapper(map, &for_loop.body);
block_mapper(map, &for_loop.initializer);
block_mapper(map, &for_loop.finalizer);
}
Statement::IfConditional(if_conditional) => {
map.insert(if_conditional.location, IF.to_string());
expression_mapper(map, &if_conditional.condition);
block_mapper(map, &if_conditional.block);
}
Statement::Expression(expression) => expression_mapper(map, expression),
Statement::Continue(location) => {
map.insert(*location, CONTINUE.to_string());
}
Statement::Leave(location) => {
map.insert(*location, LEAVE.to_string());
}
Statement::Break(location) => {
map.insert(*location, BREAK.to_string());
}
Statement::Switch(switch) => {
map.insert(switch.expression.location(), SWITCH.to_string());
expression_mapper(map, &switch.expression);
for case in &switch.cases {
block_mapper(map, &case.block);
}
if let Some(block) = switch.default.as_ref() {
block_mapper(map, block);
}
}
Statement::Assignment(assignment) => {
map.insert(assignment.location, ASSIGNMENT.to_string());
expression_mapper(map, &assignment.initializer);
}
Statement::VariableDeclaration(declaration) => {
map.insert(declaration.location, DECLARATION.to_string());
if let Some(expression) = declaration.expression.as_ref() {
expression_mapper(map, expression);
}
}
Statement::FunctionDefinition(definition) => {
map.insert(definition.location, FUNCTION_DEFINITION.to_string());
block_mapper(map, &definition.body);
}
}
}
/// Map the [Object].
fn object_mapper(map: &mut LocationMap, object: &Object) {
map.insert(object.location, object.identifier.clone());
block_mapper(map, &object.code.block);
if let Some(object) = object.inner_object.as_ref() {
object_mapper(map, object);
}
}
+56
View File
@@ -0,0 +1,56 @@
use std::path::PathBuf;
use clap::Parser;
use revive_explorer::{dwarfdump, dwarfdump_analyzer::DwarfdumpAnalyzer, yul_phaser};
/// The `revive-explorer` is a helper utility for exploring the compilers YUL lowering unit.
///
/// It analyzes a given shared objects from the debug dump and outputs:
/// - The count of each YUL statement translated.
/// - A per YUL statement break-down of bytecode size contributed per.
/// - Estimated `yul-phaser` cost parameters.
#[derive(Parser, Debug)]
#[command(version, about, long_about = None)]
struct Args {
/// Path of the dwarfdump executable.
#[arg(short, long)]
dwarfdump: Option<PathBuf>,
/// The YUL phaser cost scale maximum value.
#[arg(short, long, default_value_t = 10)]
cost_scale: u64,
/// Run the provided yul-phaser executable using the estimated costs.
#[arg(short, long)]
yul_phaser: Option<PathBuf>,
/// Path of the shared object to analyze.
file: PathBuf,
}
fn main() -> anyhow::Result<()> {
let args = Args::parse();
let source_file = dwarfdump::source_file(&args.file, &args.dwarfdump)?;
let debug_lines = dwarfdump::debug_lines(&args.file, &args.dwarfdump)?;
let mut analyzer = DwarfdumpAnalyzer::new(source_file.as_path(), debug_lines);
analyzer.analyze()?;
if let Some(path) = args.yul_phaser.as_ref() {
yul_phaser::run(
path,
source_file.as_path(),
analyzer.phaser_costs(args.cost_scale).as_slice(),
num_cpus::get() / 2, // TODO: should be configurable.
)?;
return Ok(());
}
analyzer.display_statement_count();
analyzer.display_statement_size();
analyzer.display_phaser_costs(args.cost_scale);
Ok(())
}
+79
View File
@@ -0,0 +1,79 @@
//! The revive explorer YUL phaser utility library.
//!
//! This can be used to invoke the `yul-phaser` utility,
//! used to find better YUL optimizer sequences.
use std::{
path::{Path, PathBuf},
process::{Command, Stdio},
thread,
time::{SystemTime, UNIX_EPOCH},
};
/// The `yul-phaser` sane default arguments:
/// - Less verbose output.
/// - Sufficient rounds.
/// - Sufficient random population start.
const ARGUMENTS: [&str; 6] = [
"--hide-round",
"--rounds",
"1000",
"--random-population",
"100",
"--show-only-top-chromosome",
];
/// Run multiple YUL phaser executables in parallel.
pub fn run(
executable: &Path,
source: &Path,
costs: &[(String, u64)],
n_threads: usize,
) -> anyhow::Result<()> {
let mut handles = Vec::with_capacity(n_threads);
for n in 0..n_threads {
let executable = executable.to_path_buf();
let source = source.to_path_buf();
let costs = costs.to_vec();
handles.push(thread::spawn(move || {
spawn_process(executable, source, costs, n)
}));
}
for handle in handles {
let _ = handle.join();
}
Ok(())
}
/// The `yul-phaser` process spawning helper function.
fn spawn_process(
executable: PathBuf,
source: PathBuf,
costs: Vec<(String, u64)>,
seed: usize,
) -> anyhow::Result<()> {
let cost_parameters = costs
.iter()
.flat_map(|(parameter, cost)| vec![parameter.clone(), cost.to_string()]);
let secs = SystemTime::now()
.duration_since(UNIX_EPOCH)
.expect("Time went backwards")
.as_secs();
Command::new(executable)
.args(cost_parameters)
.args(ARGUMENTS)
.arg("--seed")
.arg((seed + secs as usize).to_string())
.arg(source)
.stdin(Stdio::null())
.spawn()?
.wait()?;
Ok(())
}
+10
View File
@@ -1,5 +1,8 @@
//! The lexical token location.
use std::hash::Hash;
use std::hash::Hasher;
use serde::Deserialize;
use serde::Serialize;
@@ -48,6 +51,13 @@ impl PartialEq for Location {
}
}
impl Hash for Location {
fn hash<H: Hasher>(&self, state: &mut H) {
self.line.hash(state);
self.column.hash(state);
}
}
impl std::fmt::Display for Location {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}:{}", self.line, self.column)
@@ -997,6 +997,8 @@ impl FunctionCall {
}
arguments.reverse();
context.set_debug_location(self.location.line, self.location.column, None)?;
Ok(arguments.try_into().expect("Always successful"))
}
@@ -1014,6 +1016,8 @@ impl FunctionCall {
}
arguments.reverse();
context.set_debug_location(self.location.line, self.location.column, None)?;
Ok(arguments.try_into().expect("Always successful"))
}
}
@@ -196,6 +196,7 @@ where
&mut self,
context: &mut revive_llvm_context::PolkaVMContext<D>,
) -> anyhow::Result<()> {
context.set_debug_location(self.location.line, self.location.column, None)?;
let argument_types: Vec<_> = self
.arguments
.iter()