YUL tree visitor interface (#369)

- Implement the visitor interface. This simplifies working with the AST,
for example transformations into other IRs or collecting and analyzing
various statistics.
- Switch the explorer to use the visitor interface.
- Add the reciprocal function name conversion for function names.
- Some drive-by cosmetic fixes.

---------

Signed-off-by: xermicus <bigcyrill@hotmail.com>
This commit is contained in:
xermicus
2025-08-10 00:08:25 +02:00
committed by GitHub
parent 72f9e4f66e
commit 903cbd7159
24 changed files with 1303 additions and 162 deletions
+10 -1
View File
@@ -29,7 +29,16 @@ pub fn source_file(
dwarfdump_executable: &Option<PathBuf>,
) -> anyhow::Result<PathBuf> {
let output = dwarfdump(shared_object, dwarfdump_executable, &SOURCE_FILE_ARGUMENTS)?;
Ok(output.trim().into())
let output = output.trim();
if output.is_empty() {
anyhow::bail!(
"the shared object at path `{}` doesn't contain the source file name. Hint: compile with debug information (-g)?",
shared_object.display()
);
}
Ok(output.into())
}
/// The internal `llvm-dwarfdump` helper function.
+5 -33
View File
@@ -7,34 +7,7 @@ use std::{
use revive_yul::lexer::token::location::Location;
use crate::location_mapper::{self, map_locations, LocationMap};
/// Unknwon code.
pub const OTHER: &str = "other";
/// Compiler internal code.
pub const INTERNAL: &str = "internal";
/// YUL block code.
pub const BLOCK: &str = "block";
/// YUL function call code.
pub const FUNCTION_CALL: &str = "function_call";
/// YUL conditional code.
pub const IF: &str = "if";
/// YUL loop code.
pub const FOR: &str = "for";
/// YUL loop continue code.
pub const CONTINUE: &str = "continue";
/// YUL loop break code.
pub const BREAK: &str = "break";
/// YUL switch code.
pub const SWITCH: &str = "switch";
/// YUL variable declaration code.
pub const DECLARATION: &str = "let";
/// YUL variable assignment code.
pub const ASSIGNMENT: &str = "assignment";
/// YUL function definition code.
pub const FUNCTION_DEFINITION: &str = "function_definition";
/// YUL function leave code.
pub const LEAVE: &str = "leave";
use crate::location_mapper::{self, LocationMapper};
/// The dwarf dump analyzer.
///
@@ -48,7 +21,7 @@ pub struct DwarfdumpAnalyzer {
source: PathBuf,
/// The YUL location to statements map.
location_map: LocationMap,
location_map: HashMap<Location, String>,
/// The `llvm-dwarfdump --debug-lines` output.
debug_lines: String,
@@ -81,7 +54,7 @@ impl DwarfdumpAnalyzer {
/// Populate the maps so that we can always unwrap later.
fn map_locations(&mut self) -> anyhow::Result<()> {
self.location_map = map_locations(&self.source)?;
self.location_map = LocationMapper::map_locations(&self.source)?;
self.statements_count = HashMap::with_capacity(self.location_map.len());
self.statements_size = HashMap::with_capacity(self.location_map.len());
@@ -176,13 +149,12 @@ impl DwarfdumpAnalyzer {
location_mapper::BLOCK => "--block-cost",
location_mapper::FUNCTION_CALL => "--function-call-cost",
location_mapper::IF => "--if-cost",
location_mapper::CONTINUE => "--continue-cost",
location_mapper::BREAK => "--break-cost",
location_mapper::LEAVE => "--leave-cost",
location_mapper::SWITCH => "--switch-cost",
location_mapper::DECLARATION => "--variable-declaration-cost",
location_mapper::ASSIGNMENT => "--assignment-cost",
location_mapper::FUNCTION_DEFINITION => "--function-definition-cost",
location_mapper::IDENTIFIER => "--identifier-cost",
location_mapper::LITERAL => "--literal-cost",
_ => "--expression-statement-cost",
};
+92 -127
View File
@@ -1,158 +1,123 @@
//! The location mapper utility maps YUL source locations to AST statements.
//!
//! TODO: Refactor when the AST visitor is implemented.
use std::{collections::HashMap, path::Path};
use revive_yul::{
lexer::{token::location::Location, Lexer},
parser::statement::{
block::Block,
expression::{function_call::name::Name, Expression},
object::Object,
Statement,
parser::{
identifier::Identifier,
statement::{
assignment::Assignment,
block::Block,
expression::{function_call::FunctionCall, literal::Literal},
for_loop::ForLoop,
function_definition::FunctionDefinition,
if_conditional::IfConditional,
object::Object,
switch::Switch,
variable_declaration::VariableDeclaration,
},
},
visitor::{AstNode, AstVisitor},
};
/// Code attributed to an unknown location.
pub const OTHER: &str = "other";
/// Code attributed to a compiler internal location.
pub const INTERNAL: &str = "internal";
/// Code attributed to a
/// Code attributed to a block.
pub const BLOCK: &str = "block";
/// Code attributed to a function call.
pub const FUNCTION_CALL: &str = "function_call";
/// Code attributed to a for loop.
pub const FOR: &str = "for";
/// Code attributed to an if statement.
pub const IF: &str = "if";
pub const CONTINUE: &str = "continue";
pub const BREAK: &str = "break";
pub const LEAVE: &str = "leave";
/// Code attributed to a switch statement.
pub const SWITCH: &str = "switch";
/// Code attributed to a variable declaration.
pub const DECLARATION: &str = "let";
/// Code attributed to a variable assignement.
pub const ASSIGNMENT: &str = "assignment";
/// Code attributed to a function definition.
pub const FUNCTION_DEFINITION: &str = "function_definition";
/// Code attributed to an identifier.
pub const IDENTIFIER: &str = "identifier";
/// Code attributed to a literal.
pub const LITERAL: &str = "literal";
/// The location to statements map type alias.
pub type LocationMap = HashMap<Location, String>;
/// The location to statements mapper.
pub struct LocationMapper(HashMap<Location, String>);
/// Construct a [LocationMap] from the given YUL `source` file.
pub fn map_locations(source: &Path) -> anyhow::Result<LocationMap> {
let mut lexer = Lexer::new(std::fs::read_to_string(source)?);
let ast = Object::parse(&mut lexer, None).map_err(|error| {
anyhow::anyhow!("Contract `{}` parsing error: {:?}", source.display(), error)
})?;
impl LocationMapper {
/// Construct a node location map from the given YUL `source` file.
pub fn map_locations(source: &Path) -> anyhow::Result<HashMap<Location, String>> {
let mut lexer = Lexer::new(std::fs::read_to_string(source)?);
let ast = Object::parse(&mut lexer, None).map_err(|error| {
anyhow::anyhow!("Contract `{}` parsing error: {:?}", source.display(), error)
})?;
let mut location_map = HashMap::with_capacity(1024);
crate::location_mapper::object_mapper(&mut location_map, &ast);
location_map.insert(Location::new(0, 0), OTHER.to_string());
location_map.insert(Location::new(1, 0), INTERNAL.to_string());
let mut location_map = Self(Default::default());
ast.accept(&mut location_map);
location_map.0.insert(Location::new(0, 0), OTHER.into());
location_map.0.insert(Location::new(1, 0), INTERNAL.into());
Ok(location_map)
}
/// Map the [Block].
fn block_mapper(map: &mut LocationMap, block: &Block) {
map.insert(block.location, BLOCK.to_string());
for statement in &block.statements {
statement_mapper(map, statement);
Ok(location_map.0)
}
}
/// Map the [Expression].
fn expression_mapper(map: &mut LocationMap, expression: &Expression) {
if let Expression::FunctionCall(call) = expression {
let id = match call.name {
Name::UserDefined(_) => FUNCTION_CALL.to_string(),
_ => format!("{:?}", call.name),
};
map.insert(expression.location(), id);
impl AstVisitor for LocationMapper {
fn visit(&mut self, node: &impl AstNode) {
node.visit_children(self);
}
for expression in &call.arguments {
expression_mapper(map, expression);
}
}
}
/// Map the [Statement].
fn statement_mapper(map: &mut LocationMap, statement: &Statement) {
match statement {
Statement::Object(object) => object_mapper(map, object),
Statement::Code(code) => block_mapper(map, &code.block),
Statement::Block(block) => block_mapper(map, block),
Statement::ForLoop(for_loop) => {
map.insert(for_loop.location, FOR.to_string());
expression_mapper(map, &for_loop.condition);
block_mapper(map, &for_loop.body);
block_mapper(map, &for_loop.initializer);
block_mapper(map, &for_loop.finalizer);
}
Statement::IfConditional(if_conditional) => {
map.insert(if_conditional.location, IF.to_string());
expression_mapper(map, &if_conditional.condition);
block_mapper(map, &if_conditional.block);
}
Statement::Expression(expression) => expression_mapper(map, expression),
Statement::Continue(location) => {
map.insert(*location, CONTINUE.to_string());
}
Statement::Leave(location) => {
map.insert(*location, LEAVE.to_string());
}
Statement::Break(location) => {
map.insert(*location, BREAK.to_string());
}
Statement::Switch(switch) => {
map.insert(switch.expression.location(), SWITCH.to_string());
expression_mapper(map, &switch.expression);
for case in &switch.cases {
block_mapper(map, &case.block);
}
if let Some(block) = switch.default.as_ref() {
block_mapper(map, block);
}
}
Statement::Assignment(assignment) => {
map.insert(assignment.location, ASSIGNMENT.to_string());
expression_mapper(map, &assignment.initializer);
}
Statement::VariableDeclaration(declaration) => {
map.insert(declaration.location, DECLARATION.to_string());
if let Some(expression) = declaration.expression.as_ref() {
expression_mapper(map, expression);
}
}
Statement::FunctionDefinition(definition) => {
map.insert(definition.location, FUNCTION_DEFINITION.to_string());
block_mapper(map, &definition.body);
}
}
}
/// Map the [Object].
fn object_mapper(map: &mut LocationMap, object: &Object) {
map.insert(object.location, object.identifier.clone());
block_mapper(map, &object.code.block);
if let Some(object) = object.inner_object.as_ref() {
object_mapper(map, object);
fn visit_block(&mut self, node: &Block) {
node.visit_children(self);
self.0.insert(node.location, BLOCK.into());
}
fn visit_assignment(&mut self, node: &Assignment) {
node.visit_children(self);
self.0.insert(node.location, ASSIGNMENT.into());
}
fn visit_if_conditional(&mut self, node: &IfConditional) {
node.visit_children(self);
self.0.insert(node.location, IF.into());
}
fn visit_variable_declaration(&mut self, node: &VariableDeclaration) {
node.visit_children(self);
self.0.insert(node.location, DECLARATION.into());
}
fn visit_function_call(&mut self, node: &FunctionCall) {
node.visit_children(self);
self.0.insert(node.location, node.name.to_string());
}
fn visit_function_definition(&mut self, node: &FunctionDefinition) {
node.visit_children(self);
self.0.insert(node.location, FUNCTION_DEFINITION.into());
}
fn visit_identifier(&mut self, node: &Identifier) {
node.visit_children(self);
self.0.insert(node.location, IDENTIFIER.into());
}
fn visit_literal(&mut self, node: &Literal) {
node.visit_children(self);
self.0.insert(node.location, LITERAL.into());
}
fn visit_for_loop(&mut self, node: &ForLoop) {
node.visit_children(self);
self.0.insert(node.location, FOR.into());
}
fn visit_switch(&mut self, node: &Switch) {
node.visit_children(self);
self.0.insert(node.location, SWITCH.into());
}
}
+3
View File
@@ -10,6 +10,8 @@ use revive_explorer::{dwarfdump, dwarfdump_analyzer::DwarfdumpAnalyzer, yul_phas
/// - The count of each YUL statement translated.
/// - A per YUL statement break-down of bytecode size contributed per.
/// - Estimated `yul-phaser` cost parameters.
///
/// Note: This tool might not be fully accurate, especially when the code was optimized.
#[derive(Parser, Debug)]
#[command(version, about, long_about = None)]
struct Args {
@@ -26,6 +28,7 @@ struct Args {
yul_phaser: Option<PathBuf>,
/// Path of the shared object to analyze.
/// It must have been compiled with debug info (-g).
file: PathBuf,
}