mirror of
https://github.com/pezkuwichain/revive.git
synced 2026-04-28 03:57:55 +00:00
the revive-explorer utility (#364)
A maintainable and more precise version of what was a hacky but useful script, exploring the compilers YUL lowering unit. It analyzes a given shared objects from the debug dump and outputs: - The count of each YUL statement translated. - A per YUL statement break-down of bytecode size contributed per. - Estimated `yul-phaser` cost parameters. Signed-off-by: Cyrill Leutwiler <bigcyrill@hotmail.com>
This commit is contained in:
@@ -0,0 +1,250 @@
|
||||
//! The core dwarf dump analyzer library.
|
||||
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use revive_yul::lexer::token::location::Location;
|
||||
|
||||
use crate::location_mapper::{self, map_locations, LocationMap};
|
||||
|
||||
/// Unknwon code.
|
||||
pub const OTHER: &str = "other";
|
||||
/// Compiler internal code.
|
||||
pub const INTERNAL: &str = "internal";
|
||||
/// YUL block code.
|
||||
pub const BLOCK: &str = "block";
|
||||
/// YUL function call code.
|
||||
pub const FUNCTION_CALL: &str = "function_call";
|
||||
/// YUL conditional code.
|
||||
pub const IF: &str = "if";
|
||||
/// YUL loop code.
|
||||
pub const FOR: &str = "for";
|
||||
/// YUL loop continue code.
|
||||
pub const CONTINUE: &str = "continue";
|
||||
/// YUL loop break code.
|
||||
pub const BREAK: &str = "break";
|
||||
/// YUL switch code.
|
||||
pub const SWITCH: &str = "switch";
|
||||
/// YUL variable declaration code.
|
||||
pub const DECLARATION: &str = "let";
|
||||
/// YUL variable assignment code.
|
||||
pub const ASSIGNMENT: &str = "assignment";
|
||||
/// YUL function definition code.
|
||||
pub const FUNCTION_DEFINITION: &str = "function_definition";
|
||||
/// YUL function leave code.
|
||||
pub const LEAVE: &str = "leave";
|
||||
|
||||
/// The dwarf dump analyzer.
|
||||
///
|
||||
/// Loads debug information from `llvm-dwarfdump` and calculates statistics
|
||||
/// about the compiled YUL statements:
|
||||
/// - Statements count
|
||||
/// - Per-statement
|
||||
#[derive(Debug, Default)]
|
||||
pub struct DwarfdumpAnalyzer {
|
||||
/// The YUL source file path.
|
||||
source: PathBuf,
|
||||
|
||||
/// The YUL location to statements map.
|
||||
location_map: LocationMap,
|
||||
|
||||
/// The `llvm-dwarfdump --debug-lines` output.
|
||||
debug_lines: String,
|
||||
|
||||
/// The observed statements.
|
||||
statements_count: HashMap<String, usize>,
|
||||
/// The observed statement to instructions size.
|
||||
statements_size: HashMap<String, u64>,
|
||||
}
|
||||
|
||||
impl DwarfdumpAnalyzer {
|
||||
/// The debug info analyzer constructor.
|
||||
///
|
||||
/// `source` is the path to the YUL source file.
|
||||
/// `debug_lines` is the `llvm-dwarfdump --debug-lines` output.
|
||||
pub fn new(source: &Path, debug_lines: String) -> Self {
|
||||
Self {
|
||||
source: source.to_path_buf(),
|
||||
debug_lines,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
/// Run the analysis.
|
||||
pub fn analyze(&mut self) -> anyhow::Result<()> {
|
||||
self.map_locations()?;
|
||||
self.analyze_statements()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Populate the maps so that we can always unwrap later.
|
||||
fn map_locations(&mut self) -> anyhow::Result<()> {
|
||||
self.location_map = map_locations(&self.source)?;
|
||||
|
||||
self.statements_count = HashMap::with_capacity(self.location_map.len());
|
||||
self.statements_size = HashMap::with_capacity(self.location_map.len());
|
||||
|
||||
for statement in self.location_map.values() {
|
||||
if !self.statements_size.contains_key(statement) {
|
||||
self.statements_size.insert(statement.clone(), 0);
|
||||
}
|
||||
|
||||
*self.statements_count.entry(statement.clone()).or_insert(0) += 1;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Analyze how much bytes of insturctions each statement contributes.
|
||||
fn analyze_statements(&mut self) -> anyhow::Result<()> {
|
||||
let mut previous_offset = 0;
|
||||
let mut previous_location = Location::new(0, 0);
|
||||
|
||||
for line in self
|
||||
.debug_lines
|
||||
.lines()
|
||||
.skip_while(|line| !line.starts_with("Address"))
|
||||
.skip(2)
|
||||
{
|
||||
let mut parts = line.split_whitespace();
|
||||
let (Some(offset), Some(line), Some(column)) =
|
||||
(parts.next(), parts.next(), parts.next())
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let current_offset = u64::from_str_radix(offset.trim_start_matches("0x"), 16)?;
|
||||
let mut current_location = Location::new(line.parse()?, column.parse()?);
|
||||
|
||||
// TODO: A bug? Needs further investigation.
|
||||
if current_location.line == 0 && current_location.column != 0 {
|
||||
current_location.line = previous_location.line;
|
||||
}
|
||||
|
||||
if let Some(statement) = self.location_map.get(&previous_location) {
|
||||
let contribution = current_offset - previous_offset;
|
||||
*self.statements_size.get_mut(statement).unwrap() += contribution;
|
||||
}
|
||||
|
||||
previous_offset = current_offset;
|
||||
previous_location = current_location;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Print the per-statement count break-down.
|
||||
pub fn display_statement_count(&self) {
|
||||
println!("statements count:");
|
||||
for (statement, count) in self.statements_count.iter() {
|
||||
println!("\t{statement} {count}");
|
||||
}
|
||||
}
|
||||
|
||||
/// Print the per-statement byte size contribution break-down.
|
||||
pub fn display_statement_size(&self) {
|
||||
println!("bytes per statement:");
|
||||
for (statement, size) in self.statements_size.iter() {
|
||||
println!("\t{statement} {size}");
|
||||
}
|
||||
}
|
||||
|
||||
/// Print the estimated `yul-phaser` cost parameters.
|
||||
pub fn display_phaser_costs(&self, yul_phaser_scale: u64) {
|
||||
println!("yul-phaser parameters:");
|
||||
for (parameter, cost) in self.phaser_costs(yul_phaser_scale) {
|
||||
println!("\t{parameter} {cost}");
|
||||
}
|
||||
}
|
||||
|
||||
/// Estimate the `yul-phaser` costs using the simplified weight function:
|
||||
/// `Total size / toal count = cost`
|
||||
pub fn phaser_costs(&self, yul_phaser_scale: u64) -> Vec<(String, u64)> {
|
||||
let mut costs: HashMap<String, (usize, u64)> = HashMap::with_capacity(16);
|
||||
for (statement, count) in self
|
||||
.statements_count
|
||||
.iter()
|
||||
.filter(|(_, count)| **count > 0)
|
||||
{
|
||||
let size = self.statements_size.get(statement).unwrap();
|
||||
let cost = match statement.as_str() {
|
||||
location_mapper::FOR => "--for-loop-cost",
|
||||
location_mapper::OTHER => continue,
|
||||
location_mapper::INTERNAL => continue,
|
||||
location_mapper::BLOCK => "--block-cost",
|
||||
location_mapper::FUNCTION_CALL => "--function-call-cost",
|
||||
location_mapper::IF => "--if-cost",
|
||||
location_mapper::CONTINUE => "--continue-cost",
|
||||
location_mapper::BREAK => "--break-cost",
|
||||
location_mapper::LEAVE => "--leave-cost",
|
||||
location_mapper::SWITCH => "--switch-cost",
|
||||
location_mapper::DECLARATION => "--variable-declaration-cost",
|
||||
location_mapper::ASSIGNMENT => "--assignment-cost",
|
||||
location_mapper::FUNCTION_DEFINITION => "--function-definition-cost",
|
||||
_ => "--expression-statement-cost",
|
||||
};
|
||||
|
||||
let entry = costs.entry(cost.to_string()).or_default();
|
||||
entry.0 += count;
|
||||
entry.1 += size;
|
||||
}
|
||||
|
||||
let costs = costs
|
||||
.iter()
|
||||
.map(|(cost, (count, size))| {
|
||||
let ratio = *size / *count as u64;
|
||||
(cost.to_string(), ratio.min(100))
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let scaled_costs = scale_to(
|
||||
costs
|
||||
.iter()
|
||||
.map(|(_, ratio)| *ratio)
|
||||
.collect::<Vec<_>>()
|
||||
.as_slice(),
|
||||
yul_phaser_scale,
|
||||
);
|
||||
|
||||
costs
|
||||
.iter()
|
||||
.zip(scaled_costs)
|
||||
.map(|((cost, _), scaled_ratio)| (cost.to_string(), scaled_ratio))
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
/// Given a slice of u64 values, returns a Vec<u64> where each element
|
||||
/// is linearly scaled into the closed interval [1, 10].
|
||||
fn scale_to(data: &[u64], scale_max: u64) -> Vec<u64> {
|
||||
if data.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let mut min = data[0];
|
||||
let mut max = data[0];
|
||||
for &x in &data[1..] {
|
||||
if x < min {
|
||||
min = x;
|
||||
}
|
||||
if x > max {
|
||||
max = x;
|
||||
}
|
||||
}
|
||||
if max < scale_max {
|
||||
return data.to_vec();
|
||||
}
|
||||
|
||||
let range = max - min;
|
||||
data.iter()
|
||||
.map(|&x| {
|
||||
if range == 0 {
|
||||
1
|
||||
} else {
|
||||
1 + (x - min) * scale_max / range
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
Reference in New Issue
Block a user