custom ir

Signed-off-by: Cyrill Leutwiler <bigcyrill@hotmail.com>
This commit is contained in:
Cyrill Leutwiler
2024-02-02 09:10:03 +01:00
parent 7a094f17c0
commit d238d8f39e
48 changed files with 4399 additions and 603 deletions
+1 -1
View File
@@ -1,5 +1,5 @@
[package]
name = "compiler-builtins"
name = "revive-builtins"
version = "0.1.0"
edition = "2021"
build = "build.rs"
+1 -1
View File
@@ -16,7 +16,7 @@ fn main() {
let lib_path = std::path::PathBuf::from(llvm_lib_dir.trim())
.join("linux")
.join(lib);
let archive = fs::read(lib_path).expect("clang builtins for riscv32 not fonud");
let archive = fs::read(lib_path).expect("clang builtins for riscv32 not found");
let out_dir = env::var_os("OUT_DIR").expect("has OUT_DIR");
let archive_path = Path::new(&out_dir).join(lib);
+4 -1
View File
@@ -8,4 +8,7 @@ edition = "2021"
[dependencies]
hex = { workspace = true }
evmil = { workspace = true }
revive-ir = { path = "../ir" }
revive-ir = { path = "../ir" }
revive-codegen = { path = "../codegen" }
revive-target-polkavm = { path = "../target-polkavm" }
+13 -2
View File
@@ -1,10 +1,21 @@
use evmil::bytecode::Disassemble;
use revive_ir::cfg::{BasicBlockFormatOption, Program};
use revive_ir::cfg::BasicBlockFormatOption;
use revive_target_polkavm::PolkaVm;
fn main() {
let hexcode = std::fs::read_to_string(std::env::args().nth(1).unwrap()).unwrap();
let bytecode = hex::decode(hexcode.trim()).unwrap();
let instructions = bytecode.disassemble();
Program::new(instructions).dot(BasicBlockFormatOption::ByteCode);
let mut ir = revive_ir::cfg::Program::new(&instructions);
ir.optimize();
ir.dot(BasicBlockFormatOption::Ir);
let target = PolkaVm::default();
let program = revive_codegen::program::Program::new(&target).unwrap();
program.emit(ir);
let artifact = program.compile_and_link();
std::fs::write("/tmp/out.pvm", artifact).unwrap();
}
@@ -1,8 +1,12 @@
[package]
name = "revive-polkavm"
name = "revive-codegen"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
inkwell = { workspace = true }
revive-compilation-target = { path = "../compilation-target" }
revive-ir = { path = "../ir" }
+2
View File
@@ -0,0 +1,2 @@
mod module;
pub mod program;
+37
View File
@@ -0,0 +1,37 @@
use inkwell::{
module::Module,
support::LLVMString,
targets::{RelocMode, TargetTriple},
};
use revive_compilation_target::target::Target;
pub(crate) fn create<'ctx, T>(target: &'ctx T) -> Result<Module<'ctx>, LLVMString>
where
T: Target<'ctx>,
{
let module = target.context().create_module("contract");
module.set_triple(&TargetTriple::create(<T as Target>::TARGET_TRIPLE));
module.set_source_file_name("contract.bin");
set_flags(target, &module);
for lib in target.libraries() {
module.link_in_module(lib)?;
}
Ok(module)
}
fn set_flags<'ctx, T>(target: &'ctx T, module: &Module<'ctx>)
where
T: Target<'ctx>,
{
if let RelocMode::PIC = <T as Target>::RELOC_MODE {
module.add_basic_value_flag(
"PIE Level",
inkwell::module::FlagBehavior::Override,
target.context().i32_type().const_int(2, false),
);
}
}
+93
View File
@@ -0,0 +1,93 @@
use inkwell::{
builder::Builder,
module::{Linkage, Module},
support::LLVMString,
targets::{FileType, TargetTriple},
values::{FunctionValue, GlobalValue},
AddressSpace,
};
use revive_compilation_target::environment::Environment;
use revive_compilation_target::target::Target;
use crate::module;
pub struct Program<'ctx, T> {
pub module: Module<'ctx>,
pub builder: Builder<'ctx>,
pub calldata: GlobalValue<'ctx>,
pub returndata: GlobalValue<'ctx>,
pub target: &'ctx T,
pub start: FunctionValue<'ctx>,
}
impl<'ctx, T> Program<'ctx, T>
where
T: Target<'ctx> + Environment<'ctx>,
{
pub fn new(target: &'ctx T) -> Result<Self, LLVMString> {
T::initialize_llvm();
let context = target.context();
let module = module::create(target)?;
let builder = context.create_builder();
let address_space = Some(AddressSpace::default());
let calldata_type = context.i8_type().array_type(T::CALLDATA_SIZE);
let calldata = module.add_global(calldata_type, address_space, "calldata");
let returndata_type = context.i8_type().array_type(T::RETURNDATA_SIZE);
let returndata = module.add_global(returndata_type, address_space, "returndata");
let start_fn_type = target.context().void_type().fn_type(&[], false);
let start = module.add_function("start", start_fn_type, Some(Linkage::Internal));
Ok(Self {
module,
builder,
calldata,
returndata,
target,
start,
})
}
pub fn emit(&self, program: revive_ir::cfg::Program) {
self.emit_start();
}
pub fn compile_and_link(&self) -> Vec<u8> {
inkwell::targets::Target::from_name(T::TARGET_NAME)
.expect("target name should be valid")
.create_target_machine(
&TargetTriple::create(T::TARGET_TRIPLE),
T::CPU,
T::TARGET_FEATURES,
self.target.optimization_level(),
T::RELOC_MODE,
T::CODE_MODEL,
)
.expect("target configuration should be valid")
.write_to_memory_buffer(&self.module, FileType::Object)
.map(|out| self.target.link(out.as_slice()))
.expect("linker should succeed")
.to_vec()
}
fn emit_start(&self) {
let start = self.start;
let block = self
.start
.get_last_basic_block()
.unwrap_or_else(|| self.target.context().append_basic_block(start, "entry"));
self.builder.position_at_end(block);
self.builder.build_return(None);
let env_start = self.target.call_start(&self.builder, self.start);
self.module
.link_in_module(env_start)
.expect("entrypoint module should be linkable");
}
}
@@ -1,8 +1,9 @@
[package]
name = "revive-environment"
name = "revive-compilation-target"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
inkwell = { workspace = true }
@@ -0,0 +1,15 @@
use inkwell::{builder::Builder, module::Module, values::FunctionValue};
/// [Environment] describes EVM runtime functionality.
pub trait Environment<'ctx> {
const STACK_SIZE: u32 = 1024 * 32;
const CALLDATA_SIZE: u32 = 0x10000;
const RETURNDATA_SIZE: u32 = 0x10000;
const MEMORY_SIZE: u32 = 0x100000;
/// Build a module containing all required runtime exports and imports.
///
/// The `start` function is the entrypoint to the contract logic.
/// The returned `Module` is expected to call `start` somewhere.
fn call_start(&'ctx self, builder: &Builder<'ctx>, start: FunctionValue<'ctx>) -> Module<'ctx>;
}
+2
View File
@@ -0,0 +1,2 @@
pub mod environment;
pub mod target;
+27
View File
@@ -0,0 +1,27 @@
use inkwell::{
context::Context,
module::Module,
targets::{CodeModel, RelocMode},
OptimizationLevel,
};
pub trait Target<'ctx> {
const TARGET_NAME: &'ctx str;
const TARGET_TRIPLE: &'ctx str;
const TARGET_FEATURES: &'ctx str;
const CPU: &'ctx str;
const RELOC_MODE: RelocMode = RelocMode::Default;
const CODE_MODEL: CodeModel = CodeModel::Default;
fn initialize_llvm() {
inkwell::targets::Target::initialize_riscv(&Default::default());
}
fn context(&self) -> &Context;
fn libraries(&'ctx self) -> Vec<Module<'ctx>>;
fn link(&self, blob: &[u8]) -> Vec<u8>;
fn optimization_level(&self) -> OptimizationLevel;
}
-1
View File
@@ -1 +0,0 @@
+3 -1
View File
@@ -9,4 +9,6 @@ edition = "2021"
evmil = { workspace = true }
petgraph = { workspace = true }
primitive-types = { workspace = true }
indexmap = { workspace = true }
indexmap = { workspace = true }
revive-compilation-target = { path = "../compilation-target" }
+3 -3
View File
@@ -30,13 +30,13 @@ impl Address {
#[derive(Clone, Copy)]
pub enum Type {
Int { size: u8 },
Int { size: u16 },
Bytes { size: u8 },
Bool,
}
impl Type {
pub fn int(size: u8) -> Self {
pub fn int(size: u16) -> Self {
Self::Int { size }
}
@@ -47,7 +47,7 @@ impl Type {
impl Default for Type {
fn default() -> Self {
Type::Int { size: 32 }
Type::Bytes { size: 32 }
}
}
+60
View File
@@ -0,0 +1,60 @@
use indexmap::{IndexMap, IndexSet};
use petgraph::prelude::*;
use crate::{
analysis::BlockAnalysis,
cfg::{Branch, Program},
instruction::Instruction,
symbol::Kind,
};
/// Remove basic blocks not reachable from the start node.
#[derive(Default)]
pub struct ReachableCode(pub IndexSet<NodeIndex>);
impl BlockAnalysis for ReachableCode {
fn analyze_block(&mut self, node: NodeIndex, _program: &mut Program) {
self.0.insert(node);
}
fn apply_results(&mut self, program: &mut Program) {
program.cfg.graph.retain_nodes(|_, i| self.0.contains(&i));
}
}
/// Remove edges to the jump table if the jump target is statically known.
#[derive(Default)]
pub struct StaticJumps(IndexMap<EdgeIndex, (NodeIndex, NodeIndex)>);
impl BlockAnalysis for StaticJumps {
fn analyze_block(&mut self, node: NodeIndex, program: &mut Program) {
for edge in program.cfg.graph.edges(node) {
if *edge.weight() == Branch::Static {
continue;
}
if let Some(Instruction::ConditionalBranch { target, .. })
| Some(Instruction::UncoditionalBranch { target }) =
program.cfg.graph[node].instructions.last()
{
let Kind::Constant(bytecode_offset) = target.symbol().kind else {
continue;
};
let destination = program
.jump_targets
.get(&bytecode_offset.as_usize())
.unwrap_or(&program.cfg.invalid_jump);
self.0.insert(edge.id(), (node, *destination));
}
}
}
fn apply_results(&mut self, program: &mut Program) {
for (edge, (a, b)) in &self.0 {
program.cfg.graph.remove_edge(*edge);
program.cfg.graph.add_edge(*a, *b, Branch::Static);
}
}
}
+19
View File
@@ -0,0 +1,19 @@
use crate::{cfg::Program, instruction::Instruction, symbol::Type, POINTER_SIZE};
use petgraph::prelude::*;
use super::BlockAnalysis;
#[derive(Default)]
pub struct Unstack;
impl BlockAnalysis for Unstack {
fn analyze_block(&mut self, node: NodeIndex, program: &mut Program) {
for instruction in &program.cfg.graph[node].instructions {
match instruction {
_ => {}
}
}
}
fn apply_results(&mut self, _program: &mut Program) {}
}
+373
View File
@@ -0,0 +1,373 @@
use indexmap::{IndexMap, IndexSet};
use petgraph::prelude::*;
use crate::{
analysis::BlockAnalysis,
cfg::{Program, StackInfo},
instruction::{Instruction, Operator},
symbol::{Global, Kind, Symbol, SymbolBuilder, SymbolRef, SymbolTable},
};
#[derive(Default)]
pub struct IrBuilder;
impl BlockAnalysis for IrBuilder {
fn analyze_block(&mut self, node: NodeIndex, program: &mut Program) {
let mut builder = BlockBuilder::new(node, &mut program.symbol_table);
for opcode in &program.evm_instructions[program.cfg.graph[node].opcodes.to_owned()] {
builder.translate(&opcode.instruction);
}
let (instructions, stack_info) = builder.done();
program.cfg.graph[node].instructions = instructions;
program.cfg.graph[node].stack_info = stack_info;
}
fn apply_results(&mut self, _program: &mut Program) {}
}
pub struct BlockBuilder<'tbl> {
state: State<'tbl>,
instructions: Vec<Instruction>,
}
impl<'tbl> BlockBuilder<'tbl> {
fn new(node: NodeIndex, symbol_table: &'tbl mut SymbolTable) -> Self {
Self {
state: State::new(node, symbol_table),
instructions: Default::default(),
}
}
fn done(self) -> (Vec<Instruction>, StackInfo) {
let stack_info = StackInfo {
arguments: self.state.borrows,
generates: self.state.stack,
height: self.state.height,
};
assert_eq!(
stack_info.arguments as i32 + stack_info.height,
stack_info.generates.len() as i32,
"local stack elements must equal stack arguments taken + local height"
);
(self.instructions, stack_info)
}
fn translate(&mut self, opcode: &evmil::bytecode::Instruction) {
use evmil::bytecode::Instruction::*;
self.instructions.extend(match opcode {
JUMPDEST => Vec::new(),
PUSH(bytes) => {
self.state.push(Symbol::builder().constant(bytes));
Vec::new()
}
POP => {
self.state.pop();
Vec::new()
}
SWAP(n) => self.state.swap(*n as usize),
DUP(n) => vec![Instruction::Copy {
y: self.state.nth(*n as usize),
x: self.state.push(Symbol::builder().variable()),
}],
ADD => vec![Instruction::BinaryAssign {
y: self.state.pop(),
z: self.state.pop(),
x: self.state.push(Symbol::builder().variable()),
operator: Operator::Add,
}],
SUB => vec![Instruction::BinaryAssign {
y: self.state.pop(),
z: self.state.pop(),
x: self.state.push(Symbol::builder().variable()),
operator: Operator::Sub,
}],
MSTORE => vec![Instruction::IndexedAssign {
x: self.state.symbol_table.global(Global::Memory),
index: self.state.pop(),
y: self.state.pop(),
}],
MLOAD => vec![Instruction::IndexedCopy {
index: self.state.pop(),
x: self.state.push(Symbol::builder().variable()),
y: self.state.symbol_table.global(Global::Memory),
}],
JUMP => vec![Instruction::UncoditionalBranch {
target: self.state.pop(),
}],
JUMPI => vec![Instruction::ConditionalBranch {
target: self.state.pop(),
condition: self.state.pop(),
}],
CALLDATACOPY => vec![Instruction::Procedure {
symbol: Global::CallDataCopy,
parameters: vec![self.state.pop(), self.state.pop(), self.state.pop()],
}],
CALLDATALOAD => vec![Instruction::IndexedCopy {
index: self.state.pop(),
x: self.state.push(Symbol::builder().variable()),
y: self.state.symbol_table.global(Global::CallData),
}],
RETURN => vec![Instruction::Procedure {
symbol: Global::Return,
parameters: vec![self.state.pop(), self.state.pop()],
}],
GT => vec![Instruction::BinaryAssign {
y: self.state.pop(),
z: self.state.pop(),
x: self.state.push(Symbol::builder().variable()),
operator: Operator::GreaterThan,
}],
LT => vec![Instruction::BinaryAssign {
y: self.state.pop(),
z: self.state.pop(),
x: self.state.push(Symbol::builder().variable()),
operator: Operator::LessThan,
}],
EQ => vec![Instruction::BinaryAssign {
y: self.state.pop(),
z: self.state.pop(),
x: self.state.push(Symbol::builder().variable()),
operator: Operator::Equal,
}],
ISZERO => vec![Instruction::UnaryAssign {
y: self.state.pop(),
x: self.state.push(Symbol::builder().variable()),
operator: Operator::IsZero,
}],
_ => {
eprintln!("unimplement instruction: {opcode}");
Vec::new()
}
})
}
}
struct State<'tbl> {
node: NodeIndex,
symbol_table: &'tbl mut SymbolTable,
stack: Vec<SymbolRef>,
/// Every pop on an empty stack was counts as an additional argument.
borrows: usize,
/// Caches the arguments the block borrows from the stack.
arguments: IndexMap<usize, SymbolRef>,
/// Tracks the relative stack height:
/// - Pushes increase the height by one
/// - Pops decrease the height by one
height: i32,
}
impl<'tbl> State<'tbl> {
fn new(node: NodeIndex, symbol_table: &'tbl mut SymbolTable) -> Self {
Self {
node,
symbol_table,
stack: Default::default(),
borrows: Default::default(),
arguments: Default::default(),
height: Default::default(),
}
}
fn pop(&mut self) -> SymbolRef {
self.height -= 1;
self.stack.pop().unwrap_or_else(|| {
self.borrows += 1;
self.nth(0)
})
}
fn push(&mut self, builder: SymbolBuilder<(), Kind>) -> SymbolRef {
let symbol = builder.temporary().done();
let symbol = self.symbol_table.insert(self.node, symbol);
self.stack.push(symbol.clone());
self.height += 1;
symbol
}
fn swap(&mut self, n: usize) -> Vec<Instruction> {
// For free if both elements are local to the basic block
let top = self.stack.len().saturating_sub(1);
if n <= top {
self.stack.swap(top - n, top);
return Vec::new();
}
let tmp = self.symbol_table.temporary(self.node);
let a = self.nth(0);
let b = self.nth(n);
vec![
Instruction::Copy {
x: tmp.clone(),
y: a.clone(),
},
Instruction::Copy { x: a, y: b.clone() },
Instruction::Copy { x: b, y: tmp },
]
}
fn nth(&mut self, n: usize) -> SymbolRef {
self.stack
.iter()
.rev()
.nth(n)
.or_else(|| self.arguments.get(&(self.slot(n) as usize)))
.cloned()
.unwrap_or_else(|| {
let builder = Symbol::builder().stack(self.slot(n)).variable();
let symbol = self.symbol_table.insert(self.node, builder.done());
self.arguments.insert(self.slot(n) as usize, symbol.clone());
symbol
})
}
fn slot(&self, n: usize) -> i32 {
n as i32 - (self.stack.len() as i32 - self.borrows as i32)
}
}
#[cfg(test)]
mod tests {
use super::{BlockBuilder, State};
use crate::{
cfg::StackInfo,
instruction::Instruction,
symbol::{Symbol, SymbolTable},
};
use evmil::bytecode::Instruction::*;
fn translate<'tbl>(code: &[evmil::bytecode::Instruction]) -> (Vec<Instruction>, StackInfo) {
code.iter()
.fold(
BlockBuilder::new(Default::default(), &mut SymbolTable::default()),
|mut builder, instruction| {
builder.translate(instruction);
builder
},
)
.done()
}
#[test]
fn stack_slot_works() {
let mut symbol_table = SymbolTable::default();
let mut state = State::new(Default::default(), &mut symbol_table);
state.push(Symbol::builder().variable());
assert_eq!(state.slot(0), -1);
assert_eq!(state.slot(1), 0);
assert_eq!(state.slot(2), 1);
state.pop();
state.pop();
assert_eq!(state.slot(0), 1);
assert_eq!(state.slot(1), 2);
assert_eq!(state.slot(2), 3);
state.push(Symbol::builder().variable());
state.push(Symbol::builder().variable());
assert_eq!(state.slot(0), -1);
assert_eq!(state.slot(1), 0);
assert_eq!(state.slot(2), 1);
}
#[test]
fn push_works() {
let state = translate(&[PUSH(vec![1])]).1;
assert_eq!(state.height, 1);
assert_eq!(state.arguments, 0);
assert_eq!(state.generates.len(), 1);
}
#[test]
fn add_works() {
let state = translate(&[ADD]).1;
assert_eq!(state.height, -1);
assert_eq!(state.arguments, 2);
assert_eq!(state.generates.len(), 1);
}
#[test]
fn dup_works() {
let state = translate(&[DUP(4)]).1;
assert_eq!(state.height, 1);
assert_eq!(state.arguments, 0);
assert_eq!(state.generates.len(), 1);
}
#[test]
fn swap_works() {
let state = translate(&[SWAP(4)]).1;
assert_eq!(state.height, 0);
assert_eq!(state.arguments, 0);
assert_eq!(state.generates.len(), 0);
}
#[test]
fn jump() {
let state = translate(&[JUMP]).1;
assert_eq!(state.height, -1);
assert_eq!(state.arguments, 1);
assert_eq!(state.generates.len(), 0);
}
#[test]
fn pop5_push2() {
let state = translate(&[POP, POP, POP, POP, POP, PUSH(vec![1]), PUSH(vec![1])]).1;
assert_eq!(state.height, -3);
assert_eq!(state.arguments, 5);
assert_eq!(state.generates.len(), 2);
}
#[test]
fn fibonacci_loop_body() {
let state = translate(&[
PUSH(vec![1]),
ADD,
SWAP(2),
DUP(1),
SWAP(4),
ADD,
SWAP(2),
PUSH(vec![10]),
JUMP,
])
.1;
assert_eq!(state.height, 0);
assert_eq!(state.arguments, 1);
assert_eq!(state.generates.len(), 1);
}
}
+31
View File
@@ -0,0 +1,31 @@
use petgraph::prelude::*;
use crate::cfg::Program;
pub mod control_flow;
pub mod dominance;
pub mod evm_bytecode;
pub mod types;
/// The analyzer visits each basic block using DFS.
pub trait BlockAnalysis: Default {
fn analyze_block(&mut self, node: NodeIndex, program: &mut Program);
fn apply_results(&mut self, program: &mut Program);
}
pub fn analyze<Pass>(program: &mut Program) -> Pass
where
Pass: BlockAnalysis,
{
let mut dfs = Dfs::new(&program.cfg.graph, program.cfg.start);
let mut pass = Pass::default();
while let Some(node) = dfs.next(&program.cfg.graph) {
pass.analyze_block(node, program);
}
pass.apply_results(program);
pass
}
+10
View File
@@ -0,0 +1,10 @@
use indexmap::IndexMap;
use petgraph::prelude::*;
use crate::{
cfg::{Branch, Program},
instruction::Instruction,
symbol::Kind,
};
use super::BlockAnalysis;
+42
View File
@@ -0,0 +1,42 @@
use crate::{cfg::Program, instruction::Instruction, symbol::Type, POINTER_SIZE};
use petgraph::prelude::*;
use super::BlockAnalysis;
#[derive(Default)]
pub struct TypePropagation;
impl BlockAnalysis for TypePropagation {
fn analyze_block(&mut self, node: NodeIndex, program: &mut Program) {
for instruction in &program.cfg.graph[node].instructions {
match instruction {
Instruction::ConditionalBranch { condition, target } => {
condition.replace_type(Type::Bool);
target.replace_type(Type::Int(POINTER_SIZE));
}
Instruction::UncoditionalBranch { target } => {
target.replace_type(Type::Int(POINTER_SIZE));
}
Instruction::BinaryAssign { x, y, z, .. } => {
y.replace_type(x.symbol().type_hint);
z.replace_type(x.symbol().type_hint);
}
Instruction::Copy { x, y } | Instruction::UnaryAssign { x, y, .. } => {
x.replace_type(y.symbol().type_hint);
}
Instruction::IndexedCopy { index, .. }
| Instruction::IndexedAssign { index, .. } => {
index.replace_type(Type::Int(POINTER_SIZE))
}
_ => {}
}
}
}
fn apply_results(&mut self, _program: &mut Program) {}
}
+154 -137
View File
@@ -1,48 +1,74 @@
use std::fmt::Write;
use std::ops::Range;
use evmil::bytecode;
use petgraph::{
dot::{Config, Dot},
graph::DiGraph,
stable_graph::NodeIndex,
};
use indexmap::IndexMap;
use petgraph::dot::{Config, Dot};
use petgraph::prelude::*;
use crate::{
instruction::{self, Instruction},
symbol::SymbolTable,
};
use crate::pass::dead_code::DeadCodeElimination;
use crate::pass::lift::BytecodeLifter;
use crate::pass::Pass;
use crate::symbol::SymbolRef;
use crate::{instruction::Instruction, symbol::SymbolTable};
pub struct Cfg {
pub graph: StableDiGraph<BasicBlock, Branch>,
pub start: NodeIndex,
pub jump_table: NodeIndex,
pub terminator: NodeIndex,
pub invalid_jump: NodeIndex,
}
#[derive(Debug, PartialEq)]
pub enum Branch {
Static,
Dynamic,
}
impl Default for Cfg {
fn default() -> Self {
let mut graph = StableDiGraph::new();
Self {
start: graph.add_node(Default::default()),
jump_table: graph.add_node(Default::default()),
terminator: graph.add_node(Default::default()),
invalid_jump: graph.add_node(Default::default()),
graph,
}
}
}
#[derive(Clone, Debug)]
pub struct EvmInstruction {
pub bytecode_offset: usize,
pub instruction: bytecode::Instruction,
pub instruction: evmil::bytecode::Instruction,
}
#[derive(Debug, Default)]
pub struct BasicBlock {
pub entry: Option<Entry>,
pub opcodes: Range<usize>,
pub instructions: Vec<Instruction>,
pub stack_info: StackInfo,
}
#[derive(Clone, Copy, Default)]
pub enum BasicBlockFormatOption {
ByteCode,
Ir,
#[default]
None,
#[derive(Debug, Default)]
pub struct StackInfo {
pub arguments: usize,
pub generates: Vec<SymbolRef>,
pub height: i32,
}
impl BasicBlock {
fn linear_at(start: usize) -> Self {
Self {
opcodes: start..start + 1,
..Default::default()
}
}
fn format(&self, evm_bytecode: &[EvmInstruction], options: BasicBlockFormatOption) -> String {
let offset = evm_bytecode[self.opcodes.start].bytecode_offset;
let start = if let Some(Entry::Start) = self.entry {
"Start\n".to_string()
} else {
String::new()
};
let instructions = match options {
match options {
BasicBlockFormatOption::ByteCode => evm_bytecode[self.opcodes.start..self.opcodes.end]
.iter()
.fold(String::new(), |mut acc, opcode| {
@@ -58,164 +84,155 @@ impl BasicBlock {
})
}
_ => String::new(),
};
format!("{start}Offset: 0x{offset:02x}\n---\n{instructions}")
}
}
}
#[derive(Clone, Debug)]
pub enum Entry {
Start,
Jumpdest(NodeIndex),
Else(NodeIndex),
}
#[derive(Debug)]
pub enum Jump {
Direct,
Indirect,
#[derive(Clone, Copy, Default)]
pub enum BasicBlockFormatOption {
ByteCode,
Ir,
#[default]
None,
}
pub struct Program {
pub evm_instructions: Vec<EvmInstruction>,
pub cfg: DiGraph<BasicBlock, Jump>,
pub cfg: Cfg,
pub symbol_table: SymbolTable,
pub jump_targets: IndexMap<usize, NodeIndex>,
}
impl Program {
pub fn new(bytecode: Vec<bytecode::Instruction>) -> Self {
let mut cfg = DiGraph::new();
let mut symbol_table = SymbolTable::default();
/// Create a new [Program] from EVM bytecode.
///
/// - Dynamic jumps reach the dynamic jump table
/// - `JUMPDEST` and `JUMPI` split up the node
/// - Instructions not returning reach the terminator node
pub fn new(bytecode: &[evmil::bytecode::Instruction]) -> Self {
let mut evm_instructions = Vec::with_capacity(bytecode.len());
let mut current_block = Some(BasicBlock {
entry: Some(Entry::Start),
..Default::default()
});
let mut cfg = Cfg::default();
let mut jump_targets = IndexMap::default();
let mut bytecode_offset = 0;
let mut node = cfg.graph.add_node(Default::default());
cfg.graph.add_edge(cfg.start, node, Branch::Static);
cfg.graph
.add_edge(cfg.invalid_jump, cfg.terminator, Branch::Static);
cfg.graph
.add_edge(cfg.jump_table, cfg.invalid_jump, Branch::Dynamic);
for (index, opcode) in bytecode.iter().enumerate() {
evm_instructions.push(EvmInstruction {
bytecode_offset,
instruction: opcode.clone(),
});
bytecode_offset += opcode.length();
cfg.graph[node].opcodes.end = index + 1;
let instructions = instruction::translate(opcode, &mut symbol_table);
use bytecode::Instruction::*;
use evmil::bytecode::Instruction::*;
match opcode {
JUMPDEST => {
// If we are already in a bb, conclude it
let entry = current_block.take().map(|mut node| {
node.opcodes.end = index + 1;
let entry = node.entry.clone();
let node_index = cfg.add_node(node);
// The preceding instruction did already split up control flow
JUMPDEST
if matches!(
evm_instructions[index.saturating_sub(1)].instruction,
JUMP | JUMPI | RETURN | REVERT | INVALID | STOP | SELFDESTRUCT
) =>
{
cfg.graph.add_edge(cfg.jump_table, node, Branch::Dynamic);
// If the block had an entry, add an edge from the previous block to it
if let Some(Entry::Else(incoming)) | Some(Entry::Jumpdest(incoming)) = entry
{
cfg.add_edge(incoming, node_index, Jump::Direct);
}
node_index
});
// JUMPDEST implicitly starts a new bb
current_block = Some(BasicBlock {
entry: entry.map(Entry::Jumpdest),
opcodes: Range {
start: index + 1,
end: index + 1,
},
..Default::default()
});
jump_targets.insert(bytecode_offset, node);
}
JUMP | STOP | RETURN | REVERT | INVALID => {
// Conclude this bb; if we are not already in a bb we must create a new one
let mut node = current_block.take().unwrap_or_else(|| BasicBlock {
opcodes: Range {
start: index,
end: index + 1,
},
..Default::default()
});
node.instructions.extend(instructions);
node.opcodes.end = index + 1;
JUMPDEST => {
cfg.graph[node].opcodes.end = index;
let previous_node = node;
node = cfg.graph.add_node(BasicBlock::linear_at(index));
let entry = node.entry.clone();
let node_index = cfg.add_node(node);
cfg.graph.add_edge(cfg.jump_table, node, Branch::Dynamic);
cfg.graph.add_edge(previous_node, node, Branch::Static);
// If the block had an entry, add an edge from the previous block to it
if let Some(Entry::Else(incoming)) | Some(Entry::Jumpdest(incoming)) = entry {
cfg.add_edge(incoming, node_index, Jump::Direct);
}
jump_targets.insert(bytecode_offset, node);
}
JUMP => {
cfg.graph.add_edge(node, cfg.jump_table, Branch::Dynamic);
node = cfg.graph.add_node(BasicBlock::linear_at(index + 1));
}
JUMPI => {
// Conclude this bb; if we are not already in a bb we must create a new one
let mut node = current_block.take().unwrap_or_else(|| BasicBlock {
opcodes: Range {
start: index,
end: index + 1,
},
..Default::default()
});
node.instructions.extend(instructions);
node.opcodes.end = index + 1;
cfg.graph.add_edge(node, cfg.jump_table, Branch::Dynamic);
let entry = node.entry.clone();
let node_index = cfg.add_node(node);
// If the block had an entry, add an edge from the previous block to it
if let Some(Entry::Else(incoming)) | Some(Entry::Jumpdest(incoming)) = entry {
cfg.add_edge(incoming, node_index, Jump::Direct);
}
// JUMPI implicitly starts a new bb for the else branch
current_block = Some(BasicBlock {
entry: Some(Entry::Else(node_index)),
opcodes: Range {
start: index + 1,
end: index + 1,
},
..Default::default()
});
let previous_node = node;
node = cfg.graph.add_node(BasicBlock::linear_at(index + 1));
cfg.graph.add_edge(previous_node, node, Branch::Static);
}
_ => current_block
.get_or_insert(BasicBlock {
opcodes: Range {
start: index,
end: index + 1,
},
..Default::default()
})
.instructions
.extend(instructions),
STOP | RETURN | REVERT | INVALID | SELFDESTRUCT => {
cfg.graph.add_edge(node, cfg.terminator, Branch::Static);
node = cfg.graph.add_node(BasicBlock::linear_at(index + 1));
}
_ => {}
}
bytecode_offset += opcode.length();
}
Self {
evm_instructions,
cfg,
symbol_table,
symbol_table: Default::default(),
jump_targets,
}
}
pub fn optimize(&mut self) {
DeadCodeElimination::run(&mut Default::default(), self);
BytecodeLifter::run(&mut Default::default(), self);
DeadCodeElimination::run(&mut Default::default(), self)
}
pub fn dot(&self, format_options: BasicBlockFormatOption) {
let get_node_attrs = move |_, (_, node): (_, &BasicBlock)| {
format!(
"label = \"{}\"",
node.format(&self.evm_instructions, format_options)
)
let get_node_attrs = move |_, (index, node): (_, &BasicBlock)| {
let (color, shape, label) = if index == self.cfg.terminator {
("red", "oval", "Terminator".to_string())
} else if index == self.cfg.start {
("red", "oval", "Start".to_string())
} else if index == self.cfg.invalid_jump {
("blue", "hexagon", "Invalid jump target".to_string())
} else if index == self.cfg.jump_table {
("blue", "diamond", "Dynamic jump table".to_string())
} else {
let instructions = node.format(&self.evm_instructions, format_options);
let start = &self.evm_instructions[node.opcodes.start].bytecode_offset;
let end = &self
.evm_instructions
.get(node.opcodes.end)
.unwrap_or_else(|| &self.evm_instructions[node.opcodes.end - 1])
.bytecode_offset;
(
"black",
"rectangle",
format!("Bytecode (0x{start:02x}, 0x{end:02x}]\n---\n{instructions}",),
)
};
format!("color={color} shape={shape} label=\"{label}\"",)
};
let get_edge_attrs = |_, edge: petgraph::stable_graph::EdgeReference<'_, Branch>| {
let style = match edge.weight() {
Branch::Static => "solid",
Branch::Dynamic => "dashed",
};
format!("style={style}")
};
let dot = Dot::with_attr_getters(
&self.cfg,
&self.cfg.graph,
&[Config::EdgeNoLabel, Config::NodeNoLabel],
&|_, edge| format!("label = \"{:?}\"", edge.weight()),
&get_edge_attrs,
&get_node_attrs,
);
+36 -303
View File
@@ -1,67 +1,68 @@
use evmil::bytecode::Instruction as EvmInstruction;
use primitive_types::U256;
use crate::symbol::{Global, SymbolRef};
use std::fmt::Write;
use crate::{
symbol::{Global, Symbol, SymbolTable, Type},
POINTER_SIZE,
};
#[derive(PartialEq, Debug)]
pub enum Instruction {
Nop,
/// `x = y op z`
BinaryAssign {
x: Symbol,
y: Symbol,
x: SymbolRef,
y: SymbolRef,
operator: Operator,
z: Symbol,
z: SymbolRef,
},
/// `x = op y`
UnaryAssign {
x: Symbol,
x: SymbolRef,
operator: Operator,
y: Symbol,
y: SymbolRef,
},
/// `branch target`
UncoditionalBranch { target: Symbol },
UncoditionalBranch {
target: SymbolRef,
},
/// `branch target if condition`
ConditionalBranch { condition: Symbol, target: Symbol },
ConditionalBranch {
condition: SymbolRef,
target: SymbolRef,
},
/// `call(label, n)`
Procedure {
symbol: Global,
parameters: Vec<Symbol>,
parameters: Vec<SymbolRef>,
},
/// `x = call(label, n)`
Function {
symbol: Global,
x: Symbol,
parameters: Vec<Symbol>,
x: SymbolRef,
parameters: Vec<SymbolRef>,
},
/// `x = y`
Copy { x: Symbol, y: Symbol },
Copy {
x: SymbolRef,
y: SymbolRef,
},
/// `x[index] = y`
IndexedAssign { x: Symbol, index: Symbol, y: Symbol },
IndexedAssign {
x: SymbolRef,
index: SymbolRef,
y: SymbolRef,
},
/// `x = y[index]`
IndexedCopy { x: Symbol, y: Symbol, index: Symbol },
}
impl Instruction {
fn target_address(&self) -> Symbol {
match self {
Instruction::Copy { x, .. } => *x,
Instruction::IndexedAssign { x, .. } => *x,
Instruction::IndexedCopy { x, .. } => *x,
_ => unreachable!(),
}
}
IndexedCopy {
x: SymbolRef,
y: SymbolRef,
index: SymbolRef,
},
}
impl std::fmt::Display for Instruction {
@@ -104,6 +105,8 @@ impl std::fmt::Display for Instruction {
Self::IndexedAssign { x, index, y } => write!(f, "{x}[{index}] = {y}"),
Self::IndexedCopy { x, y, index } => write!(f, "{x} = {y}[{index}]"),
Self::Nop => write!(f, "no-op"),
}
}
}
@@ -122,11 +125,11 @@ pub enum Operator {
Exp,
SignExtend,
LessThat,
LessThan,
GreaterThan,
SignedLessThan,
SignedGreaterThan,
Eq,
Equal,
IsZero,
And,
@@ -138,273 +141,3 @@ pub enum Operator {
ShiftRight,
ShiftArithmeticRight,
}
struct StackPop {
decrement: Instruction,
load: Instruction,
}
/// Pop a value from the stack.
///
/// Returns 2 `Instruction`: Decrementing the stack pointer and the value copy.
fn stack_pop(symbol_table: &mut SymbolTable) -> StackPop {
let decrement = decrement_stack_height(symbol_table);
let load = Instruction::IndexedCopy {
x: symbol_table.temporary(None),
y: symbol_table.global(Global::Stack),
index: symbol_table.global(Global::StackHeight),
};
StackPop { decrement, load }
}
/// Decrease the stack height by one.
fn decrement_stack_height(symbol_table: &mut SymbolTable) -> Instruction {
Instruction::BinaryAssign {
x: symbol_table.global(Global::StackHeight),
y: symbol_table.global(Global::StackHeight),
operator: Operator::Sub,
z: symbol_table.constant(U256::one(), Some(Global::StackHeight.typ())),
}
}
struct StackPush {
assign: Instruction,
increment: Instruction,
}
/// Push a `value` to the stack.
///
/// Returns 2 `Instruction`: the value assign and the stack height increase.
fn stack_push(symbol_table: &mut SymbolTable, value: Symbol) -> StackPush {
let assign = Instruction::IndexedAssign {
x: symbol_table.global(Global::Stack),
index: symbol_table.global(Global::StackHeight),
y: value,
};
let increment = increment_stack_height(symbol_table);
StackPush { assign, increment }
}
/// Increment the stack height by one.
fn increment_stack_height(symbol_table: &mut SymbolTable) -> Instruction {
Instruction::BinaryAssign {
x: symbol_table.global(Global::StackHeight),
y: symbol_table.global(Global::StackHeight),
operator: Operator::Add,
z: symbol_table.constant(U256::one(), Some(Global::StackHeight.typ())),
}
}
/// Lower an EVM instruction into corresponding 3AC instructions.
pub fn translate(opcode: &EvmInstruction, symbol_table: &mut SymbolTable) -> Vec<Instruction> {
use EvmInstruction::*;
match opcode {
JUMPDEST => Vec::new(),
PUSH(bytes) => {
let type_hint = Some(Type::Bytes(bytes.len()));
let value = symbol_table.constant(U256::from_big_endian(bytes), type_hint);
let push = stack_push(symbol_table, value);
vec![push.assign, push.increment]
}
POP => vec![decrement_stack_height(symbol_table)],
MSTORE => {
let offset = stack_pop(symbol_table);
let value = stack_pop(symbol_table);
let store = Instruction::IndexedAssign {
x: symbol_table.global(Global::Memory),
index: offset.load.target_address(),
y: value.load.target_address(),
};
vec![
offset.decrement,
offset.load,
value.decrement,
value.load,
store,
]
}
JUMP => {
let target = stack_pop(symbol_table);
let jump = Instruction::UncoditionalBranch {
target: target.load.target_address(),
};
vec![target.decrement, target.load, jump]
}
RETURN => {
let offset = stack_pop(symbol_table);
let size = stack_pop(symbol_table);
let procedure = Instruction::Procedure {
symbol: Global::Return,
parameters: vec![offset.load.target_address(), size.load.target_address()],
};
vec![
offset.decrement,
offset.load,
size.decrement,
size.load,
procedure,
]
}
CALLDATACOPY => {
let destination_offset = stack_pop(symbol_table);
let offset = stack_pop(symbol_table);
let size = stack_pop(symbol_table);
let parameters = vec![
destination_offset.load.target_address(),
offset.load.target_address(),
size.load.target_address(),
];
let procedure = Instruction::Procedure {
symbol: Global::MemoryCopy,
parameters,
};
vec![
destination_offset.decrement,
destination_offset.load,
offset.decrement,
offset.load,
size.decrement,
size.load,
procedure,
]
}
CALLDATALOAD => {
let index = stack_pop(symbol_table);
let value = Instruction::IndexedCopy {
x: symbol_table.temporary(None),
y: symbol_table.global(Global::CallData),
index: index.load.target_address(),
};
let push = stack_push(symbol_table, value.target_address());
vec![
index.decrement,
index.load,
value,
push.assign,
push.increment,
]
}
STOP => {
vec![Instruction::Procedure {
symbol: Global::Stop,
parameters: Default::default(),
}]
}
INVALID => {
let offset = symbol_table.constant(U256::zero(), Some(Type::Int(POINTER_SIZE)));
let size = symbol_table.constant(U256::zero(), Some(Type::Int(POINTER_SIZE)));
vec![Instruction::Procedure {
symbol: Global::Revert,
parameters: vec![offset, size],
}]
}
REVERT => {
let offset = stack_pop(symbol_table);
let size = stack_pop(symbol_table);
let procedure = Instruction::Procedure {
symbol: Global::Revert,
parameters: vec![offset.load.target_address(), size.load.target_address()],
};
vec![
offset.decrement,
offset.load,
size.decrement,
size.load,
procedure,
]
}
//_ => todo!("{opcode}"),
_ => Vec::new(),
}
}
#[cfg(test)]
mod tests {
use evmil::bytecode;
use primitive_types::U256;
use crate::{
instruction::Operator,
symbol::{Address, Global, Kind, Symbol, Type},
};
use super::Instruction;
#[test]
fn lower_push_works() {
let mut symbol_table = Default::default();
let opcode = bytecode::Instruction::PUSH(vec![0x01]);
let result = super::translate(&opcode, &mut symbol_table);
let expected = vec![
Instruction::IndexedAssign {
x: Symbol {
address: Address::Label(Global::Stack),
type_hint: Global::Stack.typ(),
kind: Global::Stack.kind(),
},
index: Symbol {
address: Address::Label(Global::StackHeight),
type_hint: Global::StackHeight.typ(),
kind: Global::StackHeight.kind(),
},
y: Symbol {
address: Address::Constant(U256::one()),
type_hint: Type::Bytes(1),
kind: Kind::Value,
},
},
Instruction::BinaryAssign {
x: Symbol {
address: Address::Label(Global::StackHeight),
type_hint: Global::StackHeight.typ(),
kind: Global::StackHeight.kind(),
},
y: Symbol {
address: Address::Label(Global::StackHeight),
type_hint: Global::StackHeight.typ(),
kind: Global::StackHeight.kind(),
},
operator: Operator::Add,
z: Symbol {
address: Address::Constant(U256::one()),
type_hint: Global::StackHeight.typ(),
kind: Kind::Value,
},
},
];
assert_eq!(expected, result);
}
}
+2
View File
@@ -1,5 +1,7 @@
pub mod analysis;
pub mod cfg;
pub mod instruction;
pub mod pass;
pub mod symbol;
pub static POINTER_SIZE: usize = 32;
+15
View File
@@ -0,0 +1,15 @@
use crate::{
analysis::{analyze, control_flow::ReachableCode},
cfg::Program,
};
use super::Pass;
#[derive(Default)]
pub struct DeadCodeElimination;
impl Pass for DeadCodeElimination {
fn run(&mut self, program: &mut Program) {
analyze::<ReachableCode>(program);
}
}
+19
View File
@@ -0,0 +1,19 @@
use crate::{
analysis::{
analyze, control_flow::StaticJumps, evm_bytecode::IrBuilder, types::TypePropagation,
},
cfg::Program,
};
use super::Pass;
#[derive(Default)]
pub struct BytecodeLifter;
impl Pass for BytecodeLifter {
fn run(&mut self, program: &mut Program) {
analyze::<IrBuilder>(program);
analyze::<StaticJumps>(program);
analyze::<TypePropagation>(program);
}
}
+8
View File
@@ -0,0 +1,8 @@
use crate::cfg::Program;
pub mod dead_code;
pub mod lift;
pub trait Pass: Default {
fn run(&mut self, program: &mut Program);
}
+169 -48
View File
@@ -1,53 +1,136 @@
use indexmap::IndexSet;
use indexmap::IndexMap;
use petgraph::prelude::NodeIndex;
use primitive_types::U256;
use std::{cell::RefCell, rc::Rc};
use crate::POINTER_SIZE;
#[derive(Debug, Default)]
pub struct SymbolTable {
symbols: IndexSet<Symbol>,
nonce: usize,
table: IndexMap<NodeIndex, IndexMap<usize, Rc<RefCell<Symbol>>>>,
symbols: IndexMap<usize, Rc<RefCell<Symbol>>>,
global_scope: NodeIndex,
id_nonce: usize,
}
impl SymbolTable {
pub fn merge_scopes(&mut self, node: NodeIndex, target: NodeIndex) {
let sym = self.symbols.remove(&0).unwrap();
let new = self
.table
.get(&NodeIndex::default())
.unwrap()
.get(&0)
.unwrap();
//RefCell::replace(&sym, Rc::clone(new));
}
pub fn get_symbol(&self, id: usize) -> SymbolRef {
SymbolRef {
inner: self.symbols.get(&id).unwrap().clone(),
id,
}
}
pub fn insert(&mut self, scope: NodeIndex, symbol: Symbol) -> SymbolRef {
let id = self.next();
let inner = Rc::new(RefCell::new(symbol));
self.table
.entry(scope)
.or_default()
.insert(id, Rc::clone(&inner));
self.symbols.insert(id, inner.clone());
SymbolRef { inner, id }
}
pub fn global(&mut self, label: Global) -> SymbolRef {
self.table
.entry(self.global_scope)
.or_default()
.iter()
.find(|(_, symbol)| symbol.borrow().address == Address::Label(label))
.map(|(id, _)| *id)
.map(|id| self.get_symbol(id))
.unwrap_or_else(|| self.insert(self.global_scope, Symbol::builder().global(label)))
}
pub fn temporary(&mut self, node: NodeIndex) -> SymbolRef {
self.insert(node, Symbol::builder().temporary().variable().done())
}
fn next(&mut self) -> usize {
let current = self.nonce;
self.nonce += 1;
let current = self.id_nonce;
self.id_nonce += 1;
current
}
}
pub fn temporary(&mut self, type_hint: Option<Type>) -> Symbol {
let id = self.next();
let symbol = Symbol {
address: Address::Temporary(id),
type_hint: type_hint.unwrap_or_default(),
kind: Kind::Value,
};
assert!(self.symbols.insert(symbol));
#[derive(Default)]
pub struct SymbolBuilder<A = (), K = ()> {
address: A,
type_hint: Type,
kind: K,
}
symbol
impl<K> SymbolBuilder<(), K> {
pub fn temporary(self) -> SymbolBuilder<Address, K> {
SymbolBuilder {
address: Address::Temporary,
type_hint: self.type_hint,
kind: self.kind,
}
}
pub fn constant(&mut self, value: U256, type_hint: Option<Type>) -> Symbol {
let symbol = Symbol {
address: Address::Constant(value),
type_hint: type_hint.unwrap_or_default(),
kind: Kind::Value,
};
self.symbols.insert(symbol);
symbol
pub fn stack(self, slot: i32) -> SymbolBuilder<Address, K> {
SymbolBuilder {
address: Address::Stack(slot),
type_hint: self.type_hint,
kind: self.kind,
}
}
pub fn global(&mut self, label: Global) -> Symbol {
let symbol = Symbol {
pub fn global(self, label: Global) -> Symbol {
Symbol {
address: Address::Label(label),
type_hint: label.typ(),
kind: label.kind(),
};
self.symbols.insert(symbol);
}
}
}
symbol
impl<A> SymbolBuilder<A, ()> {
pub fn constant(self, bytes: &[u8]) -> SymbolBuilder<A, Kind> {
SymbolBuilder {
address: self.address,
type_hint: Type::Bytes(bytes.len()),
kind: Kind::Constant(U256::from_big_endian(bytes)),
}
}
pub fn variable(self) -> SymbolBuilder<A, Kind> {
SymbolBuilder {
address: self.address,
type_hint: self.type_hint,
kind: Kind::Variable,
}
}
}
impl<A, K> SymbolBuilder<A, K> {
pub fn of(self, type_hint: Type) -> Self {
Self { type_hint, ..self }
}
}
impl SymbolBuilder<Address, Kind> {
pub fn done(self) -> Symbol {
Symbol {
address: self.address,
type_hint: self.type_hint,
kind: self.kind,
}
}
}
@@ -58,40 +141,76 @@ pub struct Symbol {
pub kind: Kind,
}
impl std::fmt::Display for Symbol {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "({} {})", self.type_hint, self.address)?;
impl Symbol {
pub fn builder() -> SymbolBuilder {
Default::default()
}
}
match self.kind {
Kind::Pointer => write!(f, "*"),
_ => Ok(()),
#[derive(Clone, Debug)]
pub struct SymbolRef {
inner: Rc<RefCell<Symbol>>,
id: usize,
}
impl std::fmt::Display for SymbolRef {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let symbol = self.symbol();
let address = format!("${}_{}", self.id, symbol.address);
match symbol.kind {
Kind::Pointer => write!(f, "*{address}"),
Kind::Constant(value) => {
write!(f, "{} {address} := {value}", symbol.type_hint)
}
_ => write!(f, "{} {address} ", symbol.type_hint),
}
}
}
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
impl SymbolRef {
pub fn replace_type(&self, type_hint: Type) {
self.inner.replace_with(|old| Symbol {
address: old.address,
kind: old.kind,
type_hint,
});
}
pub fn symbol(&self) -> Symbol {
*self.inner.borrow()
}
pub fn id(&self) -> usize {
self.id
}
}
impl PartialEq for SymbolRef {
fn eq(&self, other: &Self) -> bool {
self.id == other.id
}
}
#[derive(Default, Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum Address {
Constant(U256),
Temporary(usize),
#[default]
Temporary,
Stack(i32),
Label(Global),
}
impl std::fmt::Display for Address {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Constant(value) => write!(f, "0x{value:02x}"),
Self::Temporary(n) => write!(f, "tmp_{n}"),
Self::Stack(slot) => write!(f, "stack[{slot}]"),
Self::Temporary => write!(f, "tmp"),
Self::Label(label) => write!(f, "{label:?}"),
}
}
}
impl Address {
pub fn from_be_bytes(bytes: &[u8]) -> Self {
Self::Constant(U256::from_big_endian(bytes))
}
}
#[derive(Debug, PartialEq, Eq, Hash, Default, Clone, Copy)]
pub enum Type {
#[default]
@@ -120,10 +239,12 @@ impl Type {
}
}
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
#[derive(Default, Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum Kind {
Pointer,
Value,
#[default]
Variable,
Constant(U256),
Function,
}
@@ -189,7 +310,7 @@ impl Global {
pub fn kind(&self) -> Kind {
match self {
Self::Stack | Self::CallData | Self::Memory | Self::ReturnData => Kind::Pointer,
Self::StackHeight => Kind::Value,
Self::StackHeight => Kind::Variable,
_ => Kind::Function,
}
}
+15 -13
View File
@@ -1,25 +1,27 @@
// SPDX-License-Identifier: Apache-2.0
fn llvm_config(arg: &str) -> String {
let output = std::process::Command::new("llvm-config")
.args([arg])
.output()
.unwrap_or_else(|_| panic!("`llvm-config {arg}` failed"));
use std::{io::Read, process::Command};
String::from_utf8(output.stdout)
.unwrap_or_else(|_| panic!("output of `llvm-config {arg}` should be utf8"))
}
fn main() {
let mut flags = String::new();
Command::new("llvm-config")
.args(["--cxxflags"])
.output()
.expect("llvm-config should be able to provide CXX flags")
.stdout
.as_slice()
.read_to_string(&mut flags)
.expect("llvm-config output should be utf8");
let mut builder = cc::Build::new();
flags
llvm_config("--cxxflags")
.split_whitespace()
.fold(&mut builder, |builder, flag| builder.flag(flag))
.cpp(true)
.file("src/linker.cpp")
.compile("liblinker.a");
println!("cargo:rustc-link-search=native={}", llvm_config("--libdir"));
for lib in ["lldELF", "lldCommon", "lldMachO"] {
println!("cargo:rustc-link-lib=static={lib}");
}
println!("cargo:rerun-if-changed=build.rs");
}
+1 -3
View File
@@ -1,5 +1,3 @@
// SPDX-License-Identifier: Apache-2.0
#include "lld/Common/Driver.h"
#include "lld/Common/CommonLinkerContext.h"
#include "llvm/Support/CrashRecoveryContext.h"
@@ -19,4 +17,4 @@ extern "C" bool LLDELFLink(const char *argv[], size_t length)
llvm::CrashRecoveryContext crc;
return canRunAgain && crc.RunSafely([&]()
{ lld::CommonLinkerContext::destroy(); });
}
}
-14
View File
@@ -1,14 +0,0 @@
pub fn add(left: usize, right: usize) -> usize {
left + right
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn it_works() {
let result = add(2, 2);
assert_eq!(result, 4);
}
}
+18
View File
@@ -0,0 +1,18 @@
[package]
name = "revive-target-polkavm"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
inkwell = { workspace = true }
tempfile = { workspace = true }
polkavm-linker = { workspace = true }
polkavm-common = { workspace = true }
libc = { workspace = true }
revive-codegen = { path = "../codegen" }
revive-compilation-target = { path = "../compilation-target" }
revive-builtins = { path = "../builtins" }
lld-sys = { path = "../lld-sys" }
Binary file not shown.
+77
View File
@@ -0,0 +1,77 @@
use inkwell::{builder::Builder, context::Context, module::Module, values::FunctionValue};
use polkavm_common::elf::FnMetadata;
use revive_compilation_target::environment::Environment;
use revive_compilation_target::target::Target;
use crate::PolkaVm;
impl<'ctx> Environment<'ctx> for PolkaVm {
fn call_start(&'ctx self, builder: &Builder<'ctx>, start: FunctionValue<'ctx>) -> Module<'ctx> {
let module = self.context().create_module("entrypoint");
let (call, deploy) = pvm_exports(&self.0);
module.link_in_module(call).unwrap();
module.link_in_module(deploy).unwrap();
let function_type = self.context().void_type().fn_type(&[], false);
let call = module.add_function("call", function_type, None);
call.set_section(Some(".text.polkavm_export"));
builder.position_at_end(self.context().append_basic_block(call, "entry"));
builder.build_call(start, &[], "call_start");
builder.build_return(None);
let deploy = module.add_function("deploy", function_type, None);
deploy.set_section(Some(".text.polkavm_export"));
builder.position_at_end(self.context().append_basic_block(deploy, "entry"));
builder.build_unreachable();
builder.build_return(None);
module
}
}
pub(super) fn pvm_exports(context: &Context) -> (Module, Module) {
let call_m = context.create_module("pvm_call");
let deploy_m = context.create_module("pvm_deploy");
call_m.set_inline_assembly(&generate_export_assembly("call"));
deploy_m.set_inline_assembly(&generate_export_assembly("deploy"));
(call_m, deploy_m)
}
fn generate_export_assembly(symbol: &str) -> String {
let mut assembly = String::new();
assembly.push_str(".pushsection .polkavm_exports,\"\",@progbits\n");
assembly.push_str(".byte 1\n"); // Version.
assembly.push_str(&format!(".4byte {symbol}\n")); // Address
// Metadata
let mut metadata = Vec::new();
FnMetadata {
name: symbol.to_string(),
args: Default::default(),
return_ty: Default::default(),
}
.serialize(|slice| metadata.extend_from_slice(slice));
assembly.push_str(&bytes_to_asm(&metadata));
assembly.push_str(".popsection\n");
assembly
}
pub fn bytes_to_asm(bytes: &[u8]) -> String {
use std::fmt::Write;
let mut out = String::with_capacity(bytes.len() * 11);
for &byte in bytes {
writeln!(&mut out, ".byte 0x{:02x}", byte).unwrap();
}
out
}
+13
View File
@@ -0,0 +1,13 @@
use inkwell::context::Context;
pub mod environment;
pub mod linker;
pub mod target;
pub struct PolkaVm(Context);
impl Default for PolkaVm {
fn default() -> Self {
Self(Context::create())
}
}
+86
View File
@@ -0,0 +1,86 @@
use std::{ffi::CString, fs};
use lld_sys::LLDELFLink;
use revive_builtins::COMPILER_RT;
const LINKER_SCRIPT: &str = r#"
SECTIONS {
. = 0x10000;
.rodata : { *(.rodata) *(.rodata.*) }
.data.rel.ro : { *(.data.rel.ro) *(.data.rel.ro.*) }
.got : { *(.got) *(.got.*) }
. = ALIGN(0x4000);
.data : { *(.sdata) *(.data) }
.bss : { *(.sbss) *(.bss) *(.bss.*) }
. = 0xf0000000;
.text : { KEEP(*(.text.polkavm_export)) *(.text .text.*) }
/DISCARD/ : { *(.eh_frame) }
. = ALIGN(4);
}"#;
fn invoke_lld(cmd_args: &[&str]) -> bool {
let c_strings = cmd_args
.iter()
.map(|arg| CString::new(*arg).expect("ld.lld args should not contain null bytes"))
.collect::<Vec<_>>();
let args: Vec<*const libc::c_char> = c_strings.iter().map(|arg| arg.as_ptr()).collect();
unsafe { LLDELFLink(args.as_ptr(), args.len()) == 0 }
}
fn polkavm_linker(code: &[u8]) -> Vec<u8> {
let mut config = polkavm_linker::Config::default();
config.set_strip(true);
match polkavm_linker::program_from_elf(config, code) {
Ok(blob) => blob.as_bytes().to_vec(),
Err(reason) => panic!("polkavm linker failed: {}", reason),
}
}
pub(crate) fn link(input: &[u8]) -> Vec<u8> {
let dir = tempfile::tempdir().expect("failed to create temp directory for linking");
let output_path = dir.path().join("out.so");
let object_path = dir.path().join("out.o");
let linker_script_path = dir.path().join("linker.ld");
let compiler_rt_path = dir.path().join("libclang_rt.builtins-riscv32.a");
fs::write(&object_path, input).unwrap_or_else(|msg| panic!("{msg} {object_path:?}"));
fs::write(&linker_script_path, LINKER_SCRIPT)
.unwrap_or_else(|msg| panic!("{msg} {linker_script_path:?}"));
fs::write(&compiler_rt_path, COMPILER_RT)
.unwrap_or_else(|msg| panic!("{msg} {compiler_rt_path:?}"));
let ld_args = [
"ld.lld",
"--error-limit=0",
"--relocatable",
"--emit-relocs",
"--no-relax",
"--gc-sections",
"--library-path",
dir.path().to_str().expect("should be utf8"),
"--library",
"clang_rt.builtins-riscv32",
linker_script_path.to_str().expect("should be utf8"),
object_path.to_str().expect("should be utf8"),
"-o",
output_path.to_str().expect("should be utf8"),
];
assert!(!invoke_lld(&ld_args), "ld.lld failed");
fs::copy(&object_path, "/tmp/out.o").unwrap();
fs::copy(&output_path, "/tmp/out.so").unwrap();
fs::copy(&linker_script_path, "/tmp/linkder.ld").unwrap();
let blob = fs::read(&output_path).expect("ld.lld should produce output");
polkavm_linker(&blob)
}
+34
View File
@@ -0,0 +1,34 @@
use inkwell::{
context::Context, memory_buffer::MemoryBuffer, module::Module, targets::RelocMode,
OptimizationLevel,
};
use revive_compilation_target::target::Target;
use crate::PolkaVm;
impl<'ctx> Target<'ctx> for PolkaVm {
const TARGET_NAME: &'static str = "riscv32";
const TARGET_TRIPLE: &'static str = "riscv32-unknown-unknown-elf";
const TARGET_FEATURES: &'static str = "+e,+m";
const CPU: &'static str = "generic-rv32";
const RELOC_MODE: RelocMode = RelocMode::PIC;
fn libraries(&'ctx self) -> Vec<Module<'ctx>> {
let guest_bitcode = include_bytes!("../polkavm_guest.bc");
let imports = MemoryBuffer::create_from_memory_range(guest_bitcode, "guest_bc");
vec![Module::parse_bitcode_from_buffer(&imports, &self.0).unwrap()]
}
fn context(&self) -> &Context {
&self.0
}
fn link(&self, blob: &[u8]) -> Vec<u8> {
crate::linker::link(blob)
}
fn optimization_level(&self) -> OptimizationLevel {
OptimizationLevel::Aggressive
}
}