Emerge Yul recompiler (#1)

Provide a modified (and incomplete) version of ZKSync zksolc that can compile the most basic contracts
This commit is contained in:
Cyrill Leutwiler
2024-03-12 12:06:02 +01:00
committed by GitHub
parent d238d8f39e
commit cffa14a4d2
247 changed files with 35357 additions and 4905 deletions
@@ -0,0 +1,58 @@
//!
//! The Ethereal IR entry function link.
//!
use inkwell::values::BasicValue;
use crate::evmla::ethereal_ir::EtherealIR;
///
/// The Ethereal IR entry function link.
///
/// The link represents branching between the deploy and runtime code.
///
#[derive(Debug, Clone)]
pub struct EntryLink {
/// The code part type.
pub code_type: era_compiler_llvm_context::EraVMCodeType,
}
impl EntryLink {
///
/// A shortcut constructor.
///
pub fn new(code_type: era_compiler_llvm_context::EraVMCodeType) -> Self {
Self { code_type }
}
}
impl<D> era_compiler_llvm_context::EraVMWriteLLVM<D> for EntryLink
where
D: era_compiler_llvm_context::EraVMDependency + Clone,
{
fn into_llvm(
self,
context: &mut era_compiler_llvm_context::EraVMContext<D>,
) -> anyhow::Result<()> {
let target = context
.get_function(EtherealIR::DEFAULT_ENTRY_FUNCTION_NAME)
.expect("Always exists")
.borrow()
.declaration();
let is_deploy_code = match self.code_type {
era_compiler_llvm_context::EraVMCodeType::Deploy => context
.integer_type(era_compiler_common::BIT_LENGTH_BOOLEAN)
.const_int(1, false),
era_compiler_llvm_context::EraVMCodeType::Runtime => context
.integer_type(era_compiler_common::BIT_LENGTH_BOOLEAN)
.const_int(0, false),
};
context.build_invoke(
target,
&[is_deploy_code.as_basic_value_enum()],
format!("call_link_{}", EtherealIR::DEFAULT_ENTRY_FUNCTION_NAME).as_str(),
);
Ok(())
}
}
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,41 @@
//!
//! The Ethereal IR block element stack element.
//!
///
/// The Ethereal IR block element stack element.
///
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Element {
/// The runtime value.
Value(String),
/// The compile-time value.
Constant(num::BigUint),
/// The compile-time destination tag.
Tag(num::BigUint),
/// The compile-time path.
Path(String),
/// The compile-time hexadecimal data chunk.
Data(String),
/// The recursive function return address.
ReturnAddress(usize),
}
impl Element {
pub fn value(identifier: String) -> Self {
Self::Value(identifier)
}
}
impl std::fmt::Display for Element {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Value(identifier) => write!(f, "V_{identifier}"),
Self::Constant(value) => write!(f, "{value:X}"),
Self::Tag(tag) => write!(f, "T_{tag}"),
Self::Path(path) => write!(f, "{path}"),
Self::Data(data) => write!(f, "{data}"),
Self::ReturnAddress(_) => write!(f, "RETURN_ADDRESS"),
}
}
}
@@ -0,0 +1,149 @@
//!
//! The Ethereal IR block element stack.
//!
pub mod element;
use self::element::Element;
///
/// The Ethereal IR block element stack.
///
#[derive(Debug, Default, Clone)]
pub struct Stack {
/// The stack elements.
pub elements: Vec<Element>,
}
impl Stack {
/// The default stack size.
pub const DEFAULT_STACK_SIZE: usize = 16;
///
/// A shortcut constructor.
///
pub fn new() -> Self {
Self {
elements: Vec::with_capacity(Self::DEFAULT_STACK_SIZE),
}
}
///
/// A shortcut constructor.
///
pub fn with_capacity(capacity: usize) -> Self {
Self {
elements: Vec::with_capacity(capacity),
}
}
///
/// A shortcut constructor.
///
pub fn new_with_elements(elements: Vec<Element>) -> Self {
Self { elements }
}
///
/// The stack state hash, which acts as a block identifier.
///
/// Each block clone has its own initial stack state, which uniquely identifies the block.
///
pub fn hash(&self) -> md5::Digest {
let mut hash_context = md5::Context::new();
for element in self.elements.iter() {
match element {
Element::Tag(tag) => hash_context.consume(tag.to_bytes_be()),
_ => hash_context.consume([0]),
}
}
hash_context.compute()
}
///
/// Pushes an element onto the stack.
///
pub fn push(&mut self, element: Element) {
self.elements.push(element);
}
///
/// Appends another stack on top of this one.
///
pub fn append(&mut self, other: &mut Self) {
self.elements.append(&mut other.elements);
}
///
/// Pops a stack element.
///
pub fn pop(&mut self) -> anyhow::Result<Element> {
self.elements
.pop()
.ok_or_else(|| anyhow::anyhow!("Stack underflow"))
}
///
/// Pops the tag from the top.
///
pub fn pop_tag(&mut self) -> anyhow::Result<num::BigUint> {
match self.elements.pop() {
Some(Element::Tag(tag)) => Ok(tag),
Some(element) => anyhow::bail!("Expected tag, found {}", element),
None => anyhow::bail!("Stack underflow"),
}
}
///
/// Swaps two stack elements.
///
pub fn swap(&mut self, index: usize) -> anyhow::Result<()> {
if self.elements.len() < index + 1 {
anyhow::bail!("Stack underflow");
}
let length = self.elements.len();
self.elements.swap(length - 1, length - 1 - index);
Ok(())
}
///
/// Duplicates a stack element.
///
pub fn dup(&mut self, index: usize) -> anyhow::Result<Element> {
if self.elements.len() < index {
anyhow::bail!("Stack underflow");
}
Ok(self.elements[self.elements.len() - index].to_owned())
}
///
/// Returns the stack length.
///
pub fn len(&self) -> usize {
self.elements.len()
}
///
/// Returns an emptiness flag.
///
pub fn is_empty(&self) -> bool {
self.elements.len() == 0
}
}
impl std::fmt::Display for Stack {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"[ {} ]",
self.elements
.iter()
.map(Element::to_string)
.collect::<Vec<String>>()
.join(" | ")
)
}
}
@@ -0,0 +1,167 @@
//!
//! The Ethereal IR block.
//!
pub mod element;
use std::collections::HashSet;
use num::Zero;
use crate::evmla::assembly::instruction::name::Name as InstructionName;
use crate::evmla::assembly::instruction::Instruction;
use self::element::stack::Stack as ElementStack;
use self::element::Element;
///
/// The Ethereal IR block.
///
#[derive(Debug, Clone)]
pub struct Block {
/// The Solidity compiler version.
pub solc_version: semver::Version,
/// The block key.
pub key: era_compiler_llvm_context::EraVMFunctionBlockKey,
/// The block instance.
pub instance: Option<usize>,
/// The block elements relevant to the stack consistency.
pub elements: Vec<Element>,
/// The block predecessors.
pub predecessors: HashSet<(era_compiler_llvm_context::EraVMFunctionBlockKey, usize)>,
/// The initial stack state.
pub initial_stack: ElementStack,
/// The stack.
pub stack: ElementStack,
/// The extra block hashes for alternative routes.
pub extra_hashes: Vec<md5::Digest>,
}
impl Block {
/// The elements vector initial capacity.
pub const ELEMENTS_VECTOR_DEFAULT_CAPACITY: usize = 64;
/// The predecessors hashset initial capacity.
pub const PREDECESSORS_HASHSET_DEFAULT_CAPACITY: usize = 4;
///
/// Assembles a block from the sequence of instructions.
///
pub fn try_from_instructions(
solc_version: semver::Version,
code_type: era_compiler_llvm_context::EraVMCodeType,
slice: &[Instruction],
) -> anyhow::Result<(Self, usize)> {
let mut cursor = 0;
let tag: num::BigUint = match slice[cursor].name {
InstructionName::Tag => {
let tag = slice[cursor]
.value
.as_deref()
.expect("Always exists")
.parse()
.expect("Always valid");
cursor += 1;
tag
}
_ => num::BigUint::zero(),
};
let mut block = Self {
solc_version: solc_version.clone(),
key: era_compiler_llvm_context::EraVMFunctionBlockKey::new(code_type, tag),
instance: None,
elements: Vec::with_capacity(Self::ELEMENTS_VECTOR_DEFAULT_CAPACITY),
predecessors: HashSet::with_capacity(Self::PREDECESSORS_HASHSET_DEFAULT_CAPACITY),
initial_stack: ElementStack::new(),
stack: ElementStack::new(),
extra_hashes: vec![],
};
let mut dead_code = false;
while cursor < slice.len() {
if !dead_code {
let element: Element = Element::new(solc_version.clone(), slice[cursor].to_owned());
block.elements.push(element);
}
match slice[cursor].name {
InstructionName::RETURN
| InstructionName::REVERT
| InstructionName::STOP
| InstructionName::INVALID => {
cursor += 1;
dead_code = true;
}
InstructionName::JUMP => {
cursor += 1;
dead_code = true;
}
InstructionName::Tag => {
break;
}
_ => {
cursor += 1;
}
}
}
Ok((block, cursor))
}
///
/// Inserts a predecessor tag.
///
pub fn insert_predecessor(
&mut self,
key: era_compiler_llvm_context::EraVMFunctionBlockKey,
instance: usize,
) {
self.predecessors.insert((key, instance));
}
}
impl<D> era_compiler_llvm_context::EraVMWriteLLVM<D> for Block
where
D: era_compiler_llvm_context::EraVMDependency + Clone,
{
fn into_llvm(
self,
context: &mut era_compiler_llvm_context::EraVMContext<D>,
) -> anyhow::Result<()> {
context.set_code_type(self.key.code_type);
for element in self.elements.into_iter() {
element.into_llvm(context)?;
}
Ok(())
}
}
impl std::fmt::Display for Block {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
writeln!(
f,
"block_{}/{}: {}",
self.key,
self.instance.unwrap_or_default(),
if self.predecessors.is_empty() {
"".to_owned()
} else {
format!(
"(predecessors: {})",
self.predecessors
.iter()
.map(|(key, instance)| format!("{}/{}", key, instance))
.collect::<Vec<String>>()
.join(", ")
)
},
)?;
for element in self.elements.iter() {
writeln!(f, " {element}")?;
}
Ok(())
}
}
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,35 @@
//!
//! The Ethereal IR block queue element.
//!
use crate::evmla::ethereal_ir::function::block::element::stack::Stack;
///
/// The Ethereal IR block queue element.
///
#[derive(Debug, Clone)]
pub struct QueueElement {
/// The block key.
pub block_key: era_compiler_llvm_context::EraVMFunctionBlockKey,
/// The block predecessor.
pub predecessor: Option<(era_compiler_llvm_context::EraVMFunctionBlockKey, usize)>,
/// The predecessor's last stack state.
pub stack: Stack,
}
impl QueueElement {
///
/// A shortcut constructor.
///
pub fn new(
block_key: era_compiler_llvm_context::EraVMFunctionBlockKey,
predecessor: Option<(era_compiler_llvm_context::EraVMFunctionBlockKey, usize)>,
stack: Stack,
) -> Self {
Self {
block_key,
predecessor,
stack,
}
}
}
@@ -0,0 +1,49 @@
//!
//! The Ethereal IR function type.
//!
///
/// The Ethereal IR function type.
///
#[derive(Debug, Clone)]
pub enum Type {
/// The initial function, combining deploy and runtime code.
Initial,
/// The recursive function with a specific block starting its recursive context.
Recursive {
/// The function name.
name: String,
/// The function initial block key.
block_key: era_compiler_llvm_context::EraVMFunctionBlockKey,
/// The size of stack input (in cells or 256-bit words).
input_size: usize,
/// The size of stack output (in cells or 256-bit words).
output_size: usize,
},
}
impl Type {
///
/// A shortcut constructor.
///
pub fn new_initial() -> Self {
Self::Initial
}
///
/// A shortcut constructor.
///
pub fn new_recursive(
name: String,
block_key: era_compiler_llvm_context::EraVMFunctionBlockKey,
input_size: usize,
output_size: usize,
) -> Self {
Self::Recursive {
name,
block_key,
input_size,
output_size,
}
}
}
@@ -0,0 +1,71 @@
//!
//! The Ethereal IR block visited element.
//!
use std::cmp::Ordering;
///
/// The Ethereal IR block visited element.
///
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct VisitedElement {
/// The block key.
pub block_key: era_compiler_llvm_context::EraVMFunctionBlockKey,
/// The initial stack state hash.
pub stack_hash: md5::Digest,
}
impl VisitedElement {
///
/// A shortcut constructor.
///
pub fn new(
block_key: era_compiler_llvm_context::EraVMFunctionBlockKey,
stack_hash: md5::Digest,
) -> Self {
Self {
block_key,
stack_hash,
}
}
}
impl PartialOrd for VisitedElement {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for VisitedElement {
fn cmp(&self, other: &Self) -> Ordering {
match (self.block_key.code_type, other.block_key.code_type) {
(
era_compiler_llvm_context::EraVMCodeType::Deploy,
era_compiler_llvm_context::EraVMCodeType::Runtime,
) => Ordering::Less,
(
era_compiler_llvm_context::EraVMCodeType::Runtime,
era_compiler_llvm_context::EraVMCodeType::Deploy,
) => Ordering::Greater,
(
era_compiler_llvm_context::EraVMCodeType::Deploy,
era_compiler_llvm_context::EraVMCodeType::Deploy,
)
| (
era_compiler_llvm_context::EraVMCodeType::Runtime,
era_compiler_llvm_context::EraVMCodeType::Runtime,
) => {
let tag_comparison = self.block_key.tag.cmp(&other.block_key.tag);
if tag_comparison == Ordering::Equal {
if self.stack_hash == other.stack_hash {
Ordering::Equal
} else {
Ordering::Less
}
} else {
tag_comparison
}
}
}
}
}
@@ -0,0 +1,150 @@
//!
//! The Ethereal IR of the EVM bytecode.
//!
pub mod entry_link;
pub mod function;
use std::collections::BTreeMap;
use std::collections::BTreeSet;
use std::collections::HashMap;
use crate::evmla::assembly::instruction::Instruction;
use crate::solc::standard_json::output::contract::evm::extra_metadata::ExtraMetadata;
use self::function::block::Block;
use self::function::r#type::Type as FunctionType;
use self::function::Function;
///
/// The Ethereal IR of the EVM bytecode.
///
/// The Ethereal IR (EthIR) is a special IR between the EVM legacy assembly and LLVM IR. It is
/// created to facilitate the translation and provide an additional environment for applying some
/// transformations, duplicating parts of the call and control flow graphs, tracking the
/// data flow, and a few more algorithms of static analysis.
///
/// The most important feature of EthIR is flattening the block tags and duplicating blocks for
/// each of initial states of the stack. The LLVM IR supports only static control flow, so the
/// stack state must be known all the way throughout the program.
///
#[derive(Debug)]
pub struct EtherealIR {
/// The Solidity compiler version.
pub solc_version: semver::Version,
/// The EVMLA extra metadata.
pub extra_metadata: ExtraMetadata,
/// The all-inlined function.
pub entry_function: Function,
/// The recursive functions.
pub recursive_functions: BTreeMap<era_compiler_llvm_context::EraVMFunctionBlockKey, Function>,
}
impl EtherealIR {
/// The default entry function name.
pub const DEFAULT_ENTRY_FUNCTION_NAME: &'static str = "main";
/// The blocks hashmap initial capacity.
pub const BLOCKS_HASHMAP_DEFAULT_CAPACITY: usize = 64;
///
/// Assembles a sequence of functions from the sequence of instructions.
///
pub fn new(
solc_version: semver::Version,
extra_metadata: ExtraMetadata,
blocks: HashMap<era_compiler_llvm_context::EraVMFunctionBlockKey, Block>,
) -> anyhow::Result<Self> {
let mut entry_function = Function::new(solc_version.clone(), FunctionType::new_initial());
let mut recursive_functions = BTreeMap::new();
let mut visited_functions = BTreeSet::new();
entry_function.traverse(
&blocks,
&mut recursive_functions,
&extra_metadata,
&mut visited_functions,
)?;
Ok(Self {
solc_version,
extra_metadata,
entry_function,
recursive_functions,
})
}
///
/// Gets blocks for the specified type of the contract code.
///
pub fn get_blocks(
solc_version: semver::Version,
code_type: era_compiler_llvm_context::EraVMCodeType,
instructions: &[Instruction],
) -> anyhow::Result<HashMap<era_compiler_llvm_context::EraVMFunctionBlockKey, Block>> {
let mut blocks = HashMap::with_capacity(Self::BLOCKS_HASHMAP_DEFAULT_CAPACITY);
let mut offset = 0;
while offset < instructions.len() {
let (block, size) = Block::try_from_instructions(
solc_version.clone(),
code_type,
&instructions[offset..],
)?;
blocks.insert(
era_compiler_llvm_context::EraVMFunctionBlockKey::new(
code_type,
block.key.tag.clone(),
),
block,
);
offset += size;
}
Ok(blocks)
}
}
impl<D> era_compiler_llvm_context::EraVMWriteLLVM<D> for EtherealIR
where
D: era_compiler_llvm_context::EraVMDependency + Clone,
{
fn declare(
&mut self,
context: &mut era_compiler_llvm_context::EraVMContext<D>,
) -> anyhow::Result<()> {
self.entry_function.declare(context)?;
for (_key, function) in self.recursive_functions.iter_mut() {
function.declare(context)?;
}
Ok(())
}
fn into_llvm(
self,
context: &mut era_compiler_llvm_context::EraVMContext<D>,
) -> anyhow::Result<()> {
context.evmla_mut().stack = vec![];
self.entry_function.into_llvm(context)?;
for (_key, function) in self.recursive_functions.into_iter() {
function.into_llvm(context)?;
}
Ok(())
}
}
impl std::fmt::Display for EtherealIR {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
writeln!(f, "{}", self.entry_function)?;
for (_key, function) in self.recursive_functions.iter() {
writeln!(f, "{}", function)?;
}
Ok(())
}
}