resolc crate (#328)

- Factor the YUL crate out of `revive-solidity`.
- `revive-solidity` is in reality not a Solidity implementation but the
revive solidity compiler driver (`resolc`). By renaming we not only get
this straight but also a binary with the same name as the crate which
should be less confusing.

---------

Signed-off-by: Cyrill Leutwiler <bigcyrill@hotmail.com>
This commit is contained in:
xermicus
2025-05-27 09:48:43 +02:00
committed by GitHub
parent 090e3ac13c
commit bd4e108bb0
99 changed files with 599 additions and 624 deletions
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,358 @@
//! The function name.
use serde::Deserialize;
use serde::Serialize;
/// The function name.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub enum Name {
/// The user-defined function.
UserDefined(String),
/// `x + y`
Add,
/// `x - y`
Sub,
/// `x * y`
Mul,
/// `x / y` or `0` if `y == 0`
Div,
/// `x % y` or `0` if `y == 0`
Mod,
/// `x / y`, for signed numbers in twos complement, `0` if `y == 0`
Sdiv,
/// `x % y`, for signed numbers in twos complement, `0` if `y == 0`
Smod,
/// `1` if `x < y`, `0` otherwise
Lt,
/// `1` if `x > y`, `0` otherwise
Gt,
/// `1` if `x == y`, `0` otherwise
Eq,
/// `1` if `x == 0`, `0` otherwise
IsZero,
/// `1` if `x < y`, `0` otherwise, for signed numbers in twos complement
Slt,
/// `1` if `x > y`, `0` otherwise, for signed numbers in twos complement
Sgt,
/// bitwise "or" of `x` and `y`
Or,
/// bitwise "xor" of `x` and `y`
Xor,
/// bitwise "not" of `x` (every bit of `x` is negated)
Not,
/// bitwise "and" of `x` and `y`
And,
/// logical shift left `y` by `x` bits
Shl,
/// logical shift right `y` by `x` bits
Shr,
/// signed arithmetic shift right `y` by `x` bits
Sar,
/// `n`th byte of `x`, where the most significant byte is the `0`th byte
Byte,
/// discard value x
Pop,
/// `(x + y) % m` with arbitrary precision arithmetic, `0` if `m == 0`
AddMod,
/// `(x * y) % m` with arbitrary precision arithmetic, `0` if `m == 0`
MulMod,
/// `x` to the power of `y`
Exp,
/// sign extend from `(i*8+7)`th bit counting from least significant
SignExtend,
/// `keccak(mem[p…(p+n)))`
Keccak256,
/// `mem[p…(p+32))`
MLoad,
/// `mem[p…(p+32)) := v`
MStore,
/// `mem[p] := v & 0xff` (only modifies a single byte)
MStore8,
/// heap memory copy
MCopy,
/// `storage[p]`
SLoad,
/// `storage[p] := v`
SStore,
/// transient `storage[p]`
TLoad,
/// transient `storage[p] := v`
TStore,
/// `loadimmutable` storage read
LoadImmutable,
/// `setimmutable` storage write
SetImmutable,
/// call data starting from position `p` (32 bytes)
CallDataLoad,
/// size of call data in bytes
CallDataSize,
/// copy `s` bytes from calldata at position `f` to memory at position `t`
CallDataCopy,
/// size of the code of the current contract / execution context
CodeSize,
/// copy `s` bytes from code at position `f` to mem at position `t`
CodeCopy,
/// size of the code at address `a`
ExtCodeSize,
/// code hash of address `a`
ExtCodeHash,
/// size of the last returndata
ReturnDataSize,
/// copy `s` bytes from returndata at position `f` to mem at position `t`
ReturnDataCopy,
/// end execution, return data `mem[p…(p+s))`
Return,
/// end execution, revert state changes, return data `mem[p…(p+s))`
Revert,
/// stop execution, identical to `return(0, 0)`
Stop,
/// end execution with invalid instruction
Invalid,
/// log without topics and data `mem[p…(p+s))`
Log0,
/// log with topic t1 and data `mem[p…(p+s))`
Log1,
/// log with topics t1, t2 and data `mem[p…(p+s))`
Log2,
/// log with topics t1, t2, t3 and data `mem[p…(p+s))`
Log3,
/// log with topics t1, t2, t3, t4 and data `mem[p…(p+s))`
Log4,
/// call contract at address a with input `mem[in…(in+insize))` providing `g` gas and `v` wei
/// and output area `mem[out…(out+outsize))` returning 0 on error (e.g. out of gas)
/// and 1 on success
/// [See more](https://docs.soliditylang.org/en/v0.8.2/yul.html#yul-call-return-area)
Call,
/// identical to call but only use the code from a and stay in the context of the current
/// contract otherwise
CallCode,
/// identical to `callcode` but also keeps `caller` and `callvalue`
DelegateCall,
/// identical to `call(g, a, 0, in, insize, out, outsize)` but do not allows state modifications
StaticCall,
/// create new contract with code `mem[p…(p+n))` and send `v` wei and return the new address
/// Passes bytecode to the system contracts.
Create,
/// create new contract with code `mem[p…(p+n))` at address
/// `keccak256(0xff . this . s . keccak256(mem[p…(p+n)))` and send `v` wei and return the
/// new address, where `0xff` is a 1-byte value, this is the current contracts address as a
/// 20-byte value and `s` is a big-endian 256-bit value
/// Passes bytecode to the system contracts.
Create2,
/// returns the size in the data area
DataSize,
/// is equivalent to `CodeCopy`
DataCopy,
/// returns the offset in the data area
DataOffset,
/// `linkersymbol` is a stub call
LinkerSymbol,
/// `memoryguard` is a stub call
MemoryGuard,
/// address of the current contract / execution context
Address,
/// call sender (excluding `delegatecall`)
Caller,
/// wei sent together with the current call
CallValue,
/// gas still available to execution
Gas,
/// wei balance at address `a`
Balance,
/// equivalent to `balance(address())`, but cheaper
SelfBalance,
/// block gas limit of the current block
GasLimit,
/// gas price of the transaction
GasPrice,
/// transaction sender
Origin,
/// ID of the executing chain (EIP 1344)
ChainId,
/// current block number
Number,
/// timestamp of the current block in seconds since the epoch
Timestamp,
/// hash of block nr b - only for last 256 blocks excluding current
BlockHash,
/// versioned hash of transactions i-th blob
BlobHash,
/// difficulty of the current block
Difficulty,
/// https://eips.ethereum.org/EIPS/eip-4399
Prevrandao,
/// current mining beneficiary
CoinBase,
/// size of memory, i.e. largest accessed memory index
MSize,
/// verbatim instruction with 0 inputs and 0 outputs only works in the Yul mode,
/// so it is mostly used as a tool for extending Yul for PolkaVM
Verbatim {
/// the number of input arguments
input_size: usize,
/// the number of output arguments
output_size: usize,
},
/// current blocks base fee (EIP-3198 and EIP-1559)
BaseFee,
/// current blocks blob base fee (EIP-7516 and EIP-4844)
BlobBaseFee,
/// current position in code
Pc,
/// like `codecopy(t, f, s)` but take code at address `a`
ExtCodeCopy,
/// end execution, destroy current contract and send funds to `a`
SelfDestruct,
}
impl Name {
/// Tries parsing the verbatim instruction.
fn parse_verbatim(input: &str) -> Option<Self> {
let verbatim = input.strip_prefix("verbatim")?;
let regex = regex::Regex::new(r"_(\d+)i_(\d+)o").expect("Always valid");
let captures = regex.captures(verbatim)?;
let input_size: usize = captures.get(1)?.as_str().parse().ok()?;
let output_size: usize = captures.get(2)?.as_str().parse().ok()?;
Some(Self::Verbatim {
input_size,
output_size,
})
}
}
impl From<&str> for Name {
fn from(input: &str) -> Self {
if let Some(verbatim) = Self::parse_verbatim(input) {
return verbatim;
}
match input {
"add" => Self::Add,
"sub" => Self::Sub,
"mul" => Self::Mul,
"div" => Self::Div,
"mod" => Self::Mod,
"sdiv" => Self::Sdiv,
"smod" => Self::Smod,
"lt" => Self::Lt,
"gt" => Self::Gt,
"eq" => Self::Eq,
"iszero" => Self::IsZero,
"slt" => Self::Slt,
"sgt" => Self::Sgt,
"or" => Self::Or,
"xor" => Self::Xor,
"not" => Self::Not,
"and" => Self::And,
"shl" => Self::Shl,
"shr" => Self::Shr,
"sar" => Self::Sar,
"byte" => Self::Byte,
"pop" => Self::Pop,
"addmod" => Self::AddMod,
"mulmod" => Self::MulMod,
"exp" => Self::Exp,
"signextend" => Self::SignExtend,
"keccak256" => Self::Keccak256,
"mload" => Self::MLoad,
"mstore" => Self::MStore,
"mstore8" => Self::MStore8,
"mcopy" => Self::MCopy,
"sload" => Self::SLoad,
"sstore" => Self::SStore,
"tload" => Self::TLoad,
"tstore" => Self::TStore,
"loadimmutable" => Self::LoadImmutable,
"setimmutable" => Self::SetImmutable,
"calldataload" => Self::CallDataLoad,
"calldatasize" => Self::CallDataSize,
"calldatacopy" => Self::CallDataCopy,
"codesize" => Self::CodeSize,
"codecopy" => Self::CodeCopy,
"returndatasize" => Self::ReturnDataSize,
"returndatacopy" => Self::ReturnDataCopy,
"extcodesize" => Self::ExtCodeSize,
"extcodehash" => Self::ExtCodeHash,
"return" => Self::Return,
"revert" => Self::Revert,
"log0" => Self::Log0,
"log1" => Self::Log1,
"log2" => Self::Log2,
"log3" => Self::Log3,
"log4" => Self::Log4,
"call" => Self::Call,
"delegatecall" => Self::DelegateCall,
"staticcall" => Self::StaticCall,
"create" => Self::Create,
"create2" => Self::Create2,
"datasize" => Self::DataSize,
"dataoffset" => Self::DataOffset,
"datacopy" => Self::DataCopy,
"stop" => Self::Stop,
"invalid" => Self::Invalid,
"linkersymbol" => Self::LinkerSymbol,
"memoryguard" => Self::MemoryGuard,
"address" => Self::Address,
"caller" => Self::Caller,
"callvalue" => Self::CallValue,
"gas" => Self::Gas,
"balance" => Self::Balance,
"selfbalance" => Self::SelfBalance,
"gaslimit" => Self::GasLimit,
"gasprice" => Self::GasPrice,
"origin" => Self::Origin,
"chainid" => Self::ChainId,
"timestamp" => Self::Timestamp,
"number" => Self::Number,
"blockhash" => Self::BlockHash,
"blobhash" => Self::BlobHash,
"difficulty" => Self::Difficulty,
"prevrandao" => Self::Prevrandao,
"coinbase" => Self::CoinBase,
"basefee" => Self::BaseFee,
"blobbasefee" => Self::BlobBaseFee,
"msize" => Self::MSize,
"callcode" => Self::CallCode,
"pc" => Self::Pc,
"extcodecopy" => Self::ExtCodeCopy,
"selfdestruct" => Self::SelfDestruct,
input => Self::UserDefined(input.to_owned()),
}
}
}
@@ -0,0 +1,33 @@
//! Translates the verbatim simulations.
use crate::parser::statement::expression::function_call::FunctionCall;
/// Translates the verbatim simulations.
pub fn verbatim<'ctx, D>(
context: &mut revive_llvm_context::PolkaVMContext<'ctx, D>,
call: &mut FunctionCall,
_input_size: usize,
output_size: usize,
) -> anyhow::Result<Option<inkwell::values::BasicValueEnum<'ctx>>>
where
D: revive_llvm_context::PolkaVMDependency + Clone,
{
if output_size > 1 {
anyhow::bail!(
"{} Verbatim instructions with multiple return values are not supported",
call.location
);
}
let mut arguments = call.pop_arguments::<D, 1>(context)?;
let identifier = arguments[0]
.original
.take()
.ok_or_else(|| anyhow::anyhow!("{} Verbatim literal is missing", call.location))?;
anyhow::bail!(
"{} Found unknown internal function `{}`",
call.location,
identifier
)
}
@@ -0,0 +1,223 @@
//! The YUL source code literal.
use inkwell::values::BasicValue;
use num::Num;
use num::One;
use num::Zero;
use serde::Deserialize;
use serde::Serialize;
use crate::error::Error;
use crate::lexer::token::lexeme::literal::boolean::Boolean as BooleanLiteral;
use crate::lexer::token::lexeme::literal::integer::Integer as IntegerLiteral;
use crate::lexer::token::lexeme::literal::Literal as LexicalLiteral;
use crate::lexer::token::lexeme::symbol::Symbol;
use crate::lexer::token::lexeme::Lexeme;
use crate::lexer::token::location::Location;
use crate::lexer::token::Token;
use crate::lexer::Lexer;
use crate::parser::error::Error as ParserError;
use crate::parser::r#type::Type;
/// Represents a literal in YUL without differentiating its type.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub struct Literal {
/// The location.
pub location: Location,
/// The lexical literal.
pub inner: LexicalLiteral,
/// The type, if it has been explicitly specified.
pub yul_type: Option<Type>,
}
impl Literal {
/// The element parser.
pub fn parse(lexer: &mut Lexer, initial: Option<Token>) -> Result<Self, Error> {
let token = crate::parser::take_or_next(initial, lexer)?;
let (location, literal) = match token {
Token {
lexeme: Lexeme::Literal(literal),
location,
..
} => (location, literal),
token => {
return Err(ParserError::InvalidToken {
location: token.location,
expected: vec!["{literal}"],
found: token.lexeme.to_string(),
}
.into());
}
};
let yul_type = match lexer.peek()? {
Token {
lexeme: Lexeme::Symbol(Symbol::Colon),
..
} => {
lexer.next()?;
Some(Type::parse(lexer, None)?)
}
_ => None,
};
Ok(Self {
location,
inner: literal,
yul_type,
})
}
/// Converts the literal into its LLVM.
pub fn into_llvm<'ctx, D>(
self,
context: &revive_llvm_context::PolkaVMContext<'ctx, D>,
) -> anyhow::Result<revive_llvm_context::PolkaVMArgument<'ctx>>
where
D: revive_llvm_context::PolkaVMDependency + Clone,
{
match self.inner {
LexicalLiteral::Boolean(inner) => {
let value = self
.yul_type
.unwrap_or_default()
.into_llvm(context)
.const_int(
match inner {
BooleanLiteral::False => 0,
BooleanLiteral::True => 1,
},
false,
)
.as_basic_value_enum();
let constant = match inner {
BooleanLiteral::False => num::BigUint::zero(),
BooleanLiteral::True => num::BigUint::one(),
};
Ok(revive_llvm_context::PolkaVMArgument::value(value).with_constant(constant))
}
LexicalLiteral::Integer(inner) => {
let r#type = self.yul_type.unwrap_or_default().into_llvm(context);
let value = match inner {
IntegerLiteral::Decimal { ref inner } => r#type.const_int_from_string(
inner.as_str(),
inkwell::types::StringRadix::Decimal,
),
IntegerLiteral::Hexadecimal { ref inner } => r#type.const_int_from_string(
&inner["0x".len()..],
inkwell::types::StringRadix::Hexadecimal,
),
}
.expect("The value is valid")
.as_basic_value_enum();
let constant = match inner {
IntegerLiteral::Decimal { ref inner } => {
num::BigUint::from_str_radix(inner.as_str(), revive_common::BASE_DECIMAL)
}
IntegerLiteral::Hexadecimal { ref inner } => num::BigUint::from_str_radix(
&inner["0x".len()..],
revive_common::BASE_HEXADECIMAL,
),
}
.expect("Always valid");
Ok(revive_llvm_context::PolkaVMArgument::value(value).with_constant(constant))
}
LexicalLiteral::String(inner) => {
let string = inner.inner;
let r#type = self.yul_type.unwrap_or_default().into_llvm(context);
let mut hex_string = if inner.is_hexadecimal {
string.clone()
} else {
let mut hex_string = String::with_capacity(revive_common::BYTE_LENGTH_WORD * 2);
let mut index = 0;
loop {
if index >= string.len() {
break;
}
if string[index..].starts_with('\\') {
index += 1;
if string[index..].starts_with('x') {
hex_string.push_str(&string[index + 1..index + 3]);
index += 3;
} else if string[index..].starts_with('u') {
let codepoint_str = &string[index + 1..index + 5];
let codepoint = u32::from_str_radix(
codepoint_str,
revive_common::BASE_HEXADECIMAL,
)
.map_err(|error| {
anyhow::anyhow!(
"Invalid codepoint `{}`: {}",
codepoint_str,
error
)
})?;
let unicode_char = char::from_u32(codepoint).ok_or_else(|| {
anyhow::anyhow!("Invalid codepoint {}", codepoint)
})?;
let mut unicode_bytes = vec![0u8; 3];
unicode_char.encode_utf8(&mut unicode_bytes);
for byte in unicode_bytes.into_iter() {
hex_string.push_str(format!("{:02x}", byte).as_str());
}
index += 5;
} else if string[index..].starts_with('t') {
hex_string.push_str("09");
index += 1;
} else if string[index..].starts_with('n') {
hex_string.push_str("0a");
index += 1;
} else if string[index..].starts_with('r') {
hex_string.push_str("0d");
index += 1;
} else if string[index..].starts_with('\n') {
index += 1;
} else {
hex_string
.push_str(format!("{:02x}", string.as_bytes()[index]).as_str());
index += 1;
}
} else {
hex_string
.push_str(format!("{:02x}", string.as_bytes()[index]).as_str());
index += 1;
}
}
hex_string
};
if hex_string.len() > revive_common::BYTE_LENGTH_WORD * 2 {
return Ok(revive_llvm_context::PolkaVMArgument::value(
r#type.const_zero().as_basic_value_enum(),
)
.with_original(string));
}
if hex_string.len() < revive_common::BYTE_LENGTH_WORD * 2 {
hex_string.push_str(
"0".repeat((revive_common::BYTE_LENGTH_WORD * 2) - hex_string.len())
.as_str(),
);
}
let value = r#type
.const_int_from_string(
hex_string.as_str(),
inkwell::types::StringRadix::Hexadecimal,
)
.expect("The value is valid")
.as_basic_value_enum();
Ok(revive_llvm_context::PolkaVMArgument::value(value).with_original(string))
}
}
}
}
@@ -0,0 +1,146 @@
//! The expression statement.
pub mod function_call;
pub mod literal;
use std::collections::HashSet;
use serde::Deserialize;
use serde::Serialize;
use crate::error::Error;
use crate::lexer::token::lexeme::symbol::Symbol;
use crate::lexer::token::lexeme::Lexeme;
use crate::lexer::token::location::Location;
use crate::lexer::token::Token;
use crate::lexer::Lexer;
use crate::parser::error::Error as ParserError;
use crate::parser::identifier::Identifier;
use self::function_call::FunctionCall;
use self::literal::Literal;
/// The Yul expression statement.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub enum Expression {
/// The function call subexpression.
FunctionCall(FunctionCall),
/// The identifier operand.
Identifier(Identifier),
/// The literal operand.
Literal(Literal),
}
impl Expression {
/// The element parser.
pub fn parse(lexer: &mut Lexer, initial: Option<Token>) -> Result<Self, Error> {
let token = crate::parser::take_or_next(initial, lexer)?;
let (location, identifier) = match token {
Token {
lexeme: Lexeme::Literal(_),
..
} => return Ok(Self::Literal(Literal::parse(lexer, Some(token))?)),
Token {
location,
lexeme: Lexeme::Identifier(identifier),
..
} => (location, identifier),
token => {
return Err(ParserError::InvalidToken {
location: token.location,
expected: vec!["{literal}", "{identifier}"],
found: token.lexeme.to_string(),
}
.into());
}
};
let length = identifier
.inner
.len()
.try_into()
.map_err(|_| Error::Parser(ParserError::InvalidLength))?;
match lexer.peek()? {
Token {
lexeme: Lexeme::Symbol(Symbol::ParenthesisLeft),
..
} => {
lexer.next()?;
Ok(Self::FunctionCall(FunctionCall::parse(
lexer,
Some(Token::new(location, Lexeme::Identifier(identifier), length)),
)?))
}
_ => Ok(Self::Identifier(Identifier::new(
location,
identifier.inner,
))),
}
}
/// Get the list of missing deployable libraries.
pub fn get_missing_libraries(&self) -> HashSet<String> {
match self {
Self::FunctionCall(inner) => inner.get_missing_libraries(),
Self::Identifier(_) => HashSet::new(),
Self::Literal(_) => HashSet::new(),
}
}
/// Returns the statement location.
pub fn location(&self) -> Location {
match self {
Self::FunctionCall(inner) => inner.location,
Self::Identifier(inner) => inner.location,
Self::Literal(inner) => inner.location,
}
}
/// Converts the expression into an LLVM value.
pub fn into_llvm<'ctx, D>(
self,
context: &mut revive_llvm_context::PolkaVMContext<'ctx, D>,
) -> anyhow::Result<Option<revive_llvm_context::PolkaVMArgument<'ctx>>>
where
D: revive_llvm_context::PolkaVMDependency + Clone,
{
match self {
Self::Literal(literal) => literal
.clone()
.into_llvm(context)
.map_err(|error| {
anyhow::anyhow!(
"{} Invalid literal `{}`: {}",
literal.location,
literal.inner.to_string(),
error
)
})
.map(Some),
Self::Identifier(identifier) => {
let id = identifier.inner;
let pointer = context
.current_function()
.borrow()
.get_stack_pointer(&id)
.ok_or_else(|| {
anyhow::anyhow!("{} Undeclared variable `{}`", identifier.location, id)
})?;
let constant = context.current_function().borrow().yul().get_constant(&id);
let argument = revive_llvm_context::PolkaVMArgument::pointer(pointer, id);
Ok(Some(match constant {
Some(constant) => argument.with_constant(constant),
_ => argument,
}))
}
Self::FunctionCall(call) => Ok(call
.into_llvm(context)?
.map(revive_llvm_context::PolkaVMArgument::value)),
}
}
}