resolc crate (#328)

- Factor the YUL crate out of `revive-solidity`.
- `revive-solidity` is in reality not a Solidity implementation but the
revive solidity compiler driver (`resolc`). By renaming we not only get
this straight but also a binary with the same name as the crate which
should be less confusing.

---------

Signed-off-by: Cyrill Leutwiler <bigcyrill@hotmail.com>
This commit is contained in:
xermicus
2025-05-27 09:48:43 +02:00
committed by GitHub
parent 090e3ac13c
commit bd4e108bb0
99 changed files with 599 additions and 624 deletions
+15
View File
@@ -0,0 +1,15 @@
//! The Yul IR error.
use crate::lexer::error::Error as LexerError;
use crate::parser::error::Error as ParserError;
/// The Yul IR error.
#[derive(Debug, thiserror::Error, PartialEq, Eq)]
pub enum Error {
/// The lexer error.
#[error("Lexical error: {0}")]
Lexer(#[from] LexerError),
/// The parser error.
#[error("Syntax error: {0}")]
Parser(#[from] ParserError),
}
+16
View File
@@ -0,0 +1,16 @@
//! The Yul IR lexer error.
use crate::lexer::token::location::Location;
/// The Yul IR lexer error.
#[derive(Debug, thiserror::Error, PartialEq, Eq)]
pub enum Error {
/// The invalid lexeme error.
#[error("{location} Invalid character sequence `{sequence}`")]
InvalidLexeme {
/// The lexeme location.
location: Location,
/// The invalid sequence of characters.
sequence: String,
},
}
+137
View File
@@ -0,0 +1,137 @@
//! The compiler lexer.
pub mod error;
pub mod token;
#[cfg(test)]
mod tests;
use self::error::Error;
use self::token::lexeme::comment::Comment;
use self::token::lexeme::identifier::Identifier;
use self::token::lexeme::literal::integer::Integer as IntegerLiteral;
use self::token::lexeme::literal::string::String as StringLiteral;
use self::token::lexeme::symbol::Symbol;
use self::token::lexeme::Lexeme;
use self::token::location::Location;
use self::token::Token;
/// The compiler lexer.
pub struct Lexer {
/// The input source code.
input: String,
/// The number of characters processed so far.
offset: u32,
/// The current location.
location: Location,
/// The peeked lexeme, waiting to be fetched.
peeked: Option<Token>,
}
impl Lexer {
/// A shortcut constructor.
pub fn new(mut input: String) -> Self {
input.push('\n');
Self {
input,
offset: 0,
location: Location::default(),
peeked: None,
}
}
/// Advances the lexer, returning the next lexeme.
#[allow(clippy::should_implement_trait)]
pub fn next(&mut self) -> Result<Token, Error> {
if let Some(peeked) = self.peeked.take() {
return Ok(peeked);
}
while self.offset
< self
.input
.len()
.try_into()
.map_err(|_| Error::InvalidLexeme {
location: self.location,
sequence: Default::default(),
})?
{
let input = &self.input[(self.offset as usize)..];
if input.starts_with(|character| char::is_ascii_whitespace(&character)) {
if input.starts_with('\n') {
self.location.line += 1;
self.location.column = 1;
} else if !input.starts_with('\r') {
self.location.column += 1;
}
self.offset += 1;
continue;
}
if let Some(token) = Comment::parse(input) {
self.offset += token.length;
self.location
.shift_down(token.location.line, token.location.column);
continue;
}
if let Some(mut token) = StringLiteral::parse(input) {
token.location = self.location;
self.offset += token.length;
self.location.shift_right(token.length);
return Ok(token);
}
if let Some(mut token) = IntegerLiteral::parse(input) {
token.location = self.location;
self.offset += token.length;
self.location.shift_right(token.length);
return Ok(token);
}
if let Some(mut token) = Identifier::parse(input) {
token.location = self.location;
self.offset += token.length;
self.location.shift_right(token.length);
return Ok(token);
}
if let Some(mut token) = Symbol::parse(input) {
token.location = self.location;
self.offset += token.length;
self.location.shift_right(token.length);
return Ok(token);
}
let end = self.input[(self.offset as usize)..]
.find(char::is_whitespace)
.unwrap_or(self.input.len());
return Err(Error::InvalidLexeme {
location: self.location,
sequence: self.input[(self.offset as usize)..(self.offset as usize) + end]
.to_owned(),
});
}
Ok(Token::new(self.location, Lexeme::EndOfFile, 0))
}
/// Peeks the next lexeme without advancing the iterator.
pub fn peek(&mut self) -> Result<Token, Error> {
match self.peeked {
Some(ref peeked) => Ok(peeked.clone()),
None => {
let peeked = self.next()?;
self.peeked = Some(peeked.clone());
Ok(peeked)
}
}
}
}
+89
View File
@@ -0,0 +1,89 @@
//! The Yul IR lexer tests.
use crate::lexer::error::Error;
use crate::lexer::token::lexeme::Lexeme;
use crate::lexer::token::location::Location;
use crate::lexer::Lexer;
#[test]
fn default() {
let input = r#"
object "Test" {
code {
{
/*
The deploy code.
*/
mstore(64, 128)
if callvalue() { revert(0, 0) }
let _1 := datasize("Test_deployed")
codecopy(0, dataoffset("Test_deployed"), _1)
return(0, _1)
}
}
object "Test_deployed" {
code {
{
/*
The runtime code.
*/
mstore(64, 128)
if iszero(lt(calldatasize(), 4))
{
let _1 := 0
switch shr(224, calldataload(_1))
case 0x3df4ddf4 {
if callvalue() { revert(_1, _1) }
if slt(add(calldatasize(), not(3)), _1) { revert(_1, _1) }
let memPos := allocate_memory(_1)
mstore(memPos, 0x2a)
return(memPos, 32)
}
case 0x5a8ac02d {
if callvalue() { revert(_1, _1) }
if slt(add(calldatasize(), not(3)), _1) { revert(_1, _1) }
let memPos_1 := allocate_memory(_1)
return(memPos_1, sub(abi_encode_uint256(memPos_1, 0x63), memPos_1))
}
}
revert(0, 0)
}
function abi_encode_uint256(headStart, value0) -> tail
{
tail := add(headStart, 32)
mstore(headStart, value0)
}
function allocate_memory(size) -> memPtr
{
memPtr := mload(64)
let newFreePtr := add(memPtr, and(add(size, 31), not(31)))
if or(gt(newFreePtr, 0xffffffffffffffff)#, lt(newFreePtr, memPtr))
{
mstore(0, shl(224, 0x4e487b71))
mstore(4, 0x41)
revert(0, 0x24)
}
mstore(64, newFreePtr)
}
}
}
}
"#;
let mut lexer = Lexer::new(input.to_owned());
loop {
match lexer.next() {
Ok(token) => assert_ne!(token.lexeme, Lexeme::EndOfFile),
Err(error) => {
assert_eq!(
error,
Error::InvalidLexeme {
location: Location::new(51, 57),
sequence: "#,".to_owned(),
}
);
break;
}
}
}
}
@@ -0,0 +1,32 @@
//! The comment lexeme.
pub mod multi_line;
pub mod single_line;
use crate::lexer::token::Token;
use self::multi_line::Comment as MultiLineComment;
use self::single_line::Comment as SingleLineComment;
/// The comment lexeme.
#[derive(Debug, Clone, PartialEq, Eq)]
#[allow(dead_code)]
pub enum Comment {
/// The single-line comment.
SingleLine(SingleLineComment),
/// The multi-line comment.
MultiLine(MultiLineComment),
}
impl Comment {
/// Returns the comment's length, including the trimmed whitespace around it.
pub fn parse(input: &str) -> Option<Token> {
if input.starts_with(SingleLineComment::START) {
Some(SingleLineComment::parse(input))
} else if input.starts_with(MultiLineComment::START) {
Some(MultiLineComment::parse(input))
} else {
None
}
}
}
@@ -0,0 +1,39 @@
//! The multi-line comment lexeme.
use crate::lexer::token::lexeme::Lexeme;
use crate::lexer::token::location::Location;
use crate::lexer::token::Token;
/// The multi-line comment lexeme.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Comment {}
impl Comment {
/// The start symbol.
pub const START: &'static str = "/*";
/// The end symbol.
pub const END: &'static str = "*/";
/// Returns the comment, including its length and number of lines.
pub fn parse(input: &str) -> Token {
let end_position = input.find(Self::END).unwrap_or(input.len());
let input = &input[..end_position];
let length = (end_position + Self::END.len())
.try_into()
.expect("the YUL should be of reasonable size");
let lines = input
.matches('\n')
.count()
.try_into()
.expect("the YUL should be of reasonable size");
let columns = match input.rfind('\n') {
Some(new_line) => end_position - (new_line + 1),
None => end_position,
}
.try_into()
.expect("the YUL should be of reasonable size");
Token::new(Location::new(lines, columns), Lexeme::Comment, length)
}
}
@@ -0,0 +1,26 @@
//! The single-line comment lexeme.
use crate::lexer::token::lexeme::Lexeme;
use crate::lexer::token::location::Location;
use crate::lexer::token::Token;
/// The single-line comment lexeme.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Comment {}
impl Comment {
/// The start symbol.
pub const START: &'static str = "//";
/// The end symbol.
pub const END: &'static str = "\n";
/// Returns the comment's length, including the trimmed whitespace around it.
pub fn parse(input: &str) -> Token {
let end_position = input.find(Self::END).unwrap_or(input.len());
let length = (end_position + Self::END.len())
.try_into()
.expect("the YUL should be of reasonable size");
Token::new(Location::new(1, 1), Lexeme::Comment, length)
}
}
@@ -0,0 +1,69 @@
//! The identifier lexeme.
use crate::lexer::token::lexeme::keyword::Keyword;
use crate::lexer::token::lexeme::Lexeme;
use crate::lexer::token::location::Location;
use crate::lexer::token::Token;
/// The identifier lexeme.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Identifier {
/// The inner string.
pub inner: String,
}
impl Identifier {
/// A shortcut constructor.
pub fn new(inner: String) -> Self {
Self { inner }
}
/// Parses the identifier, returning it as a token.
pub fn parse(input: &str) -> Option<Token> {
if !input.starts_with(Self::can_begin) {
return None;
}
let end = input.find(Self::cannot_continue).unwrap_or(input.len());
let inner = input[..end].to_string();
let length = inner
.len()
.try_into()
.expect("the YUL should be of reasonable size");
if let Some(token) = Keyword::parse(inner.as_str()) {
return Some(token);
}
Some(Token::new(
Location::new(0, length),
Lexeme::Identifier(Self::new(inner)),
length,
))
}
/// Checks whether the character can begin an identifier.
pub fn can_begin(character: char) -> bool {
character.is_alphabetic() || character == '_' || character == '$'
}
/// Checks whether the character can continue an identifier.
pub fn can_continue(character: char) -> bool {
Self::can_begin(character)
|| character.is_numeric()
|| character == '_'
|| character == '$'
|| character == '.'
}
/// Checks whether the character cannot continue an identifier.
pub fn cannot_continue(character: char) -> bool {
!Self::can_continue(character)
}
}
impl std::fmt::Display for Identifier {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.inner)
}
}
@@ -0,0 +1,147 @@
//! The keyword lexeme.
use crate::lexer::token::lexeme::literal::boolean::Boolean as BooleanLiteral;
use crate::lexer::token::lexeme::literal::Literal;
use crate::lexer::token::lexeme::Lexeme;
use crate::lexer::token::location::Location;
use crate::lexer::token::Token;
/// The keyword lexeme.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Keyword {
/// The `object` keyword.
Object,
/// The `code` keyword.
Code,
/// The `function` keyword.
Function,
/// The `let` keyword.
Let,
/// The `if` keyword.
If,
/// The `switch` keyword.
Switch,
/// The `case` keyword.
Case,
/// The `default` keyword.
Default,
/// The `for` keyword.
For,
/// The `break` keyword.
Break,
/// The `continue` keyword.
Continue,
/// The `leave` keyword.
Leave,
/// The `true` keyword.
True,
/// The `false` keyword.
False,
/// The `bool` keyword.
Bool,
/// The `int{N}` keyword.
Int(usize),
/// The `uint{N}` keyword.
Uint(usize),
}
impl Keyword {
/// Parses the keyword, returning it as a token.
pub fn parse(input: &str) -> Option<Token> {
let keyword = Self::parse_keyword(input)?;
let lexeme = match BooleanLiteral::try_from(keyword) {
Ok(literal) => Lexeme::Literal(Literal::Boolean(literal)),
Err(keyword) => Lexeme::Keyword(keyword),
};
let length = lexeme.to_string().len();
if length != input.len() {
return None;
}
let length = length
.try_into()
.expect("the YUL should be of reasonable size");
Some(Token::new(Location::new(0, length), lexeme, length))
}
/// Parses the keyword itself.
fn parse_keyword(input: &str) -> Option<Self> {
if !input.starts_with(Self::can_begin) {
return None;
}
let end = input.find(Self::cannot_continue).unwrap_or(input.len());
let input = &input[..end];
if let Some(input) = input.strip_prefix("int") {
if let Ok(bitlength) = input.parse::<usize>() {
return Some(Self::Int(bitlength));
}
}
if let Some(input) = input.strip_prefix("uint") {
if let Ok(bitlength) = input.parse::<usize>() {
return Some(Self::Uint(bitlength));
}
}
Some(match input {
"object" => Self::Object,
"code" => Self::Code,
"function" => Self::Function,
"let" => Self::Let,
"if" => Self::If,
"switch" => Self::Switch,
"case" => Self::Case,
"default" => Self::Default,
"for" => Self::For,
"break" => Self::Break,
"continue" => Self::Continue,
"leave" => Self::Leave,
"true" => Self::True,
"false" => Self::False,
"bool" => Self::Bool,
_ => return None,
})
}
/// Checks whether the character can begin a keyword.
pub fn can_begin(character: char) -> bool {
character.is_alphabetic()
}
/// Checks whether the character can continue a keyword.
pub fn can_continue(character: char) -> bool {
Self::can_begin(character) || character.is_numeric()
}
/// Checks whether the character cannot continue a keyword.
pub fn cannot_continue(character: char) -> bool {
!Self::can_continue(character)
}
}
impl std::fmt::Display for Keyword {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Object => write!(f, "object"),
Self::Code => write!(f, "code"),
Self::Function => write!(f, "function"),
Self::Let => write!(f, "let"),
Self::If => write!(f, "if"),
Self::Switch => write!(f, "switch"),
Self::Case => write!(f, "case"),
Self::Default => write!(f, "default"),
Self::For => write!(f, "for"),
Self::Break => write!(f, "break"),
Self::Continue => write!(f, "continue"),
Self::Leave => write!(f, "leave"),
Self::True => write!(f, "true"),
Self::False => write!(f, "false"),
Self::Bool => write!(f, "bool"),
Self::Int(bitlength) => write!(f, "int{bitlength}"),
Self::Uint(bitlength) => write!(f, "uint{bitlength}"),
}
}
}
@@ -0,0 +1,58 @@
//! The boolean literal lexeme.
use serde::Deserialize;
use serde::Serialize;
use crate::lexer::token::lexeme::keyword::Keyword;
/// The boolean literal lexeme.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub enum Boolean {
/// Created from the `false` keyword.
False,
/// Created from the `true` keyword.
True,
}
impl Boolean {
/// Creates a `false` value.
pub fn r#false() -> Self {
Self::False
}
/// Creates a `true` value.
pub fn r#true() -> Self {
Self::True
}
}
impl TryFrom<Keyword> for Boolean {
type Error = Keyword;
fn try_from(keyword: Keyword) -> Result<Self, Self::Error> {
Ok(match keyword {
Keyword::False => Self::False,
Keyword::True => Self::True,
unknown => return Err(unknown),
})
}
}
impl From<bool> for Boolean {
fn from(value: bool) -> Self {
if value {
Self::True
} else {
Self::False
}
}
}
impl std::fmt::Display for Boolean {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::False => write!(f, "false"),
Self::True => write!(f, "true"),
}
}
}
@@ -0,0 +1,102 @@
//! The integer literal lexeme.
use serde::Deserialize;
use serde::Serialize;
use crate::lexer::token::lexeme::Lexeme;
use crate::lexer::token::lexeme::Literal;
use crate::lexer::token::location::Location;
use crate::lexer::token::Token;
/// The integer literal lexeme.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub enum Integer {
/// An integer literal, like `42`.
Decimal {
/// The inner literal contents.
inner: String,
},
/// A hexadecimal literal, like `0xffff`.
Hexadecimal {
/// The inner literal contents.
inner: String,
},
}
impl Integer {
/// Creates a decimal value.
pub fn new_decimal(inner: String) -> Self {
Self::Decimal { inner }
}
/// Creates a hexadecimal value.
pub fn new_hexadecimal(inner: String) -> Self {
Self::Hexadecimal { inner }
}
/// Parses the value from the source code slice.
pub fn parse(input: &str) -> Option<Token> {
let (value, length) = if let Some(body) = input.strip_prefix("0x") {
let end = body
.find(Self::cannot_continue_hexadecimal)
.unwrap_or(body.len());
let length = "0x".len() + end;
let value = Self::new_hexadecimal(input[..length].to_owned());
(value, length)
} else if input.starts_with(Self::can_begin_decimal) {
let end = input
.find(Self::cannot_continue_decimal)
.unwrap_or(input.len());
let length = end;
let value = Self::new_decimal(input[..length].to_owned());
(value, length)
} else {
return None;
};
let length = length
.try_into()
.expect("the YUL should be of reasonable size");
let token = Token::new(
Location::new(0, length),
Lexeme::Literal(Literal::Integer(value)),
length,
);
Some(token)
}
/// Checks whether the character can begin a decimal number.
pub fn can_begin_decimal(character: char) -> bool {
Self::can_continue_decimal(character)
}
/// Checks whether the character can continue a decimal number.
pub fn can_continue_decimal(character: char) -> bool {
character.is_digit(revive_common::BASE_DECIMAL)
}
/// Checks whether the character cannot continue a decimal number.
pub fn cannot_continue_decimal(character: char) -> bool {
!Self::can_continue_decimal(character)
}
/// Checks whether the character can continue a hexadecimal number.
pub fn can_continue_hexadecimal(character: char) -> bool {
character.is_digit(revive_common::BASE_HEXADECIMAL)
}
/// Checks whether the character cannot continue a hexadecimal number.
pub fn cannot_continue_hexadecimal(character: char) -> bool {
!Self::can_continue_hexadecimal(character)
}
}
impl std::fmt::Display for Integer {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Decimal { inner } => write!(f, "{inner}"),
Self::Hexadecimal { inner } => write!(f, "{inner}"),
}
}
}
@@ -0,0 +1,33 @@
//! The literal lexeme.
pub mod boolean;
pub mod integer;
pub mod string;
use serde::Deserialize;
use serde::Serialize;
use self::boolean::Boolean;
use self::integer::Integer;
use self::string::String;
/// The literal lexeme.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub enum Literal {
/// A boolean literal, like `true`, or `false`.
Boolean(Boolean),
/// An integer literal, like `42`, or `0xff`.
Integer(Integer),
/// A string literal, like `"message"`.
String(String),
}
impl std::fmt::Display for Literal {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Boolean(inner) => write!(f, "{inner}"),
Self::Integer(inner) => write!(f, "{inner}"),
Self::String(inner) => write!(f, "{inner}"),
}
}
}
@@ -0,0 +1,88 @@
//! The string literal lexeme.
use serde::Deserialize;
use serde::Serialize;
use crate::lexer::token::lexeme::Lexeme;
use crate::lexer::token::lexeme::Literal;
use crate::lexer::token::location::Location;
use crate::lexer::token::Token;
/// The string literal lexeme.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub struct String {
/// The inner string contents.
pub inner: std::string::String,
/// Whether the string is hexadecimal.
pub is_hexadecimal: bool,
}
impl String {
/// Creates a string literal value.
pub fn new(inner: ::std::string::String, is_hexadecimal: bool) -> Self {
Self {
inner,
is_hexadecimal,
}
}
/// Parses the value from the source code slice.
pub fn parse(input: &str) -> Option<Token> {
let mut length = 0;
let is_string = input[length..].starts_with('"');
let is_hex_string = input[length..].starts_with(r#"hex""#);
if !is_string && !is_hex_string {
return None;
}
if is_string {
length += 1;
}
if is_hex_string {
length += r#"hex""#.len();
}
let mut string = std::string::String::new();
loop {
if input[length..].starts_with('\\') {
string.push(input.chars().nth(length).expect("Always exists"));
string.push(input.chars().nth(length + 1).expect("Always exists"));
length += 2;
continue;
}
if input[length..].starts_with('"') {
length += 1;
break;
}
string.push(input.chars().nth(length).expect("Always exists"));
length += 1;
}
let string = string
.strip_prefix('"')
.and_then(|string| string.strip_suffix('"'))
.unwrap_or(string.as_str())
.to_owned();
let literal = Self::new(string, is_hex_string);
let length = length
.try_into()
.expect("the YUL should be of reasonable size");
Some(Token::new(
Location::new(0, length),
Lexeme::Literal(Literal::String(literal)),
length,
))
}
}
impl std::fmt::Display for String {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.inner)
}
}
+42
View File
@@ -0,0 +1,42 @@
//! The lexeme.
pub mod comment;
pub mod identifier;
pub mod keyword;
pub mod literal;
pub mod symbol;
use self::identifier::Identifier;
use self::keyword::Keyword;
use self::literal::Literal;
use self::symbol::Symbol;
/// The lexeme.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Lexeme {
/// The keyword lexeme.
Keyword(Keyword),
/// The symbol lexeme.
Symbol(Symbol),
/// The identifier lexeme.
Identifier(Identifier),
/// The literal lexeme.
Literal(Literal),
/// The comment lexeme.
Comment,
/// The end-of-file lexeme.
EndOfFile,
}
impl std::fmt::Display for Lexeme {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Keyword(inner) => write!(f, "{inner}"),
Self::Symbol(inner) => write!(f, "{inner}"),
Self::Identifier(inner) => write!(f, "{inner}"),
Self::Literal(inner) => write!(f, "{inner}"),
Self::Comment => Ok(()),
Self::EndOfFile => write!(f, "EOF"),
}
}
}
@@ -0,0 +1,68 @@
//! The symbol lexeme.
use crate::lexer::token::lexeme::Lexeme;
use crate::lexer::token::location::Location;
use crate::lexer::token::Token;
/// The symbol lexeme.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Symbol {
/// The `:=` symbol.
Assignment,
/// The `->` symbol.
Arrow,
/// The `{` symbol.
BracketCurlyLeft,
/// The `}` symbol.
BracketCurlyRight,
/// The `(` symbol.
ParenthesisLeft,
/// The `)` symbol.
ParenthesisRight,
/// The `,` symbol.
Comma,
/// The `:` symbol.
Colon,
}
impl Symbol {
/// Parses the symbol, returning it as a token.
pub fn parse(input: &str) -> Option<Token> {
let (symbol, length) = match &input[..2] {
":=" => (Self::Assignment, 2),
"->" => (Self::Arrow, 2),
_ => match &input[..1] {
"{" => (Self::BracketCurlyLeft, 1),
"}" => (Self::BracketCurlyRight, 1),
"(" => (Self::ParenthesisLeft, 1),
")" => (Self::ParenthesisRight, 1),
"," => (Self::Comma, 1),
":" => (Self::Colon, 1),
_ => return None,
},
};
Some(Token::new(
Location::new(0, length),
Lexeme::Symbol(symbol),
length,
))
}
}
impl std::fmt::Display for Symbol {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Assignment => write!(f, ":="),
Self::Arrow => write!(f, "->"),
Self::BracketCurlyLeft => write!(f, "{{"),
Self::BracketCurlyRight => write!(f, "}}"),
Self::ParenthesisLeft => write!(f, "("),
Self::ParenthesisRight => write!(f, ")"),
Self::Comma => write!(f, ","),
Self::Colon => write!(f, ":"),
}
}
}
+55
View File
@@ -0,0 +1,55 @@
//! The lexical token location.
use serde::Deserialize;
use serde::Serialize;
/// The token location in the source code file.
#[derive(Debug, Serialize, Deserialize, Clone, Copy, Eq)]
pub struct Location {
/// The line number, starting from 1.
pub line: u32,
/// The column number, starting from 1.
pub column: u32,
}
impl Default for Location {
fn default() -> Self {
Self { line: 1, column: 1 }
}
}
impl Location {
/// Creates a default location.
pub fn new(line: u32, column: u32) -> Self {
Self { line, column }
}
/// Mutates the location by shifting the original one down by `lines` and
/// setting the column to `column`.
pub fn shift_down(&mut self, lines: u32, column: u32) {
if lines == 0 {
self.shift_right(column);
return;
}
self.line += lines;
self.column = column;
}
/// Mutates the location by shifting the original one rightward by `columns`.
pub fn shift_right(&mut self, columns: u32) {
self.column += columns;
}
}
impl PartialEq for Location {
fn eq(&self, other: &Self) -> bool {
self.line == other.line && self.column == other.column
}
}
impl std::fmt::Display for Location {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}:{}", self.line, self.column)
}
}
+36
View File
@@ -0,0 +1,36 @@
//! The token.
pub mod lexeme;
pub mod location;
use self::lexeme::Lexeme;
use self::location::Location;
/// The token.
/// Contains a lexeme and its location.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Token {
/// The token location.
pub location: Location,
/// The lexeme.
pub lexeme: Lexeme,
/// The token length, including whitespaces.
pub length: u32,
}
impl Token {
/// A shortcut constructor.
pub fn new(location: Location, lexeme: Lexeme, length: u32) -> Self {
Self {
location,
lexeme,
length,
}
}
}
impl std::fmt::Display for Token {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}:{}", self.location, self.lexeme)
}
}
+5
View File
@@ -0,0 +1,5 @@
//! The Yul IR compiling tools.
pub mod error;
pub mod lexer;
pub mod parser;
+63
View File
@@ -0,0 +1,63 @@
//! The Yul IR parser error.
use std::collections::BTreeSet;
use crate::lexer::token::location::Location;
/// The Yul IR parser error.
#[derive(Debug, thiserror::Error, PartialEq, Eq)]
pub enum Error {
/// An invalid token received from the lexer.
#[error("{location} Expected one of {expected:?}, found `{found}`")]
InvalidToken {
/// The invalid token location.
location: Location,
/// The list of expected tokens.
expected: Vec<&'static str>,
/// The invalid token.
found: String,
},
/// A reserved keyword cannot be used as an identifier.
#[error("{location} The identifier `{identifier}` is reserved")]
ReservedIdentifier {
/// The invalid token location.
location: Location,
/// The invalid identifier.
identifier: String,
},
/// Invalid number of function arguments.
#[error("{location} Function `{identifier}` must have {expected} arguments, found {found}")]
InvalidNumberOfArguments {
/// The invalid function location.
location: Location,
/// The invalid function name.
identifier: String,
/// The expected number of arguments.
expected: usize,
/// The actual number of arguments.
found: usize,
},
/// Invalid object name.
#[error(
"{location} Objects must be named as '<name>' (deploy) and '<name>_deployed' (runtime)"
)]
InvalidObjectName {
/// The invalid token location.
location: Location,
/// The expected identifier.
expected: String,
/// The invalid identifier.
found: String,
},
/// Invalid attributes.
#[error("{location} Found invalid LLVM attributes: {values:?}")]
InvalidAttributes {
/// The invalid token location.
location: Location,
/// The list of invalid attributes.
values: BTreeSet<String>,
},
/// Invalid code length.
#[error("The line or column length exceed the maximum of u32::MAX")]
InvalidLength,
}
+115
View File
@@ -0,0 +1,115 @@
//! The YUL source code identifier.
use serde::Deserialize;
use serde::Serialize;
use crate::error::Error;
use crate::lexer::token::lexeme::symbol::Symbol;
use crate::lexer::token::lexeme::Lexeme;
use crate::lexer::token::location::Location;
use crate::lexer::token::Token;
use crate::lexer::Lexer;
use crate::parser::r#type::Type;
/// The YUL source code identifier.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub struct Identifier {
/// The location.
pub location: Location,
/// The inner string.
pub inner: String,
/// The type, if it has been explicitly specified.
pub r#type: Option<Type>,
}
impl Identifier {
/// A shortcut constructor.
pub fn new(location: Location, inner: String) -> Self {
Self {
location,
inner,
r#type: None,
}
}
/// A shortcut constructor for a typed identifier.
pub fn new_with_type(location: Location, inner: String, r#type: Option<Type>) -> Self {
Self {
location,
inner,
r#type,
}
}
/// Parses the identifier list where the types cannot be specified.
pub fn parse_list(
lexer: &mut Lexer,
mut initial: Option<Token>,
) -> Result<(Vec<Self>, Option<Token>), Error> {
let mut result = Vec::new();
let mut expected_comma = false;
loop {
let token = crate::parser::take_or_next(initial.take(), lexer)?;
match token {
Token {
location,
lexeme: Lexeme::Identifier(identifier),
..
} if !expected_comma => {
result.push(Self::new(location, identifier.inner));
expected_comma = true;
}
Token {
lexeme: Lexeme::Symbol(Symbol::Comma),
..
} if expected_comma => {
expected_comma = false;
}
token => return Ok((result, Some(token))),
}
}
}
/// Parses the identifier list where the types may be optionally specified.
pub fn parse_typed_list(
lexer: &mut Lexer,
mut initial: Option<Token>,
) -> Result<(Vec<Self>, Option<Token>), Error> {
let mut result = Vec::new();
let mut expected_comma = false;
loop {
let token = crate::parser::take_or_next(initial.take(), lexer)?;
match token {
Token {
lexeme: Lexeme::Identifier(identifier),
location,
..
} if !expected_comma => {
let r#type = match lexer.peek()? {
Token {
lexeme: Lexeme::Symbol(Symbol::Colon),
..
} => {
lexer.next()?;
Some(Type::parse(lexer, None)?)
}
_ => None,
};
result.push(Self::new_with_type(location, identifier.inner, r#type));
expected_comma = true;
}
Token {
lexeme: Lexeme::Symbol(Symbol::Comma),
..
} if expected_comma => {
expected_comma = false;
}
token => return Ok((result, Some(token))),
}
}
}
}
+18
View File
@@ -0,0 +1,18 @@
//! The YUL code block.
pub mod error;
pub mod identifier;
pub mod statement;
pub mod r#type;
use crate::lexer::error::Error as LexerError;
use crate::lexer::token::Token;
use crate::lexer::Lexer;
/// Returns the `token` value if it is `Some(_)`, otherwise takes the next token from the `stream`.
pub fn take_or_next(mut token: Option<Token>, lexer: &mut Lexer) -> Result<Token, LexerError> {
match token.take() {
Some(token) => Ok(token),
None => lexer.next(),
}
}
@@ -0,0 +1,186 @@
//! The assignment expression statement.
use std::collections::HashSet;
use inkwell::types::BasicType;
use serde::Deserialize;
use serde::Serialize;
use crate::error::Error;
use crate::lexer::token::lexeme::symbol::Symbol;
use crate::lexer::token::lexeme::Lexeme;
use crate::lexer::token::location::Location;
use crate::lexer::token::Token;
use crate::lexer::Lexer;
use crate::parser::error::Error as ParserError;
use crate::parser::identifier::Identifier;
use crate::parser::statement::expression::Expression;
/// The Yul assignment expression statement.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub struct Assignment {
/// The location.
pub location: Location,
/// The variable bindings.
pub bindings: Vec<Identifier>,
/// The initializing expression.
pub initializer: Expression,
}
impl Assignment {
/// The element parser.
pub fn parse(lexer: &mut Lexer, initial: Option<Token>) -> Result<Self, Error> {
let token = crate::parser::take_or_next(initial, lexer)?;
let (location, identifier) = match token {
Token {
location,
lexeme: Lexeme::Identifier(identifier),
..
} => (location, identifier),
token => {
return Err(ParserError::InvalidToken {
location: token.location,
expected: vec!["{identifier}"],
found: token.lexeme.to_string(),
}
.into());
}
};
let length = identifier
.inner
.len()
.try_into()
.map_err(|_| Error::Parser(ParserError::InvalidLength))?;
match lexer.peek()? {
Token {
lexeme: Lexeme::Symbol(Symbol::Assignment),
..
} => {
lexer.next()?;
Ok(Self {
location,
bindings: vec![Identifier::new(location, identifier.inner)],
initializer: Expression::parse(lexer, None)?,
})
}
Token {
lexeme: Lexeme::Symbol(Symbol::Comma),
..
} => {
let (identifiers, next) = Identifier::parse_list(
lexer,
Some(Token::new(location, Lexeme::Identifier(identifier), length)),
)?;
match crate::parser::take_or_next(next, lexer)? {
Token {
lexeme: Lexeme::Symbol(Symbol::Assignment),
..
} => {}
token => {
return Err(ParserError::InvalidToken {
location: token.location,
expected: vec![":="],
found: token.lexeme.to_string(),
}
.into());
}
}
Ok(Self {
location,
bindings: identifiers,
initializer: Expression::parse(lexer, None)?,
})
}
token => Err(ParserError::InvalidToken {
location: token.location,
expected: vec![":=", ","],
found: token.lexeme.to_string(),
}
.into()),
}
}
/// Get the list of missing deployable libraries.
pub fn get_missing_libraries(&self) -> HashSet<String> {
self.initializer.get_missing_libraries()
}
}
impl<D> revive_llvm_context::PolkaVMWriteLLVM<D> for Assignment
where
D: revive_llvm_context::PolkaVMDependency + Clone,
{
fn into_llvm(
mut self,
context: &mut revive_llvm_context::PolkaVMContext<D>,
) -> anyhow::Result<()> {
context.set_debug_location(self.location.line, 0, None)?;
let value = match self.initializer.into_llvm(context)? {
Some(value) => value,
None => return Ok(()),
};
if self.bindings.len() == 1 {
let identifier = self.bindings.remove(0);
let pointer = context
.current_function()
.borrow()
.get_stack_pointer(identifier.inner.as_str())
.ok_or_else(|| {
anyhow::anyhow!(
"{} Assignment to an undeclared variable `{}`",
identifier.location,
identifier.inner,
)
})?;
context.build_store(pointer, value.access(context)?)?;
return Ok(());
}
let value = value.access(context)?;
let llvm_type = value.into_struct_value().get_type();
let tuple_pointer = context.build_alloca(llvm_type, "assignment_pointer");
context.build_store(tuple_pointer, value)?;
for (index, binding) in self.bindings.into_iter().enumerate() {
context.set_debug_location(self.location.line, 0, None)?;
let field_pointer = context.build_gep(
tuple_pointer,
&[
context.word_const(0),
context
.integer_type(revive_common::BIT_LENGTH_X32)
.const_int(index as u64, false),
],
context.word_type().as_basic_type_enum(),
format!("assignment_binding_{index}_gep_pointer").as_str(),
);
let binding_pointer = context
.current_function()
.borrow()
.get_stack_pointer(binding.inner.as_str())
.ok_or_else(|| {
anyhow::anyhow!(
"{} Assignment to an undeclared variable `{}`",
binding.location,
binding.inner,
)
})?;
let value = context.build_load(
field_pointer,
format!("assignment_binding_{index}_value").as_str(),
)?;
context.build_store(binding_pointer, value)?;
}
Ok(())
}
}
+295
View File
@@ -0,0 +1,295 @@
//! The source code block.
use std::collections::HashSet;
use serde::Deserialize;
use serde::Serialize;
use inkwell::debug_info::AsDIScope;
use crate::error::Error;
use crate::lexer::token::lexeme::symbol::Symbol;
use crate::lexer::token::lexeme::Lexeme;
use crate::lexer::token::location::Location;
use crate::lexer::token::Token;
use crate::lexer::Lexer;
use crate::parser::error::Error as ParserError;
use crate::parser::statement::assignment::Assignment;
use crate::parser::statement::expression::Expression;
use crate::parser::statement::Statement;
/// The Yul source code block.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub struct Block {
/// The location.
pub location: Location,
/// The block statements.
pub statements: Vec<Statement>,
}
impl Block {
/// The element parser.
pub fn parse(lexer: &mut Lexer, initial: Option<Token>) -> Result<Self, Error> {
let token = crate::parser::take_or_next(initial, lexer)?;
let mut statements = Vec::new();
let location = match token {
Token {
lexeme: Lexeme::Symbol(Symbol::BracketCurlyLeft),
location,
..
} => location,
token => {
return Err(ParserError::InvalidToken {
location: token.location,
expected: vec!["{"],
found: token.lexeme.to_string(),
}
.into());
}
};
let mut remaining = None;
loop {
match crate::parser::take_or_next(remaining.take(), lexer)? {
token @ Token {
lexeme: Lexeme::Keyword(_),
..
} => {
let (statement, next) = Statement::parse(lexer, Some(token))?;
remaining = next;
statements.push(statement);
}
token @ Token {
lexeme: Lexeme::Literal(_),
..
} => {
statements
.push(Expression::parse(lexer, Some(token)).map(Statement::Expression)?);
}
token @ Token {
lexeme: Lexeme::Identifier(_),
..
} => match lexer.peek()? {
Token {
lexeme: Lexeme::Symbol(Symbol::Assignment),
..
} => {
statements.push(
Assignment::parse(lexer, Some(token)).map(Statement::Assignment)?,
);
}
Token {
lexeme: Lexeme::Symbol(Symbol::Comma),
..
} => {
statements.push(
Assignment::parse(lexer, Some(token)).map(Statement::Assignment)?,
);
}
_ => {
statements.push(
Expression::parse(lexer, Some(token)).map(Statement::Expression)?,
);
}
},
token @ Token {
lexeme: Lexeme::Symbol(Symbol::BracketCurlyLeft),
..
} => statements.push(Block::parse(lexer, Some(token)).map(Statement::Block)?),
Token {
lexeme: Lexeme::Symbol(Symbol::BracketCurlyRight),
..
} => break,
token => {
return Err(ParserError::InvalidToken {
location: token.location,
expected: vec!["{keyword}", "{expression}", "{identifier}", "{", "}"],
found: token.lexeme.to_string(),
}
.into());
}
}
}
Ok(Self {
location,
statements,
})
}
/// Get the list of missing deployable libraries.
pub fn get_missing_libraries(&self) -> HashSet<String> {
let mut libraries = HashSet::new();
for statement in self.statements.iter() {
libraries.extend(statement.get_missing_libraries());
}
libraries
}
}
impl<D> revive_llvm_context::PolkaVMWriteLLVM<D> for Block
where
D: revive_llvm_context::PolkaVMDependency + Clone,
{
fn into_llvm(self, context: &mut revive_llvm_context::PolkaVMContext<D>) -> anyhow::Result<()> {
let current_function = context.current_function().borrow().name().to_owned();
let current_block = context.basic_block();
let mut functions = Vec::with_capacity(self.statements.len());
let mut local_statements = Vec::with_capacity(self.statements.len());
for statement in self.statements.into_iter() {
match statement {
Statement::FunctionDefinition(mut statement) => {
statement.declare(context)?;
functions.push(statement);
}
statement => local_statements.push(statement),
}
}
for function in functions.into_iter() {
function.into_llvm(context)?;
}
context.set_current_function(current_function.as_str(), Some(self.location.line))?;
if let Some(debug_info) = context.debug_info() {
let di_builder = debug_info.builder();
let di_scope = debug_info.top_scope().expect("expected a debug-info scope");
let di_block_scope = di_builder
.create_lexical_block(
di_scope,
debug_info.compilation_unit().get_file(),
self.location.line,
0,
)
.as_debug_info_scope();
context.push_debug_scope(di_block_scope);
context.set_debug_location(self.location.line, 0, None)?;
}
context.set_basic_block(current_block);
for statement in local_statements.into_iter() {
context.set_debug_location(statement.location().line, 0, None)?;
if context.basic_block().get_terminator().is_some() {
break;
}
match statement {
Statement::Block(block) => {
block.into_llvm(context)?;
}
Statement::Expression(expression) => {
expression.into_llvm(context)?;
}
Statement::VariableDeclaration(statement) => statement.into_llvm(context)?,
Statement::Assignment(statement) => statement.into_llvm(context)?,
Statement::IfConditional(statement) => statement.into_llvm(context)?,
Statement::Switch(statement) => statement.into_llvm(context)?,
Statement::ForLoop(statement) => statement.into_llvm(context)?,
Statement::Continue(_location) => {
context.build_unconditional_branch(context.r#loop().continue_block);
break;
}
Statement::Break(_location) => {
context.build_unconditional_branch(context.r#loop().join_block);
break;
}
Statement::Leave(_location) => {
context.build_unconditional_branch(
context.current_function().borrow().return_block(),
);
break;
}
statement => anyhow::bail!(
"{} Unexpected local statement: {:?}",
statement.location(),
statement
),
}
}
context.pop_debug_scope();
Ok(())
}
}
#[cfg(test)]
mod tests {
use crate::lexer::token::location::Location;
use crate::lexer::Lexer;
use crate::parser::error::Error;
use crate::parser::statement::object::Object;
#[test]
fn error_invalid_token_bracket_curly_left() {
let input = r#"
object "Test" {
code {
{
return(0, 0)
}
}
object "Test_deployed" {
code {
{
(
return(0, 0)
}
}
}
}
}
"#;
let mut lexer = Lexer::new(input.to_owned());
let result = Object::parse(&mut lexer, None);
assert_eq!(
result,
Err(Error::InvalidToken {
location: Location::new(11, 17),
expected: vec!["{keyword}", "{expression}", "{identifier}", "{", "}"],
found: "(".to_owned(),
}
.into())
);
}
#[test]
fn error_invalid_token_statement() {
let input = r#"
object "Test" {
code {
{
return(0, 0)
}
}
object "Test_deployed" {
code {
{
:=
return(0, 0)
}
}
}
}
"#;
let mut lexer = Lexer::new(input.to_owned());
let result = Object::parse(&mut lexer, None);
assert_eq!(
result,
Err(Error::InvalidToken {
location: Location::new(11, 17),
expected: vec!["{keyword}", "{expression}", "{identifier}", "{", "}"],
found: ":=".to_owned(),
}
.into())
);
}
}
+107
View File
@@ -0,0 +1,107 @@
//! The YUL code.
use std::collections::HashSet;
use serde::Deserialize;
use serde::Serialize;
use crate::error::Error;
use crate::lexer::token::lexeme::keyword::Keyword;
use crate::lexer::token::lexeme::Lexeme;
use crate::lexer::token::location::Location;
use crate::lexer::token::Token;
use crate::lexer::Lexer;
use crate::parser::error::Error as ParserError;
use crate::parser::statement::block::Block;
/// The YUL code entity, which is the first block of the object.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub struct Code {
/// The location.
pub location: Location,
/// The main block.
pub block: Block,
}
impl Code {
/// The element parser.
pub fn parse(lexer: &mut Lexer, initial: Option<Token>) -> Result<Self, Error> {
let token = crate::parser::take_or_next(initial, lexer)?;
let location = match token {
Token {
lexeme: Lexeme::Keyword(Keyword::Code),
location,
..
} => location,
token => {
return Err(ParserError::InvalidToken {
location: token.location,
expected: vec!["code"],
found: token.lexeme.to_string(),
}
.into());
}
};
let block = Block::parse(lexer, None)?;
Ok(Self { location, block })
}
/// Get the list of missing deployable libraries.
pub fn get_missing_libraries(&self) -> HashSet<String> {
self.block.get_missing_libraries()
}
}
impl<D> revive_llvm_context::PolkaVMWriteLLVM<D> for Code
where
D: revive_llvm_context::PolkaVMDependency + Clone,
{
fn into_llvm(self, context: &mut revive_llvm_context::PolkaVMContext<D>) -> anyhow::Result<()> {
self.block.into_llvm(context)?;
Ok(())
}
}
#[cfg(test)]
mod tests {
use crate::lexer::token::location::Location;
use crate::lexer::Lexer;
use crate::parser::error::Error;
use crate::parser::statement::object::Object;
#[test]
fn error_invalid_token_code() {
let input = r#"
object "Test" {
data {
{
return(0, 0)
}
}
object "Test_deployed" {
code {
{
return(0, 0)
}
}
}
}
"#;
let mut lexer = Lexer::new(input.to_owned());
let result = Object::parse(&mut lexer, None);
assert_eq!(
result,
Err(Error::InvalidToken {
location: Location::new(3, 5),
expected: vec!["code"],
found: "data".to_owned(),
}
.into())
);
}
}
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,358 @@
//! The function name.
use serde::Deserialize;
use serde::Serialize;
/// The function name.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub enum Name {
/// The user-defined function.
UserDefined(String),
/// `x + y`
Add,
/// `x - y`
Sub,
/// `x * y`
Mul,
/// `x / y` or `0` if `y == 0`
Div,
/// `x % y` or `0` if `y == 0`
Mod,
/// `x / y`, for signed numbers in twos complement, `0` if `y == 0`
Sdiv,
/// `x % y`, for signed numbers in twos complement, `0` if `y == 0`
Smod,
/// `1` if `x < y`, `0` otherwise
Lt,
/// `1` if `x > y`, `0` otherwise
Gt,
/// `1` if `x == y`, `0` otherwise
Eq,
/// `1` if `x == 0`, `0` otherwise
IsZero,
/// `1` if `x < y`, `0` otherwise, for signed numbers in twos complement
Slt,
/// `1` if `x > y`, `0` otherwise, for signed numbers in twos complement
Sgt,
/// bitwise "or" of `x` and `y`
Or,
/// bitwise "xor" of `x` and `y`
Xor,
/// bitwise "not" of `x` (every bit of `x` is negated)
Not,
/// bitwise "and" of `x` and `y`
And,
/// logical shift left `y` by `x` bits
Shl,
/// logical shift right `y` by `x` bits
Shr,
/// signed arithmetic shift right `y` by `x` bits
Sar,
/// `n`th byte of `x`, where the most significant byte is the `0`th byte
Byte,
/// discard value x
Pop,
/// `(x + y) % m` with arbitrary precision arithmetic, `0` if `m == 0`
AddMod,
/// `(x * y) % m` with arbitrary precision arithmetic, `0` if `m == 0`
MulMod,
/// `x` to the power of `y`
Exp,
/// sign extend from `(i*8+7)`th bit counting from least significant
SignExtend,
/// `keccak(mem[p…(p+n)))`
Keccak256,
/// `mem[p…(p+32))`
MLoad,
/// `mem[p…(p+32)) := v`
MStore,
/// `mem[p] := v & 0xff` (only modifies a single byte)
MStore8,
/// heap memory copy
MCopy,
/// `storage[p]`
SLoad,
/// `storage[p] := v`
SStore,
/// transient `storage[p]`
TLoad,
/// transient `storage[p] := v`
TStore,
/// `loadimmutable` storage read
LoadImmutable,
/// `setimmutable` storage write
SetImmutable,
/// call data starting from position `p` (32 bytes)
CallDataLoad,
/// size of call data in bytes
CallDataSize,
/// copy `s` bytes from calldata at position `f` to memory at position `t`
CallDataCopy,
/// size of the code of the current contract / execution context
CodeSize,
/// copy `s` bytes from code at position `f` to mem at position `t`
CodeCopy,
/// size of the code at address `a`
ExtCodeSize,
/// code hash of address `a`
ExtCodeHash,
/// size of the last returndata
ReturnDataSize,
/// copy `s` bytes from returndata at position `f` to mem at position `t`
ReturnDataCopy,
/// end execution, return data `mem[p…(p+s))`
Return,
/// end execution, revert state changes, return data `mem[p…(p+s))`
Revert,
/// stop execution, identical to `return(0, 0)`
Stop,
/// end execution with invalid instruction
Invalid,
/// log without topics and data `mem[p…(p+s))`
Log0,
/// log with topic t1 and data `mem[p…(p+s))`
Log1,
/// log with topics t1, t2 and data `mem[p…(p+s))`
Log2,
/// log with topics t1, t2, t3 and data `mem[p…(p+s))`
Log3,
/// log with topics t1, t2, t3, t4 and data `mem[p…(p+s))`
Log4,
/// call contract at address a with input `mem[in…(in+insize))` providing `g` gas and `v` wei
/// and output area `mem[out…(out+outsize))` returning 0 on error (e.g. out of gas)
/// and 1 on success
/// [See more](https://docs.soliditylang.org/en/v0.8.2/yul.html#yul-call-return-area)
Call,
/// identical to call but only use the code from a and stay in the context of the current
/// contract otherwise
CallCode,
/// identical to `callcode` but also keeps `caller` and `callvalue`
DelegateCall,
/// identical to `call(g, a, 0, in, insize, out, outsize)` but do not allows state modifications
StaticCall,
/// create new contract with code `mem[p…(p+n))` and send `v` wei and return the new address
/// Passes bytecode to the system contracts.
Create,
/// create new contract with code `mem[p…(p+n))` at address
/// `keccak256(0xff . this . s . keccak256(mem[p…(p+n)))` and send `v` wei and return the
/// new address, where `0xff` is a 1-byte value, this is the current contracts address as a
/// 20-byte value and `s` is a big-endian 256-bit value
/// Passes bytecode to the system contracts.
Create2,
/// returns the size in the data area
DataSize,
/// is equivalent to `CodeCopy`
DataCopy,
/// returns the offset in the data area
DataOffset,
/// `linkersymbol` is a stub call
LinkerSymbol,
/// `memoryguard` is a stub call
MemoryGuard,
/// address of the current contract / execution context
Address,
/// call sender (excluding `delegatecall`)
Caller,
/// wei sent together with the current call
CallValue,
/// gas still available to execution
Gas,
/// wei balance at address `a`
Balance,
/// equivalent to `balance(address())`, but cheaper
SelfBalance,
/// block gas limit of the current block
GasLimit,
/// gas price of the transaction
GasPrice,
/// transaction sender
Origin,
/// ID of the executing chain (EIP 1344)
ChainId,
/// current block number
Number,
/// timestamp of the current block in seconds since the epoch
Timestamp,
/// hash of block nr b - only for last 256 blocks excluding current
BlockHash,
/// versioned hash of transactions i-th blob
BlobHash,
/// difficulty of the current block
Difficulty,
/// https://eips.ethereum.org/EIPS/eip-4399
Prevrandao,
/// current mining beneficiary
CoinBase,
/// size of memory, i.e. largest accessed memory index
MSize,
/// verbatim instruction with 0 inputs and 0 outputs only works in the Yul mode,
/// so it is mostly used as a tool for extending Yul for PolkaVM
Verbatim {
/// the number of input arguments
input_size: usize,
/// the number of output arguments
output_size: usize,
},
/// current blocks base fee (EIP-3198 and EIP-1559)
BaseFee,
/// current blocks blob base fee (EIP-7516 and EIP-4844)
BlobBaseFee,
/// current position in code
Pc,
/// like `codecopy(t, f, s)` but take code at address `a`
ExtCodeCopy,
/// end execution, destroy current contract and send funds to `a`
SelfDestruct,
}
impl Name {
/// Tries parsing the verbatim instruction.
fn parse_verbatim(input: &str) -> Option<Self> {
let verbatim = input.strip_prefix("verbatim")?;
let regex = regex::Regex::new(r"_(\d+)i_(\d+)o").expect("Always valid");
let captures = regex.captures(verbatim)?;
let input_size: usize = captures.get(1)?.as_str().parse().ok()?;
let output_size: usize = captures.get(2)?.as_str().parse().ok()?;
Some(Self::Verbatim {
input_size,
output_size,
})
}
}
impl From<&str> for Name {
fn from(input: &str) -> Self {
if let Some(verbatim) = Self::parse_verbatim(input) {
return verbatim;
}
match input {
"add" => Self::Add,
"sub" => Self::Sub,
"mul" => Self::Mul,
"div" => Self::Div,
"mod" => Self::Mod,
"sdiv" => Self::Sdiv,
"smod" => Self::Smod,
"lt" => Self::Lt,
"gt" => Self::Gt,
"eq" => Self::Eq,
"iszero" => Self::IsZero,
"slt" => Self::Slt,
"sgt" => Self::Sgt,
"or" => Self::Or,
"xor" => Self::Xor,
"not" => Self::Not,
"and" => Self::And,
"shl" => Self::Shl,
"shr" => Self::Shr,
"sar" => Self::Sar,
"byte" => Self::Byte,
"pop" => Self::Pop,
"addmod" => Self::AddMod,
"mulmod" => Self::MulMod,
"exp" => Self::Exp,
"signextend" => Self::SignExtend,
"keccak256" => Self::Keccak256,
"mload" => Self::MLoad,
"mstore" => Self::MStore,
"mstore8" => Self::MStore8,
"mcopy" => Self::MCopy,
"sload" => Self::SLoad,
"sstore" => Self::SStore,
"tload" => Self::TLoad,
"tstore" => Self::TStore,
"loadimmutable" => Self::LoadImmutable,
"setimmutable" => Self::SetImmutable,
"calldataload" => Self::CallDataLoad,
"calldatasize" => Self::CallDataSize,
"calldatacopy" => Self::CallDataCopy,
"codesize" => Self::CodeSize,
"codecopy" => Self::CodeCopy,
"returndatasize" => Self::ReturnDataSize,
"returndatacopy" => Self::ReturnDataCopy,
"extcodesize" => Self::ExtCodeSize,
"extcodehash" => Self::ExtCodeHash,
"return" => Self::Return,
"revert" => Self::Revert,
"log0" => Self::Log0,
"log1" => Self::Log1,
"log2" => Self::Log2,
"log3" => Self::Log3,
"log4" => Self::Log4,
"call" => Self::Call,
"delegatecall" => Self::DelegateCall,
"staticcall" => Self::StaticCall,
"create" => Self::Create,
"create2" => Self::Create2,
"datasize" => Self::DataSize,
"dataoffset" => Self::DataOffset,
"datacopy" => Self::DataCopy,
"stop" => Self::Stop,
"invalid" => Self::Invalid,
"linkersymbol" => Self::LinkerSymbol,
"memoryguard" => Self::MemoryGuard,
"address" => Self::Address,
"caller" => Self::Caller,
"callvalue" => Self::CallValue,
"gas" => Self::Gas,
"balance" => Self::Balance,
"selfbalance" => Self::SelfBalance,
"gaslimit" => Self::GasLimit,
"gasprice" => Self::GasPrice,
"origin" => Self::Origin,
"chainid" => Self::ChainId,
"timestamp" => Self::Timestamp,
"number" => Self::Number,
"blockhash" => Self::BlockHash,
"blobhash" => Self::BlobHash,
"difficulty" => Self::Difficulty,
"prevrandao" => Self::Prevrandao,
"coinbase" => Self::CoinBase,
"basefee" => Self::BaseFee,
"blobbasefee" => Self::BlobBaseFee,
"msize" => Self::MSize,
"callcode" => Self::CallCode,
"pc" => Self::Pc,
"extcodecopy" => Self::ExtCodeCopy,
"selfdestruct" => Self::SelfDestruct,
input => Self::UserDefined(input.to_owned()),
}
}
}
@@ -0,0 +1,33 @@
//! Translates the verbatim simulations.
use crate::parser::statement::expression::function_call::FunctionCall;
/// Translates the verbatim simulations.
pub fn verbatim<'ctx, D>(
context: &mut revive_llvm_context::PolkaVMContext<'ctx, D>,
call: &mut FunctionCall,
_input_size: usize,
output_size: usize,
) -> anyhow::Result<Option<inkwell::values::BasicValueEnum<'ctx>>>
where
D: revive_llvm_context::PolkaVMDependency + Clone,
{
if output_size > 1 {
anyhow::bail!(
"{} Verbatim instructions with multiple return values are not supported",
call.location
);
}
let mut arguments = call.pop_arguments::<D, 1>(context)?;
let identifier = arguments[0]
.original
.take()
.ok_or_else(|| anyhow::anyhow!("{} Verbatim literal is missing", call.location))?;
anyhow::bail!(
"{} Found unknown internal function `{}`",
call.location,
identifier
)
}
@@ -0,0 +1,223 @@
//! The YUL source code literal.
use inkwell::values::BasicValue;
use num::Num;
use num::One;
use num::Zero;
use serde::Deserialize;
use serde::Serialize;
use crate::error::Error;
use crate::lexer::token::lexeme::literal::boolean::Boolean as BooleanLiteral;
use crate::lexer::token::lexeme::literal::integer::Integer as IntegerLiteral;
use crate::lexer::token::lexeme::literal::Literal as LexicalLiteral;
use crate::lexer::token::lexeme::symbol::Symbol;
use crate::lexer::token::lexeme::Lexeme;
use crate::lexer::token::location::Location;
use crate::lexer::token::Token;
use crate::lexer::Lexer;
use crate::parser::error::Error as ParserError;
use crate::parser::r#type::Type;
/// Represents a literal in YUL without differentiating its type.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub struct Literal {
/// The location.
pub location: Location,
/// The lexical literal.
pub inner: LexicalLiteral,
/// The type, if it has been explicitly specified.
pub yul_type: Option<Type>,
}
impl Literal {
/// The element parser.
pub fn parse(lexer: &mut Lexer, initial: Option<Token>) -> Result<Self, Error> {
let token = crate::parser::take_or_next(initial, lexer)?;
let (location, literal) = match token {
Token {
lexeme: Lexeme::Literal(literal),
location,
..
} => (location, literal),
token => {
return Err(ParserError::InvalidToken {
location: token.location,
expected: vec!["{literal}"],
found: token.lexeme.to_string(),
}
.into());
}
};
let yul_type = match lexer.peek()? {
Token {
lexeme: Lexeme::Symbol(Symbol::Colon),
..
} => {
lexer.next()?;
Some(Type::parse(lexer, None)?)
}
_ => None,
};
Ok(Self {
location,
inner: literal,
yul_type,
})
}
/// Converts the literal into its LLVM.
pub fn into_llvm<'ctx, D>(
self,
context: &revive_llvm_context::PolkaVMContext<'ctx, D>,
) -> anyhow::Result<revive_llvm_context::PolkaVMArgument<'ctx>>
where
D: revive_llvm_context::PolkaVMDependency + Clone,
{
match self.inner {
LexicalLiteral::Boolean(inner) => {
let value = self
.yul_type
.unwrap_or_default()
.into_llvm(context)
.const_int(
match inner {
BooleanLiteral::False => 0,
BooleanLiteral::True => 1,
},
false,
)
.as_basic_value_enum();
let constant = match inner {
BooleanLiteral::False => num::BigUint::zero(),
BooleanLiteral::True => num::BigUint::one(),
};
Ok(revive_llvm_context::PolkaVMArgument::value(value).with_constant(constant))
}
LexicalLiteral::Integer(inner) => {
let r#type = self.yul_type.unwrap_or_default().into_llvm(context);
let value = match inner {
IntegerLiteral::Decimal { ref inner } => r#type.const_int_from_string(
inner.as_str(),
inkwell::types::StringRadix::Decimal,
),
IntegerLiteral::Hexadecimal { ref inner } => r#type.const_int_from_string(
&inner["0x".len()..],
inkwell::types::StringRadix::Hexadecimal,
),
}
.expect("The value is valid")
.as_basic_value_enum();
let constant = match inner {
IntegerLiteral::Decimal { ref inner } => {
num::BigUint::from_str_radix(inner.as_str(), revive_common::BASE_DECIMAL)
}
IntegerLiteral::Hexadecimal { ref inner } => num::BigUint::from_str_radix(
&inner["0x".len()..],
revive_common::BASE_HEXADECIMAL,
),
}
.expect("Always valid");
Ok(revive_llvm_context::PolkaVMArgument::value(value).with_constant(constant))
}
LexicalLiteral::String(inner) => {
let string = inner.inner;
let r#type = self.yul_type.unwrap_or_default().into_llvm(context);
let mut hex_string = if inner.is_hexadecimal {
string.clone()
} else {
let mut hex_string = String::with_capacity(revive_common::BYTE_LENGTH_WORD * 2);
let mut index = 0;
loop {
if index >= string.len() {
break;
}
if string[index..].starts_with('\\') {
index += 1;
if string[index..].starts_with('x') {
hex_string.push_str(&string[index + 1..index + 3]);
index += 3;
} else if string[index..].starts_with('u') {
let codepoint_str = &string[index + 1..index + 5];
let codepoint = u32::from_str_radix(
codepoint_str,
revive_common::BASE_HEXADECIMAL,
)
.map_err(|error| {
anyhow::anyhow!(
"Invalid codepoint `{}`: {}",
codepoint_str,
error
)
})?;
let unicode_char = char::from_u32(codepoint).ok_or_else(|| {
anyhow::anyhow!("Invalid codepoint {}", codepoint)
})?;
let mut unicode_bytes = vec![0u8; 3];
unicode_char.encode_utf8(&mut unicode_bytes);
for byte in unicode_bytes.into_iter() {
hex_string.push_str(format!("{:02x}", byte).as_str());
}
index += 5;
} else if string[index..].starts_with('t') {
hex_string.push_str("09");
index += 1;
} else if string[index..].starts_with('n') {
hex_string.push_str("0a");
index += 1;
} else if string[index..].starts_with('r') {
hex_string.push_str("0d");
index += 1;
} else if string[index..].starts_with('\n') {
index += 1;
} else {
hex_string
.push_str(format!("{:02x}", string.as_bytes()[index]).as_str());
index += 1;
}
} else {
hex_string
.push_str(format!("{:02x}", string.as_bytes()[index]).as_str());
index += 1;
}
}
hex_string
};
if hex_string.len() > revive_common::BYTE_LENGTH_WORD * 2 {
return Ok(revive_llvm_context::PolkaVMArgument::value(
r#type.const_zero().as_basic_value_enum(),
)
.with_original(string));
}
if hex_string.len() < revive_common::BYTE_LENGTH_WORD * 2 {
hex_string.push_str(
"0".repeat((revive_common::BYTE_LENGTH_WORD * 2) - hex_string.len())
.as_str(),
);
}
let value = r#type
.const_int_from_string(
hex_string.as_str(),
inkwell::types::StringRadix::Hexadecimal,
)
.expect("The value is valid")
.as_basic_value_enum();
Ok(revive_llvm_context::PolkaVMArgument::value(value).with_original(string))
}
}
}
}
@@ -0,0 +1,146 @@
//! The expression statement.
pub mod function_call;
pub mod literal;
use std::collections::HashSet;
use serde::Deserialize;
use serde::Serialize;
use crate::error::Error;
use crate::lexer::token::lexeme::symbol::Symbol;
use crate::lexer::token::lexeme::Lexeme;
use crate::lexer::token::location::Location;
use crate::lexer::token::Token;
use crate::lexer::Lexer;
use crate::parser::error::Error as ParserError;
use crate::parser::identifier::Identifier;
use self::function_call::FunctionCall;
use self::literal::Literal;
/// The Yul expression statement.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub enum Expression {
/// The function call subexpression.
FunctionCall(FunctionCall),
/// The identifier operand.
Identifier(Identifier),
/// The literal operand.
Literal(Literal),
}
impl Expression {
/// The element parser.
pub fn parse(lexer: &mut Lexer, initial: Option<Token>) -> Result<Self, Error> {
let token = crate::parser::take_or_next(initial, lexer)?;
let (location, identifier) = match token {
Token {
lexeme: Lexeme::Literal(_),
..
} => return Ok(Self::Literal(Literal::parse(lexer, Some(token))?)),
Token {
location,
lexeme: Lexeme::Identifier(identifier),
..
} => (location, identifier),
token => {
return Err(ParserError::InvalidToken {
location: token.location,
expected: vec!["{literal}", "{identifier}"],
found: token.lexeme.to_string(),
}
.into());
}
};
let length = identifier
.inner
.len()
.try_into()
.map_err(|_| Error::Parser(ParserError::InvalidLength))?;
match lexer.peek()? {
Token {
lexeme: Lexeme::Symbol(Symbol::ParenthesisLeft),
..
} => {
lexer.next()?;
Ok(Self::FunctionCall(FunctionCall::parse(
lexer,
Some(Token::new(location, Lexeme::Identifier(identifier), length)),
)?))
}
_ => Ok(Self::Identifier(Identifier::new(
location,
identifier.inner,
))),
}
}
/// Get the list of missing deployable libraries.
pub fn get_missing_libraries(&self) -> HashSet<String> {
match self {
Self::FunctionCall(inner) => inner.get_missing_libraries(),
Self::Identifier(_) => HashSet::new(),
Self::Literal(_) => HashSet::new(),
}
}
/// Returns the statement location.
pub fn location(&self) -> Location {
match self {
Self::FunctionCall(inner) => inner.location,
Self::Identifier(inner) => inner.location,
Self::Literal(inner) => inner.location,
}
}
/// Converts the expression into an LLVM value.
pub fn into_llvm<'ctx, D>(
self,
context: &mut revive_llvm_context::PolkaVMContext<'ctx, D>,
) -> anyhow::Result<Option<revive_llvm_context::PolkaVMArgument<'ctx>>>
where
D: revive_llvm_context::PolkaVMDependency + Clone,
{
match self {
Self::Literal(literal) => literal
.clone()
.into_llvm(context)
.map_err(|error| {
anyhow::anyhow!(
"{} Invalid literal `{}`: {}",
literal.location,
literal.inner.to_string(),
error
)
})
.map(Some),
Self::Identifier(identifier) => {
let id = identifier.inner;
let pointer = context
.current_function()
.borrow()
.get_stack_pointer(&id)
.ok_or_else(|| {
anyhow::anyhow!("{} Undeclared variable `{}`", identifier.location, id)
})?;
let constant = context.current_function().borrow().yul().get_constant(&id);
let argument = revive_llvm_context::PolkaVMArgument::pointer(pointer, id);
Ok(Some(match constant {
Some(constant) => argument.with_constant(constant),
_ => argument,
}))
}
Self::FunctionCall(call) => Ok(call
.into_llvm(context)?
.map(revive_llvm_context::PolkaVMArgument::value)),
}
}
}
+111
View File
@@ -0,0 +1,111 @@
//! The for-loop statement.
use std::collections::HashSet;
use serde::Deserialize;
use serde::Serialize;
use crate::error::Error;
use crate::lexer::token::location::Location;
use crate::lexer::token::Token;
use crate::lexer::Lexer;
use crate::parser::statement::block::Block;
use crate::parser::statement::expression::Expression;
/// The Yul for-loop statement.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub struct ForLoop {
/// The location.
pub location: Location,
/// The index variables initialization block.
pub initializer: Block,
/// The continue condition block.
pub condition: Expression,
/// The index variables mutating block.
pub finalizer: Block,
/// The loop body.
pub body: Block,
}
impl ForLoop {
/// The element parser.
pub fn parse(lexer: &mut Lexer, initial: Option<Token>) -> Result<Self, Error> {
let token = crate::parser::take_or_next(initial, lexer)?;
let location = token.location;
let initializer = Block::parse(lexer, Some(token))?;
let condition = Expression::parse(lexer, None)?;
let finalizer = Block::parse(lexer, None)?;
let body = Block::parse(lexer, None)?;
Ok(Self {
location,
initializer,
condition,
finalizer,
body,
})
}
/// Get the list of missing deployable libraries.
pub fn get_missing_libraries(&self) -> HashSet<String> {
let mut libraries = self.initializer.get_missing_libraries();
libraries.extend(self.condition.get_missing_libraries());
libraries.extend(self.finalizer.get_missing_libraries());
libraries.extend(self.body.get_missing_libraries());
libraries
}
}
impl<D> revive_llvm_context::PolkaVMWriteLLVM<D> for ForLoop
where
D: revive_llvm_context::PolkaVMDependency + Clone,
{
fn into_llvm(self, context: &mut revive_llvm_context::PolkaVMContext<D>) -> anyhow::Result<()> {
self.initializer.into_llvm(context)?;
let condition_block = context.append_basic_block("for_condition");
let body_block = context.append_basic_block("for_body");
let increment_block = context.append_basic_block("for_increment");
let join_block = context.append_basic_block("for_join");
context.build_unconditional_branch(condition_block);
context.set_basic_block(condition_block);
let condition = self
.condition
.into_llvm(context)?
.expect("Always exists")
.access(context)?
.into_int_value();
let condition = context.builder().build_int_z_extend_or_bit_cast(
condition,
context.word_type(),
"for_condition_extended",
)?;
let condition = context.builder().build_int_compare(
inkwell::IntPredicate::NE,
condition,
context.word_const(0),
"for_condition_compared",
)?;
context.build_conditional_branch(condition, body_block, join_block)?;
context.push_loop(body_block, increment_block, join_block);
context.set_basic_block(body_block);
self.body.into_llvm(context)?;
context.build_unconditional_branch(increment_block);
context.set_basic_block(increment_block);
self.finalizer.into_llvm(context)?;
context.build_unconditional_branch(condition_block);
context.pop_loop();
context.set_basic_block(join_block);
Ok(())
}
}
@@ -0,0 +1,589 @@
//! The function definition statement.
use std::collections::BTreeSet;
use std::collections::HashSet;
use inkwell::types::BasicType;
use serde::Deserialize;
use serde::Serialize;
use crate::error::Error;
use crate::lexer::token::lexeme::symbol::Symbol;
use crate::lexer::token::lexeme::Lexeme;
use crate::lexer::token::location::Location;
use crate::lexer::token::Token;
use crate::lexer::Lexer;
use crate::parser::error::Error as ParserError;
use crate::parser::identifier::Identifier;
use crate::parser::statement::block::Block;
use crate::parser::statement::expression::function_call::name::Name as FunctionName;
/// The function definition statement.
/// All functions are translated in two steps:
/// 1. The hoisted declaration
/// 2. The definition, which now has the access to all function signatures
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub struct FunctionDefinition {
/// The location.
pub location: Location,
/// The function identifier.
pub identifier: String,
/// The function formal arguments.
pub arguments: Vec<Identifier>,
/// The function return variables.
pub result: Vec<Identifier>,
/// The function body block.
pub body: Block,
/// The function LLVM attributes encoded in the identifier.
pub attributes: BTreeSet<revive_llvm_context::PolkaVMAttribute>,
}
impl FunctionDefinition {
/// The LLVM attribute section prefix.
pub const LLVM_ATTRIBUTE_PREFIX: &'static str = "$llvm_";
/// The LLVM attribute section suffix.
pub const LLVM_ATTRIBUTE_SUFFIX: &'static str = "_llvm$";
/// The element parser.
pub fn parse(lexer: &mut Lexer, initial: Option<Token>) -> Result<Self, Error> {
let token = crate::parser::take_or_next(initial, lexer)?;
let (location, identifier) = match token {
Token {
lexeme: Lexeme::Identifier(identifier),
location,
..
} => (location, identifier),
token => {
return Err(ParserError::InvalidToken {
location: token.location,
expected: vec!["{identifier}"],
found: token.lexeme.to_string(),
}
.into());
}
};
let identifier = Identifier::new(location, identifier.inner);
match FunctionName::from(identifier.inner.as_str()) {
FunctionName::UserDefined(_) => {}
_function_name => {
return Err(ParserError::ReservedIdentifier {
location,
identifier: identifier.inner,
}
.into())
}
}
match lexer.next()? {
Token {
lexeme: Lexeme::Symbol(Symbol::ParenthesisLeft),
..
} => {}
token => {
return Err(ParserError::InvalidToken {
location: token.location,
expected: vec!["("],
found: token.lexeme.to_string(),
}
.into());
}
}
let (arguments, next) = Identifier::parse_typed_list(lexer, None)?;
match crate::parser::take_or_next(next, lexer)? {
Token {
lexeme: Lexeme::Symbol(Symbol::ParenthesisRight),
..
} => {}
token => {
return Err(ParserError::InvalidToken {
location: token.location,
expected: vec![")"],
found: token.lexeme.to_string(),
}
.into());
}
}
let (result, next) = match lexer.peek()? {
Token {
lexeme: Lexeme::Symbol(Symbol::Arrow),
..
} => {
lexer.next()?;
Identifier::parse_typed_list(lexer, None)?
}
Token {
lexeme: Lexeme::Symbol(Symbol::BracketCurlyLeft),
..
} => (vec![], None),
token => {
return Err(ParserError::InvalidToken {
location: token.location,
expected: vec!["->", "{"],
found: token.lexeme.to_string(),
}
.into());
}
};
let body = Block::parse(lexer, next)?;
let attributes = Self::get_llvm_attributes(&identifier)?;
Ok(Self {
location,
identifier: identifier.inner,
arguments,
result,
body,
attributes,
})
}
/// Gets the list of missing deployable libraries.
pub fn get_missing_libraries(&self) -> HashSet<String> {
self.body.get_missing_libraries()
}
/// Gets the list of LLVM attributes provided in the function name.
pub fn get_llvm_attributes(
identifier: &Identifier,
) -> Result<BTreeSet<revive_llvm_context::PolkaVMAttribute>, Error> {
let mut valid_attributes = BTreeSet::new();
let llvm_begin = identifier.inner.find(Self::LLVM_ATTRIBUTE_PREFIX);
let llvm_end = identifier.inner.find(Self::LLVM_ATTRIBUTE_SUFFIX);
let attribute_string = if let (Some(llvm_begin), Some(llvm_end)) = (llvm_begin, llvm_end) {
if llvm_begin < llvm_end {
&identifier.inner[llvm_begin + Self::LLVM_ATTRIBUTE_PREFIX.len()..llvm_end]
} else {
return Ok(valid_attributes);
}
} else {
return Ok(valid_attributes);
};
let mut invalid_attributes = BTreeSet::new();
for value in attribute_string.split('_') {
match revive_llvm_context::PolkaVMAttribute::try_from(value) {
Ok(attribute) => valid_attributes.insert(attribute),
Err(value) => invalid_attributes.insert(value),
};
}
if !invalid_attributes.is_empty() {
return Err(ParserError::InvalidAttributes {
location: identifier.location,
values: invalid_attributes,
}
.into());
}
Ok(valid_attributes)
}
}
impl<D> revive_llvm_context::PolkaVMWriteLLVM<D> for FunctionDefinition
where
D: revive_llvm_context::PolkaVMDependency + Clone,
{
fn declare(
&mut self,
context: &mut revive_llvm_context::PolkaVMContext<D>,
) -> anyhow::Result<()> {
let argument_types: Vec<_> = self
.arguments
.iter()
.map(|argument| {
let yul_type = argument.r#type.to_owned().unwrap_or_default();
yul_type.into_llvm(context).as_basic_type_enum()
})
.collect();
let function_type = context.function_type(argument_types, self.result.len());
let function = context.add_function(
self.identifier.as_str(),
function_type,
self.result.len(),
Some(inkwell::module::Linkage::External),
)?;
revive_llvm_context::PolkaVMFunction::set_attributes(
context.llvm(),
function.borrow().declaration(),
&self.attributes.clone().into_iter().collect::<Vec<_>>(),
true,
);
function
.borrow_mut()
.set_yul_data(revive_llvm_context::PolkaVMFunctionYulData::default());
Ok(())
}
fn into_llvm(
mut self,
context: &mut revive_llvm_context::PolkaVMContext<D>,
) -> anyhow::Result<()> {
context.set_current_function(self.identifier.as_str(), Some(self.location.line))?;
context.set_basic_block(context.current_function().borrow().entry_block());
let r#return = context.current_function().borrow().r#return();
match r#return {
revive_llvm_context::PolkaVMFunctionReturn::None => {}
revive_llvm_context::PolkaVMFunctionReturn::Primitive { pointer } => {
let identifier = self.result.pop().expect("Always exists");
let r#type = identifier.r#type.unwrap_or_default();
context.build_store(pointer, r#type.into_llvm(context).const_zero())?;
context
.current_function()
.borrow_mut()
.insert_stack_pointer(identifier.inner, pointer);
}
revive_llvm_context::PolkaVMFunctionReturn::Compound { pointer, .. } => {
for (index, identifier) in self.result.into_iter().enumerate() {
let r#type = identifier.r#type.unwrap_or_default().into_llvm(context);
let pointer = context.build_gep(
pointer,
&[
context.word_const(0),
context
.integer_type(revive_common::BIT_LENGTH_X32)
.const_int(index as u64, false),
],
context.word_type(),
format!("return_{index}_gep_pointer").as_str(),
);
context.build_store(pointer, r#type.const_zero())?;
context
.current_function()
.borrow_mut()
.insert_stack_pointer(identifier.inner.clone(), pointer);
}
}
};
let argument_types: Vec<_> = self
.arguments
.iter()
.map(|argument| {
let yul_type = argument.r#type.to_owned().unwrap_or_default();
yul_type.into_llvm(context)
})
.collect();
for (index, argument) in self.arguments.iter().enumerate() {
let pointer = context.build_alloca(argument_types[index], argument.inner.as_str());
context
.current_function()
.borrow_mut()
.insert_stack_pointer(argument.inner.clone(), pointer);
context.build_store(
pointer,
context.current_function().borrow().get_nth_param(index),
)?;
}
self.body.into_llvm(context)?;
context.set_debug_location(self.location.line, 0, None)?;
match context
.basic_block()
.get_last_instruction()
.map(|instruction| instruction.get_opcode())
{
Some(inkwell::values::InstructionOpcode::Br) => {}
Some(inkwell::values::InstructionOpcode::Switch) => {}
_ => context
.build_unconditional_branch(context.current_function().borrow().return_block()),
}
context.set_basic_block(context.current_function().borrow().return_block());
match context.current_function().borrow().r#return() {
revive_llvm_context::PolkaVMFunctionReturn::None => {
context.build_return(None);
}
revive_llvm_context::PolkaVMFunctionReturn::Primitive { pointer } => {
let return_value = context.build_load(pointer, "return_value")?;
context.build_return(Some(&return_value));
}
revive_llvm_context::PolkaVMFunctionReturn::Compound { pointer, .. } => {
let return_value = context.build_load(pointer, "return_value")?;
context.build_return(Some(&return_value));
}
}
context.pop_debug_scope();
Ok(())
}
}
#[cfg(test)]
mod tests {
use std::collections::BTreeSet;
use crate::lexer::token::location::Location;
use crate::lexer::Lexer;
use crate::parser::error::Error;
use crate::parser::statement::object::Object;
#[test]
fn error_invalid_token_identifier() {
let input = r#"
object "Test" {
code {
{
return(0, 0)
}
}
object "Test_deployed" {
code {
{
return(0, 0)
}
function 256() -> result {
result := 42
}
}
}
}
"#;
let mut lexer = Lexer::new(input.to_owned());
let result = Object::parse(&mut lexer, None);
assert_eq!(
result,
Err(Error::InvalidToken {
location: Location::new(14, 22),
expected: vec!["{identifier}"],
found: "256".to_owned(),
}
.into())
);
}
#[test]
fn error_invalid_token_parenthesis_left() {
let input = r#"
object "Test" {
code {
{
return(0, 0)
}
}
object "Test_deployed" {
code {
{
return(0, 0)
}
function test{) -> result {
result := 42
}
}
}
}
"#;
let mut lexer = Lexer::new(input.to_owned());
let result = Object::parse(&mut lexer, None);
assert_eq!(
result,
Err(Error::InvalidToken {
location: Location::new(14, 26),
expected: vec!["("],
found: "{".to_owned(),
}
.into())
);
}
#[test]
fn error_invalid_token_parenthesis_right() {
let input = r#"
object "Test" {
code {
{
return(0, 0)
}
}
object "Test_deployed" {
code {
{
return(0, 0)
}
function test(} -> result {
result := 42
}
}
}
}
"#;
let mut lexer = Lexer::new(input.to_owned());
let result = Object::parse(&mut lexer, None);
assert_eq!(
result,
Err(Error::InvalidToken {
location: Location::new(14, 27),
expected: vec![")"],
found: "}".to_owned(),
}
.into())
);
}
#[test]
fn error_invalid_token_arrow_or_bracket_curly_left() {
let input = r#"
object "Test" {
code {
{
return(0, 0)
}
}
object "Test_deployed" {
code {
{
return(0, 0)
}
function test() := result {
result := 42
}
}
}
}
"#;
let mut lexer = Lexer::new(input.to_owned());
let result = Object::parse(&mut lexer, None);
assert_eq!(
result,
Err(Error::InvalidToken {
location: Location::new(14, 29),
expected: vec!["->", "{"],
found: ":=".to_owned(),
}
.into())
);
}
#[test]
fn error_reserved_identifier() {
let input = r#"
object "Test" {
code {
{
return(0, 0)
}
}
object "Test_deployed" {
code {
{
return(0, 0)
}
function basefee() -> result {
result := 42
}
}
}
}
"#;
let mut lexer = Lexer::new(input.to_owned());
let result = Object::parse(&mut lexer, None);
assert_eq!(
result,
Err(Error::ReservedIdentifier {
location: Location::new(14, 22),
identifier: "basefee".to_owned()
}
.into())
);
}
#[test]
fn error_invalid_attributes_single() {
let input = r#"
object "Test" {
code {
{
return(0, 0)
}
}
object "Test_deployed" {
code {
{
return(0, 0)
}
function test_$llvm_UnknownAttribute_llvm$_test() -> result {
result := 42
}
}
}
}
"#;
let mut invalid_attributes = BTreeSet::new();
invalid_attributes.insert("UnknownAttribute".to_owned());
let mut lexer = Lexer::new(input.to_owned());
let result = Object::parse(&mut lexer, None);
assert_eq!(
result,
Err(Error::InvalidAttributes {
location: Location::new(14, 22),
values: invalid_attributes,
}
.into())
);
}
#[test]
fn error_invalid_attributes_multiple_repeated() {
let input = r#"
object "Test" {
code {
{
return(0, 0)
}
}
object "Test_deployed" {
code {
{
return(0, 0)
}
function test_$llvm_UnknownAttribute1_UnknownAttribute1_UnknownAttribute2_llvm$_test() -> result {
result := 42
}
}
}
}
"#;
let mut invalid_attributes = BTreeSet::new();
invalid_attributes.insert("UnknownAttribute1".to_owned());
invalid_attributes.insert("UnknownAttribute2".to_owned());
let mut lexer = Lexer::new(input.to_owned());
let result = Object::parse(&mut lexer, None);
assert_eq!(
result,
Err(Error::InvalidAttributes {
location: Location::new(14, 22),
values: invalid_attributes,
}
.into())
);
}
}
@@ -0,0 +1,83 @@
//! The if-conditional statement.
use std::collections::HashSet;
use serde::Deserialize;
use serde::Serialize;
use crate::error::Error;
use crate::lexer::token::location::Location;
use crate::lexer::token::Token;
use crate::lexer::Lexer;
use crate::parser::statement::block::Block;
use crate::parser::statement::expression::Expression;
/// The Yul if-conditional statement.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub struct IfConditional {
/// The location.
pub location: Location,
/// The condition expression.
pub condition: Expression,
/// The conditional block.
pub block: Block,
}
impl IfConditional {
/// The element parser.
pub fn parse(lexer: &mut Lexer, initial: Option<Token>) -> Result<Self, Error> {
let token = crate::parser::take_or_next(initial, lexer)?;
let location = token.location;
let condition = Expression::parse(lexer, Some(token))?;
let block = Block::parse(lexer, None)?;
Ok(Self {
location,
condition,
block,
})
}
/// Get the list of missing deployable libraries.
pub fn get_missing_libraries(&self) -> HashSet<String> {
let mut libraries = self.condition.get_missing_libraries();
libraries.extend(self.block.get_missing_libraries());
libraries
}
}
impl<D> revive_llvm_context::PolkaVMWriteLLVM<D> for IfConditional
where
D: revive_llvm_context::PolkaVMDependency + Clone,
{
fn into_llvm(self, context: &mut revive_llvm_context::PolkaVMContext<D>) -> anyhow::Result<()> {
let condition = self
.condition
.into_llvm(context)?
.expect("Always exists")
.access(context)?
.into_int_value();
let condition = context.builder().build_int_z_extend_or_bit_cast(
condition,
context.word_type(),
"if_condition_extended",
)?;
let condition = context.builder().build_int_compare(
inkwell::IntPredicate::NE,
condition,
context.word_const(0),
"if_condition_compared",
)?;
let main_block = context.append_basic_block("if_main");
let join_block = context.append_basic_block("if_join");
context.build_conditional_branch(condition, main_block, join_block)?;
context.set_basic_block(main_block);
self.block.into_llvm(context)?;
context.build_unconditional_branch(join_block);
context.set_basic_block(join_block);
Ok(())
}
}
+179
View File
@@ -0,0 +1,179 @@
//! The block statement.
pub mod assignment;
pub mod block;
pub mod code;
pub mod expression;
pub mod for_loop;
pub mod function_definition;
pub mod if_conditional;
pub mod object;
pub mod switch;
pub mod variable_declaration;
use std::collections::HashSet;
use serde::Deserialize;
use serde::Serialize;
use crate::error::Error;
use crate::lexer::token::lexeme::keyword::Keyword;
use crate::lexer::token::lexeme::Lexeme;
use crate::lexer::token::location::Location;
use crate::lexer::token::Token;
use crate::lexer::Lexer;
use crate::parser::error::Error as ParserError;
use self::assignment::Assignment;
use self::block::Block;
use self::code::Code;
use self::expression::Expression;
use self::for_loop::ForLoop;
use self::function_definition::FunctionDefinition;
use self::if_conditional::IfConditional;
use self::object::Object;
use self::switch::Switch;
use self::variable_declaration::VariableDeclaration;
/// The Yul block statement.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub enum Statement {
/// The object element.
Object(Object),
/// The code element.
Code(Code),
/// The code block.
Block(Block),
/// The expression.
Expression(Expression),
/// The `function` statement.
FunctionDefinition(FunctionDefinition),
/// The `let` statement.
VariableDeclaration(VariableDeclaration),
/// The `:=` existing variables reassignment statement.
Assignment(Assignment),
/// The `if` statement.
IfConditional(IfConditional),
/// The `switch` statement.
Switch(Switch),
/// The `for` statement.
ForLoop(ForLoop),
/// The `continue` statement.
Continue(Location),
/// The `break` statement.
Break(Location),
/// The `leave` statement.
Leave(Location),
}
impl Statement {
/// The element parser.
pub fn parse(
lexer: &mut Lexer,
initial: Option<Token>,
) -> Result<(Self, Option<Token>), Error> {
let token = crate::parser::take_or_next(initial, lexer)?;
match token {
token @ Token {
lexeme: Lexeme::Keyword(Keyword::Object),
..
} => Ok((Statement::Object(Object::parse(lexer, Some(token))?), None)),
Token {
lexeme: Lexeme::Keyword(Keyword::Code),
..
} => Ok((Statement::Code(Code::parse(lexer, None)?), None)),
Token {
lexeme: Lexeme::Keyword(Keyword::Function),
..
} => Ok((
Statement::FunctionDefinition(FunctionDefinition::parse(lexer, None)?),
None,
)),
Token {
lexeme: Lexeme::Keyword(Keyword::Let),
..
} => {
let (statement, next) = VariableDeclaration::parse(lexer, None)?;
Ok((Statement::VariableDeclaration(statement), next))
}
Token {
lexeme: Lexeme::Keyword(Keyword::If),
..
} => Ok((
Statement::IfConditional(IfConditional::parse(lexer, None)?),
None,
)),
Token {
lexeme: Lexeme::Keyword(Keyword::Switch),
..
} => Ok((Statement::Switch(Switch::parse(lexer, None)?), None)),
Token {
lexeme: Lexeme::Keyword(Keyword::For),
..
} => Ok((Statement::ForLoop(ForLoop::parse(lexer, None)?), None)),
Token {
lexeme: Lexeme::Keyword(Keyword::Continue),
location,
..
} => Ok((Statement::Continue(location), None)),
Token {
lexeme: Lexeme::Keyword(Keyword::Break),
location,
..
} => Ok((Statement::Break(location), None)),
Token {
lexeme: Lexeme::Keyword(Keyword::Leave),
location,
..
} => Ok((Statement::Leave(location), None)),
token => Err(ParserError::InvalidToken {
location: token.location,
expected: vec![
"object", "code", "function", "let", "if", "switch", "for", "continue",
"break", "leave",
],
found: token.lexeme.to_string(),
}
.into()),
}
}
/// Get the list of missing deployable libraries.
pub fn get_missing_libraries(&self) -> HashSet<String> {
match self {
Self::Object(inner) => inner.get_missing_libraries(),
Self::Code(inner) => inner.get_missing_libraries(),
Self::Block(inner) => inner.get_missing_libraries(),
Self::Expression(inner) => inner.get_missing_libraries(),
Self::FunctionDefinition(inner) => inner.get_missing_libraries(),
Self::VariableDeclaration(inner) => inner.get_missing_libraries(),
Self::Assignment(inner) => inner.get_missing_libraries(),
Self::IfConditional(inner) => inner.get_missing_libraries(),
Self::Switch(inner) => inner.get_missing_libraries(),
Self::ForLoop(inner) => inner.get_missing_libraries(),
Self::Continue(_) => HashSet::new(),
Self::Break(_) => HashSet::new(),
Self::Leave(_) => HashSet::new(),
}
}
/// Returns the statement location.
pub fn location(&self) -> Location {
match self {
Self::Object(inner) => inner.location,
Self::Code(inner) => inner.location,
Self::Block(inner) => inner.location,
Self::Expression(inner) => inner.location(),
Self::FunctionDefinition(inner) => inner.location,
Self::VariableDeclaration(inner) => inner.location,
Self::Assignment(inner) => inner.location,
Self::IfConditional(inner) => inner.location,
Self::Switch(inner) => inner.location,
Self::ForLoop(inner) => inner.location,
Self::Continue(location) => *location,
Self::Break(location) => *location,
Self::Leave(location) => *location,
}
}
}
+466
View File
@@ -0,0 +1,466 @@
//! The YUL object.
use std::collections::HashSet;
use inkwell::debug_info::AsDIScope;
use serde::Deserialize;
use serde::Serialize;
use crate::error::Error;
use crate::lexer::token::lexeme::keyword::Keyword;
use crate::lexer::token::lexeme::literal::Literal;
use crate::lexer::token::lexeme::symbol::Symbol;
use crate::lexer::token::lexeme::Lexeme;
use crate::lexer::token::location::Location;
use crate::lexer::token::Token;
use crate::lexer::Lexer;
use crate::parser::error::Error as ParserError;
use crate::parser::statement::code::Code;
/// The upper-level YUL object, representing the deploy code.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub struct Object {
/// The location.
pub location: Location,
/// The identifier.
pub identifier: String,
/// The code.
pub code: Code,
/// The optional inner object, representing the runtime code.
pub inner_object: Option<Box<Self>>,
/// The factory dependency objects, which are represented by nested Yul object. The nested
/// objects are duplicates of the upper-level objects describing the dependencies, so only
/// their identifiers are preserved. The identifiers are used to address upper-level objects.
pub factory_dependencies: HashSet<String>,
}
impl Object {
/// The element parser.
pub fn parse(lexer: &mut Lexer, initial: Option<Token>) -> Result<Self, Error> {
let token = crate::parser::take_or_next(initial, lexer)?;
let location = match token {
Token {
lexeme: Lexeme::Keyword(Keyword::Object),
location,
..
} => location,
token => {
return Err(ParserError::InvalidToken {
location: token.location,
expected: vec!["object"],
found: token.lexeme.to_string(),
}
.into());
}
};
let identifier = match lexer.next()? {
Token {
lexeme: Lexeme::Literal(Literal::String(literal)),
..
} => literal.inner,
token => {
return Err(ParserError::InvalidToken {
location: token.location,
expected: vec!["{string}"],
found: token.lexeme.to_string(),
}
.into());
}
};
let is_runtime_code = identifier.ends_with("_deployed");
match lexer.next()? {
Token {
lexeme: Lexeme::Symbol(Symbol::BracketCurlyLeft),
..
} => {}
token => {
return Err(ParserError::InvalidToken {
location: token.location,
expected: vec!["{"],
found: token.lexeme.to_string(),
}
.into());
}
}
let code = Code::parse(lexer, None)?;
let mut inner_object = None;
let mut factory_dependencies = HashSet::new();
if !is_runtime_code {
inner_object = match lexer.peek()? {
Token {
lexeme: Lexeme::Keyword(Keyword::Object),
..
} => {
let mut object = Self::parse(lexer, None)?;
if format!("{identifier}_deployed") != object.identifier {
return Err(ParserError::InvalidObjectName {
location: object.location,
expected: format!("{identifier}_deployed"),
found: object.identifier,
}
.into());
}
factory_dependencies.extend(object.factory_dependencies.drain());
Some(Box::new(object))
}
_ => None,
};
if let Token {
lexeme: Lexeme::Identifier(identifier),
..
} = lexer.peek()?
{
if identifier.inner.as_str() == "data" {
let _data = lexer.next()?;
let _identifier = lexer.next()?;
let _metadata = lexer.next()?;
}
};
}
loop {
match lexer.next()? {
Token {
lexeme: Lexeme::Symbol(Symbol::BracketCurlyRight),
..
} => break,
token @ Token {
lexeme: Lexeme::Keyword(Keyword::Object),
..
} => {
let dependency = Self::parse(lexer, Some(token))?;
factory_dependencies.insert(dependency.identifier);
}
Token {
lexeme: Lexeme::Identifier(identifier),
..
} if identifier.inner.as_str() == "data" => {
let _identifier = lexer.next()?;
let _metadata = lexer.next()?;
}
token => {
return Err(ParserError::InvalidToken {
location: token.location,
expected: vec!["object", "}"],
found: token.lexeme.to_string(),
}
.into());
}
}
}
Ok(Self {
location,
identifier,
code,
inner_object,
factory_dependencies,
})
}
/// Get the list of missing deployable libraries.
pub fn get_missing_libraries(&self) -> HashSet<String> {
let mut missing_libraries = self.code.get_missing_libraries();
if let Some(inner_object) = &self.inner_object {
missing_libraries.extend(inner_object.get_missing_libraries());
}
missing_libraries
}
}
impl<D> revive_llvm_context::PolkaVMWriteLLVM<D> for Object
where
D: revive_llvm_context::PolkaVMDependency + Clone,
{
fn declare(
&mut self,
context: &mut revive_llvm_context::PolkaVMContext<D>,
) -> anyhow::Result<()> {
revive_llvm_context::PolkaVMLoadImmutableDataFunction.declare(context)?;
revive_llvm_context::PolkaVMStoreImmutableDataFunction.declare(context)?;
revive_llvm_context::PolkaVMLoadHeapWordFunction.declare(context)?;
revive_llvm_context::PolkaVMStoreHeapWordFunction.declare(context)?;
revive_llvm_context::PolkaVMLoadStorageWordFunction.declare(context)?;
revive_llvm_context::PolkaVMStoreStorageWordFunction.declare(context)?;
revive_llvm_context::PolkaVMLoadTransientStorageWordFunction.declare(context)?;
revive_llvm_context::PolkaVMStoreTransientStorageWordFunction.declare(context)?;
revive_llvm_context::PolkaVMWordToPointerFunction.declare(context)?;
revive_llvm_context::PolkaVMExitFunction.declare(context)?;
revive_llvm_context::PolkaVMEventLogFunction::<0>.declare(context)?;
revive_llvm_context::PolkaVMEventLogFunction::<1>.declare(context)?;
revive_llvm_context::PolkaVMEventLogFunction::<2>.declare(context)?;
revive_llvm_context::PolkaVMEventLogFunction::<3>.declare(context)?;
revive_llvm_context::PolkaVMEventLogFunction::<4>.declare(context)?;
revive_llvm_context::PolkaVMDivisionFunction.declare(context)?;
revive_llvm_context::PolkaVMSignedDivisionFunction.declare(context)?;
revive_llvm_context::PolkaVMRemainderFunction.declare(context)?;
revive_llvm_context::PolkaVMSignedRemainderFunction.declare(context)?;
revive_llvm_context::PolkaVMSbrkFunction.declare(context)?;
let mut entry = revive_llvm_context::PolkaVMEntryFunction::default();
entry.declare(context)?;
revive_llvm_context::PolkaVMDeployCodeFunction::new(
revive_llvm_context::PolkaVMDummyLLVMWritable::default(),
)
.declare(context)?;
revive_llvm_context::PolkaVMRuntimeCodeFunction::new(
revive_llvm_context::PolkaVMDummyLLVMWritable::default(),
)
.declare(context)?;
for name in [
revive_llvm_context::PolkaVMFunctionDeployCode,
revive_llvm_context::PolkaVMFunctionRuntimeCode,
revive_llvm_context::PolkaVMFunctionEntry,
]
.into_iter()
{
context
.get_function(name)
.expect("Always exists")
.borrow_mut()
.set_yul_data(revive_llvm_context::PolkaVMFunctionYulData::default());
}
entry.into_llvm(context)?;
revive_llvm_context::PolkaVMLoadImmutableDataFunction.into_llvm(context)?;
revive_llvm_context::PolkaVMStoreImmutableDataFunction.into_llvm(context)?;
revive_llvm_context::PolkaVMLoadHeapWordFunction.into_llvm(context)?;
revive_llvm_context::PolkaVMStoreHeapWordFunction.into_llvm(context)?;
revive_llvm_context::PolkaVMLoadStorageWordFunction.into_llvm(context)?;
revive_llvm_context::PolkaVMStoreStorageWordFunction.into_llvm(context)?;
revive_llvm_context::PolkaVMLoadTransientStorageWordFunction.into_llvm(context)?;
revive_llvm_context::PolkaVMStoreTransientStorageWordFunction.into_llvm(context)?;
revive_llvm_context::PolkaVMWordToPointerFunction.into_llvm(context)?;
revive_llvm_context::PolkaVMExitFunction.into_llvm(context)?;
revive_llvm_context::PolkaVMEventLogFunction::<0>.into_llvm(context)?;
revive_llvm_context::PolkaVMEventLogFunction::<1>.into_llvm(context)?;
revive_llvm_context::PolkaVMEventLogFunction::<2>.into_llvm(context)?;
revive_llvm_context::PolkaVMEventLogFunction::<3>.into_llvm(context)?;
revive_llvm_context::PolkaVMEventLogFunction::<4>.into_llvm(context)?;
revive_llvm_context::PolkaVMDivisionFunction.into_llvm(context)?;
revive_llvm_context::PolkaVMSignedDivisionFunction.into_llvm(context)?;
revive_llvm_context::PolkaVMRemainderFunction.into_llvm(context)?;
revive_llvm_context::PolkaVMSignedRemainderFunction.into_llvm(context)?;
revive_llvm_context::PolkaVMSbrkFunction.into_llvm(context)?;
Ok(())
}
fn into_llvm(self, context: &mut revive_llvm_context::PolkaVMContext<D>) -> anyhow::Result<()> {
if let Some(debug_info) = context.debug_info() {
let di_builder = debug_info.builder();
let object_name: &str = self.identifier.as_str();
let di_parent_scope = debug_info
.top_scope()
.expect("expected an existing debug-info scope");
let object_scope = di_builder.create_namespace(di_parent_scope, object_name, true);
context.push_debug_scope(object_scope.as_debug_info_scope());
}
if self.identifier.ends_with("_deployed") {
revive_llvm_context::PolkaVMRuntimeCodeFunction::new(self.code).into_llvm(context)?;
} else {
revive_llvm_context::PolkaVMDeployCodeFunction::new(self.code).into_llvm(context)?;
}
context.set_debug_location(self.location.line, 0, None)?;
if let Some(object) = self.inner_object {
object.into_llvm(context)?;
}
context.set_debug_location(self.location.line, 0, None)?;
context.pop_debug_scope();
Ok(())
}
}
#[cfg(test)]
mod tests {
use crate::lexer::token::location::Location;
use crate::lexer::Lexer;
use crate::parser::error::Error;
use crate::parser::statement::object::Object;
#[test]
fn error_invalid_token_object() {
let input = r#"
class "Test" {
code {
{
return(0, 0)
}
}
object "Test_deployed" {
code {
{
return(0, 0)
}
}
}
}
"#;
let mut lexer = Lexer::new(input.to_owned());
let result = Object::parse(&mut lexer, None);
assert_eq!(
result,
Err(Error::InvalidToken {
location: Location::new(2, 1),
expected: vec!["object"],
found: "class".to_owned(),
}
.into())
);
}
#[test]
fn error_invalid_token_identifier() {
let input = r#"
object 256 {
code {
{
return(0, 0)
}
}
object "Test_deployed" {
code {
{
return(0, 0)
}
}
}
}
"#;
let mut lexer = Lexer::new(input.to_owned());
let result = Object::parse(&mut lexer, None);
assert_eq!(
result,
Err(Error::InvalidToken {
location: Location::new(2, 8),
expected: vec!["{string}"],
found: "256".to_owned(),
}
.into())
);
}
#[test]
fn error_invalid_token_bracket_curly_left() {
let input = r#"
object "Test" (
code {
{
return(0, 0)
}
}
object "Test_deployed" {
code {
{
return(0, 0)
}
}
}
}
"#;
let mut lexer = Lexer::new(input.to_owned());
let result = Object::parse(&mut lexer, None);
assert_eq!(
result,
Err(Error::InvalidToken {
location: Location::new(2, 15),
expected: vec!["{"],
found: "(".to_owned(),
}
.into())
);
}
#[test]
fn error_invalid_token_object_inner() {
let input = r#"
object "Test" {
code {
{
return(0, 0)
}
}
class "Test_deployed" {
code {
{
return(0, 0)
}
}
}
}
"#;
let mut lexer = Lexer::new(input.to_owned());
let result = Object::parse(&mut lexer, None);
assert_eq!(
result,
Err(Error::InvalidToken {
location: Location::new(8, 5),
expected: vec!["object", "}"],
found: "class".to_owned(),
}
.into())
);
}
#[test]
fn error_invalid_object_name() {
let input = r#"
object "Test" {
code {
{
return(0, 0)
}
}
object "Invalid" {
code {
{
return(0, 0)
}
}
}
}
"#;
let mut lexer = Lexer::new(input.to_owned());
let result = Object::parse(&mut lexer, None);
assert_eq!(
result,
Err(Error::InvalidObjectName {
location: Location::new(8, 5),
expected: "Test_deployed".to_owned(),
found: "Invalid".to_owned(),
}
.into())
);
}
}
@@ -0,0 +1,105 @@
//! The switch statement case.
use std::collections::HashSet;
use serde::Deserialize;
use serde::Serialize;
use crate::error::Error;
use crate::lexer::token::lexeme::Lexeme;
use crate::lexer::token::location::Location;
use crate::lexer::token::Token;
use crate::lexer::Lexer;
use crate::parser::error::Error as ParserError;
use crate::parser::statement::block::Block;
use crate::parser::statement::expression::literal::Literal;
/// The Yul switch statement case.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub struct Case {
/// The location.
pub location: Location,
/// The matched constant.
pub literal: Literal,
/// The case block.
pub block: Block,
}
impl Case {
/// The element parser.
pub fn parse(lexer: &mut Lexer, initial: Option<Token>) -> Result<Self, Error> {
let token = crate::parser::take_or_next(initial, lexer)?;
let (location, literal) = match token {
token @ Token {
lexeme: Lexeme::Literal(_),
location,
..
} => (location, Literal::parse(lexer, Some(token))?),
token => {
return Err(ParserError::InvalidToken {
location: token.location,
expected: vec!["{literal}"],
found: token.lexeme.to_string(),
}
.into());
}
};
let block = Block::parse(lexer, None)?;
Ok(Self {
location,
literal,
block,
})
}
/// Get the list of missing deployable libraries.
pub fn get_missing_libraries(&self) -> HashSet<String> {
self.block.get_missing_libraries()
}
}
#[cfg(test)]
mod tests {
use crate::lexer::token::location::Location;
use crate::lexer::Lexer;
use crate::parser::error::Error;
use crate::parser::statement::object::Object;
#[test]
fn error_invalid_token_literal() {
let input = r#"
object "Test" {
code {
{
return(0, 0)
}
}
object "Test_deployed" {
code {
{
switch 42
case x {}
default {}
}
}
}
}
}
"#;
let mut lexer = Lexer::new(input.to_owned());
let result = Object::parse(&mut lexer, None);
assert_eq!(
result,
Err(Error::InvalidToken {
location: Location::new(12, 26),
expected: vec!["{literal}"],
found: "x".to_owned(),
}
.into())
);
}
}
@@ -0,0 +1,219 @@
//! The switch statement.
pub mod case;
use std::collections::HashSet;
use serde::Deserialize;
use serde::Serialize;
use crate::error::Error;
use crate::lexer::token::lexeme::keyword::Keyword;
use crate::lexer::token::lexeme::Lexeme;
use crate::lexer::token::location::Location;
use crate::lexer::token::Token;
use crate::lexer::Lexer;
use crate::parser::error::Error as ParserError;
use crate::parser::statement::block::Block;
use crate::parser::statement::expression::Expression;
use self::case::Case;
/// The Yul switch statement.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub struct Switch {
/// The location.
pub location: Location,
/// The expression being matched.
pub expression: Expression,
/// The non-default cases.
pub cases: Vec<Case>,
/// The optional default case, if `cases` do not cover all possible values.
pub default: Option<Block>,
}
/// The parsing state.
pub enum State {
/// After match expression.
CaseOrDefaultKeyword,
/// After `case`.
CaseBlock,
/// After `default`.
DefaultBlock,
}
impl Switch {
/// The element parser.
pub fn parse(lexer: &mut Lexer, initial: Option<Token>) -> Result<Self, Error> {
let mut token = crate::parser::take_or_next(initial, lexer)?;
let location = token.location;
let mut state = State::CaseOrDefaultKeyword;
let expression = Expression::parse(lexer, Some(token.clone()))?;
let mut cases = Vec::new();
let mut default = None;
loop {
match state {
State::CaseOrDefaultKeyword => match lexer.peek()? {
_token @ Token {
lexeme: Lexeme::Keyword(Keyword::Case),
..
} => {
token = _token;
state = State::CaseBlock;
}
_token @ Token {
lexeme: Lexeme::Keyword(Keyword::Default),
..
} => {
token = _token;
state = State::DefaultBlock;
}
_token => {
token = _token;
break;
}
},
State::CaseBlock => {
lexer.next()?;
cases.push(Case::parse(lexer, None)?);
state = State::CaseOrDefaultKeyword;
}
State::DefaultBlock => {
lexer.next()?;
default = Some(Block::parse(lexer, None)?);
break;
}
}
}
if cases.is_empty() && default.is_none() {
return Err(ParserError::InvalidToken {
location: token.location,
expected: vec!["case", "default"],
found: token.lexeme.to_string(),
}
.into());
}
Ok(Self {
location,
expression,
cases,
default,
})
}
/// Get the list of missing deployable libraries.
pub fn get_missing_libraries(&self) -> HashSet<String> {
let mut libraries = HashSet::new();
for case in self.cases.iter() {
libraries.extend(case.get_missing_libraries());
}
if let Some(default) = &self.default {
libraries.extend(default.get_missing_libraries());
}
libraries
}
}
impl<D> revive_llvm_context::PolkaVMWriteLLVM<D> for Switch
where
D: revive_llvm_context::PolkaVMDependency + Clone,
{
fn into_llvm(self, context: &mut revive_llvm_context::PolkaVMContext<D>) -> anyhow::Result<()> {
let scrutinee = self.expression.into_llvm(context)?;
if self.cases.is_empty() {
if let Some(block) = self.default {
block.into_llvm(context)?;
}
return Ok(());
}
let current_block = context.basic_block();
let join_block = context.append_basic_block("switch_join_block");
let mut branches = Vec::with_capacity(self.cases.len());
for (index, case) in self.cases.into_iter().enumerate() {
let constant = case.literal.into_llvm(context)?.access(context)?;
let expression_block = context
.append_basic_block(format!("switch_case_branch_{}_block", index + 1).as_str());
context.set_basic_block(expression_block);
case.block.into_llvm(context)?;
context.build_unconditional_branch(join_block);
branches.push((constant.into_int_value(), expression_block));
}
let default_block = match self.default {
Some(default) => {
let default_block = context.append_basic_block("switch_default_block");
context.set_basic_block(default_block);
default.into_llvm(context)?;
context.build_unconditional_branch(join_block);
default_block
}
None => join_block,
};
context.set_basic_block(current_block);
context.builder().build_switch(
scrutinee
.expect("Always exists")
.access(context)?
.into_int_value(),
default_block,
branches.as_slice(),
)?;
context.set_basic_block(join_block);
Ok(())
}
}
#[cfg(test)]
mod tests {
use crate::lexer::token::location::Location;
use crate::lexer::Lexer;
use crate::parser::error::Error;
use crate::parser::statement::object::Object;
#[test]
fn error_invalid_token_case() {
let input = r#"
object "Test" {
code {
{
return(0, 0)
}
}
object "Test_deployed" {
code {
{
switch 42
branch x {}
default {}
}
}
}
}
}
"#;
let mut lexer = Lexer::new(input.to_owned());
let result = Object::parse(&mut lexer, None);
assert_eq!(
result,
Err(Error::InvalidToken {
location: Location::new(12, 21),
expected: vec!["case", "default"],
found: "branch".to_owned(),
}
.into())
);
}
}
@@ -0,0 +1,259 @@
//! The variable declaration statement.
use std::collections::HashSet;
use inkwell::types::BasicType;
use inkwell::values::BasicValue;
use serde::Deserialize;
use serde::Serialize;
use crate::error::Error;
use crate::lexer::token::lexeme::symbol::Symbol;
use crate::lexer::token::lexeme::Lexeme;
use crate::lexer::token::location::Location;
use crate::lexer::token::Token;
use crate::lexer::Lexer;
use crate::parser::error::Error as ParserError;
use crate::parser::identifier::Identifier;
use crate::parser::statement::expression::function_call::name::Name as FunctionName;
use crate::parser::statement::expression::Expression;
/// The Yul variable declaration statement.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub struct VariableDeclaration {
/// The location.
pub location: Location,
/// The variable bindings list.
pub bindings: Vec<Identifier>,
/// The variable initializing expression.
pub expression: Option<Expression>,
}
impl VariableDeclaration {
/// The element parser.
pub fn parse(
lexer: &mut Lexer,
initial: Option<Token>,
) -> Result<(Self, Option<Token>), Error> {
let token = crate::parser::take_or_next(initial, lexer)?;
let location = token.location;
let (bindings, next) = Identifier::parse_typed_list(lexer, Some(token))?;
for binding in bindings.iter() {
match FunctionName::from(binding.inner.as_str()) {
FunctionName::UserDefined(_) => continue,
_function_name => {
return Err(ParserError::ReservedIdentifier {
location: binding.location,
identifier: binding.inner.to_owned(),
}
.into())
}
}
}
match crate::parser::take_or_next(next, lexer)? {
Token {
lexeme: Lexeme::Symbol(Symbol::Assignment),
..
} => {}
token => {
return Ok((
Self {
location,
bindings,
expression: None,
},
Some(token),
))
}
}
let expression = Expression::parse(lexer, None)?;
Ok((
Self {
location,
bindings,
expression: Some(expression),
},
None,
))
}
/// Get the list of missing deployable libraries.
pub fn get_missing_libraries(&self) -> HashSet<String> {
self.expression
.as_ref()
.map_or_else(HashSet::new, |expression| {
expression.get_missing_libraries()
})
}
}
impl<D> revive_llvm_context::PolkaVMWriteLLVM<D> for VariableDeclaration
where
D: revive_llvm_context::PolkaVMDependency + Clone,
{
fn into_llvm<'ctx>(
mut self,
context: &mut revive_llvm_context::PolkaVMContext<'ctx, D>,
) -> anyhow::Result<()> {
if self.bindings.len() == 1 {
let identifier = self.bindings.remove(0);
context.set_debug_location(self.location.line, 0, None)?;
let identifier_type = identifier.r#type.clone().unwrap_or_default();
let r#type = identifier_type.into_llvm(context);
let pointer = context.build_alloca(r#type, identifier.inner.as_str());
context
.current_function()
.borrow_mut()
.insert_stack_pointer(identifier.inner.clone(), pointer);
let value = if let Some(expression) = self.expression {
match expression.into_llvm(context)? {
Some(mut value) => {
if let Some(constant) = value.constant.take() {
context
.current_function()
.borrow_mut()
.yul_mut()
.insert_constant(identifier.inner.clone(), constant);
}
value.access(context)?
}
None => r#type.const_zero().as_basic_value_enum(),
}
} else {
r#type.const_zero().as_basic_value_enum()
};
context.build_store(pointer, value)?;
return Ok(());
}
for (index, binding) in self.bindings.iter().enumerate() {
context.set_debug_location(self.location.line, 0, None)?;
let yul_type = binding
.r#type
.to_owned()
.unwrap_or_default()
.into_llvm(context);
let pointer = context.build_alloca(
yul_type.as_basic_type_enum(),
format!("binding_{index}_pointer").as_str(),
);
context.build_store(pointer, yul_type.const_zero())?;
context
.current_function()
.borrow_mut()
.insert_stack_pointer(binding.inner.to_owned(), pointer);
}
let expression = match self.expression.take() {
Some(expression) => expression,
None => return Ok(()),
};
let location = expression.location();
let expression = match expression.into_llvm(context)? {
Some(expression) => expression,
None => return Ok(()),
};
let llvm_type = context.structure_type(
self.bindings
.iter()
.map(|binding| {
binding
.r#type
.to_owned()
.unwrap_or_default()
.into_llvm(context)
.as_basic_type_enum()
})
.collect::<Vec<inkwell::types::BasicTypeEnum<'ctx>>>()
.as_slice(),
);
let value = expression.access(context)?;
if value.get_type() != llvm_type.as_basic_type_enum() {
anyhow::bail!(
"{} Assignment to {:?} received an invalid number of arguments",
location,
self.bindings
);
}
let pointer = context.build_alloca(llvm_type, "bindings_pointer");
context.build_store(pointer, value)?;
for (index, binding) in self.bindings.into_iter().enumerate() {
let pointer = context.build_gep(
pointer,
&[
context.word_const(0),
context
.integer_type(revive_common::BIT_LENGTH_X32)
.const_int(index as u64, false),
],
binding.r#type.unwrap_or_default().into_llvm(context),
format!("binding_{index}_gep_pointer").as_str(),
);
let value = context.build_load(pointer, format!("binding_{index}_value").as_str())?;
let pointer = context
.current_function()
.borrow_mut()
.get_stack_pointer(binding.inner.as_str())
.ok_or_else(|| {
anyhow::anyhow!(
"{} Assignment to an undeclared variable `{}`",
binding.location,
binding.inner
)
})?;
context.build_store(pointer, value)?;
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use crate::lexer::token::location::Location;
use crate::lexer::Lexer;
use crate::parser::error::Error;
use crate::parser::statement::object::Object;
#[test]
fn error_reserved_identifier() {
let input = r#"
object "Test" {
code {
{
return(0, 0)
}
}
object "Test_deployed" {
code {
{
let basefee := 42
return(0, 0)
}
}
}
}
"#;
let mut lexer = Lexer::new(input.to_owned());
let result = Object::parse(&mut lexer, None);
assert_eq!(
result,
Err(Error::ReservedIdentifier {
location: Location::new(11, 21),
identifier: "basefee".to_owned()
}
.into())
);
}
}
+79
View File
@@ -0,0 +1,79 @@
//! The YUL source code type.
use serde::Deserialize;
use serde::Serialize;
use crate::error::Error;
use crate::lexer::token::lexeme::keyword::Keyword;
use crate::lexer::token::lexeme::Lexeme;
use crate::lexer::token::Token;
use crate::lexer::Lexer;
use crate::parser::error::Error as ParserError;
/// The YUL source code type.
/// The type is not currently in use, so all values have the `uint256` type by default.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub enum Type {
/// The `bool` type.
Bool,
/// The `int{N}` type.
Int(usize),
/// The `uint{N}` type.
UInt(usize),
/// The custom user-defined type.
Custom(String),
}
impl Default for Type {
fn default() -> Self {
Self::UInt(revive_common::BIT_LENGTH_WORD)
}
}
impl Type {
/// The element parser.
pub fn parse(lexer: &mut Lexer, initial: Option<Token>) -> Result<Self, Error> {
let token = crate::parser::take_or_next(initial, lexer)?;
match token {
Token {
lexeme: Lexeme::Keyword(Keyword::Bool),
..
} => Ok(Self::Bool),
Token {
lexeme: Lexeme::Keyword(Keyword::Int(bitlength)),
..
} => Ok(Self::Int(bitlength)),
Token {
lexeme: Lexeme::Keyword(Keyword::Uint(bitlength)),
..
} => Ok(Self::UInt(bitlength)),
Token {
lexeme: Lexeme::Identifier(identifier),
..
} => Ok(Self::Custom(identifier.inner)),
token => Err(ParserError::InvalidToken {
location: token.location,
expected: vec!["{type}"],
found: token.lexeme.to_string(),
}
.into()),
}
}
/// Converts the type into its LLVM.
pub fn into_llvm<'ctx, D>(
self,
context: &revive_llvm_context::PolkaVMContext<'ctx, D>,
) -> inkwell::types::IntType<'ctx>
where
D: revive_llvm_context::PolkaVMDependency + Clone,
{
match self {
Self::Bool => context.integer_type(revive_common::BIT_LENGTH_BOOLEAN),
Self::Int(bitlength) => context.integer_type(bitlength),
Self::UInt(bitlength) => context.integer_type(bitlength),
Self::Custom(_) => context.word_type(),
}
}
}