Emerge Yul recompiler (#1)

Provide a modified (and incomplete) version of ZKSync zksolc that can compile the most basic contracts
This commit is contained in:
Cyrill Leutwiler
2024-03-12 12:06:02 +01:00
committed by GitHub
parent d238d8f39e
commit cffa14a4d2
247 changed files with 35357 additions and 4905 deletions
@@ -0,0 +1,38 @@
//!
//! The comment lexeme.
//!
pub mod multi_line;
pub mod single_line;
use crate::yul::lexer::token::Token;
use self::multi_line::Comment as MultiLineComment;
use self::single_line::Comment as SingleLineComment;
///
/// The comment lexeme.
///
#[derive(Debug, Clone, PartialEq, Eq)]
#[allow(dead_code)]
pub enum Comment {
/// The single-line comment.
SingleLine(SingleLineComment),
/// The multi-line comment.
MultiLine(MultiLineComment),
}
impl Comment {
///
/// Returns the comment's length, including the trimmed whitespace around it.
///
pub fn parse(input: &str) -> Option<Token> {
if input.starts_with(SingleLineComment::START) {
Some(SingleLineComment::parse(input))
} else if input.starts_with(MultiLineComment::START) {
Some(MultiLineComment::parse(input))
} else {
None
}
}
}
@@ -0,0 +1,37 @@
//!
//! The multi-line comment lexeme.
//!
use crate::yul::lexer::token::lexeme::Lexeme;
use crate::yul::lexer::token::location::Location;
use crate::yul::lexer::token::Token;
///
/// The multi-line comment lexeme.
///
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Comment {}
impl Comment {
/// The start symbol.
pub const START: &'static str = "/*";
/// The end symbol.
pub const END: &'static str = "*/";
///
/// Returns the comment, including its length and number of lines.
///
pub fn parse(input: &str) -> Token {
let end_position = input.find(Self::END).unwrap_or(input.len());
let input = &input[..end_position];
let length = end_position + Self::END.len();
let lines = input.matches('\n').count();
let columns = match input.rfind('\n') {
Some(new_line) => end_position - (new_line + 1),
None => end_position,
};
Token::new(Location::new(lines, columns), Lexeme::Comment, length)
}
}
@@ -0,0 +1,30 @@
//!
//! The single-line comment lexeme.
//!
use crate::yul::lexer::token::lexeme::Lexeme;
use crate::yul::lexer::token::location::Location;
use crate::yul::lexer::token::Token;
///
/// The single-line comment lexeme.
///
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Comment {}
impl Comment {
/// The start symbol.
pub const START: &'static str = "//";
/// The end symbol.
pub const END: &'static str = "\n";
///
/// Returns the comment's length, including the trimmed whitespace around it.
///
pub fn parse(input: &str) -> Token {
let end_position = input.find(Self::END).unwrap_or(input.len());
let length = end_position + Self::END.len();
Token::new(Location::new(1, 1), Lexeme::Comment, length)
}
}
@@ -0,0 +1,80 @@
//!
//! The identifier lexeme.
//!
use crate::yul::lexer::token::lexeme::keyword::Keyword;
use crate::yul::lexer::token::lexeme::Lexeme;
use crate::yul::lexer::token::location::Location;
use crate::yul::lexer::token::Token;
///
/// The identifier lexeme.
///
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Identifier {
/// The inner string.
pub inner: String,
}
impl Identifier {
///
/// A shortcut constructor.
///
pub fn new(inner: String) -> Self {
Self { inner }
}
///
/// Parses the identifier, returning it as a token.
///
pub fn parse(input: &str) -> Option<Token> {
if !input.starts_with(Self::can_begin) {
return None;
}
let end = input.find(Self::cannot_continue).unwrap_or(input.len());
let inner = input[..end].to_string();
let length = inner.len();
if let Some(token) = Keyword::parse(inner.as_str()) {
return Some(token);
}
Some(Token::new(
Location::new(0, length),
Lexeme::Identifier(Self::new(inner)),
length,
))
}
///
/// Checks whether the character can begin an identifier.
///
pub fn can_begin(character: char) -> bool {
character.is_alphabetic() || character == '_' || character == '$'
}
///
/// Checks whether the character can continue an identifier.
///
pub fn can_continue(character: char) -> bool {
Self::can_begin(character)
|| character.is_numeric()
|| character == '_'
|| character == '$'
|| character == '.'
}
///
/// Checks whether the character cannot continue an identifier.
///
pub fn cannot_continue(character: char) -> bool {
!Self::can_continue(character)
}
}
impl std::fmt::Display for Identifier {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.inner)
}
}
@@ -0,0 +1,158 @@
//!
//! The keyword lexeme.
//!
use crate::yul::lexer::token::lexeme::literal::boolean::Boolean as BooleanLiteral;
use crate::yul::lexer::token::lexeme::literal::Literal;
use crate::yul::lexer::token::lexeme::Lexeme;
use crate::yul::lexer::token::location::Location;
use crate::yul::lexer::token::Token;
///
/// The keyword lexeme.
///
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Keyword {
/// The `object` keyword.
Object,
/// The `code` keyword.
Code,
/// The `function` keyword.
Function,
/// The `let` keyword.
Let,
/// The `if` keyword.
If,
/// The `switch` keyword.
Switch,
/// The `case` keyword.
Case,
/// The `default` keyword.
Default,
/// The `for` keyword.
For,
/// The `break` keyword.
Break,
/// The `continue` keyword.
Continue,
/// The `leave` keyword.
Leave,
/// The `true` keyword.
True,
/// The `false` keyword.
False,
/// The `bool` keyword.
Bool,
/// The `int{N}` keyword.
Int(usize),
/// The `uint{N}` keyword.
Uint(usize),
}
impl Keyword {
///
/// Parses the keyword, returning it as a token.
///
pub fn parse(input: &str) -> Option<Token> {
let keyword = Self::parse_keyword(input)?;
let lexeme = match BooleanLiteral::try_from(keyword) {
Ok(literal) => Lexeme::Literal(Literal::Boolean(literal)),
Err(keyword) => Lexeme::Keyword(keyword),
};
let length = lexeme.to_string().len();
if length != input.len() {
return None;
}
Some(Token::new(Location::new(0, length), lexeme, length))
}
///
/// Parses the keyword itself.
///
fn parse_keyword(input: &str) -> Option<Self> {
if !input.starts_with(Self::can_begin) {
return None;
}
let end = input.find(Self::cannot_continue).unwrap_or(input.len());
let input = &input[..end];
if let Some(input) = input.strip_prefix("int") {
if let Ok(bitlength) = input.parse::<usize>() {
return Some(Self::Int(bitlength));
}
}
if let Some(input) = input.strip_prefix("uint") {
if let Ok(bitlength) = input.parse::<usize>() {
return Some(Self::Uint(bitlength));
}
}
Some(match input {
"object" => Self::Object,
"code" => Self::Code,
"function" => Self::Function,
"let" => Self::Let,
"if" => Self::If,
"switch" => Self::Switch,
"case" => Self::Case,
"default" => Self::Default,
"for" => Self::For,
"break" => Self::Break,
"continue" => Self::Continue,
"leave" => Self::Leave,
"true" => Self::True,
"false" => Self::False,
"bool" => Self::Bool,
_ => return None,
})
}
///
/// Checks whether the character can begin a keyword.
///
pub fn can_begin(character: char) -> bool {
character.is_alphabetic()
}
///
/// Checks whether the character can continue a keyword.
///
pub fn can_continue(character: char) -> bool {
Self::can_begin(character) || character.is_numeric()
}
///
/// Checks whether the character cannot continue a keyword.
///
pub fn cannot_continue(character: char) -> bool {
!Self::can_continue(character)
}
}
impl std::fmt::Display for Keyword {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Object => write!(f, "object"),
Self::Code => write!(f, "code"),
Self::Function => write!(f, "function"),
Self::Let => write!(f, "let"),
Self::If => write!(f, "if"),
Self::Switch => write!(f, "switch"),
Self::Case => write!(f, "case"),
Self::Default => write!(f, "default"),
Self::For => write!(f, "for"),
Self::Break => write!(f, "break"),
Self::Continue => write!(f, "continue"),
Self::Leave => write!(f, "leave"),
Self::True => write!(f, "true"),
Self::False => write!(f, "false"),
Self::Bool => write!(f, "bool"),
Self::Int(bitlength) => write!(f, "int{bitlength}"),
Self::Uint(bitlength) => write!(f, "uint{bitlength}"),
}
}
}
@@ -0,0 +1,66 @@
//!
//! The boolean literal lexeme.
//!
use serde::Deserialize;
use serde::Serialize;
use crate::yul::lexer::token::lexeme::keyword::Keyword;
///
/// The boolean literal lexeme.
///
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub enum Boolean {
/// Created from the `false` keyword.
False,
/// Created from the `true` keyword.
True,
}
impl Boolean {
///
/// Creates a `false` value.
///
pub fn r#false() -> Self {
Self::False
}
///
/// Creates a `true` value.
///
pub fn r#true() -> Self {
Self::True
}
}
impl TryFrom<Keyword> for Boolean {
type Error = Keyword;
fn try_from(keyword: Keyword) -> Result<Self, Self::Error> {
Ok(match keyword {
Keyword::False => Self::False,
Keyword::True => Self::True,
unknown => return Err(unknown),
})
}
}
impl From<bool> for Boolean {
fn from(value: bool) -> Self {
if value {
Self::True
} else {
Self::False
}
}
}
impl std::fmt::Display for Boolean {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::False => write!(f, "false"),
Self::True => write!(f, "true"),
}
}
}
@@ -0,0 +1,118 @@
//!
//! The integer literal lexeme.
//!
use serde::Deserialize;
use serde::Serialize;
use crate::yul::lexer::token::lexeme::Lexeme;
use crate::yul::lexer::token::lexeme::Literal;
use crate::yul::lexer::token::location::Location;
use crate::yul::lexer::token::Token;
///
/// The integer literal lexeme.
///
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub enum Integer {
/// An integer literal, like `42`.
Decimal {
/// The inner literal contents.
inner: String,
},
/// A hexadecimal literal, like `0xffff`.
Hexadecimal {
/// The inner literal contents.
inner: String,
},
}
impl Integer {
///
/// Creates a decimal value.
///
pub fn new_decimal(inner: String) -> Self {
Self::Decimal { inner }
}
///
/// Creates a hexadecimal value.
///
pub fn new_hexadecimal(inner: String) -> Self {
Self::Hexadecimal { inner }
}
///
/// Parses the value from the source code slice.
///
pub fn parse(input: &str) -> Option<Token> {
let (value, length) = if let Some(body) = input.strip_prefix("0x") {
let end = body
.find(Self::cannot_continue_hexadecimal)
.unwrap_or(body.len());
let length = "0x".len() + end;
let value = Self::new_hexadecimal(input[..length].to_owned());
(value, length)
} else if input.starts_with(Self::can_begin_decimal) {
let end = input
.find(Self::cannot_continue_decimal)
.unwrap_or(input.len());
let length = end;
let value = Self::new_decimal(input[..length].to_owned());
(value, length)
} else {
return None;
};
let token = Token::new(
Location::new(0, length),
Lexeme::Literal(Literal::Integer(value)),
length,
);
Some(token)
}
///
/// Checks whether the character can begin a decimal number.
///
pub fn can_begin_decimal(character: char) -> bool {
Self::can_continue_decimal(character)
}
///
/// Checks whether the character can continue a decimal number.
///
pub fn can_continue_decimal(character: char) -> bool {
character.is_digit(era_compiler_common::BASE_DECIMAL)
}
///
/// Checks whether the character cannot continue a decimal number.
///
pub fn cannot_continue_decimal(character: char) -> bool {
!Self::can_continue_decimal(character)
}
///
/// Checks whether the character can continue a hexadecimal number.
///
pub fn can_continue_hexadecimal(character: char) -> bool {
character.is_digit(era_compiler_common::BASE_HEXADECIMAL)
}
///
/// Checks whether the character cannot continue a hexadecimal number.
///
pub fn cannot_continue_hexadecimal(character: char) -> bool {
!Self::can_continue_hexadecimal(character)
}
}
impl std::fmt::Display for Integer {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Decimal { inner } => write!(f, "{inner}"),
Self::Hexadecimal { inner } => write!(f, "{inner}"),
}
}
}
@@ -0,0 +1,37 @@
//!
//! The literal lexeme.
//!
pub mod boolean;
pub mod integer;
pub mod string;
use serde::Deserialize;
use serde::Serialize;
use self::boolean::Boolean;
use self::integer::Integer;
use self::string::String;
///
/// The literal lexeme.
///
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub enum Literal {
/// A boolean literal, like `true`, or `false`.
Boolean(Boolean),
/// An integer literal, like `42`, or `0xff`.
Integer(Integer),
/// A string literal, like `"message"`.
String(String),
}
impl std::fmt::Display for Literal {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Boolean(inner) => write!(f, "{inner}"),
Self::Integer(inner) => write!(f, "{inner}"),
Self::String(inner) => write!(f, "{inner}"),
}
}
}
@@ -0,0 +1,93 @@
//!
//! The string literal lexeme.
//!
use serde::Deserialize;
use serde::Serialize;
use crate::yul::lexer::token::lexeme::Lexeme;
use crate::yul::lexer::token::lexeme::Literal;
use crate::yul::lexer::token::location::Location;
use crate::yul::lexer::token::Token;
///
/// The string literal lexeme.
///
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub struct String {
/// The inner string contents.
pub inner: std::string::String,
/// Whether the string is hexadecimal.
pub is_hexadecimal: bool,
}
impl String {
///
/// Creates a string literal value.
///
pub fn new(inner: ::std::string::String, is_hexadecimal: bool) -> Self {
Self {
inner,
is_hexadecimal,
}
}
///
/// Parses the value from the source code slice.
///
pub fn parse(input: &str) -> Option<Token> {
let mut length = 0;
let is_string = input[length..].starts_with('"');
let is_hex_string = input[length..].starts_with(r#"hex""#);
if !is_string && !is_hex_string {
return None;
}
if is_string {
length += 1;
}
if is_hex_string {
length += r#"hex""#.len();
}
let mut string = std::string::String::new();
loop {
if input[length..].starts_with('\\') {
string.push(input.chars().nth(length).expect("Always exists"));
string.push(input.chars().nth(length + 1).expect("Always exists"));
length += 2;
continue;
}
if input[length..].starts_with('"') {
length += 1;
break;
}
string.push(input.chars().nth(length).expect("Always exists"));
length += 1;
}
let string = string
.strip_prefix('"')
.and_then(|string| string.strip_suffix('"'))
.unwrap_or(string.as_str())
.to_owned();
let literal = Self::new(string, is_hex_string);
Some(Token::new(
Location::new(0, length),
Lexeme::Literal(Literal::String(literal)),
length,
))
}
}
impl std::fmt::Display for String {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.inner)
}
}
@@ -0,0 +1,46 @@
//!
//! The lexeme.
//!
pub mod comment;
pub mod identifier;
pub mod keyword;
pub mod literal;
pub mod symbol;
use self::identifier::Identifier;
use self::keyword::Keyword;
use self::literal::Literal;
use self::symbol::Symbol;
///
/// The lexeme.
///
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Lexeme {
/// The keyword lexeme.
Keyword(Keyword),
/// The symbol lexeme.
Symbol(Symbol),
/// The identifier lexeme.
Identifier(Identifier),
/// The literal lexeme.
Literal(Literal),
/// The comment lexeme.
Comment,
/// The end-of-file lexeme.
EndOfFile,
}
impl std::fmt::Display for Lexeme {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Keyword(inner) => write!(f, "{inner}"),
Self::Symbol(inner) => write!(f, "{inner}"),
Self::Identifier(inner) => write!(f, "{inner}"),
Self::Literal(inner) => write!(f, "{inner}"),
Self::Comment => Ok(()),
Self::EndOfFile => write!(f, "EOF"),
}
}
}
@@ -0,0 +1,74 @@
//!
//! The symbol lexeme.
//!
use crate::yul::lexer::token::lexeme::Lexeme;
use crate::yul::lexer::token::location::Location;
use crate::yul::lexer::token::Token;
///
/// The symbol lexeme.
///
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Symbol {
/// The `:=` symbol.
Assignment,
/// The `->` symbol.
Arrow,
/// The `{` symbol.
BracketCurlyLeft,
/// The `}` symbol.
BracketCurlyRight,
/// The `(` symbol.
ParenthesisLeft,
/// The `)` symbol.
ParenthesisRight,
/// The `,` symbol.
Comma,
/// The `:` symbol.
Colon,
}
impl Symbol {
///
/// Parses the symbol, returning it as a token.
///
pub fn parse(input: &str) -> Option<Token> {
let (symbol, length) = match &input[..2] {
":=" => (Self::Assignment, 2),
"->" => (Self::Arrow, 2),
_ => match &input[..1] {
"{" => (Self::BracketCurlyLeft, 1),
"}" => (Self::BracketCurlyRight, 1),
"(" => (Self::ParenthesisLeft, 1),
")" => (Self::ParenthesisRight, 1),
"," => (Self::Comma, 1),
":" => (Self::Colon, 1),
_ => return None,
},
};
Some(Token::new(
Location::new(0, length),
Lexeme::Symbol(symbol),
length,
))
}
}
impl std::fmt::Display for Symbol {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Assignment => write!(f, ":="),
Self::Arrow => write!(f, "->"),
Self::BracketCurlyLeft => write!(f, "{{"),
Self::BracketCurlyRight => write!(f, "}}"),
Self::ParenthesisLeft => write!(f, "("),
Self::ParenthesisRight => write!(f, ")"),
Self::Comma => write!(f, ","),
Self::Colon => write!(f, ":"),
}
}
}