mirror of
https://github.com/pezkuwichain/revive.git
synced 2026-06-17 13:31:06 +00:00
Emerge Yul recompiler (#1)
Provide a modified (and incomplete) version of ZKSync zksolc that can compile the most basic contracts
This commit is contained in:
@@ -0,0 +1,20 @@
|
||||
//!
|
||||
//! The Yul IR lexer error.
|
||||
//!
|
||||
|
||||
use crate::yul::lexer::token::location::Location;
|
||||
|
||||
///
|
||||
/// The Yul IR lexer error.
|
||||
///
|
||||
#[derive(Debug, thiserror::Error, PartialEq, Eq)]
|
||||
pub enum Error {
|
||||
/// The invalid lexeme error.
|
||||
#[error("{location} Invalid character sequence `{sequence}`")]
|
||||
InvalidLexeme {
|
||||
/// The lexeme location.
|
||||
location: Location,
|
||||
/// The invalid sequence of characters.
|
||||
sequence: String,
|
||||
},
|
||||
}
|
||||
@@ -0,0 +1,137 @@
|
||||
//!
|
||||
//! The compiler lexer.
|
||||
//!
|
||||
|
||||
pub mod error;
|
||||
pub mod token;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
use self::error::Error;
|
||||
use self::token::lexeme::comment::Comment;
|
||||
use self::token::lexeme::identifier::Identifier;
|
||||
use self::token::lexeme::literal::integer::Integer as IntegerLiteral;
|
||||
use self::token::lexeme::literal::string::String as StringLiteral;
|
||||
use self::token::lexeme::symbol::Symbol;
|
||||
use self::token::lexeme::Lexeme;
|
||||
use self::token::location::Location;
|
||||
use self::token::Token;
|
||||
|
||||
///
|
||||
/// The compiler lexer.
|
||||
///
|
||||
pub struct Lexer {
|
||||
/// The input source code.
|
||||
input: String,
|
||||
/// The number of characters processed so far.
|
||||
offset: usize,
|
||||
/// The current location.
|
||||
location: Location,
|
||||
/// The peeked lexeme, waiting to be fetched.
|
||||
peeked: Option<Token>,
|
||||
}
|
||||
|
||||
impl Lexer {
|
||||
///
|
||||
/// A shortcut constructor.
|
||||
///
|
||||
pub fn new(mut input: String) -> Self {
|
||||
input.push('\n');
|
||||
|
||||
Self {
|
||||
input,
|
||||
offset: 0,
|
||||
location: Location::default(),
|
||||
peeked: None,
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// Advances the lexer, returning the next lexeme.
|
||||
///
|
||||
#[allow(clippy::should_implement_trait)]
|
||||
pub fn next(&mut self) -> Result<Token, Error> {
|
||||
if let Some(peeked) = self.peeked.take() {
|
||||
return Ok(peeked);
|
||||
}
|
||||
|
||||
while self.offset < self.input.len() {
|
||||
let input = &self.input[self.offset..];
|
||||
|
||||
if input.starts_with(|character| char::is_ascii_whitespace(&character)) {
|
||||
if input.starts_with('\n') {
|
||||
self.location.line += 1;
|
||||
self.location.column = 1;
|
||||
} else if !input.starts_with('\r') {
|
||||
self.location.column += 1;
|
||||
}
|
||||
self.offset += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(token) = Comment::parse(input) {
|
||||
self.offset += token.length;
|
||||
self.location
|
||||
.shift_down(token.location.line, token.location.column);
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(mut token) = StringLiteral::parse(input) {
|
||||
token.location = self.location;
|
||||
|
||||
self.offset += token.length;
|
||||
self.location.shift_right(token.length);
|
||||
return Ok(token);
|
||||
}
|
||||
|
||||
if let Some(mut token) = IntegerLiteral::parse(input) {
|
||||
token.location = self.location;
|
||||
|
||||
self.offset += token.length;
|
||||
self.location.shift_right(token.length);
|
||||
return Ok(token);
|
||||
}
|
||||
|
||||
if let Some(mut token) = Identifier::parse(input) {
|
||||
token.location = self.location;
|
||||
|
||||
self.offset += token.length;
|
||||
self.location.shift_right(token.length);
|
||||
return Ok(token);
|
||||
}
|
||||
|
||||
if let Some(mut token) = Symbol::parse(input) {
|
||||
token.location = self.location;
|
||||
|
||||
self.offset += token.length;
|
||||
self.location.shift_right(token.length);
|
||||
return Ok(token);
|
||||
}
|
||||
|
||||
let end = self.input[self.offset..]
|
||||
.find(char::is_whitespace)
|
||||
.unwrap_or(self.input.len());
|
||||
return Err(Error::InvalidLexeme {
|
||||
location: self.location,
|
||||
sequence: self.input[self.offset..self.offset + end].to_owned(),
|
||||
});
|
||||
}
|
||||
|
||||
Ok(Token::new(self.location, Lexeme::EndOfFile, 0))
|
||||
}
|
||||
|
||||
///
|
||||
/// Peeks the next lexeme without advancing the iterator.
|
||||
///
|
||||
pub fn peek(&mut self) -> Result<Token, Error> {
|
||||
match self.peeked {
|
||||
Some(ref peeked) => Ok(peeked.clone()),
|
||||
None => {
|
||||
let peeked = self.next()?;
|
||||
self.peeked = Some(peeked.clone());
|
||||
Ok(peeked)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,91 @@
|
||||
//!
|
||||
//! The Yul IR lexer tests.
|
||||
//!
|
||||
|
||||
use crate::yul::lexer::error::Error;
|
||||
use crate::yul::lexer::token::lexeme::Lexeme;
|
||||
use crate::yul::lexer::token::location::Location;
|
||||
use crate::yul::lexer::Lexer;
|
||||
|
||||
#[test]
|
||||
fn default() {
|
||||
let input = r#"
|
||||
object "Test" {
|
||||
code {
|
||||
{
|
||||
/*
|
||||
The deploy code.
|
||||
*/
|
||||
mstore(64, 128)
|
||||
if callvalue() { revert(0, 0) }
|
||||
let _1 := datasize("Test_deployed")
|
||||
codecopy(0, dataoffset("Test_deployed"), _1)
|
||||
return(0, _1)
|
||||
}
|
||||
}
|
||||
object "Test_deployed" {
|
||||
code {
|
||||
{
|
||||
/*
|
||||
The runtime code.
|
||||
*/
|
||||
mstore(64, 128)
|
||||
if iszero(lt(calldatasize(), 4))
|
||||
{
|
||||
let _1 := 0
|
||||
switch shr(224, calldataload(_1))
|
||||
case 0x3df4ddf4 {
|
||||
if callvalue() { revert(_1, _1) }
|
||||
if slt(add(calldatasize(), not(3)), _1) { revert(_1, _1) }
|
||||
let memPos := allocate_memory(_1)
|
||||
mstore(memPos, 0x2a)
|
||||
return(memPos, 32)
|
||||
}
|
||||
case 0x5a8ac02d {
|
||||
if callvalue() { revert(_1, _1) }
|
||||
if slt(add(calldatasize(), not(3)), _1) { revert(_1, _1) }
|
||||
let memPos_1 := allocate_memory(_1)
|
||||
return(memPos_1, sub(abi_encode_uint256(memPos_1, 0x63), memPos_1))
|
||||
}
|
||||
}
|
||||
revert(0, 0)
|
||||
}
|
||||
function abi_encode_uint256(headStart, value0) -> tail
|
||||
{
|
||||
tail := add(headStart, 32)
|
||||
mstore(headStart, value0)
|
||||
}
|
||||
function allocate_memory(size) -> memPtr
|
||||
{
|
||||
memPtr := mload(64)
|
||||
let newFreePtr := add(memPtr, and(add(size, 31), not(31)))
|
||||
if or(gt(newFreePtr, 0xffffffffffffffff)#, lt(newFreePtr, memPtr))
|
||||
{
|
||||
mstore(0, shl(224, 0x4e487b71))
|
||||
mstore(4, 0x41)
|
||||
revert(0, 0x24)
|
||||
}
|
||||
mstore(64, newFreePtr)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"#;
|
||||
|
||||
let mut lexer = Lexer::new(input.to_owned());
|
||||
loop {
|
||||
match lexer.next() {
|
||||
Ok(token) => assert_ne!(token.lexeme, Lexeme::EndOfFile),
|
||||
Err(error) => {
|
||||
assert_eq!(
|
||||
error,
|
||||
Error::InvalidLexeme {
|
||||
location: Location::new(51, 57),
|
||||
sequence: "#,".to_owned(),
|
||||
}
|
||||
);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
//!
|
||||
//! The comment lexeme.
|
||||
//!
|
||||
|
||||
pub mod multi_line;
|
||||
pub mod single_line;
|
||||
|
||||
use crate::yul::lexer::token::Token;
|
||||
|
||||
use self::multi_line::Comment as MultiLineComment;
|
||||
use self::single_line::Comment as SingleLineComment;
|
||||
|
||||
///
|
||||
/// The comment lexeme.
|
||||
///
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
#[allow(dead_code)]
|
||||
pub enum Comment {
|
||||
/// The single-line comment.
|
||||
SingleLine(SingleLineComment),
|
||||
/// The multi-line comment.
|
||||
MultiLine(MultiLineComment),
|
||||
}
|
||||
|
||||
impl Comment {
|
||||
///
|
||||
/// Returns the comment's length, including the trimmed whitespace around it.
|
||||
///
|
||||
pub fn parse(input: &str) -> Option<Token> {
|
||||
if input.starts_with(SingleLineComment::START) {
|
||||
Some(SingleLineComment::parse(input))
|
||||
} else if input.starts_with(MultiLineComment::START) {
|
||||
Some(MultiLineComment::parse(input))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
//!
|
||||
//! The multi-line comment lexeme.
|
||||
//!
|
||||
|
||||
use crate::yul::lexer::token::lexeme::Lexeme;
|
||||
use crate::yul::lexer::token::location::Location;
|
||||
use crate::yul::lexer::token::Token;
|
||||
|
||||
///
|
||||
/// The multi-line comment lexeme.
|
||||
///
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Comment {}
|
||||
|
||||
impl Comment {
|
||||
/// The start symbol.
|
||||
pub const START: &'static str = "/*";
|
||||
/// The end symbol.
|
||||
pub const END: &'static str = "*/";
|
||||
|
||||
///
|
||||
/// Returns the comment, including its length and number of lines.
|
||||
///
|
||||
pub fn parse(input: &str) -> Token {
|
||||
let end_position = input.find(Self::END).unwrap_or(input.len());
|
||||
let input = &input[..end_position];
|
||||
|
||||
let length = end_position + Self::END.len();
|
||||
let lines = input.matches('\n').count();
|
||||
let columns = match input.rfind('\n') {
|
||||
Some(new_line) => end_position - (new_line + 1),
|
||||
None => end_position,
|
||||
};
|
||||
|
||||
Token::new(Location::new(lines, columns), Lexeme::Comment, length)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
//!
|
||||
//! The single-line comment lexeme.
|
||||
//!
|
||||
|
||||
use crate::yul::lexer::token::lexeme::Lexeme;
|
||||
use crate::yul::lexer::token::location::Location;
|
||||
use crate::yul::lexer::token::Token;
|
||||
|
||||
///
|
||||
/// The single-line comment lexeme.
|
||||
///
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Comment {}
|
||||
|
||||
impl Comment {
|
||||
/// The start symbol.
|
||||
pub const START: &'static str = "//";
|
||||
/// The end symbol.
|
||||
pub const END: &'static str = "\n";
|
||||
|
||||
///
|
||||
/// Returns the comment's length, including the trimmed whitespace around it.
|
||||
///
|
||||
pub fn parse(input: &str) -> Token {
|
||||
let end_position = input.find(Self::END).unwrap_or(input.len());
|
||||
let length = end_position + Self::END.len();
|
||||
|
||||
Token::new(Location::new(1, 1), Lexeme::Comment, length)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,80 @@
|
||||
//!
|
||||
//! The identifier lexeme.
|
||||
//!
|
||||
|
||||
use crate::yul::lexer::token::lexeme::keyword::Keyword;
|
||||
use crate::yul::lexer::token::lexeme::Lexeme;
|
||||
use crate::yul::lexer::token::location::Location;
|
||||
use crate::yul::lexer::token::Token;
|
||||
|
||||
///
|
||||
/// The identifier lexeme.
|
||||
///
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Identifier {
|
||||
/// The inner string.
|
||||
pub inner: String,
|
||||
}
|
||||
|
||||
impl Identifier {
|
||||
///
|
||||
/// A shortcut constructor.
|
||||
///
|
||||
pub fn new(inner: String) -> Self {
|
||||
Self { inner }
|
||||
}
|
||||
|
||||
///
|
||||
/// Parses the identifier, returning it as a token.
|
||||
///
|
||||
pub fn parse(input: &str) -> Option<Token> {
|
||||
if !input.starts_with(Self::can_begin) {
|
||||
return None;
|
||||
}
|
||||
let end = input.find(Self::cannot_continue).unwrap_or(input.len());
|
||||
|
||||
let inner = input[..end].to_string();
|
||||
let length = inner.len();
|
||||
|
||||
if let Some(token) = Keyword::parse(inner.as_str()) {
|
||||
return Some(token);
|
||||
}
|
||||
|
||||
Some(Token::new(
|
||||
Location::new(0, length),
|
||||
Lexeme::Identifier(Self::new(inner)),
|
||||
length,
|
||||
))
|
||||
}
|
||||
|
||||
///
|
||||
/// Checks whether the character can begin an identifier.
|
||||
///
|
||||
pub fn can_begin(character: char) -> bool {
|
||||
character.is_alphabetic() || character == '_' || character == '$'
|
||||
}
|
||||
|
||||
///
|
||||
/// Checks whether the character can continue an identifier.
|
||||
///
|
||||
pub fn can_continue(character: char) -> bool {
|
||||
Self::can_begin(character)
|
||||
|| character.is_numeric()
|
||||
|| character == '_'
|
||||
|| character == '$'
|
||||
|| character == '.'
|
||||
}
|
||||
|
||||
///
|
||||
/// Checks whether the character cannot continue an identifier.
|
||||
///
|
||||
pub fn cannot_continue(character: char) -> bool {
|
||||
!Self::can_continue(character)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Identifier {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.inner)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,158 @@
|
||||
//!
|
||||
//! The keyword lexeme.
|
||||
//!
|
||||
|
||||
use crate::yul::lexer::token::lexeme::literal::boolean::Boolean as BooleanLiteral;
|
||||
use crate::yul::lexer::token::lexeme::literal::Literal;
|
||||
use crate::yul::lexer::token::lexeme::Lexeme;
|
||||
use crate::yul::lexer::token::location::Location;
|
||||
use crate::yul::lexer::token::Token;
|
||||
|
||||
///
|
||||
/// The keyword lexeme.
|
||||
///
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum Keyword {
|
||||
/// The `object` keyword.
|
||||
Object,
|
||||
/// The `code` keyword.
|
||||
Code,
|
||||
/// The `function` keyword.
|
||||
Function,
|
||||
/// The `let` keyword.
|
||||
Let,
|
||||
/// The `if` keyword.
|
||||
If,
|
||||
/// The `switch` keyword.
|
||||
Switch,
|
||||
/// The `case` keyword.
|
||||
Case,
|
||||
/// The `default` keyword.
|
||||
Default,
|
||||
/// The `for` keyword.
|
||||
For,
|
||||
/// The `break` keyword.
|
||||
Break,
|
||||
/// The `continue` keyword.
|
||||
Continue,
|
||||
/// The `leave` keyword.
|
||||
Leave,
|
||||
/// The `true` keyword.
|
||||
True,
|
||||
/// The `false` keyword.
|
||||
False,
|
||||
/// The `bool` keyword.
|
||||
Bool,
|
||||
/// The `int{N}` keyword.
|
||||
Int(usize),
|
||||
/// The `uint{N}` keyword.
|
||||
Uint(usize),
|
||||
}
|
||||
|
||||
impl Keyword {
|
||||
///
|
||||
/// Parses the keyword, returning it as a token.
|
||||
///
|
||||
pub fn parse(input: &str) -> Option<Token> {
|
||||
let keyword = Self::parse_keyword(input)?;
|
||||
let lexeme = match BooleanLiteral::try_from(keyword) {
|
||||
Ok(literal) => Lexeme::Literal(Literal::Boolean(literal)),
|
||||
Err(keyword) => Lexeme::Keyword(keyword),
|
||||
};
|
||||
|
||||
let length = lexeme.to_string().len();
|
||||
if length != input.len() {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(Token::new(Location::new(0, length), lexeme, length))
|
||||
}
|
||||
|
||||
///
|
||||
/// Parses the keyword itself.
|
||||
///
|
||||
fn parse_keyword(input: &str) -> Option<Self> {
|
||||
if !input.starts_with(Self::can_begin) {
|
||||
return None;
|
||||
}
|
||||
let end = input.find(Self::cannot_continue).unwrap_or(input.len());
|
||||
let input = &input[..end];
|
||||
|
||||
if let Some(input) = input.strip_prefix("int") {
|
||||
if let Ok(bitlength) = input.parse::<usize>() {
|
||||
return Some(Self::Int(bitlength));
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(input) = input.strip_prefix("uint") {
|
||||
if let Ok(bitlength) = input.parse::<usize>() {
|
||||
return Some(Self::Uint(bitlength));
|
||||
}
|
||||
}
|
||||
|
||||
Some(match input {
|
||||
"object" => Self::Object,
|
||||
"code" => Self::Code,
|
||||
"function" => Self::Function,
|
||||
"let" => Self::Let,
|
||||
"if" => Self::If,
|
||||
"switch" => Self::Switch,
|
||||
"case" => Self::Case,
|
||||
"default" => Self::Default,
|
||||
"for" => Self::For,
|
||||
"break" => Self::Break,
|
||||
"continue" => Self::Continue,
|
||||
"leave" => Self::Leave,
|
||||
"true" => Self::True,
|
||||
"false" => Self::False,
|
||||
"bool" => Self::Bool,
|
||||
|
||||
_ => return None,
|
||||
})
|
||||
}
|
||||
|
||||
///
|
||||
/// Checks whether the character can begin a keyword.
|
||||
///
|
||||
pub fn can_begin(character: char) -> bool {
|
||||
character.is_alphabetic()
|
||||
}
|
||||
|
||||
///
|
||||
/// Checks whether the character can continue a keyword.
|
||||
///
|
||||
pub fn can_continue(character: char) -> bool {
|
||||
Self::can_begin(character) || character.is_numeric()
|
||||
}
|
||||
|
||||
///
|
||||
/// Checks whether the character cannot continue a keyword.
|
||||
///
|
||||
pub fn cannot_continue(character: char) -> bool {
|
||||
!Self::can_continue(character)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Keyword {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Object => write!(f, "object"),
|
||||
Self::Code => write!(f, "code"),
|
||||
Self::Function => write!(f, "function"),
|
||||
Self::Let => write!(f, "let"),
|
||||
Self::If => write!(f, "if"),
|
||||
Self::Switch => write!(f, "switch"),
|
||||
Self::Case => write!(f, "case"),
|
||||
Self::Default => write!(f, "default"),
|
||||
Self::For => write!(f, "for"),
|
||||
Self::Break => write!(f, "break"),
|
||||
Self::Continue => write!(f, "continue"),
|
||||
Self::Leave => write!(f, "leave"),
|
||||
Self::True => write!(f, "true"),
|
||||
Self::False => write!(f, "false"),
|
||||
Self::Bool => write!(f, "bool"),
|
||||
Self::Int(bitlength) => write!(f, "int{bitlength}"),
|
||||
Self::Uint(bitlength) => write!(f, "uint{bitlength}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,66 @@
|
||||
//!
|
||||
//! The boolean literal lexeme.
|
||||
//!
|
||||
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::yul::lexer::token::lexeme::keyword::Keyword;
|
||||
|
||||
///
|
||||
/// The boolean literal lexeme.
|
||||
///
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
|
||||
pub enum Boolean {
|
||||
/// Created from the `false` keyword.
|
||||
False,
|
||||
/// Created from the `true` keyword.
|
||||
True,
|
||||
}
|
||||
|
||||
impl Boolean {
|
||||
///
|
||||
/// Creates a `false` value.
|
||||
///
|
||||
pub fn r#false() -> Self {
|
||||
Self::False
|
||||
}
|
||||
|
||||
///
|
||||
/// Creates a `true` value.
|
||||
///
|
||||
pub fn r#true() -> Self {
|
||||
Self::True
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<Keyword> for Boolean {
|
||||
type Error = Keyword;
|
||||
|
||||
fn try_from(keyword: Keyword) -> Result<Self, Self::Error> {
|
||||
Ok(match keyword {
|
||||
Keyword::False => Self::False,
|
||||
Keyword::True => Self::True,
|
||||
unknown => return Err(unknown),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl From<bool> for Boolean {
|
||||
fn from(value: bool) -> Self {
|
||||
if value {
|
||||
Self::True
|
||||
} else {
|
||||
Self::False
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Boolean {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::False => write!(f, "false"),
|
||||
Self::True => write!(f, "true"),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,118 @@
|
||||
//!
|
||||
//! The integer literal lexeme.
|
||||
//!
|
||||
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::yul::lexer::token::lexeme::Lexeme;
|
||||
use crate::yul::lexer::token::lexeme::Literal;
|
||||
use crate::yul::lexer::token::location::Location;
|
||||
use crate::yul::lexer::token::Token;
|
||||
|
||||
///
|
||||
/// The integer literal lexeme.
|
||||
///
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
|
||||
pub enum Integer {
|
||||
/// An integer literal, like `42`.
|
||||
Decimal {
|
||||
/// The inner literal contents.
|
||||
inner: String,
|
||||
},
|
||||
/// A hexadecimal literal, like `0xffff`.
|
||||
Hexadecimal {
|
||||
/// The inner literal contents.
|
||||
inner: String,
|
||||
},
|
||||
}
|
||||
|
||||
impl Integer {
|
||||
///
|
||||
/// Creates a decimal value.
|
||||
///
|
||||
pub fn new_decimal(inner: String) -> Self {
|
||||
Self::Decimal { inner }
|
||||
}
|
||||
|
||||
///
|
||||
/// Creates a hexadecimal value.
|
||||
///
|
||||
pub fn new_hexadecimal(inner: String) -> Self {
|
||||
Self::Hexadecimal { inner }
|
||||
}
|
||||
|
||||
///
|
||||
/// Parses the value from the source code slice.
|
||||
///
|
||||
pub fn parse(input: &str) -> Option<Token> {
|
||||
let (value, length) = if let Some(body) = input.strip_prefix("0x") {
|
||||
let end = body
|
||||
.find(Self::cannot_continue_hexadecimal)
|
||||
.unwrap_or(body.len());
|
||||
let length = "0x".len() + end;
|
||||
let value = Self::new_hexadecimal(input[..length].to_owned());
|
||||
(value, length)
|
||||
} else if input.starts_with(Self::can_begin_decimal) {
|
||||
let end = input
|
||||
.find(Self::cannot_continue_decimal)
|
||||
.unwrap_or(input.len());
|
||||
let length = end;
|
||||
let value = Self::new_decimal(input[..length].to_owned());
|
||||
(value, length)
|
||||
} else {
|
||||
return None;
|
||||
};
|
||||
|
||||
let token = Token::new(
|
||||
Location::new(0, length),
|
||||
Lexeme::Literal(Literal::Integer(value)),
|
||||
length,
|
||||
);
|
||||
Some(token)
|
||||
}
|
||||
|
||||
///
|
||||
/// Checks whether the character can begin a decimal number.
|
||||
///
|
||||
pub fn can_begin_decimal(character: char) -> bool {
|
||||
Self::can_continue_decimal(character)
|
||||
}
|
||||
|
||||
///
|
||||
/// Checks whether the character can continue a decimal number.
|
||||
///
|
||||
pub fn can_continue_decimal(character: char) -> bool {
|
||||
character.is_digit(era_compiler_common::BASE_DECIMAL)
|
||||
}
|
||||
|
||||
///
|
||||
/// Checks whether the character cannot continue a decimal number.
|
||||
///
|
||||
pub fn cannot_continue_decimal(character: char) -> bool {
|
||||
!Self::can_continue_decimal(character)
|
||||
}
|
||||
|
||||
///
|
||||
/// Checks whether the character can continue a hexadecimal number.
|
||||
///
|
||||
pub fn can_continue_hexadecimal(character: char) -> bool {
|
||||
character.is_digit(era_compiler_common::BASE_HEXADECIMAL)
|
||||
}
|
||||
|
||||
///
|
||||
/// Checks whether the character cannot continue a hexadecimal number.
|
||||
///
|
||||
pub fn cannot_continue_hexadecimal(character: char) -> bool {
|
||||
!Self::can_continue_hexadecimal(character)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Integer {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Decimal { inner } => write!(f, "{inner}"),
|
||||
Self::Hexadecimal { inner } => write!(f, "{inner}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
//!
|
||||
//! The literal lexeme.
|
||||
//!
|
||||
|
||||
pub mod boolean;
|
||||
pub mod integer;
|
||||
pub mod string;
|
||||
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
|
||||
use self::boolean::Boolean;
|
||||
use self::integer::Integer;
|
||||
use self::string::String;
|
||||
|
||||
///
|
||||
/// The literal lexeme.
|
||||
///
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
|
||||
pub enum Literal {
|
||||
/// A boolean literal, like `true`, or `false`.
|
||||
Boolean(Boolean),
|
||||
/// An integer literal, like `42`, or `0xff`.
|
||||
Integer(Integer),
|
||||
/// A string literal, like `"message"`.
|
||||
String(String),
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Literal {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Boolean(inner) => write!(f, "{inner}"),
|
||||
Self::Integer(inner) => write!(f, "{inner}"),
|
||||
Self::String(inner) => write!(f, "{inner}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,93 @@
|
||||
//!
|
||||
//! The string literal lexeme.
|
||||
//!
|
||||
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::yul::lexer::token::lexeme::Lexeme;
|
||||
use crate::yul::lexer::token::lexeme::Literal;
|
||||
use crate::yul::lexer::token::location::Location;
|
||||
use crate::yul::lexer::token::Token;
|
||||
|
||||
///
|
||||
/// The string literal lexeme.
|
||||
///
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
|
||||
pub struct String {
|
||||
/// The inner string contents.
|
||||
pub inner: std::string::String,
|
||||
/// Whether the string is hexadecimal.
|
||||
pub is_hexadecimal: bool,
|
||||
}
|
||||
|
||||
impl String {
|
||||
///
|
||||
/// Creates a string literal value.
|
||||
///
|
||||
pub fn new(inner: ::std::string::String, is_hexadecimal: bool) -> Self {
|
||||
Self {
|
||||
inner,
|
||||
is_hexadecimal,
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// Parses the value from the source code slice.
|
||||
///
|
||||
pub fn parse(input: &str) -> Option<Token> {
|
||||
let mut length = 0;
|
||||
|
||||
let is_string = input[length..].starts_with('"');
|
||||
let is_hex_string = input[length..].starts_with(r#"hex""#);
|
||||
|
||||
if !is_string && !is_hex_string {
|
||||
return None;
|
||||
}
|
||||
|
||||
if is_string {
|
||||
length += 1;
|
||||
}
|
||||
if is_hex_string {
|
||||
length += r#"hex""#.len();
|
||||
}
|
||||
|
||||
let mut string = std::string::String::new();
|
||||
loop {
|
||||
if input[length..].starts_with('\\') {
|
||||
string.push(input.chars().nth(length).expect("Always exists"));
|
||||
string.push(input.chars().nth(length + 1).expect("Always exists"));
|
||||
length += 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
if input[length..].starts_with('"') {
|
||||
length += 1;
|
||||
break;
|
||||
}
|
||||
|
||||
string.push(input.chars().nth(length).expect("Always exists"));
|
||||
length += 1;
|
||||
}
|
||||
|
||||
let string = string
|
||||
.strip_prefix('"')
|
||||
.and_then(|string| string.strip_suffix('"'))
|
||||
.unwrap_or(string.as_str())
|
||||
.to_owned();
|
||||
|
||||
let literal = Self::new(string, is_hex_string);
|
||||
|
||||
Some(Token::new(
|
||||
Location::new(0, length),
|
||||
Lexeme::Literal(Literal::String(literal)),
|
||||
length,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for String {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.inner)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
//!
|
||||
//! The lexeme.
|
||||
//!
|
||||
|
||||
pub mod comment;
|
||||
pub mod identifier;
|
||||
pub mod keyword;
|
||||
pub mod literal;
|
||||
pub mod symbol;
|
||||
|
||||
use self::identifier::Identifier;
|
||||
use self::keyword::Keyword;
|
||||
use self::literal::Literal;
|
||||
use self::symbol::Symbol;
|
||||
|
||||
///
|
||||
/// The lexeme.
|
||||
///
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum Lexeme {
|
||||
/// The keyword lexeme.
|
||||
Keyword(Keyword),
|
||||
/// The symbol lexeme.
|
||||
Symbol(Symbol),
|
||||
/// The identifier lexeme.
|
||||
Identifier(Identifier),
|
||||
/// The literal lexeme.
|
||||
Literal(Literal),
|
||||
/// The comment lexeme.
|
||||
Comment,
|
||||
/// The end-of-file lexeme.
|
||||
EndOfFile,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Lexeme {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Keyword(inner) => write!(f, "{inner}"),
|
||||
Self::Symbol(inner) => write!(f, "{inner}"),
|
||||
Self::Identifier(inner) => write!(f, "{inner}"),
|
||||
Self::Literal(inner) => write!(f, "{inner}"),
|
||||
Self::Comment => Ok(()),
|
||||
Self::EndOfFile => write!(f, "EOF"),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,74 @@
|
||||
//!
|
||||
//! The symbol lexeme.
|
||||
//!
|
||||
|
||||
use crate::yul::lexer::token::lexeme::Lexeme;
|
||||
use crate::yul::lexer::token::location::Location;
|
||||
use crate::yul::lexer::token::Token;
|
||||
|
||||
///
|
||||
/// The symbol lexeme.
|
||||
///
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum Symbol {
|
||||
/// The `:=` symbol.
|
||||
Assignment,
|
||||
/// The `->` symbol.
|
||||
Arrow,
|
||||
/// The `{` symbol.
|
||||
BracketCurlyLeft,
|
||||
/// The `}` symbol.
|
||||
BracketCurlyRight,
|
||||
/// The `(` symbol.
|
||||
ParenthesisLeft,
|
||||
/// The `)` symbol.
|
||||
ParenthesisRight,
|
||||
/// The `,` symbol.
|
||||
Comma,
|
||||
/// The `:` symbol.
|
||||
Colon,
|
||||
}
|
||||
|
||||
impl Symbol {
|
||||
///
|
||||
/// Parses the symbol, returning it as a token.
|
||||
///
|
||||
pub fn parse(input: &str) -> Option<Token> {
|
||||
let (symbol, length) = match &input[..2] {
|
||||
":=" => (Self::Assignment, 2),
|
||||
"->" => (Self::Arrow, 2),
|
||||
|
||||
_ => match &input[..1] {
|
||||
"{" => (Self::BracketCurlyLeft, 1),
|
||||
"}" => (Self::BracketCurlyRight, 1),
|
||||
"(" => (Self::ParenthesisLeft, 1),
|
||||
")" => (Self::ParenthesisRight, 1),
|
||||
"," => (Self::Comma, 1),
|
||||
":" => (Self::Colon, 1),
|
||||
|
||||
_ => return None,
|
||||
},
|
||||
};
|
||||
|
||||
Some(Token::new(
|
||||
Location::new(0, length),
|
||||
Lexeme::Symbol(symbol),
|
||||
length,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Symbol {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Assignment => write!(f, ":="),
|
||||
Self::Arrow => write!(f, "->"),
|
||||
Self::BracketCurlyLeft => write!(f, "{{"),
|
||||
Self::BracketCurlyRight => write!(f, "}}"),
|
||||
Self::ParenthesisLeft => write!(f, "("),
|
||||
Self::ParenthesisRight => write!(f, ")"),
|
||||
Self::Comma => write!(f, ","),
|
||||
Self::Colon => write!(f, ":"),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,65 @@
|
||||
//!
|
||||
//! The lexical token location.
|
||||
//!
|
||||
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
|
||||
///
|
||||
/// The token location in the source code file.
|
||||
///
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, Copy, Eq)]
|
||||
pub struct Location {
|
||||
/// The line number, starting from 1.
|
||||
pub line: usize,
|
||||
/// The column number, starting from 1.
|
||||
pub column: usize,
|
||||
}
|
||||
|
||||
impl Default for Location {
|
||||
fn default() -> Self {
|
||||
Self { line: 1, column: 1 }
|
||||
}
|
||||
}
|
||||
|
||||
impl Location {
|
||||
///
|
||||
/// Creates a default location.
|
||||
///
|
||||
pub fn new(line: usize, column: usize) -> Self {
|
||||
Self { line, column }
|
||||
}
|
||||
|
||||
///
|
||||
/// Mutates the location by shifting the original one down by `lines` and
|
||||
/// setting the column to `column`.
|
||||
///
|
||||
pub fn shift_down(&mut self, lines: usize, column: usize) {
|
||||
if lines == 0 {
|
||||
self.shift_right(column);
|
||||
return;
|
||||
}
|
||||
|
||||
self.line += lines;
|
||||
self.column = column;
|
||||
}
|
||||
|
||||
///
|
||||
/// Mutates the location by shifting the original one rightward by `columns`.
|
||||
///
|
||||
pub fn shift_right(&mut self, columns: usize) {
|
||||
self.column += columns;
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for Location {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.line == other.line && self.column == other.column
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Location {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}:{}", self.line, self.column)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
//!
|
||||
//! The token.
|
||||
//!
|
||||
|
||||
pub mod lexeme;
|
||||
pub mod location;
|
||||
|
||||
use self::lexeme::Lexeme;
|
||||
use self::location::Location;
|
||||
|
||||
///
|
||||
/// The token.
|
||||
///
|
||||
/// Contains a lexeme and its location.
|
||||
///
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Token {
|
||||
/// The token location.
|
||||
pub location: Location,
|
||||
/// The lexeme.
|
||||
pub lexeme: Lexeme,
|
||||
/// The token length, including whitespaces.
|
||||
pub length: usize,
|
||||
}
|
||||
|
||||
impl Token {
|
||||
///
|
||||
/// A shortcut constructor.
|
||||
///
|
||||
pub fn new(location: Location, lexeme: Lexeme, length: usize) -> Self {
|
||||
Self {
|
||||
location,
|
||||
lexeme,
|
||||
length,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Token {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}:{}", self.location, self.lexeme)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user