Emerge Yul recompiler (#1)

Provide a modified (and incomplete) version of ZKSync zksolc that can compile the most basic contracts
This commit is contained in:
Cyrill Leutwiler
2024-03-12 12:06:02 +01:00
committed by GitHub
parent d238d8f39e
commit cffa14a4d2
247 changed files with 35357 additions and 4905 deletions
+20
View File
@@ -0,0 +1,20 @@
//!
//! The Yul IR lexer error.
//!
use crate::yul::lexer::token::location::Location;
///
/// The Yul IR lexer error.
///
#[derive(Debug, thiserror::Error, PartialEq, Eq)]
pub enum Error {
/// The invalid lexeme error.
#[error("{location} Invalid character sequence `{sequence}`")]
InvalidLexeme {
/// The lexeme location.
location: Location,
/// The invalid sequence of characters.
sequence: String,
},
}
+137
View File
@@ -0,0 +1,137 @@
//!
//! The compiler lexer.
//!
pub mod error;
pub mod token;
#[cfg(test)]
mod tests;
use self::error::Error;
use self::token::lexeme::comment::Comment;
use self::token::lexeme::identifier::Identifier;
use self::token::lexeme::literal::integer::Integer as IntegerLiteral;
use self::token::lexeme::literal::string::String as StringLiteral;
use self::token::lexeme::symbol::Symbol;
use self::token::lexeme::Lexeme;
use self::token::location::Location;
use self::token::Token;
///
/// The compiler lexer.
///
pub struct Lexer {
/// The input source code.
input: String,
/// The number of characters processed so far.
offset: usize,
/// The current location.
location: Location,
/// The peeked lexeme, waiting to be fetched.
peeked: Option<Token>,
}
impl Lexer {
///
/// A shortcut constructor.
///
pub fn new(mut input: String) -> Self {
input.push('\n');
Self {
input,
offset: 0,
location: Location::default(),
peeked: None,
}
}
///
/// Advances the lexer, returning the next lexeme.
///
#[allow(clippy::should_implement_trait)]
pub fn next(&mut self) -> Result<Token, Error> {
if let Some(peeked) = self.peeked.take() {
return Ok(peeked);
}
while self.offset < self.input.len() {
let input = &self.input[self.offset..];
if input.starts_with(|character| char::is_ascii_whitespace(&character)) {
if input.starts_with('\n') {
self.location.line += 1;
self.location.column = 1;
} else if !input.starts_with('\r') {
self.location.column += 1;
}
self.offset += 1;
continue;
}
if let Some(token) = Comment::parse(input) {
self.offset += token.length;
self.location
.shift_down(token.location.line, token.location.column);
continue;
}
if let Some(mut token) = StringLiteral::parse(input) {
token.location = self.location;
self.offset += token.length;
self.location.shift_right(token.length);
return Ok(token);
}
if let Some(mut token) = IntegerLiteral::parse(input) {
token.location = self.location;
self.offset += token.length;
self.location.shift_right(token.length);
return Ok(token);
}
if let Some(mut token) = Identifier::parse(input) {
token.location = self.location;
self.offset += token.length;
self.location.shift_right(token.length);
return Ok(token);
}
if let Some(mut token) = Symbol::parse(input) {
token.location = self.location;
self.offset += token.length;
self.location.shift_right(token.length);
return Ok(token);
}
let end = self.input[self.offset..]
.find(char::is_whitespace)
.unwrap_or(self.input.len());
return Err(Error::InvalidLexeme {
location: self.location,
sequence: self.input[self.offset..self.offset + end].to_owned(),
});
}
Ok(Token::new(self.location, Lexeme::EndOfFile, 0))
}
///
/// Peeks the next lexeme without advancing the iterator.
///
pub fn peek(&mut self) -> Result<Token, Error> {
match self.peeked {
Some(ref peeked) => Ok(peeked.clone()),
None => {
let peeked = self.next()?;
self.peeked = Some(peeked.clone());
Ok(peeked)
}
}
}
}
+91
View File
@@ -0,0 +1,91 @@
//!
//! The Yul IR lexer tests.
//!
use crate::yul::lexer::error::Error;
use crate::yul::lexer::token::lexeme::Lexeme;
use crate::yul::lexer::token::location::Location;
use crate::yul::lexer::Lexer;
#[test]
fn default() {
let input = r#"
object "Test" {
code {
{
/*
The deploy code.
*/
mstore(64, 128)
if callvalue() { revert(0, 0) }
let _1 := datasize("Test_deployed")
codecopy(0, dataoffset("Test_deployed"), _1)
return(0, _1)
}
}
object "Test_deployed" {
code {
{
/*
The runtime code.
*/
mstore(64, 128)
if iszero(lt(calldatasize(), 4))
{
let _1 := 0
switch shr(224, calldataload(_1))
case 0x3df4ddf4 {
if callvalue() { revert(_1, _1) }
if slt(add(calldatasize(), not(3)), _1) { revert(_1, _1) }
let memPos := allocate_memory(_1)
mstore(memPos, 0x2a)
return(memPos, 32)
}
case 0x5a8ac02d {
if callvalue() { revert(_1, _1) }
if slt(add(calldatasize(), not(3)), _1) { revert(_1, _1) }
let memPos_1 := allocate_memory(_1)
return(memPos_1, sub(abi_encode_uint256(memPos_1, 0x63), memPos_1))
}
}
revert(0, 0)
}
function abi_encode_uint256(headStart, value0) -> tail
{
tail := add(headStart, 32)
mstore(headStart, value0)
}
function allocate_memory(size) -> memPtr
{
memPtr := mload(64)
let newFreePtr := add(memPtr, and(add(size, 31), not(31)))
if or(gt(newFreePtr, 0xffffffffffffffff)#, lt(newFreePtr, memPtr))
{
mstore(0, shl(224, 0x4e487b71))
mstore(4, 0x41)
revert(0, 0x24)
}
mstore(64, newFreePtr)
}
}
}
}
"#;
let mut lexer = Lexer::new(input.to_owned());
loop {
match lexer.next() {
Ok(token) => assert_ne!(token.lexeme, Lexeme::EndOfFile),
Err(error) => {
assert_eq!(
error,
Error::InvalidLexeme {
location: Location::new(51, 57),
sequence: "#,".to_owned(),
}
);
break;
}
}
}
}
@@ -0,0 +1,38 @@
//!
//! The comment lexeme.
//!
pub mod multi_line;
pub mod single_line;
use crate::yul::lexer::token::Token;
use self::multi_line::Comment as MultiLineComment;
use self::single_line::Comment as SingleLineComment;
///
/// The comment lexeme.
///
#[derive(Debug, Clone, PartialEq, Eq)]
#[allow(dead_code)]
pub enum Comment {
/// The single-line comment.
SingleLine(SingleLineComment),
/// The multi-line comment.
MultiLine(MultiLineComment),
}
impl Comment {
///
/// Returns the comment's length, including the trimmed whitespace around it.
///
pub fn parse(input: &str) -> Option<Token> {
if input.starts_with(SingleLineComment::START) {
Some(SingleLineComment::parse(input))
} else if input.starts_with(MultiLineComment::START) {
Some(MultiLineComment::parse(input))
} else {
None
}
}
}
@@ -0,0 +1,37 @@
//!
//! The multi-line comment lexeme.
//!
use crate::yul::lexer::token::lexeme::Lexeme;
use crate::yul::lexer::token::location::Location;
use crate::yul::lexer::token::Token;
///
/// The multi-line comment lexeme.
///
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Comment {}
impl Comment {
/// The start symbol.
pub const START: &'static str = "/*";
/// The end symbol.
pub const END: &'static str = "*/";
///
/// Returns the comment, including its length and number of lines.
///
pub fn parse(input: &str) -> Token {
let end_position = input.find(Self::END).unwrap_or(input.len());
let input = &input[..end_position];
let length = end_position + Self::END.len();
let lines = input.matches('\n').count();
let columns = match input.rfind('\n') {
Some(new_line) => end_position - (new_line + 1),
None => end_position,
};
Token::new(Location::new(lines, columns), Lexeme::Comment, length)
}
}
@@ -0,0 +1,30 @@
//!
//! The single-line comment lexeme.
//!
use crate::yul::lexer::token::lexeme::Lexeme;
use crate::yul::lexer::token::location::Location;
use crate::yul::lexer::token::Token;
///
/// The single-line comment lexeme.
///
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Comment {}
impl Comment {
/// The start symbol.
pub const START: &'static str = "//";
/// The end symbol.
pub const END: &'static str = "\n";
///
/// Returns the comment's length, including the trimmed whitespace around it.
///
pub fn parse(input: &str) -> Token {
let end_position = input.find(Self::END).unwrap_or(input.len());
let length = end_position + Self::END.len();
Token::new(Location::new(1, 1), Lexeme::Comment, length)
}
}
@@ -0,0 +1,80 @@
//!
//! The identifier lexeme.
//!
use crate::yul::lexer::token::lexeme::keyword::Keyword;
use crate::yul::lexer::token::lexeme::Lexeme;
use crate::yul::lexer::token::location::Location;
use crate::yul::lexer::token::Token;
///
/// The identifier lexeme.
///
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Identifier {
/// The inner string.
pub inner: String,
}
impl Identifier {
///
/// A shortcut constructor.
///
pub fn new(inner: String) -> Self {
Self { inner }
}
///
/// Parses the identifier, returning it as a token.
///
pub fn parse(input: &str) -> Option<Token> {
if !input.starts_with(Self::can_begin) {
return None;
}
let end = input.find(Self::cannot_continue).unwrap_or(input.len());
let inner = input[..end].to_string();
let length = inner.len();
if let Some(token) = Keyword::parse(inner.as_str()) {
return Some(token);
}
Some(Token::new(
Location::new(0, length),
Lexeme::Identifier(Self::new(inner)),
length,
))
}
///
/// Checks whether the character can begin an identifier.
///
pub fn can_begin(character: char) -> bool {
character.is_alphabetic() || character == '_' || character == '$'
}
///
/// Checks whether the character can continue an identifier.
///
pub fn can_continue(character: char) -> bool {
Self::can_begin(character)
|| character.is_numeric()
|| character == '_'
|| character == '$'
|| character == '.'
}
///
/// Checks whether the character cannot continue an identifier.
///
pub fn cannot_continue(character: char) -> bool {
!Self::can_continue(character)
}
}
impl std::fmt::Display for Identifier {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.inner)
}
}
@@ -0,0 +1,158 @@
//!
//! The keyword lexeme.
//!
use crate::yul::lexer::token::lexeme::literal::boolean::Boolean as BooleanLiteral;
use crate::yul::lexer::token::lexeme::literal::Literal;
use crate::yul::lexer::token::lexeme::Lexeme;
use crate::yul::lexer::token::location::Location;
use crate::yul::lexer::token::Token;
///
/// The keyword lexeme.
///
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Keyword {
/// The `object` keyword.
Object,
/// The `code` keyword.
Code,
/// The `function` keyword.
Function,
/// The `let` keyword.
Let,
/// The `if` keyword.
If,
/// The `switch` keyword.
Switch,
/// The `case` keyword.
Case,
/// The `default` keyword.
Default,
/// The `for` keyword.
For,
/// The `break` keyword.
Break,
/// The `continue` keyword.
Continue,
/// The `leave` keyword.
Leave,
/// The `true` keyword.
True,
/// The `false` keyword.
False,
/// The `bool` keyword.
Bool,
/// The `int{N}` keyword.
Int(usize),
/// The `uint{N}` keyword.
Uint(usize),
}
impl Keyword {
///
/// Parses the keyword, returning it as a token.
///
pub fn parse(input: &str) -> Option<Token> {
let keyword = Self::parse_keyword(input)?;
let lexeme = match BooleanLiteral::try_from(keyword) {
Ok(literal) => Lexeme::Literal(Literal::Boolean(literal)),
Err(keyword) => Lexeme::Keyword(keyword),
};
let length = lexeme.to_string().len();
if length != input.len() {
return None;
}
Some(Token::new(Location::new(0, length), lexeme, length))
}
///
/// Parses the keyword itself.
///
fn parse_keyword(input: &str) -> Option<Self> {
if !input.starts_with(Self::can_begin) {
return None;
}
let end = input.find(Self::cannot_continue).unwrap_or(input.len());
let input = &input[..end];
if let Some(input) = input.strip_prefix("int") {
if let Ok(bitlength) = input.parse::<usize>() {
return Some(Self::Int(bitlength));
}
}
if let Some(input) = input.strip_prefix("uint") {
if let Ok(bitlength) = input.parse::<usize>() {
return Some(Self::Uint(bitlength));
}
}
Some(match input {
"object" => Self::Object,
"code" => Self::Code,
"function" => Self::Function,
"let" => Self::Let,
"if" => Self::If,
"switch" => Self::Switch,
"case" => Self::Case,
"default" => Self::Default,
"for" => Self::For,
"break" => Self::Break,
"continue" => Self::Continue,
"leave" => Self::Leave,
"true" => Self::True,
"false" => Self::False,
"bool" => Self::Bool,
_ => return None,
})
}
///
/// Checks whether the character can begin a keyword.
///
pub fn can_begin(character: char) -> bool {
character.is_alphabetic()
}
///
/// Checks whether the character can continue a keyword.
///
pub fn can_continue(character: char) -> bool {
Self::can_begin(character) || character.is_numeric()
}
///
/// Checks whether the character cannot continue a keyword.
///
pub fn cannot_continue(character: char) -> bool {
!Self::can_continue(character)
}
}
impl std::fmt::Display for Keyword {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Object => write!(f, "object"),
Self::Code => write!(f, "code"),
Self::Function => write!(f, "function"),
Self::Let => write!(f, "let"),
Self::If => write!(f, "if"),
Self::Switch => write!(f, "switch"),
Self::Case => write!(f, "case"),
Self::Default => write!(f, "default"),
Self::For => write!(f, "for"),
Self::Break => write!(f, "break"),
Self::Continue => write!(f, "continue"),
Self::Leave => write!(f, "leave"),
Self::True => write!(f, "true"),
Self::False => write!(f, "false"),
Self::Bool => write!(f, "bool"),
Self::Int(bitlength) => write!(f, "int{bitlength}"),
Self::Uint(bitlength) => write!(f, "uint{bitlength}"),
}
}
}
@@ -0,0 +1,66 @@
//!
//! The boolean literal lexeme.
//!
use serde::Deserialize;
use serde::Serialize;
use crate::yul::lexer::token::lexeme::keyword::Keyword;
///
/// The boolean literal lexeme.
///
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub enum Boolean {
/// Created from the `false` keyword.
False,
/// Created from the `true` keyword.
True,
}
impl Boolean {
///
/// Creates a `false` value.
///
pub fn r#false() -> Self {
Self::False
}
///
/// Creates a `true` value.
///
pub fn r#true() -> Self {
Self::True
}
}
impl TryFrom<Keyword> for Boolean {
type Error = Keyword;
fn try_from(keyword: Keyword) -> Result<Self, Self::Error> {
Ok(match keyword {
Keyword::False => Self::False,
Keyword::True => Self::True,
unknown => return Err(unknown),
})
}
}
impl From<bool> for Boolean {
fn from(value: bool) -> Self {
if value {
Self::True
} else {
Self::False
}
}
}
impl std::fmt::Display for Boolean {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::False => write!(f, "false"),
Self::True => write!(f, "true"),
}
}
}
@@ -0,0 +1,118 @@
//!
//! The integer literal lexeme.
//!
use serde::Deserialize;
use serde::Serialize;
use crate::yul::lexer::token::lexeme::Lexeme;
use crate::yul::lexer::token::lexeme::Literal;
use crate::yul::lexer::token::location::Location;
use crate::yul::lexer::token::Token;
///
/// The integer literal lexeme.
///
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub enum Integer {
/// An integer literal, like `42`.
Decimal {
/// The inner literal contents.
inner: String,
},
/// A hexadecimal literal, like `0xffff`.
Hexadecimal {
/// The inner literal contents.
inner: String,
},
}
impl Integer {
///
/// Creates a decimal value.
///
pub fn new_decimal(inner: String) -> Self {
Self::Decimal { inner }
}
///
/// Creates a hexadecimal value.
///
pub fn new_hexadecimal(inner: String) -> Self {
Self::Hexadecimal { inner }
}
///
/// Parses the value from the source code slice.
///
pub fn parse(input: &str) -> Option<Token> {
let (value, length) = if let Some(body) = input.strip_prefix("0x") {
let end = body
.find(Self::cannot_continue_hexadecimal)
.unwrap_or(body.len());
let length = "0x".len() + end;
let value = Self::new_hexadecimal(input[..length].to_owned());
(value, length)
} else if input.starts_with(Self::can_begin_decimal) {
let end = input
.find(Self::cannot_continue_decimal)
.unwrap_or(input.len());
let length = end;
let value = Self::new_decimal(input[..length].to_owned());
(value, length)
} else {
return None;
};
let token = Token::new(
Location::new(0, length),
Lexeme::Literal(Literal::Integer(value)),
length,
);
Some(token)
}
///
/// Checks whether the character can begin a decimal number.
///
pub fn can_begin_decimal(character: char) -> bool {
Self::can_continue_decimal(character)
}
///
/// Checks whether the character can continue a decimal number.
///
pub fn can_continue_decimal(character: char) -> bool {
character.is_digit(era_compiler_common::BASE_DECIMAL)
}
///
/// Checks whether the character cannot continue a decimal number.
///
pub fn cannot_continue_decimal(character: char) -> bool {
!Self::can_continue_decimal(character)
}
///
/// Checks whether the character can continue a hexadecimal number.
///
pub fn can_continue_hexadecimal(character: char) -> bool {
character.is_digit(era_compiler_common::BASE_HEXADECIMAL)
}
///
/// Checks whether the character cannot continue a hexadecimal number.
///
pub fn cannot_continue_hexadecimal(character: char) -> bool {
!Self::can_continue_hexadecimal(character)
}
}
impl std::fmt::Display for Integer {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Decimal { inner } => write!(f, "{inner}"),
Self::Hexadecimal { inner } => write!(f, "{inner}"),
}
}
}
@@ -0,0 +1,37 @@
//!
//! The literal lexeme.
//!
pub mod boolean;
pub mod integer;
pub mod string;
use serde::Deserialize;
use serde::Serialize;
use self::boolean::Boolean;
use self::integer::Integer;
use self::string::String;
///
/// The literal lexeme.
///
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub enum Literal {
/// A boolean literal, like `true`, or `false`.
Boolean(Boolean),
/// An integer literal, like `42`, or `0xff`.
Integer(Integer),
/// A string literal, like `"message"`.
String(String),
}
impl std::fmt::Display for Literal {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Boolean(inner) => write!(f, "{inner}"),
Self::Integer(inner) => write!(f, "{inner}"),
Self::String(inner) => write!(f, "{inner}"),
}
}
}
@@ -0,0 +1,93 @@
//!
//! The string literal lexeme.
//!
use serde::Deserialize;
use serde::Serialize;
use crate::yul::lexer::token::lexeme::Lexeme;
use crate::yul::lexer::token::lexeme::Literal;
use crate::yul::lexer::token::location::Location;
use crate::yul::lexer::token::Token;
///
/// The string literal lexeme.
///
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub struct String {
/// The inner string contents.
pub inner: std::string::String,
/// Whether the string is hexadecimal.
pub is_hexadecimal: bool,
}
impl String {
///
/// Creates a string literal value.
///
pub fn new(inner: ::std::string::String, is_hexadecimal: bool) -> Self {
Self {
inner,
is_hexadecimal,
}
}
///
/// Parses the value from the source code slice.
///
pub fn parse(input: &str) -> Option<Token> {
let mut length = 0;
let is_string = input[length..].starts_with('"');
let is_hex_string = input[length..].starts_with(r#"hex""#);
if !is_string && !is_hex_string {
return None;
}
if is_string {
length += 1;
}
if is_hex_string {
length += r#"hex""#.len();
}
let mut string = std::string::String::new();
loop {
if input[length..].starts_with('\\') {
string.push(input.chars().nth(length).expect("Always exists"));
string.push(input.chars().nth(length + 1).expect("Always exists"));
length += 2;
continue;
}
if input[length..].starts_with('"') {
length += 1;
break;
}
string.push(input.chars().nth(length).expect("Always exists"));
length += 1;
}
let string = string
.strip_prefix('"')
.and_then(|string| string.strip_suffix('"'))
.unwrap_or(string.as_str())
.to_owned();
let literal = Self::new(string, is_hex_string);
Some(Token::new(
Location::new(0, length),
Lexeme::Literal(Literal::String(literal)),
length,
))
}
}
impl std::fmt::Display for String {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.inner)
}
}
@@ -0,0 +1,46 @@
//!
//! The lexeme.
//!
pub mod comment;
pub mod identifier;
pub mod keyword;
pub mod literal;
pub mod symbol;
use self::identifier::Identifier;
use self::keyword::Keyword;
use self::literal::Literal;
use self::symbol::Symbol;
///
/// The lexeme.
///
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Lexeme {
/// The keyword lexeme.
Keyword(Keyword),
/// The symbol lexeme.
Symbol(Symbol),
/// The identifier lexeme.
Identifier(Identifier),
/// The literal lexeme.
Literal(Literal),
/// The comment lexeme.
Comment,
/// The end-of-file lexeme.
EndOfFile,
}
impl std::fmt::Display for Lexeme {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Keyword(inner) => write!(f, "{inner}"),
Self::Symbol(inner) => write!(f, "{inner}"),
Self::Identifier(inner) => write!(f, "{inner}"),
Self::Literal(inner) => write!(f, "{inner}"),
Self::Comment => Ok(()),
Self::EndOfFile => write!(f, "EOF"),
}
}
}
@@ -0,0 +1,74 @@
//!
//! The symbol lexeme.
//!
use crate::yul::lexer::token::lexeme::Lexeme;
use crate::yul::lexer::token::location::Location;
use crate::yul::lexer::token::Token;
///
/// The symbol lexeme.
///
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Symbol {
/// The `:=` symbol.
Assignment,
/// The `->` symbol.
Arrow,
/// The `{` symbol.
BracketCurlyLeft,
/// The `}` symbol.
BracketCurlyRight,
/// The `(` symbol.
ParenthesisLeft,
/// The `)` symbol.
ParenthesisRight,
/// The `,` symbol.
Comma,
/// The `:` symbol.
Colon,
}
impl Symbol {
///
/// Parses the symbol, returning it as a token.
///
pub fn parse(input: &str) -> Option<Token> {
let (symbol, length) = match &input[..2] {
":=" => (Self::Assignment, 2),
"->" => (Self::Arrow, 2),
_ => match &input[..1] {
"{" => (Self::BracketCurlyLeft, 1),
"}" => (Self::BracketCurlyRight, 1),
"(" => (Self::ParenthesisLeft, 1),
")" => (Self::ParenthesisRight, 1),
"," => (Self::Comma, 1),
":" => (Self::Colon, 1),
_ => return None,
},
};
Some(Token::new(
Location::new(0, length),
Lexeme::Symbol(symbol),
length,
))
}
}
impl std::fmt::Display for Symbol {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Assignment => write!(f, ":="),
Self::Arrow => write!(f, "->"),
Self::BracketCurlyLeft => write!(f, "{{"),
Self::BracketCurlyRight => write!(f, "}}"),
Self::ParenthesisLeft => write!(f, "("),
Self::ParenthesisRight => write!(f, ")"),
Self::Comma => write!(f, ","),
Self::Colon => write!(f, ":"),
}
}
}
@@ -0,0 +1,65 @@
//!
//! The lexical token location.
//!
use serde::Deserialize;
use serde::Serialize;
///
/// The token location in the source code file.
///
#[derive(Debug, Serialize, Deserialize, Clone, Copy, Eq)]
pub struct Location {
/// The line number, starting from 1.
pub line: usize,
/// The column number, starting from 1.
pub column: usize,
}
impl Default for Location {
fn default() -> Self {
Self { line: 1, column: 1 }
}
}
impl Location {
///
/// Creates a default location.
///
pub fn new(line: usize, column: usize) -> Self {
Self { line, column }
}
///
/// Mutates the location by shifting the original one down by `lines` and
/// setting the column to `column`.
///
pub fn shift_down(&mut self, lines: usize, column: usize) {
if lines == 0 {
self.shift_right(column);
return;
}
self.line += lines;
self.column = column;
}
///
/// Mutates the location by shifting the original one rightward by `columns`.
///
pub fn shift_right(&mut self, columns: usize) {
self.column += columns;
}
}
impl PartialEq for Location {
fn eq(&self, other: &Self) -> bool {
self.line == other.line && self.column == other.column
}
}
impl std::fmt::Display for Location {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}:{}", self.line, self.column)
}
}
@@ -0,0 +1,43 @@
//!
//! The token.
//!
pub mod lexeme;
pub mod location;
use self::lexeme::Lexeme;
use self::location::Location;
///
/// The token.
///
/// Contains a lexeme and its location.
///
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Token {
/// The token location.
pub location: Location,
/// The lexeme.
pub lexeme: Lexeme,
/// The token length, including whitespaces.
pub length: usize,
}
impl Token {
///
/// A shortcut constructor.
///
pub fn new(location: Location, lexeme: Lexeme, length: usize) -> Self {
Self {
location,
lexeme,
length,
}
}
}
impl std::fmt::Display for Token {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}:{}", self.location, self.lexeme)
}
}