diff --git a/Cargo.lock b/Cargo.lock index 4be0063..bb7b194 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4102,6 +4102,7 @@ dependencies = [ "alloy-sol-types", "anyhow", "indoc", + "regex", "revive-common", "revive-dt-common", "semver 1.0.26", diff --git a/Cargo.toml b/Cargo.toml index e8b3b3b..1a252c8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,6 +30,7 @@ foundry-compilers-artifacts = { version = "0.18.0" } futures = { version = "0.3.31" } hex = "0.4.3" reqwest = { version = "0.12.15", features = ["json"] } +regex = { version = "1.11.1" } once_cell = "1.21" semver = { version = "1.0", features = ["serde"] } serde = { version = "1.0", default-features = false, features = ["derive"] } diff --git a/crates/format/Cargo.toml b/crates/format/Cargo.toml index 1fe1ffc..f96b705 100644 --- a/crates/format/Cargo.toml +++ b/crates/format/Cargo.toml @@ -16,6 +16,7 @@ alloy-primitives = { workspace = true } alloy-sol-types = { workspace = true } anyhow = { workspace = true } tracing = { workspace = true } +regex = { workspace = true } semver = { workspace = true } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } diff --git a/crates/format/src/semantic_tests/function_parser.rs b/crates/format/src/semantic_tests/function_parser.rs new file mode 100644 index 0000000..c46fb78 --- /dev/null +++ b/crates/format/src/semantic_tests/function_parser.rs @@ -0,0 +1,584 @@ +use std::io::{Read, Seek}; + +use anyhow::{Result, anyhow}; + +use revive_dt_common::define_wrapper_type; + +trait ReadExt: Read + Seek { + fn read_while( + &mut self, + buf: &mut Vec, + callback: impl Fn(&u8) -> bool + Clone, + ) -> std::io::Result<()> { + for byte in self.bytes() { + let byte = byte?; + let include_byte = callback(&byte); + if include_byte { + buf.push(byte) + } else { + self.seek(std::io::SeekFrom::Current(-1))?; + break; + } + } + Ok(()) + } + + fn skip_while(&mut self, callback: impl Fn(&u8) -> bool + Clone) -> std::io::Result<()> { + for byte in self.bytes() { + let byte = byte?; + let skip = callback(&byte); + if !skip { + self.seek(std::io::SeekFrom::Current(-1))?; + break; + } + } + Ok(()) + } +} + +impl ReadExt for R where R: Read + Seek {} + +trait Parse: Sized { + fn parse(token_stream: &mut (impl Read + Seek)) -> Result; + + fn peek(token_stream: &mut (impl Read + Seek)) -> Result { + let pos = token_stream.stream_position()?; + let this = Self::parse(token_stream); + token_stream.seek(std::io::SeekFrom::Start(pos))?; + this + } +} + +macro_rules! impl_parse_for_tuple { + ($first_ident: ident $(, $($ident: ident),*)?) => { + impl<$first_ident: Parse, $($($ident: Parse),*)?> Parse for ($first_ident, $($($ident),*)?) { + fn parse(token_stream: &mut (impl Read + Seek)) -> Result { + Ok(( + $first_ident::parse(token_stream)?, + $( + $($ident::parse(token_stream)?),* + )? + )) + } + } + + $(impl_parse_for_tuple!( $($ident),* );)? + }; + () => {} +} + +impl_parse_for_tuple!( + A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W, X, Y, Z +); + +impl Parse for String { + fn parse(token_stream: &mut (impl Read + Seek)) -> Result { + let mut buffer = Vec::new(); + token_stream.read_while(&mut buffer, |char| { + char.is_ascii_alphanumeric() || char.is_ascii_whitespace() + })?; + let string = String::from_utf8(buffer)?; + if string.trim().is_empty() { + Err(anyhow!("Parsing string resulted in an empty string")) + } else { + Ok(string.trim().to_owned()) + } + } +} + +impl Parse for u64 { + fn parse(token_stream: &mut (impl Read + Seek)) -> Result { + token_stream.skip_while(u8::is_ascii_whitespace)?; + + let mut buffer = Vec::new(); + token_stream.read_while(&mut buffer, |char| matches!(char, b'0'..=b'9'))?; + let string = String::from_utf8(buffer)?; + string.parse().map_err(Into::into) + } +} + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +struct Function { + ident: FunctionIdent, + arg_types: Parenthesized, + colon: ColonToken, + function_arguments: Vec, + arrow_token: ArrowToken, + function_returns: Vec, + functions_options: Vec, +} + +impl Parse for Function { + fn parse(token_stream: &mut (impl Read + Seek)) -> Result { + Ok(Self { + ident: Parse::parse(token_stream)?, + arg_types: Parse::parse(token_stream)?, + colon: Parse::parse(token_stream)?, + function_arguments: { + let mut arguments = Vec::default(); + loop { + if arguments.is_empty() { + if FunctionArgument::peek(token_stream).is_ok() { + arguments.push(FunctionArgument::parse(token_stream)?); + } + } else { + if CommaToken::peek(token_stream).is_ok() { + CommaToken::parse(token_stream)?; + arguments.push(FunctionArgument::parse(token_stream)?); + } else { + break; + } + } + } + arguments + }, + arrow_token: Parse::parse(token_stream)?, + function_returns: { + let mut returns = Vec::default(); + + loop { + if returns.is_empty() || CommaToken::peek(token_stream).is_ok() { + if !returns.is_empty() { + CommaToken::parse(token_stream)?; + } + + let mut buf = Vec::new(); + token_stream + .read_while(&mut buf, |byte| *byte != b'\n' && *byte != b',')?; + if NewLineToken::peek(token_stream).is_ok() { + NewLineToken::parse(token_stream)?; + } else if CommaToken::peek(token_stream).is_ok() { + CommaToken::peek(token_stream)?; + } + let string = String::from_utf8(buf)?; + let trimmed = string.trim(); + if trimmed.chars().all(|char| char.is_whitespace()) { + break; + } else { + returns.push(FunctionReturn(trimmed.to_string())); + } + } else { + break; + } + } + + returns + }, + functions_options: { + let mut options = Vec::default(); + + while PostFunctionOptions::peek(token_stream).is_ok() { + options.push(PostFunctionOptions::parse(token_stream)?) + } + + options + }, + }) + } +} + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +struct Parenthesized(pub Vec); + +impl Parse for Parenthesized +where + T: Parse, +{ + fn parse(token_stream: &mut (impl Read + Seek)) -> Result { + OpenParenToken::parse(token_stream)?; + + let mut inner = Vec::new(); + loop { + if CloseParenToken::peek(token_stream).is_ok() { + break; + } + inner.push(T::parse(token_stream)?); + + let reached_the_end = CloseParenToken::peek(token_stream).is_ok(); + if reached_the_end { + break; + } else { + SingleCharToken::::parse(token_stream)?; + } + } + + CloseParenToken::parse(token_stream)?; + + Ok(Self(inner)) + } +} + +define_wrapper_type!( + /// A wrapper type for a function identifier token. + #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] + struct FunctionIdent(String); +); + +impl Parse for FunctionIdent { + fn parse(token_stream: &mut (impl Read + Seek)) -> Result { + Parse::parse(token_stream).map(Self) + } +} + +define_wrapper_type!( + /// A wrapper type for a function argument token. + #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] + struct FunctionArgumentType(String); +); + +impl Parse for FunctionArgumentType { + fn parse(token_stream: &mut (impl Read + Seek)) -> Result { + Parse::parse(token_stream).map(Self) + } +} + +define_wrapper_type!( + /// A wrapper type for a function argument token. + #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] + struct FunctionArgument(String); +); + +impl Parse for FunctionArgument { + fn parse(token_stream: &mut (impl Read + Seek)) -> Result { + Parse::parse(token_stream).map(Self) + } +} + +define_wrapper_type!( + /// A wrapper type for a function return token. + #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] + struct FunctionReturn(String); +); + +impl Parse for FunctionReturn { + fn parse(token_stream: &mut (impl Read + Seek)) -> Result { + Parse::parse(token_stream).map(Self) + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] +struct SingleCharToken; + +impl Parse for SingleCharToken { + fn parse(token_stream: &mut (impl Read + Seek)) -> Result { + let mut buf = [0; 1]; + loop { + token_stream.read(&mut buf)?; + let [byte] = buf; + if byte == CHAR as u8 { + return Ok(Self); + } else if byte.is_ascii_whitespace() { + continue; + } else { + return Err(anyhow!( + "Invalid character encountered {} expected {}", + byte as char, + CHAR + )); + } + } + } +} + +// Bit of a hack, but I do this because Rust analyzer doesn't like `SingleCharToken<'>'>` and it +// messes up with the syntax highlighting. +const GT_CHAR: char = '>'; + +type ColonToken = SingleCharToken<':'>; +type CommaToken = SingleCharToken<','>; +type OpenParenToken = SingleCharToken<'('>; +type CloseParenToken = SingleCharToken<')'>; +type DashToken = SingleCharToken<'-'>; +type GtToken = SingleCharToken<{ GT_CHAR }>; +type NewLineToken = SingleCharToken<'\n'>; +type SpaceToken = SingleCharToken<' '>; +type ArrowToken = (DashToken, GtToken); + +macro_rules! string_literal_token { + ( + $($ty_ident: ident => $str: expr),* $(,)? + ) => { + $( + #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] + pub struct $ty_ident; + + impl Parse for $ty_ident { + fn parse(token_stream: &mut (impl Read + Seek)) -> Result { + token_stream.skip_while(u8::is_ascii_whitespace)?; + + let mut buffer = [0; $str.len()]; + token_stream.read(&mut buffer)?; + while SpaceToken::peek(token_stream).is_ok() { + SpaceToken::parse(token_stream)?; + } + if $str.as_bytes() == buffer { + Ok(Self) + } else { + Err(anyhow!("Invalid string - expected {} but got {:?}", $str, str::from_utf8(&buffer))) + } + } + } + )* + }; +} +string_literal_token! { + GasLiteralStringToken => "gas", + IrOptimizedLiteralStringToken => "irOptimized", + LegacyLiteralStringToken => "legacy", + LegacyOptimizedLiteralStringToken => "legacyOptimized", + CodeLiteralStringToken => "code", +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum PostFunctionOptions { + IrOptimizedGasOption(IrOptimizedGasOption), + IrOptimizedGasCodeOption(IrOptimizedGasCodeOption), + LegacyGasOption(LegacyGasOption), + LegacyGasCodeOption(LegacyGasCodeOption), + LegacyOptimizedGasOption(LegacyOptimizedGasOption), + LegacyOptimizedGasCodeOption(LegacyOptimizedGasCodeOption), +} + +impl Parse for PostFunctionOptions { + fn parse(token_stream: &mut (impl Read + Seek)) -> Result { + if IrOptimizedGasOption::peek(token_stream).is_ok() { + IrOptimizedGasOption::parse(token_stream).map(Self::IrOptimizedGasOption) + } else if IrOptimizedGasCodeOption::peek(token_stream).is_ok() { + IrOptimizedGasCodeOption::parse(token_stream).map(Self::IrOptimizedGasCodeOption) + } else if LegacyGasOption::peek(token_stream).is_ok() { + LegacyGasOption::parse(token_stream).map(Self::LegacyGasOption) + } else if LegacyGasCodeOption::peek(token_stream).is_ok() { + LegacyGasCodeOption::parse(token_stream).map(Self::LegacyGasCodeOption) + } else if LegacyOptimizedGasOption::peek(token_stream).is_ok() { + LegacyOptimizedGasOption::parse(token_stream).map(Self::LegacyOptimizedGasOption) + } else if LegacyOptimizedGasCodeOption::peek(token_stream).is_ok() { + LegacyOptimizedGasCodeOption::parse(token_stream) + .map(Self::LegacyOptimizedGasCodeOption) + } else { + Err(anyhow!("Failed to parse post function options")) + } + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] +struct IrOptimizedGasOption { + pub gas_token: GasLiteralStringToken, + pub gas_option: IrOptimizedLiteralStringToken, + pub colon: ColonToken, + pub value: u64, +} + +impl Parse for IrOptimizedGasOption { + fn parse(token_stream: &mut (impl Read + Seek)) -> Result { + Ok(Self { + gas_token: Parse::parse(token_stream)?, + gas_option: Parse::parse(token_stream)?, + colon: Parse::parse(token_stream)?, + value: Parse::parse(token_stream)?, + }) + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] +struct IrOptimizedGasCodeOption { + pub gas_token: GasLiteralStringToken, + pub gas_option: IrOptimizedLiteralStringToken, + pub code: CodeLiteralStringToken, + pub colon: ColonToken, + pub value: u64, +} + +impl Parse for IrOptimizedGasCodeOption { + fn parse(token_stream: &mut (impl Read + Seek)) -> Result { + Ok(Self { + gas_token: Parse::parse(token_stream)?, + gas_option: Parse::parse(token_stream)?, + code: Parse::parse(token_stream)?, + colon: Parse::parse(token_stream)?, + value: Parse::parse(token_stream)?, + }) + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] +struct LegacyGasOption { + pub gas_token: GasLiteralStringToken, + pub gas_option: LegacyLiteralStringToken, + pub colon: ColonToken, + pub value: u64, +} + +impl Parse for LegacyGasOption { + fn parse(token_stream: &mut (impl Read + Seek)) -> Result { + Ok(Self { + gas_token: Parse::parse(token_stream)?, + gas_option: Parse::parse(token_stream)?, + colon: Parse::parse(token_stream)?, + value: Parse::parse(token_stream)?, + }) + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] +struct LegacyGasCodeOption { + pub gas_token: GasLiteralStringToken, + pub gas_option: LegacyLiteralStringToken, + pub code: CodeLiteralStringToken, + pub colon: ColonToken, + pub value: u64, +} + +impl Parse for LegacyGasCodeOption { + fn parse(token_stream: &mut (impl Read + Seek)) -> Result { + Ok(Self { + gas_token: Parse::parse(token_stream)?, + gas_option: Parse::parse(token_stream)?, + code: Parse::parse(token_stream)?, + colon: Parse::parse(token_stream)?, + value: Parse::parse(token_stream)?, + }) + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] +struct LegacyOptimizedGasOption { + pub gas_token: GasLiteralStringToken, + pub gas_option: LegacyOptimizedLiteralStringToken, + pub colon: ColonToken, + pub value: u64, +} + +impl Parse for LegacyOptimizedGasOption { + fn parse(token_stream: &mut (impl Read + Seek)) -> Result { + Ok(Self { + gas_token: Parse::parse(token_stream)?, + gas_option: Parse::parse(token_stream)?, + colon: Parse::parse(token_stream)?, + value: Parse::parse(token_stream)?, + }) + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] +struct LegacyOptimizedGasCodeOption { + pub gas_token: GasLiteralStringToken, + pub gas_option: LegacyOptimizedLiteralStringToken, + pub code: CodeLiteralStringToken, + pub colon: ColonToken, + pub value: u64, +} + +impl Parse for LegacyOptimizedGasCodeOption { + fn parse(token_stream: &mut (impl Read + Seek)) -> Result { + Ok(Self { + gas_token: Parse::parse(token_stream)?, + gas_option: Parse::parse(token_stream)?, + code: Parse::parse(token_stream)?, + colon: Parse::parse(token_stream)?, + value: Parse::parse(token_stream)?, + }) + } +} + +#[cfg(test)] +mod test { + use std::io::Cursor; + + use indoc::indoc; + + use super::*; + + #[test] + fn complex_function_can_be_parsed() { + // Arrange + let string = indoc!( + r#" + myFunction(uint256, uint64, + ) + : + 1, 2 + , 3 + -> 1, 2, 3, 4 + gas irOptimized: 135499 + gas legacy: 137095 + gas legacyOptimized: 135823 + gas irOptimized code: 135499 + gas legacy code: 137095 + gas legacyOptimized code: 135823 + "# + ); + let mut token_stream = Cursor::new(string); + + // Act + let function = Function::parse(&mut token_stream); + + // Assert + let function = function.expect("Function parsing failed"); + assert_eq!( + function, + Function { + ident: FunctionIdent::new("myFunction"), + arg_types: Parenthesized(vec![ + FunctionArgumentType::new("uint256"), + FunctionArgumentType::new("uint64") + ]), + colon: ColonToken::default(), + function_arguments: vec![ + FunctionArgument::new("1"), + FunctionArgument::new("2"), + FunctionArgument::new("3") + ], + arrow_token: ArrowToken::default(), + function_returns: vec![ + FunctionReturn::new("1"), + FunctionReturn::new("2"), + FunctionReturn::new("3"), + FunctionReturn::new("4"), + ], + functions_options: vec![ + PostFunctionOptions::IrOptimizedGasOption(IrOptimizedGasOption { + gas_token: Default::default(), + gas_option: Default::default(), + colon: Default::default(), + value: 135499 + }), + PostFunctionOptions::LegacyGasOption(LegacyGasOption { + gas_token: Default::default(), + gas_option: Default::default(), + colon: Default::default(), + value: 137095 + }), + PostFunctionOptions::LegacyOptimizedGasOption(LegacyOptimizedGasOption { + gas_token: Default::default(), + gas_option: Default::default(), + colon: Default::default(), + value: 135823 + }), + PostFunctionOptions::IrOptimizedGasCodeOption(IrOptimizedGasCodeOption { + gas_token: Default::default(), + gas_option: Default::default(), + code: Default::default(), + colon: Default::default(), + value: 135499 + }), + PostFunctionOptions::LegacyGasCodeOption(LegacyGasCodeOption { + gas_token: Default::default(), + gas_option: Default::default(), + code: Default::default(), + colon: Default::default(), + value: 137095 + }), + PostFunctionOptions::LegacyOptimizedGasCodeOption( + LegacyOptimizedGasCodeOption { + gas_token: Default::default(), + gas_option: Default::default(), + code: Default::default(), + colon: Default::default(), + value: 135823 + } + ), + ] + } + ); + } +} diff --git a/crates/format/src/semantic_tests/mod.rs b/crates/format/src/semantic_tests/mod.rs index fd30fce..f34f1f2 100644 --- a/crates/format/src/semantic_tests/mod.rs +++ b/crates/format/src/semantic_tests/mod.rs @@ -3,8 +3,10 @@ //! //! [`Metadata`]: crate::metadata::Metadata +mod function_parser; mod sections; mod test_configuration; +pub use function_parser::*; pub use sections::*; pub use test_configuration::*; diff --git a/crates/format/src/semantic_tests/sections.rs b/crates/format/src/semantic_tests/sections.rs index 57f031f..c1e7a4e 100644 --- a/crates/format/src/semantic_tests/sections.rs +++ b/crates/format/src/semantic_tests/sections.rs @@ -1,6 +1,7 @@ -use std::{collections::VecDeque, path::PathBuf}; +use std::{collections::VecDeque, path::PathBuf, sync::LazyLock}; use anyhow::{Context, Result, anyhow}; +use regex::Regex; use crate::semantic_tests::TestConfiguration; @@ -146,6 +147,9 @@ impl SemanticTestSection { /// section then no changes will be made to the current section and instead the line will be /// interpreted according to the rules of new sections. pub fn append_line(&mut self, line: impl AsRef) -> Result> { + static COMMENT_REPLACEMENT_REGEX: LazyLock = + LazyLock::new(|| Regex::new("#.*#$").unwrap()); + let line = line.as_ref(); if line.is_empty() { Ok(None) @@ -207,8 +211,9 @@ impl SemanticTestSection { .strip_prefix("//") .ok_or_else(|| anyhow!("Line doesn't contain test input prefix: {line}")) .map(str::trim)?; - if !line.starts_with('#') { - lines.push(line.to_owned()); + let line = COMMENT_REPLACEMENT_REGEX.replace_all(line, ""); + if !line.starts_with('#') && !line.chars().all(|char| char.is_whitespace()) { + lines.push(line.to_string()); } Ok(None) }