Compare commits

...

4 Commits

Author SHA1 Message Date
Omar Abdulla bb718fedfc Implement part of the function parser 2025-08-07 22:06:06 +03:00
Omar Abdulla ac8051b03e refactor logic into a module 2025-08-07 15:36:10 +03:00
Omar Abdulla 55322165ad Add parsing logic for the configuration 2025-08-07 14:12:11 +03:00
Omar Abdulla f7ca7a1de5 Initial parser backbone 2025-08-07 10:38:33 +03:00
10 changed files with 1160 additions and 5 deletions
Generated
+9
View File
@@ -2929,6 +2929,12 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "indoc"
version = "2.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd"
[[package]] [[package]]
name = "integer-sqrt" name = "integer-sqrt"
version = "0.1.5" version = "0.1.5"
@@ -4095,6 +4101,9 @@ dependencies = [
"alloy-primitives", "alloy-primitives",
"alloy-sol-types", "alloy-sol-types",
"anyhow", "anyhow",
"indoc",
"regex",
"revive-common",
"revive-dt-common", "revive-dt-common",
"semver 1.0.26", "semver 1.0.26",
"serde", "serde",
+2
View File
@@ -30,6 +30,7 @@ foundry-compilers-artifacts = { version = "0.18.0" }
futures = { version = "0.3.31" } futures = { version = "0.3.31" }
hex = "0.4.3" hex = "0.4.3"
reqwest = { version = "0.12.15", features = ["json"] } reqwest = { version = "0.12.15", features = ["json"] }
regex = { version = "1.11.1" }
once_cell = "1.21" once_cell = "1.21"
semver = { version = "1.0", features = ["serde"] } semver = { version = "1.0", features = ["serde"] }
serde = { version = "1.0", default-features = false, features = ["derive"] } serde = { version = "1.0", default-features = false, features = ["derive"] }
@@ -55,6 +56,7 @@ tracing-subscriber = { version = "0.3.19", default-features = false, features =
"env-filter", "env-filter",
] } ] }
indexmap = { version = "2.10.0", default-features = false } indexmap = { version = "2.10.0", default-features = false }
indoc = { version = "2.0.6", default-features = false }
# revive compiler # revive compiler
revive-solc-json-interface = { git = "https://github.com/paritytech/revive", rev = "3389865af7c3ff6f29a586d82157e8bc573c1a8e" } revive-solc-json-interface = { git = "https://github.com/paritytech/revive", rev = "3389865af7c3ff6f29a586d82157e8bc573c1a8e" }
@@ -1,4 +1,8 @@
use std::{borrow::Cow, collections::HashSet, path::PathBuf}; use std::{
borrow::Cow,
collections::HashSet,
path::{Path, PathBuf},
};
/// An iterator that finds files of a certain extension in the provided directory. You can think of /// An iterator that finds files of a certain extension in the provided directory. You can think of
/// this a glob pattern similar to: `${path}/**/*.md` /// this a glob pattern similar to: `${path}/**/*.md`
@@ -18,10 +22,10 @@ pub struct FilesWithExtensionIterator {
} }
impl FilesWithExtensionIterator { impl FilesWithExtensionIterator {
pub fn new(root_directory: PathBuf) -> Self { pub fn new(root_directory: impl AsRef<Path>) -> Self {
Self { Self {
allowed_extensions: Default::default(), allowed_extensions: Default::default(),
directories_to_search: vec![root_directory], directories_to_search: vec![root_directory.as_ref().to_path_buf()],
files_matching_allowed_extensions: Default::default(), files_matching_allowed_extensions: Default::default(),
} }
} }
+4
View File
@@ -16,9 +16,13 @@ alloy-primitives = { workspace = true }
alloy-sol-types = { workspace = true } alloy-sol-types = { workspace = true }
anyhow = { workspace = true } anyhow = { workspace = true }
tracing = { workspace = true } tracing = { workspace = true }
regex = { workspace = true }
semver = { workspace = true } semver = { workspace = true }
serde = { workspace = true, features = ["derive"] } serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true } serde_json = { workspace = true }
revive-common = { workspace = true }
[dev-dependencies] [dev-dependencies]
tokio = { workspace = true } tokio = { workspace = true }
indoc = { workspace = true }
+2
View File
@@ -6,3 +6,5 @@ pub mod input;
pub mod metadata; pub mod metadata;
pub mod mode; pub mod mode;
pub mod traits; pub mod traits;
mod semantic_tests;
+2 -2
View File
@@ -291,7 +291,7 @@ impl FromStr for ContractPathAndIdent {
type Err = anyhow::Error; type Err = anyhow::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> { fn from_str(s: &str) -> Result<Self, Self::Err> {
let mut splitted_string = s.split(":").peekable(); let mut splitted_string = s.split(':').peekable();
let mut path = None::<String>; let mut path = None::<String>;
let mut identifier = None::<String>; let mut identifier = None::<String>;
loop { loop {
@@ -316,7 +316,7 @@ impl FromStr for ContractPathAndIdent {
contract_ident: ContractIdent::new(identifier), contract_ident: ContractIdent::new(identifier),
}), }),
(None, Some(path)) | (Some(path), None) => { (None, Some(path)) | (Some(path), None) => {
let Some(identifier) = path.split(".").next().map(ToOwned::to_owned) else { let Some(identifier) = path.split('.').next().map(ToOwned::to_owned) else {
anyhow::bail!("Failed to find identifier"); anyhow::bail!("Failed to find identifier");
}; };
Ok(Self { Ok(Self {
@@ -0,0 +1,584 @@
use std::io::{Read, Seek};
use anyhow::{Result, anyhow};
use revive_dt_common::define_wrapper_type;
trait ReadExt: Read + Seek {
fn read_while(
&mut self,
buf: &mut Vec<u8>,
callback: impl Fn(&u8) -> bool + Clone,
) -> std::io::Result<()> {
for byte in self.bytes() {
let byte = byte?;
let include_byte = callback(&byte);
if include_byte {
buf.push(byte)
} else {
self.seek(std::io::SeekFrom::Current(-1))?;
break;
}
}
Ok(())
}
fn skip_while(&mut self, callback: impl Fn(&u8) -> bool + Clone) -> std::io::Result<()> {
for byte in self.bytes() {
let byte = byte?;
let skip = callback(&byte);
if !skip {
self.seek(std::io::SeekFrom::Current(-1))?;
break;
}
}
Ok(())
}
}
impl<R> ReadExt for R where R: Read + Seek {}
trait Parse: Sized {
fn parse(token_stream: &mut (impl Read + Seek)) -> Result<Self>;
fn peek(token_stream: &mut (impl Read + Seek)) -> Result<Self> {
let pos = token_stream.stream_position()?;
let this = Self::parse(token_stream);
token_stream.seek(std::io::SeekFrom::Start(pos))?;
this
}
}
macro_rules! impl_parse_for_tuple {
($first_ident: ident $(, $($ident: ident),*)?) => {
impl<$first_ident: Parse, $($($ident: Parse),*)?> Parse for ($first_ident, $($($ident),*)?) {
fn parse(token_stream: &mut (impl Read + Seek)) -> Result<Self> {
Ok((
$first_ident::parse(token_stream)?,
$(
$($ident::parse(token_stream)?),*
)?
))
}
}
$(impl_parse_for_tuple!( $($ident),* );)?
};
() => {}
}
impl_parse_for_tuple!(
A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W, X, Y, Z
);
impl Parse for String {
fn parse(token_stream: &mut (impl Read + Seek)) -> Result<Self> {
let mut buffer = Vec::new();
token_stream.read_while(&mut buffer, |char| {
char.is_ascii_alphanumeric() || char.is_ascii_whitespace()
})?;
let string = String::from_utf8(buffer)?;
if string.trim().is_empty() {
Err(anyhow!("Parsing string resulted in an empty string"))
} else {
Ok(string.trim().to_owned())
}
}
}
impl Parse for u64 {
fn parse(token_stream: &mut (impl Read + Seek)) -> Result<Self> {
token_stream.skip_while(u8::is_ascii_whitespace)?;
let mut buffer = Vec::new();
token_stream.read_while(&mut buffer, |char| matches!(char, b'0'..=b'9'))?;
let string = String::from_utf8(buffer)?;
string.parse().map_err(Into::into)
}
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
struct Function {
ident: FunctionIdent,
arg_types: Parenthesized<FunctionArgumentType, ','>,
colon: ColonToken,
function_arguments: Vec<FunctionArgument>,
arrow_token: ArrowToken,
function_returns: Vec<FunctionReturn>,
functions_options: Vec<PostFunctionOptions>,
}
impl Parse for Function {
fn parse(token_stream: &mut (impl Read + Seek)) -> Result<Self> {
Ok(Self {
ident: Parse::parse(token_stream)?,
arg_types: Parse::parse(token_stream)?,
colon: Parse::parse(token_stream)?,
function_arguments: {
let mut arguments = Vec::default();
loop {
if arguments.is_empty() {
if FunctionArgument::peek(token_stream).is_ok() {
arguments.push(FunctionArgument::parse(token_stream)?);
}
} else {
if CommaToken::peek(token_stream).is_ok() {
CommaToken::parse(token_stream)?;
arguments.push(FunctionArgument::parse(token_stream)?);
} else {
break;
}
}
}
arguments
},
arrow_token: Parse::parse(token_stream)?,
function_returns: {
let mut returns = Vec::default();
loop {
if returns.is_empty() || CommaToken::peek(token_stream).is_ok() {
if !returns.is_empty() {
CommaToken::parse(token_stream)?;
}
let mut buf = Vec::new();
token_stream
.read_while(&mut buf, |byte| *byte != b'\n' && *byte != b',')?;
if NewLineToken::peek(token_stream).is_ok() {
NewLineToken::parse(token_stream)?;
} else if CommaToken::peek(token_stream).is_ok() {
CommaToken::peek(token_stream)?;
}
let string = String::from_utf8(buf)?;
let trimmed = string.trim();
if trimmed.chars().all(|char| char.is_whitespace()) {
break;
} else {
returns.push(FunctionReturn(trimmed.to_string()));
}
} else {
break;
}
}
returns
},
functions_options: {
let mut options = Vec::default();
while PostFunctionOptions::peek(token_stream).is_ok() {
options.push(PostFunctionOptions::parse(token_stream)?)
}
options
},
})
}
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
struct Parenthesized<T, const SEP: char>(pub Vec<T>);
impl<T, const SEP: char> Parse for Parenthesized<T, SEP>
where
T: Parse,
{
fn parse(token_stream: &mut (impl Read + Seek)) -> Result<Self> {
OpenParenToken::parse(token_stream)?;
let mut inner = Vec::new();
loop {
if CloseParenToken::peek(token_stream).is_ok() {
break;
}
inner.push(T::parse(token_stream)?);
let reached_the_end = CloseParenToken::peek(token_stream).is_ok();
if reached_the_end {
break;
} else {
SingleCharToken::<SEP>::parse(token_stream)?;
}
}
CloseParenToken::parse(token_stream)?;
Ok(Self(inner))
}
}
define_wrapper_type!(
/// A wrapper type for a function identifier token.
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
struct FunctionIdent(String);
);
impl Parse for FunctionIdent {
fn parse(token_stream: &mut (impl Read + Seek)) -> Result<Self> {
Parse::parse(token_stream).map(Self)
}
}
define_wrapper_type!(
/// A wrapper type for a function argument token.
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
struct FunctionArgumentType(String);
);
impl Parse for FunctionArgumentType {
fn parse(token_stream: &mut (impl Read + Seek)) -> Result<Self> {
Parse::parse(token_stream).map(Self)
}
}
define_wrapper_type!(
/// A wrapper type for a function argument token.
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
struct FunctionArgument(String);
);
impl Parse for FunctionArgument {
fn parse(token_stream: &mut (impl Read + Seek)) -> Result<Self> {
Parse::parse(token_stream).map(Self)
}
}
define_wrapper_type!(
/// A wrapper type for a function return token.
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
struct FunctionReturn(String);
);
impl Parse for FunctionReturn {
fn parse(token_stream: &mut (impl Read + Seek)) -> Result<Self> {
Parse::parse(token_stream).map(Self)
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
struct SingleCharToken<const CHAR: char>;
impl<const CHAR: char> Parse for SingleCharToken<CHAR> {
fn parse(token_stream: &mut (impl Read + Seek)) -> Result<Self> {
let mut buf = [0; 1];
loop {
token_stream.read(&mut buf)?;
let [byte] = buf;
if byte == CHAR as u8 {
return Ok(Self);
} else if byte.is_ascii_whitespace() {
continue;
} else {
return Err(anyhow!(
"Invalid character encountered {} expected {}",
byte as char,
CHAR
));
}
}
}
}
// Bit of a hack, but I do this because Rust analyzer doesn't like `SingleCharToken<'>'>` and it
// messes up with the syntax highlighting.
const GT_CHAR: char = '>';
type ColonToken = SingleCharToken<':'>;
type CommaToken = SingleCharToken<','>;
type OpenParenToken = SingleCharToken<'('>;
type CloseParenToken = SingleCharToken<')'>;
type DashToken = SingleCharToken<'-'>;
type GtToken = SingleCharToken<{ GT_CHAR }>;
type NewLineToken = SingleCharToken<'\n'>;
type SpaceToken = SingleCharToken<' '>;
type ArrowToken = (DashToken, GtToken);
macro_rules! string_literal_token {
(
$($ty_ident: ident => $str: expr),* $(,)?
) => {
$(
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
pub struct $ty_ident;
impl Parse for $ty_ident {
fn parse(token_stream: &mut (impl Read + Seek)) -> Result<Self> {
token_stream.skip_while(u8::is_ascii_whitespace)?;
let mut buffer = [0; $str.len()];
token_stream.read(&mut buffer)?;
while SpaceToken::peek(token_stream).is_ok() {
SpaceToken::parse(token_stream)?;
}
if $str.as_bytes() == buffer {
Ok(Self)
} else {
Err(anyhow!("Invalid string - expected {} but got {:?}", $str, str::from_utf8(&buffer)))
}
}
}
)*
};
}
string_literal_token! {
GasLiteralStringToken => "gas",
IrOptimizedLiteralStringToken => "irOptimized",
LegacyLiteralStringToken => "legacy",
LegacyOptimizedLiteralStringToken => "legacyOptimized",
CodeLiteralStringToken => "code",
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum PostFunctionOptions {
IrOptimizedGasOption(IrOptimizedGasOption),
IrOptimizedGasCodeOption(IrOptimizedGasCodeOption),
LegacyGasOption(LegacyGasOption),
LegacyGasCodeOption(LegacyGasCodeOption),
LegacyOptimizedGasOption(LegacyOptimizedGasOption),
LegacyOptimizedGasCodeOption(LegacyOptimizedGasCodeOption),
}
impl Parse for PostFunctionOptions {
fn parse(token_stream: &mut (impl Read + Seek)) -> Result<Self> {
if IrOptimizedGasOption::peek(token_stream).is_ok() {
IrOptimizedGasOption::parse(token_stream).map(Self::IrOptimizedGasOption)
} else if IrOptimizedGasCodeOption::peek(token_stream).is_ok() {
IrOptimizedGasCodeOption::parse(token_stream).map(Self::IrOptimizedGasCodeOption)
} else if LegacyGasOption::peek(token_stream).is_ok() {
LegacyGasOption::parse(token_stream).map(Self::LegacyGasOption)
} else if LegacyGasCodeOption::peek(token_stream).is_ok() {
LegacyGasCodeOption::parse(token_stream).map(Self::LegacyGasCodeOption)
} else if LegacyOptimizedGasOption::peek(token_stream).is_ok() {
LegacyOptimizedGasOption::parse(token_stream).map(Self::LegacyOptimizedGasOption)
} else if LegacyOptimizedGasCodeOption::peek(token_stream).is_ok() {
LegacyOptimizedGasCodeOption::parse(token_stream)
.map(Self::LegacyOptimizedGasCodeOption)
} else {
Err(anyhow!("Failed to parse post function options"))
}
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
struct IrOptimizedGasOption {
pub gas_token: GasLiteralStringToken,
pub gas_option: IrOptimizedLiteralStringToken,
pub colon: ColonToken,
pub value: u64,
}
impl Parse for IrOptimizedGasOption {
fn parse(token_stream: &mut (impl Read + Seek)) -> Result<Self> {
Ok(Self {
gas_token: Parse::parse(token_stream)?,
gas_option: Parse::parse(token_stream)?,
colon: Parse::parse(token_stream)?,
value: Parse::parse(token_stream)?,
})
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
struct IrOptimizedGasCodeOption {
pub gas_token: GasLiteralStringToken,
pub gas_option: IrOptimizedLiteralStringToken,
pub code: CodeLiteralStringToken,
pub colon: ColonToken,
pub value: u64,
}
impl Parse for IrOptimizedGasCodeOption {
fn parse(token_stream: &mut (impl Read + Seek)) -> Result<Self> {
Ok(Self {
gas_token: Parse::parse(token_stream)?,
gas_option: Parse::parse(token_stream)?,
code: Parse::parse(token_stream)?,
colon: Parse::parse(token_stream)?,
value: Parse::parse(token_stream)?,
})
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
struct LegacyGasOption {
pub gas_token: GasLiteralStringToken,
pub gas_option: LegacyLiteralStringToken,
pub colon: ColonToken,
pub value: u64,
}
impl Parse for LegacyGasOption {
fn parse(token_stream: &mut (impl Read + Seek)) -> Result<Self> {
Ok(Self {
gas_token: Parse::parse(token_stream)?,
gas_option: Parse::parse(token_stream)?,
colon: Parse::parse(token_stream)?,
value: Parse::parse(token_stream)?,
})
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
struct LegacyGasCodeOption {
pub gas_token: GasLiteralStringToken,
pub gas_option: LegacyLiteralStringToken,
pub code: CodeLiteralStringToken,
pub colon: ColonToken,
pub value: u64,
}
impl Parse for LegacyGasCodeOption {
fn parse(token_stream: &mut (impl Read + Seek)) -> Result<Self> {
Ok(Self {
gas_token: Parse::parse(token_stream)?,
gas_option: Parse::parse(token_stream)?,
code: Parse::parse(token_stream)?,
colon: Parse::parse(token_stream)?,
value: Parse::parse(token_stream)?,
})
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
struct LegacyOptimizedGasOption {
pub gas_token: GasLiteralStringToken,
pub gas_option: LegacyOptimizedLiteralStringToken,
pub colon: ColonToken,
pub value: u64,
}
impl Parse for LegacyOptimizedGasOption {
fn parse(token_stream: &mut (impl Read + Seek)) -> Result<Self> {
Ok(Self {
gas_token: Parse::parse(token_stream)?,
gas_option: Parse::parse(token_stream)?,
colon: Parse::parse(token_stream)?,
value: Parse::parse(token_stream)?,
})
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
struct LegacyOptimizedGasCodeOption {
pub gas_token: GasLiteralStringToken,
pub gas_option: LegacyOptimizedLiteralStringToken,
pub code: CodeLiteralStringToken,
pub colon: ColonToken,
pub value: u64,
}
impl Parse for LegacyOptimizedGasCodeOption {
fn parse(token_stream: &mut (impl Read + Seek)) -> Result<Self> {
Ok(Self {
gas_token: Parse::parse(token_stream)?,
gas_option: Parse::parse(token_stream)?,
code: Parse::parse(token_stream)?,
colon: Parse::parse(token_stream)?,
value: Parse::parse(token_stream)?,
})
}
}
#[cfg(test)]
mod test {
use std::io::Cursor;
use indoc::indoc;
use super::*;
#[test]
fn complex_function_can_be_parsed() {
// Arrange
let string = indoc!(
r#"
myFunction(uint256, uint64,
)
:
1, 2
, 3
-> 1, 2, 3, 4
gas irOptimized: 135499
gas legacy: 137095
gas legacyOptimized: 135823
gas irOptimized code: 135499
gas legacy code: 137095
gas legacyOptimized code: 135823
"#
);
let mut token_stream = Cursor::new(string);
// Act
let function = Function::parse(&mut token_stream);
// Assert
let function = function.expect("Function parsing failed");
assert_eq!(
function,
Function {
ident: FunctionIdent::new("myFunction"),
arg_types: Parenthesized(vec![
FunctionArgumentType::new("uint256"),
FunctionArgumentType::new("uint64")
]),
colon: ColonToken::default(),
function_arguments: vec![
FunctionArgument::new("1"),
FunctionArgument::new("2"),
FunctionArgument::new("3")
],
arrow_token: ArrowToken::default(),
function_returns: vec![
FunctionReturn::new("1"),
FunctionReturn::new("2"),
FunctionReturn::new("3"),
FunctionReturn::new("4"),
],
functions_options: vec![
PostFunctionOptions::IrOptimizedGasOption(IrOptimizedGasOption {
gas_token: Default::default(),
gas_option: Default::default(),
colon: Default::default(),
value: 135499
}),
PostFunctionOptions::LegacyGasOption(LegacyGasOption {
gas_token: Default::default(),
gas_option: Default::default(),
colon: Default::default(),
value: 137095
}),
PostFunctionOptions::LegacyOptimizedGasOption(LegacyOptimizedGasOption {
gas_token: Default::default(),
gas_option: Default::default(),
colon: Default::default(),
value: 135823
}),
PostFunctionOptions::IrOptimizedGasCodeOption(IrOptimizedGasCodeOption {
gas_token: Default::default(),
gas_option: Default::default(),
code: Default::default(),
colon: Default::default(),
value: 135499
}),
PostFunctionOptions::LegacyGasCodeOption(LegacyGasCodeOption {
gas_token: Default::default(),
gas_option: Default::default(),
code: Default::default(),
colon: Default::default(),
value: 137095
}),
PostFunctionOptions::LegacyOptimizedGasCodeOption(
LegacyOptimizedGasCodeOption {
gas_token: Default::default(),
gas_option: Default::default(),
code: Default::default(),
colon: Default::default(),
value: 135823
}
),
]
}
);
}
}
+12
View File
@@ -0,0 +1,12 @@
//! This module contains a parser for the Solidity semantic tests allowing them to be parsed into
//! regular [`Metadata`] objects that can be executed by the testing framework.
//!
//! [`Metadata`]: crate::metadata::Metadata
mod function_parser;
mod sections;
mod test_configuration;
pub use function_parser::*;
pub use sections::*;
pub use test_configuration::*;
@@ -0,0 +1,338 @@
use std::{collections::VecDeque, path::PathBuf, sync::LazyLock};
use anyhow::{Context, Result, anyhow};
use regex::Regex;
use crate::semantic_tests::TestConfiguration;
/// This enum describes the various sections that a semantic test can contain.
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum SemanticTestSection {
/// A source code section that consists of Solidity code.
///
/// Source code sections might have a file name and they might not. Take the following section
/// as an example which doesn't contain a filename
///
/// ```solidity
/// contract C {
/// bytes data;
/// function () pure returns (bytes memory) f;
/// constructor() {
/// data = M.longdata();
/// f = M.longdata;
/// }
/// function test() public view returns (bool) {
/// return keccak256(data) == keccak256(f());
/// }
/// }
/// ```
///
/// The above will translate into this enum variant and without a defined filename for the code.
/// However, the following will translate into this variant of the enum with a defined file name
///
/// ```solidity
/// ==== Source: main.sol ====
/// contract C {
/// bytes data;
/// function () pure returns (bytes memory) f;
/// constructor() {
/// data = M.longdata();
/// f = M.longdata;
/// }
/// function test() public view returns (bool) {
/// return keccak256(data) == keccak256(f());
/// }
/// }
/// ```
///
/// This is because of the use of the `Source` directive at the start of the section.
///
/// Note the following: All tests will be run on the last declared contract in the semantic test
/// and therefore the order of the contracts matters.
SourceCode {
file_name: Option<PathBuf>,
content: String,
},
/// An external source section from the solidity semantic tests.
///
/// External source sections from the solidity semantic tests are the simplest sections out of
/// them all. They look like the following:
///
/// ```solidity
/// ==== ExternalSource: _prbmath/PRBMathSD59x18.sol ====
/// ```
///
/// And they can be thought of as a directive to the compiler to include these contracts when
/// compiling the test contract.
ExternalSource { path: PathBuf },
/// A test configuration section
///
/// This section contains various configuration and filters that are used for the tests and its
/// always the section that comes right before the actual tests. This section looks like the
/// following:
///
/// ```solidity
/// // ====
/// // ABIEncoderV1Only: true
/// // compileViaYul: false
/// // ----
/// ```
TestConfiguration { configuration: TestConfiguration },
/// A test inputs section.
///
/// This section consists of all of the lines that make up the test inputs or the test steps
/// which is the final section found in the semantic test files. This section looks like the
/// following:
///
/// ```solidity
/// // ----
/// // f1() -> 0x20, 0x40, 0x20, 0
/// // f2(string): 0x20, 0 -> 0x20, 0x40, 0x20, 0
/// // f2(string): 0x20, 0, 0 -> 0x20, 0x40, 0x20, 0
/// // g1() -> 32, 0
/// // g2(string): 0x20, 0 -> 0x20, 0
/// // g2(string): 0x20, 0, 0 -> 0x20, 0
/// ```
TestInputs { lines: Vec<String> },
}
impl SemanticTestSection {
const SOURCE_SECTION_MARKER: &str = "==== Source:";
const EXTERNAL_SOURCE_SECTION_MARKER: &str = "==== ExternalSource:";
const TEST_CONFIGURATION_SECTION_MARKER: &str = "// ====";
const TEST_INPUTS_SECTION_MARKER: &str = "// ----";
pub fn parse_source_into_sections(source: impl AsRef<str>) -> Result<Vec<Self>> {
let mut sections = VecDeque::<Self>::new();
sections.push_back(Self::SourceCode {
file_name: None,
content: Default::default(),
});
for line in source.as_ref().split('\n') {
if let Some(new_section) = sections
.back_mut()
.expect("Impossible case - we have at least one item in the sections")
.append_line(line)?
{
sections.push_back(new_section);
}
}
let first_section = sections
.front()
.expect("Impossible case - there's always at least one section");
let remove_first_section = match first_section {
SemanticTestSection::SourceCode { file_name, content } => {
file_name.is_none() && content.is_empty()
}
SemanticTestSection::ExternalSource { .. }
| SemanticTestSection::TestConfiguration { .. }
| SemanticTestSection::TestInputs { .. } => false,
};
if remove_first_section {
sections.pop_front();
}
Ok(sections.into_iter().collect())
}
/// Appends a line to a semantic test section.
///
/// This method takes in the current section and a new line and attempts to append it to parse
/// it and append it to the current section. If the line is found to be the start of a new
/// section then no changes will be made to the current section and instead the line will be
/// interpreted according to the rules of new sections.
pub fn append_line(&mut self, line: impl AsRef<str>) -> Result<Option<Self>> {
static COMMENT_REPLACEMENT_REGEX: LazyLock<Regex> =
LazyLock::new(|| Regex::new("#.*#$").unwrap());
let line = line.as_ref();
if line.is_empty() {
Ok(None)
} else if let Some(source_path) = line.strip_prefix(Self::SOURCE_SECTION_MARKER) {
let source_code_file_path = source_path
.trim()
.split(' ')
.next()
.context("Failed to find the source code file path")?;
Ok(Some(Self::SourceCode {
file_name: Some(PathBuf::from(source_code_file_path)),
content: Default::default(),
}))
} else if let Some(external_source_path) =
line.strip_prefix(Self::EXTERNAL_SOURCE_SECTION_MARKER)
{
let source_code_file_path = external_source_path
.trim()
.split(' ')
.next()
.context("Failed to find the source code file path")?;
Ok(Some(Self::ExternalSource {
path: PathBuf::from(source_code_file_path),
}))
} else if line == Self::TEST_CONFIGURATION_SECTION_MARKER {
Ok(Some(Self::TestConfiguration {
configuration: Default::default(),
}))
} else if line == Self::TEST_INPUTS_SECTION_MARKER {
Ok(Some(Self::TestInputs {
lines: Default::default(),
}))
} else {
match self {
SemanticTestSection::SourceCode { content, .. } => {
content.push('\n');
content.push_str(line);
Ok(None)
}
SemanticTestSection::ExternalSource { .. } => Ok(Some(Self::SourceCode {
file_name: None,
content: line.to_owned(),
})),
SemanticTestSection::TestConfiguration { configuration } => {
let line = line
.strip_prefix("//")
.with_context(|| {
format!("Line doesn't contain test configuration prefix: {line}")
})?
.trim();
let mut splitted = line.split(':');
let key = splitted.next().context("Failed to find the key")?.trim();
let value = splitted.next().context("Failed to find the value")?.trim();
configuration.with_config(key, value)?;
Ok(None)
}
SemanticTestSection::TestInputs { lines } => {
let line = line
.strip_prefix("//")
.ok_or_else(|| anyhow!("Line doesn't contain test input prefix: {line}"))
.map(str::trim)?;
let line = COMMENT_REPLACEMENT_REGEX.replace_all(line, "");
if !line.starts_with('#') && !line.chars().all(|char| char.is_whitespace()) {
lines.push(line.to_string());
}
Ok(None)
}
}
}
}
}
#[cfg(test)]
mod test {
use indoc::indoc;
use super::*;
#[test]
fn parses_a_simple_file_correctly() {
// Arrange
const SIMPLE_FILE: &str = indoc!(
r#"
==== Source: main.sol ====
contract C {
function f() public pure returns (uint) {
return 1;
}
}
// ====
// compileViaYul: true
// ----
// f() -> 1
"#
);
// Act
let sections =
SemanticTestSection::parse_source_into_sections(SIMPLE_FILE).expect("Failed to parse");
// Assert
assert_eq!(
sections,
vec![
SemanticTestSection::SourceCode {
file_name: Some("main.sol".into()),
content: "\ncontract C {\n function f() public pure returns (uint) {\n return 1;\n }\n}".to_string()
},
SemanticTestSection::TestConfiguration {
configuration: TestConfiguration { compile_via_yul: Some(true.into()), ..Default::default() },
},
SemanticTestSection::TestInputs {
lines: vec!["f() -> 1".to_string()]
}
]
)
}
#[test]
fn parses_a_complex_file_correctly() {
// Arrange
const COMPLEX_FILE: &str = indoc!(
r#"
==== Source: main.sol ====
import "./lib.sol";
contract C {
function f() public pure returns (uint) {
return Lib.f();
}
}
==== Source: lib.sol ====
library Lib {
function f() internal pure returns (uint) {
return 1;
}
}
// ====
// compileViaYul: true
// ----
// # This is a comment
// f() -> 1
"#
);
// Act
let sections =
SemanticTestSection::parse_source_into_sections(COMPLEX_FILE).expect("Failed to parse");
// Assert
assert_eq!(
sections,
vec![
SemanticTestSection::SourceCode {
file_name: Some("main.sol".into()),
content: "\nimport \"./lib.sol\";\ncontract C {\n function f() public pure returns (uint) {\n return Lib.f();\n }\n}".to_string()
},
SemanticTestSection::SourceCode {
file_name: Some("lib.sol".into()),
content: "\nlibrary Lib {\n function f() internal pure returns (uint) {\n return 1;\n }\n}".to_string()
},
SemanticTestSection::TestConfiguration {
configuration: TestConfiguration { compile_via_yul: Some(true.into()), ..Default::default() },
},
SemanticTestSection::TestInputs {
lines: vec!["f() -> 1".to_string()]
}
]
)
}
#[test]
#[ignore = "Ignored and should be removed before making a PR"]
fn test() {
let files = revive_dt_common::iterators::FilesWithExtensionIterator::new(
"/Users/omarabdulla/parity/resolc-compiler-tests/fixtures/solidity/ethereum",
)
.with_allowed_extension("sol");
for file in files {
let content = std::fs::read_to_string(file).unwrap();
let sections = SemanticTestSection::parse_source_into_sections(content).unwrap();
println!("{sections:#?}");
}
}
}
@@ -0,0 +1,200 @@
use std::str::FromStr;
use revive_common::EVMVersion;
use anyhow::{Error, Result, bail};
/// The configuration parameters provided in the solidity semantic tests.
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
pub struct TestConfiguration {
/// Controls if the test case compiles through the Yul IR.
pub compile_via_yul: Option<ItemConfig>,
/// Controls if the compilation should be done to EWASM.
pub compile_to_ewasm: Option<ItemConfig>,
/// Controls if ABI encoding should be restricted to the V1 ABI encoder.
pub abi_encoder_v1_only: Option<ItemConfig>,
/// Controls the EVM Version that the test is compatible with.
pub evm_version: Option<EvmVersionRequirement>,
/// Controls how the revert strings should be handled.
pub revert_strings: Option<RevertString>,
/// Controls if non-existent functions should be permitted or not.
pub allow_non_existing_functions: Option<bool>,
/// The list of bytecode formats that this test should be run against.
pub bytecode_format: Option<Vec<BytecodeFormat>>,
}
impl TestConfiguration {
pub fn new() -> Self {
Self::default()
}
pub fn with_config(
&mut self,
key: impl AsRef<str>,
value: impl AsRef<str>,
) -> Result<&mut Self> {
match key.as_ref() {
"compileViaYul" => self.compile_via_yul = Some(value.as_ref().parse()?),
"compileToEwasm" => self.compile_to_ewasm = Some(value.as_ref().parse()?),
"ABIEncoderV1Only" => self.abi_encoder_v1_only = Some(value.as_ref().parse()?),
"EVMVersion" => self.evm_version = Some(value.as_ref().parse()?),
"revertStrings" => self.revert_strings = Some(value.as_ref().parse()?),
"allowNonExistingFunctions" => {
self.allow_non_existing_functions = Some(value.as_ref().parse()?)
}
"bytecodeFormat" => {
self.bytecode_format = Some(
value
.as_ref()
.split(',')
.map(str::trim)
.map(FromStr::from_str)
.collect::<Result<Vec<_>>>()?,
)
}
_ => bail!("Unknown test configuration {}", key.as_ref()),
};
Ok(self)
}
pub fn new_from_pairs(
pairs: impl IntoIterator<Item = (impl AsRef<str>, impl AsRef<str>)>,
) -> Result<Self> {
let mut this = Self::default();
pairs
.into_iter()
.try_fold(&mut this, |this, (key, value)| this.with_config(key, value))?;
Ok(this)
}
}
/// The configuration of a single item in the test configuration.
#[derive(Clone, Debug, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum ItemConfig {
/// The configuration is set to e a boolean that's either `true` or `false`.
Boolean(bool),
/// The `also`
Also,
}
impl FromStr for ItemConfig {
type Err = Error;
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
match s {
"true" => Ok(Self::Boolean(true)),
"false" => Ok(Self::Boolean(false)),
"also" => Ok(Self::Also),
_ => bail!("Invalid ItemConfig {s}"),
}
}
}
impl From<bool> for ItemConfig {
fn from(value: bool) -> Self {
Self::Boolean(value)
}
}
impl TryFrom<String> for ItemConfig {
type Error = <ItemConfig as FromStr>::Err;
fn try_from(value: String) -> std::result::Result<Self, Self::Error> {
value.as_str().parse()
}
}
/// The options available for the revert strings.
#[derive(Clone, Debug, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
pub enum RevertString {
#[default]
Default,
Debug,
Strip,
VerboseDebug,
}
impl FromStr for RevertString {
type Err = Error;
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
match s {
"default" => Ok(Self::Default),
"debug" => Ok(Self::Debug),
"strip" => Ok(Self::Strip),
"verboseDebug" => Ok(Self::VerboseDebug),
_ => bail!("Invalid RevertString {s}"),
}
}
}
impl TryFrom<String> for RevertString {
type Error = <RevertString as FromStr>::Err;
fn try_from(value: String) -> std::result::Result<Self, Self::Error> {
value.as_str().parse()
}
}
/// The set of available bytecode formats.
#[derive(Clone, Debug, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum BytecodeFormat {
Legacy,
EofVersionGreaterThanOne,
}
impl FromStr for BytecodeFormat {
type Err = Error;
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
match s {
"legacy" => Ok(Self::Legacy),
">=EOFv1" => Ok(Self::EofVersionGreaterThanOne),
_ => bail!("Invalid BytecodeFormat {s}"),
}
}
}
impl TryFrom<String> for BytecodeFormat {
type Error = <BytecodeFormat as FromStr>::Err;
fn try_from(value: String) -> std::result::Result<Self, Self::Error> {
value.as_str().parse()
}
}
#[derive(Clone, Debug, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum EvmVersionRequirement {
GreaterThan(EVMVersion),
GreaterThanOrEqual(EVMVersion),
LessThan(EVMVersion),
LessThanOrEqual(EVMVersion),
EqualTo(EVMVersion),
}
impl FromStr for EvmVersionRequirement {
type Err = Error;
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
match s.as_bytes() {
[b'>', b'=', remaining @ ..] => Ok(Self::GreaterThanOrEqual(
str::from_utf8(remaining)?.try_into()?,
)),
[b'>', remaining @ ..] => Ok(Self::GreaterThan(str::from_utf8(remaining)?.try_into()?)),
[b'<', b'=', remaining @ ..] => Ok(Self::LessThanOrEqual(
str::from_utf8(remaining)?.try_into()?,
)),
[b'<', remaining @ ..] => Ok(Self::LessThan(str::from_utf8(remaining)?.try_into()?)),
[b'=', remaining @ ..] => Ok(Self::EqualTo(str::from_utf8(remaining)?.try_into()?)),
_ => bail!("Invalid EVM version requirement {s}"),
}
}
}
impl TryFrom<String> for EvmVersionRequirement {
type Error = <EvmVersionRequirement as FromStr>::Err;
fn try_from(value: String) -> std::result::Result<Self, Self::Error> {
value.as_str().parse()
}
}