factor out solc JSON interface crate (#264)

The differential testing framework will make a second consumer. There
seems to be no re-usable Rust crate for this. But we already have
everything here, just needs a small refactor to make it fully re-usable.

- Mostly decouple the solc JSON-input-output interface types from the
`solidity` frontend crate
- Expose the JSON-input-output interface types in a dedicated crate

---------

Signed-off-by: Cyrill Leutwiler <bigcyrill@hotmail.com>
This commit is contained in:
xermicus
2025-03-20 17:11:40 +01:00
committed by GitHub
parent 36ea69b50f
commit 497dae2494
45 changed files with 328 additions and 245 deletions
@@ -0,0 +1,34 @@
//! The `solc --standard-json` output contract EVM bytecode.
use serde::Deserialize;
use serde::Serialize;
/// The `solc --standard-json` output contract EVM bytecode.
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Bytecode {
/// The bytecode object.
pub object: String,
}
impl Bytecode {
/// A shortcut constructor.
pub fn new(object: String) -> Self {
Self { object }
}
}
/// The `solc --standard-json` output contract EVM deployed bytecode.
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct DeployedBytecode {
/// The bytecode object.
pub object: String,
}
impl DeployedBytecode {
/// A shortcut constructor.
pub fn new(object: String) -> Self {
Self { object }
}
}
@@ -0,0 +1,42 @@
//! The `solc --standard-json` output contract EVM data.
pub mod bytecode;
use std::collections::BTreeMap;
use serde::Deserialize;
use serde::Serialize;
use self::bytecode::Bytecode;
use self::bytecode::DeployedBytecode;
/// The `solc --standard-json` output contract EVM data.
/// It is replaced by PolkaVM data after compiling.
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct EVM {
/// The contract PolkaVM assembly code.
#[serde(rename = "assembly", skip_serializing_if = "Option::is_none")]
pub assembly_text: Option<String>,
/// The contract bytecode.
/// Is reset by that of PolkaVM before yielding the compiled project artifacts.
#[serde(skip_serializing_if = "Option::is_none")]
pub bytecode: Option<Bytecode>,
/// The deployed bytecode of the contract.
/// It is overwritten with the PolkaVM blob before yielding the compiled project artifacts.
/// Hence it will be the same as the runtime code but we keep both for compatibility reasons.
#[serde(skip_serializing_if = "Option::is_none")]
pub deployed_bytecode: Option<DeployedBytecode>,
/// The contract function signatures.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub method_identifiers: Option<BTreeMap<String, String>>,
}
impl EVM {
/// Sets the PolkaVM assembly and bytecode.
pub fn modify(&mut self, assembly_text: String, bytecode: String) {
self.assembly_text = Some(assembly_text);
self.bytecode = Some(Bytecode::new(bytecode.clone()));
self.deployed_bytecode = Some(DeployedBytecode::new(bytecode));
}
}
@@ -0,0 +1,50 @@
//! The `solc --standard-json` output contract.
pub mod evm;
use std::collections::BTreeMap;
use std::collections::HashSet;
use serde::Deserialize;
use serde::Serialize;
use self::evm::EVM;
/// The `solc --standard-json` output contract.
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Contract {
/// The contract ABI.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub abi: Option<serde_json::Value>,
/// The contract metadata.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub metadata: Option<serde_json::Value>,
/// The contract developer documentation.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub devdoc: Option<serde_json::Value>,
/// The contract user documentation.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub userdoc: Option<serde_json::Value>,
/// The contract storage layout.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub storage_layout: Option<serde_json::Value>,
/// Contract's bytecode and related objects
#[serde(default, skip_serializing_if = "Option::is_none")]
pub evm: Option<EVM>,
/// The contract IR code.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub ir: Option<String>,
/// The contract optimized IR code.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub ir_optimized: Option<String>,
/// The contract PolkaVM bytecode hash.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub hash: Option<String>,
/// The contract factory dependencies.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub factory_dependencies: Option<BTreeMap<String, String>>,
/// The contract missing libraries.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub missing_libraries: Option<HashSet<String>>,
}
@@ -0,0 +1,135 @@
//! The `solc --standard-json` output error.
pub mod source_location;
use std::str::FromStr;
use serde::Deserialize;
use serde::Serialize;
use self::source_location::SourceLocation;
/// The `solc --standard-json` output error.
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Error {
/// The component type.
pub component: String,
/// The error code.
pub error_code: Option<String>,
/// The formatted error message.
pub formatted_message: String,
/// The non-formatted error message.
pub message: String,
/// The error severity.
pub severity: String,
/// The error location data.
pub source_location: Option<SourceLocation>,
/// The error type.
pub r#type: String,
}
impl Error {
/// Returns the `ecrecover` function usage warning.
pub fn message_ecrecover(src: Option<&str>) -> Self {
let message = r#"
Warning: It looks like you are using 'ecrecover' to validate a signature of a user account.
Polkadot comes with native account abstraction support, therefore it is highly recommended NOT
to rely on the fact that the account has an ECDSA private key attached to it since accounts might
implement other signature schemes.
"#
.to_owned();
Self {
component: "general".to_owned(),
error_code: None,
formatted_message: message.clone(),
message,
severity: "warning".to_owned(),
source_location: src.map(SourceLocation::from_str).and_then(Result::ok),
r#type: "Warning".to_owned(),
}
}
/// Returns the `<address payable>`'s `send` and `transfer` methods usage error.
pub fn message_send_and_transfer(src: Option<&str>) -> Self {
let message = r#"
Warning: It looks like you are using '<address payable>.send/transfer(<X>)'.
Using '<address payable>.send/transfer(<X>)' is deprecated and strongly discouraged!
The resolc compiler uses a heuristic to detect '<address payable>.send/transfer(<X>)' calls,
which disables call re-entrancy and supplies all remaining gas instead of the 2300 gas stipend.
However, detection is not guaranteed. You are advised to carefully test this, employ
re-entrancy guards or use the withdrawal pattern instead!
Learn more on https://docs.soliditylang.org/en/latest/security-considerations.html#reentrancy
and https://docs.soliditylang.org/en/latest/common-patterns.html#withdrawal-from-contracts
"#
.to_owned();
Self {
component: "general".to_owned(),
error_code: None,
formatted_message: message.clone(),
message,
severity: "warning".to_owned(),
source_location: src.map(SourceLocation::from_str).and_then(Result::ok),
r#type: "Warning".to_owned(),
}
}
/// Returns the `extcodesize` instruction usage warning.
pub fn message_extcodesize(src: Option<&str>) -> Self {
let message = r#"
Warning: Your code or one of its dependencies uses the 'extcodesize' instruction, which is
usually needed in the following cases:
1. To detect whether an address belongs to a smart contract.
2. To detect whether the deploy code execution has finished.
Polkadot comes with native account abstraction support (so smart contracts are just accounts
coverned by code), and you should avoid differentiating between contracts and non-contract
addresses.
"#
.to_owned();
Self {
component: "general".to_owned(),
error_code: None,
formatted_message: message.clone(),
message,
severity: "warning".to_owned(),
source_location: src.map(SourceLocation::from_str).and_then(Result::ok),
r#type: "Warning".to_owned(),
}
}
/// Returns the `origin` instruction usage warning.
pub fn message_tx_origin(src: Option<&str>) -> Self {
let message = r#"
Warning: You are checking for 'tx.origin' in your code, which might lead to unexpected behavior.
Polkadot comes with native account abstraction support, and therefore the initiator of a
transaction might be different from the contract calling your code. It is highly recommended NOT
to rely on tx.origin, but use msg.sender instead.
"#
.to_owned();
Self {
component: "general".to_owned(),
error_code: None,
formatted_message: message.clone(),
message,
severity: "warning".to_owned(),
source_location: src.map(SourceLocation::from_str).and_then(Result::ok),
r#type: "Warning".to_owned(),
}
}
/// Appends the contract path to the message..
pub fn push_contract_path(&mut self, path: &str) {
self.formatted_message
.push_str(format!("\n--> {path}\n").as_str());
}
}
impl std::fmt::Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.formatted_message)
}
}
@@ -0,0 +1,43 @@
//! The `solc --standard-json` output error source location.
use std::str::FromStr;
use serde::Deserialize;
use serde::Serialize;
/// The `solc --standard-json` output error source location.
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct SourceLocation {
/// The source file path.
pub file: String,
/// The start location.
pub start: isize,
/// The end location.
pub end: isize,
}
impl FromStr for SourceLocation {
type Err = anyhow::Error;
fn from_str(string: &str) -> Result<Self, Self::Err> {
let mut parts = string.split(':');
let start = parts
.next()
.map(|string| string.parse::<isize>())
.and_then(Result::ok)
.unwrap_or_default();
let length = parts
.next()
.map(|string| string.parse::<isize>())
.and_then(Result::ok)
.unwrap_or_default();
let file = parts.next().unwrap_or_default().to_owned();
Ok(Self {
file,
start,
end: start + length,
})
}
}
@@ -0,0 +1,71 @@
//! The `solc --standard-json` output.
pub mod contract;
pub mod error;
pub mod source;
use std::collections::BTreeMap;
use serde::Deserialize;
use serde::Serialize;
#[cfg(feature = "resolc")]
use crate::warning::Warning;
use self::contract::Contract;
use self::error::Error as SolcStandardJsonOutputError;
use self::source::Source;
/// The `solc --standard-json` output.
#[derive(Debug, Serialize, Deserialize, Clone, Default)]
pub struct Output {
/// The file-contract hashmap.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub contracts: Option<BTreeMap<String, BTreeMap<String, Contract>>>,
/// The source code mapping data.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub sources: Option<BTreeMap<String, Source>>,
/// The compilation errors and warnings.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub errors: Option<Vec<SolcStandardJsonOutputError>>,
/// The `solc` compiler version.
#[serde(skip_serializing_if = "Option::is_none")]
pub version: Option<String>,
/// The `solc` compiler long version.
#[serde(skip_serializing_if = "Option::is_none")]
pub long_version: Option<String>,
/// The `resolc` compiler version.
#[serde(skip_serializing_if = "Option::is_none")]
pub revive_version: Option<String>,
}
impl Output {
/// Traverses the AST and returns the list of additional errors and warnings.
#[cfg(feature = "resolc")]
pub fn preprocess_ast(&mut self, suppressed_warnings: &[Warning]) -> anyhow::Result<()> {
let sources = match self.sources.as_ref() {
Some(sources) => sources,
None => return Ok(()),
};
let mut messages = Vec::new();
for (path, source) in sources.iter() {
if let Some(ast) = source.ast.as_ref() {
let mut polkavm_messages = Source::get_messages(ast, suppressed_warnings);
for message in polkavm_messages.iter_mut() {
message.push_contract_path(path.as_str());
}
messages.extend(polkavm_messages);
}
}
self.errors = match self.errors.take() {
Some(mut errors) => {
errors.extend(messages);
Some(errors)
}
None => Some(messages),
};
Ok(())
}
}
@@ -0,0 +1,202 @@
//! The `solc --standard-json` output source.
use serde::Deserialize;
use serde::Serialize;
use crate::standard_json::output::error::Error as SolcStandardJsonOutputError;
#[cfg(feature = "resolc")]
use crate::warning::Warning;
/// The `solc --standard-json` output source.
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Source {
/// The source code ID.
pub id: usize,
/// The source code AST.
pub ast: Option<serde_json::Value>,
}
impl Source {
/// Checks the AST node for the `ecrecover` function usage.
pub fn check_ecrecover(ast: &serde_json::Value) -> Option<SolcStandardJsonOutputError> {
let ast = ast.as_object()?;
if ast.get("nodeType")?.as_str()? != "FunctionCall" {
return None;
}
let expression = ast.get("expression")?.as_object()?;
if expression.get("nodeType")?.as_str()? != "Identifier" {
return None;
}
if expression.get("name")?.as_str()? != "ecrecover" {
return None;
}
Some(SolcStandardJsonOutputError::message_ecrecover(
ast.get("src")?.as_str(),
))
}
/// Checks the AST node for the `<address payable>`'s `send` and `transfer` methods usage.
pub fn check_send_and_transfer(ast: &serde_json::Value) -> Option<SolcStandardJsonOutputError> {
let ast = ast.as_object()?;
if ast.get("nodeType")?.as_str()? != "FunctionCall" {
return None;
}
let expression = ast.get("expression")?.as_object()?;
if expression.get("nodeType")?.as_str()? != "MemberAccess" {
return None;
}
let member_name = expression.get("memberName")?.as_str()?;
if member_name != "send" && member_name != "transfer" {
return None;
}
Some(SolcStandardJsonOutputError::message_send_and_transfer(
ast.get("src")?.as_str(),
))
}
/// Checks the AST node for the `extcodesize` assembly instruction usage.
pub fn check_assembly_extcodesize(
ast: &serde_json::Value,
) -> Option<SolcStandardJsonOutputError> {
let ast = ast.as_object()?;
if ast.get("nodeType")?.as_str()? != "YulFunctionCall" {
return None;
}
if ast
.get("functionName")?
.as_object()?
.get("name")?
.as_str()?
!= "extcodesize"
{
return None;
}
Some(SolcStandardJsonOutputError::message_extcodesize(
ast.get("src")?.as_str(),
))
}
/// Checks the AST node for the `origin` assembly instruction usage.
pub fn check_assembly_origin(ast: &serde_json::Value) -> Option<SolcStandardJsonOutputError> {
let ast = ast.as_object()?;
if ast.get("nodeType")?.as_str()? != "YulFunctionCall" {
return None;
}
if ast
.get("functionName")?
.as_object()?
.get("name")?
.as_str()?
!= "origin"
{
return None;
}
Some(SolcStandardJsonOutputError::message_tx_origin(
ast.get("src")?.as_str(),
))
}
/// Checks the AST node for the `tx.origin` value usage.
pub fn check_tx_origin(ast: &serde_json::Value) -> Option<SolcStandardJsonOutputError> {
let ast = ast.as_object()?;
if ast.get("nodeType")?.as_str()? != "MemberAccess" {
return None;
}
if ast.get("memberName")?.as_str()? != "origin" {
return None;
}
let expression = ast.get("expression")?.as_object()?;
if expression.get("nodeType")?.as_str()? != "Identifier" {
return None;
}
if expression.get("name")?.as_str()? != "tx" {
return None;
}
Some(SolcStandardJsonOutputError::message_tx_origin(
ast.get("src")?.as_str(),
))
}
/// Returns the list of messages for some specific parts of the AST.
#[cfg(feature = "resolc")]
pub fn get_messages(
ast: &serde_json::Value,
suppressed_warnings: &[Warning],
) -> Vec<SolcStandardJsonOutputError> {
let mut messages = Vec::new();
if !suppressed_warnings.contains(&Warning::EcRecover) {
if let Some(message) = Self::check_ecrecover(ast) {
messages.push(message);
}
}
if !suppressed_warnings.contains(&Warning::SendTransfer) {
if let Some(message) = Self::check_send_and_transfer(ast) {
messages.push(message);
}
}
if !suppressed_warnings.contains(&Warning::ExtCodeSize) {
if let Some(message) = Self::check_assembly_extcodesize(ast) {
messages.push(message);
}
}
if !suppressed_warnings.contains(&Warning::TxOrigin) {
if let Some(message) = Self::check_assembly_origin(ast) {
messages.push(message);
}
if let Some(message) = Self::check_tx_origin(ast) {
messages.push(message);
}
}
match ast {
serde_json::Value::Array(array) => {
for element in array.iter() {
messages.extend(Self::get_messages(element, suppressed_warnings));
}
}
serde_json::Value::Object(object) => {
for (_key, value) in object.iter() {
messages.extend(Self::get_messages(value, suppressed_warnings));
}
}
_ => {}
}
messages
}
/// Returns the name of the last contract.
pub fn last_contract_name(&self) -> anyhow::Result<String> {
self.ast
.as_ref()
.ok_or_else(|| anyhow::anyhow!("The AST is empty"))?
.get("nodes")
.and_then(|value| value.as_array())
.ok_or_else(|| {
anyhow::anyhow!("The last contract cannot be found in an empty list of nodes")
})?
.iter()
.filter_map(
|node| match node.get("nodeType").and_then(|node| node.as_str()) {
Some("ContractDefinition") => Some(node.get("name")?.as_str()?.to_owned()),
_ => None,
},
)
.last()
.ok_or_else(|| anyhow::anyhow!("The last contract not found in the AST"))
}
}