Separate compilation and linker phases (#376)

Separate between compilation and linker phases to allow deploy time
linking and back-porting era compiler changes to fix #91. Unlinked
contract binaries (caused by missing libraries or missing factory
dependencies in turn) are emitted as raw ELF object.

Few drive by fixes:
- #98
- A compiler panic on missing libraries definitions.
- Fixes some incosistent type forwarding in JSON output (empty string
vs. null object).
- Remove the unused fallback for size optimization setting.
- Remove the broken `--lvm-ir`  mode.
- CI workflow fixes.

---------

Signed-off-by: Cyrill Leutwiler <bigcyrill@hotmail.com>
Signed-off-by: xermicus <bigcyrill@hotmail.com>
Signed-off-by: xermicus <cyrill@parity.io>
This commit is contained in:
xermicus
2025-09-27 20:52:22 +02:00
committed by GitHub
parent 13faedf08a
commit 94ec34c4d5
169 changed files with 6288 additions and 5206 deletions
+45 -145
View File
@@ -1,22 +1,5 @@
//! The LLVM IR generator context.
pub mod address_space;
pub mod argument;
pub mod attribute;
pub mod build;
pub mod code_type;
pub mod debug_info;
pub mod function;
pub mod global;
pub mod r#loop;
pub mod pointer;
pub mod runtime;
pub mod solidity_data;
pub mod yul_data;
#[cfg(test)]
mod tests;
use std::cell::RefCell;
use std::collections::HashMap;
use std::rc::Rc;
@@ -32,7 +15,6 @@ use revive_solc_json_interface::SolcStandardJsonInputSettingsPolkaVMMemory;
use crate::optimizer::settings::Settings as OptimizerSettings;
use crate::optimizer::Optimizer;
use crate::polkavm::DebugConfig;
use crate::polkavm::Dependency;
use crate::target_machine::target::Target;
use crate::target_machine::TargetMachine;
use crate::PolkaVMLoadHeapWordFunction;
@@ -58,13 +40,27 @@ use self::runtime::RuntimeFunction;
use self::solidity_data::SolidityData;
use self::yul_data::YulData;
pub mod address_space;
pub mod argument;
pub mod attribute;
pub mod build;
pub mod code_type;
pub mod debug_info;
pub mod function;
pub mod global;
pub mod r#loop;
pub mod pointer;
pub mod runtime;
pub mod solidity_data;
pub mod yul_data;
#[cfg(test)]
mod tests;
/// The LLVM IR generator context.
/// It is a not-so-big god-like object glueing all the compilers' complexity and act as an adapter
/// and a superstructure over the inner `inkwell` LLVM context.
pub struct Context<'ctx, D>
where
D: Dependency + Clone,
{
pub struct Context<'ctx> {
/// The inner LLVM context.
llvm: &'ctx inkwell::context::Context,
/// The inner LLVM context builder.
@@ -87,17 +83,9 @@ where
current_function: Option<Rc<RefCell<Function<'ctx>>>>,
/// The loop context stack.
loop_stack: Vec<Loop<'ctx>>,
/// The extra LLVM arguments that were used during target initialization.
llvm_arguments: &'ctx [String],
/// The PVM memory configuration.
memory_config: SolcStandardJsonInputSettingsPolkaVMMemory,
/// The project dependency manager. It can be any entity implementing the trait.
/// The manager is used to get information about contracts and their dependencies during
/// the multi-threaded compilation process.
dependency_manager: Option<D>,
/// Whether to append the metadata hash at the end of bytecode.
include_metadata_hash: bool,
/// The debug info of the current module.
debug_info: Option<DebugInfo<'ctx>>,
/// The debug configuration telling whether to dump the needed IRs.
@@ -109,10 +97,7 @@ where
yul_data: Option<YulData>,
}
impl<'ctx, D> Context<'ctx, D>
where
D: Dependency + Clone,
{
impl<'ctx> Context<'ctx> {
/// The functions hashmap default capacity.
const FUNCTIONS_HASHMAP_INITIAL_CAPACITY: usize = 64;
@@ -221,15 +206,11 @@ where
}
/// Initializes a new LLVM context.
#[allow(clippy::too_many_arguments)]
pub fn new(
llvm: &'ctx inkwell::context::Context,
module: inkwell::module::Module<'ctx>,
optimizer: Optimizer,
dependency_manager: Option<D>,
include_metadata_hash: bool,
debug_config: DebugConfig,
llvm_arguments: &'ctx [String],
memory_config: SolcStandardJsonInputSettingsPolkaVMMemory,
) -> Self {
Self::set_data_layout(llvm, &module);
@@ -264,12 +245,8 @@ where
functions: HashMap::with_capacity(Self::FUNCTIONS_HASHMAP_INITIAL_CAPACITY),
current_function: None,
loop_stack: Vec::with_capacity(Self::LOOP_STACK_INITIAL_CAPACITY),
llvm_arguments,
memory_config,
dependency_manager,
include_metadata_hash,
debug_info,
debug_config,
@@ -280,12 +257,10 @@ where
/// Builds the LLVM IR module, returning the build artifacts.
pub fn build(
mut self,
self,
contract_path: &str,
metadata_hash: Option<[u8; revive_common::BYTE_LENGTH_WORD]>,
metadata_hash: Option<revive_common::Keccak256>,
) -> anyhow::Result<Build> {
let module_clone = self.module.clone();
self.link_polkavm_exports(contract_path)?;
self.link_immutable_data(contract_path)?;
@@ -334,33 +309,16 @@ where
)
})?;
let shared_object = revive_linker::link(buffer.as_slice())?;
let object = buffer.as_slice().to_vec();
self.debug_config
.dump_object(contract_path, &shared_object)?;
self.debug_config.dump_object(contract_path, &object)?;
let polkavm_bytecode =
revive_linker::polkavm_linker(shared_object, !self.debug_config().emit_debug_info)?;
let build = match crate::polkavm::build_assembly_text(
contract_path,
&polkavm_bytecode,
metadata_hash,
self.debug_config(),
) {
Ok(build) => build,
Err(_error)
if self.optimizer.settings() != &OptimizerSettings::size()
&& self.optimizer.settings().is_fallback_to_size_enabled() =>
{
self.optimizer = Optimizer::new(OptimizerSettings::size());
self.module = module_clone;
self.build(contract_path, metadata_hash)?
}
Err(error) => Err(error)?,
};
Ok(build)
crate::polkavm::build(
&object,
metadata_hash
.as_ref()
.map(|hash| hash.as_bytes().try_into().unwrap()),
)
}
/// Verifies the current LLVM IR module.
@@ -437,11 +395,15 @@ where
}
}
/// Declare an external global.
/// Declare an external global. This is an idempotent method.
pub fn declare_global<T>(&mut self, name: &str, r#type: T, address_space: AddressSpace)
where
T: BasicType<'ctx> + Clone + Copy,
{
if self.globals.contains_key(name) {
return;
}
let global = Global::declare(self, r#type, address_space, name);
self.globals.insert(name.to_owned(), global);
}
@@ -650,54 +612,6 @@ where
.expect("The current context is not in a loop")
}
/// Compiles a contract dependency, if the dependency manager is set.
pub fn compile_dependency(&mut self, name: &str) -> anyhow::Result<String> {
self.dependency_manager
.to_owned()
.ok_or_else(|| anyhow::anyhow!("The dependency manager is unset"))
.and_then(|manager| {
Dependency::compile(
manager,
name,
self.optimizer.settings().to_owned(),
self.include_metadata_hash,
self.debug_config.clone(),
self.llvm_arguments,
self.memory_config,
)
})
}
/// Gets a full contract_path from the dependency manager.
pub fn resolve_path(&self, identifier: &str) -> anyhow::Result<String> {
self.dependency_manager
.to_owned()
.ok_or_else(|| anyhow::anyhow!("The dependency manager is unset"))
.and_then(|manager| {
let full_path = manager.resolve_path(identifier)?;
Ok(full_path)
})
}
/// Gets a deployed library address from the dependency manager.
pub fn resolve_library(&self, path: &str) -> anyhow::Result<inkwell::values::IntValue<'ctx>> {
self.dependency_manager
.to_owned()
.ok_or_else(|| anyhow::anyhow!("The dependency manager is unset"))
.and_then(|manager| {
let address = manager.resolve_library(path)?;
let address = self.word_const_str_hex(address.as_str());
Ok(address)
})
}
/// Extracts the dependency manager.
pub fn take_dependency_manager(&mut self) -> D {
self.dependency_manager
.take()
.expect("The dependency manager is unset")
}
/// Returns the debug info.
pub fn debug_info(&self) -> Option<&DebugInfo<'ctx>> {
self.debug_info.as_ref()
@@ -808,9 +722,9 @@ where
) -> anyhow::Result<inkwell::values::BasicValueEnum<'ctx>> {
match pointer.address_space {
AddressSpace::Heap => {
let name = <PolkaVMLoadHeapWordFunction as RuntimeFunction<D>>::NAME;
let name = <PolkaVMLoadHeapWordFunction as RuntimeFunction>::NAME;
let declaration =
<PolkaVMLoadHeapWordFunction as RuntimeFunction<D>>::declaration(self);
<PolkaVMLoadHeapWordFunction as RuntimeFunction>::declaration(self);
let arguments = [self
.builder()
.build_ptr_to_int(pointer.value, self.xlen_type(), "offset_ptrtoint")?
@@ -846,7 +760,7 @@ where
match pointer.address_space {
AddressSpace::Heap => {
let declaration =
<PolkaVMStoreHeapWordFunction as RuntimeFunction<D>>::declaration(self);
<PolkaVMStoreHeapWordFunction as RuntimeFunction>::declaration(self);
let arguments = [
pointer.to_int(self).as_basic_value_enum(),
value.as_basic_value_enum(),
@@ -966,10 +880,7 @@ where
pub fn build_runtime_call_to_getter(
&self,
import: &'static str,
) -> anyhow::Result<inkwell::values::BasicValueEnum<'ctx>>
where
D: Dependency + Clone,
{
) -> anyhow::Result<inkwell::values::BasicValueEnum<'ctx>> {
let pointer = self.build_alloca_at_entry(self.word_type(), &format!("{import}_output"));
self.build_runtime_call(import, &[pointer.to_int(self).into()]);
self.build_load(pointer, import)
@@ -1064,7 +975,7 @@ where
length: inkwell::values::IntValue<'ctx>,
) -> anyhow::Result<()> {
self.build_call(
<Exit as RuntimeFunction<D>>::declaration(self),
<Exit as RuntimeFunction>::declaration(self),
&[flags.into(), offset.into(), length.into()],
"exit",
);
@@ -1088,14 +999,14 @@ where
Ok(self
.build_call(
<WordToPointer as RuntimeFunction<D>>::declaration(self),
<WordToPointer as RuntimeFunction>::declaration(self),
&[value.into()],
"word_to_pointer",
)
.unwrap_or_else(|| {
panic!(
"revive runtime function {} should return a value",
<WordToPointer as RuntimeFunction<D>>::NAME,
<WordToPointer as RuntimeFunction>::NAME,
)
})
.into_int_value())
@@ -1111,7 +1022,7 @@ where
size: inkwell::values::IntValue<'ctx>,
) -> anyhow::Result<inkwell::values::PointerValue<'ctx>> {
let call_site_value = self.builder().build_call(
<PolkaVMSbrkFunction as RuntimeFunction<D>>::declaration(self).function_value(),
<PolkaVMSbrkFunction as RuntimeFunction>::declaration(self).function_value(),
&[offset.into(), size.into()],
"alloc_start",
)?;
@@ -1133,7 +1044,7 @@ where
.unwrap_or_else(|| {
panic!(
"revive runtime function {} should return a value",
<PolkaVMSbrkFunction as RuntimeFunction<D>>::NAME,
<PolkaVMSbrkFunction as RuntimeFunction>::NAME,
)
})
.into_pointer_value())
@@ -1433,19 +1344,8 @@ where
/// Returns the Yul data reference.
/// # Panics
/// If the Yul data has not been initialized.
pub fn yul(&self) -> &YulData {
self.yul_data
.as_ref()
.expect("The Yul data must have been initialized")
}
/// Returns the Yul data mutable reference.
/// # Panics
/// If the Yul data has not been initialized.
pub fn yul_mut(&mut self) -> &mut YulData {
self.yul_data
.as_mut()
.expect("The Yul data must have been initialized")
pub fn yul(&self) -> Option<&YulData> {
self.yul_data.as_ref()
}
/// Returns the current number of immutables values in the contract.