allow dynamic configuration of the heap memory (#287)

This PR changes the implementation of the emulated EVM heap memory:
Instead of linking in a C implementation the code is emitted directly
into the contract module. Which allows making it configurable via a
compiler parameter (a follow up PR to this).

---------

Signed-off-by: Cyrill Leutwiler <bigcyrill@hotmail.com>
This commit is contained in:
xermicus
2025-04-23 20:25:55 +02:00
committed by GitHub
parent f937188991
commit 357bf58868
11 changed files with 216 additions and 66 deletions
+8 -8
View File
@@ -1,10 +1,10 @@
{
"Baseline": 950,
"Computation": 2222,
"DivisionArithmetics": 8802,
"ERC20": 17602,
"Events": 1628,
"FibonacciIterative": 1485,
"Flipper": 2082,
"SHA1": 8230
"Baseline": 938,
"Computation": 2281,
"DivisionArithmetics": 8848,
"ERC20": 18307,
"Events": 1639,
"FibonacciIterative": 1496,
"Flipper": 2098,
"SHA1": 8242
}
+1
View File
@@ -28,6 +28,7 @@ pub use self::polkavm::context::function::runtime::entry::Entry as PolkaVMEntryF
pub use self::polkavm::context::function::runtime::revive::Exit as PolkaVMExitFunction;
pub use self::polkavm::context::function::runtime::revive::WordToPointer as PolkaVMWordToPointerFunction;
pub use self::polkavm::context::function::runtime::runtime_code::RuntimeCode as PolkaVMRuntimeCodeFunction;
pub use self::polkavm::context::function::runtime::sbrk::Sbrk as PolkaVMSbrkFunction;
pub use self::polkavm::context::function::runtime::FUNCTION_DEPLOY_CODE as PolkaVMFunctionDeployCode;
pub use self::polkavm::context::function::runtime::FUNCTION_ENTRY as PolkaVMFunctionEntry;
pub use self::polkavm::context::function::runtime::FUNCTION_RUNTIME_CODE as PolkaVMFunctionRuntimeCode;
@@ -9,6 +9,12 @@ pub static XLEN: usize = revive_common::BIT_LENGTH_X32;
/// The calldata size global variable name.
pub static GLOBAL_CALLDATA_SIZE: &str = "calldatasize";
/// The heap size global variable name.
pub static GLOBAL_HEAP_SIZE: &str = "__heap_size";
/// The heap memory global variable name.
pub static GLOBAL_HEAP_MEMORY: &str = "__heap_memory";
/// The spill buffer global variable name.
pub static GLOBAL_ADDRESS_SPILL_BUFFER: &str = "address_spill_buffer";
@@ -31,6 +31,21 @@ impl Entry {
context.xlen_type().get_undef(),
);
context.set_global(
crate::polkavm::GLOBAL_HEAP_SIZE,
context.xlen_type(),
AddressSpace::Stack,
context.xlen_type().const_zero(),
);
let heap_memory_type = context.byte_type().array_type(context.heap_size);
context.set_global(
crate::polkavm::GLOBAL_HEAP_MEMORY,
heap_memory_type,
AddressSpace::Stack,
heap_memory_type.const_zero(),
);
let address_type = context.integer_type(revive_common::BIT_LENGTH_ETH_ADDRESS);
context.set_global(
crate::polkavm::GLOBAL_ADDRESS_SPILL_BUFFER,
@@ -5,6 +5,7 @@ pub mod deploy_code;
pub mod entry;
pub mod revive;
pub mod runtime_code;
pub mod sbrk;
/// The main entry function name.
pub const FUNCTION_ENTRY: &str = "__entry";
@@ -0,0 +1,144 @@
//! Emulates the linear EVM heap memory via a simulated `sbrk` system call.
use inkwell::values::BasicValue;
use crate::polkavm::context::attribute::Attribute;
use crate::polkavm::context::runtime::RuntimeFunction;
use crate::polkavm::context::Context;
use crate::polkavm::Dependency;
use crate::polkavm::WriteLLVM;
/// Simulates the `sbrk` system call, reproducing the semantics of the EVM heap memory.
///
/// Parameters:
/// - The `offset` into the emulated EVM heap memory.
/// - The `size` of the allocation emulated EVM heap memory.
///
/// Returns:
/// - A pointer to the EVM heap memory at given `offset`.
///
/// Semantics:
/// - Traps if the offset is out of bounds.
/// - Aligns the total heap memory size to the EVM word size.
/// - Traps if the memory size would be greater than the configured EVM heap memory size.
/// - Maintains the total memory size (`msize`) in global heap size value.
pub struct Sbrk;
impl<D> RuntimeFunction<D> for Sbrk
where
D: Dependency + Clone,
{
const NAME: &'static str = "__sbrk_internal";
const ATTRIBUTES: &'static [Attribute] = &[
Attribute::NoFree,
Attribute::NoRecurse,
Attribute::WillReturn,
];
fn r#type<'ctx>(context: &Context<'ctx, D>) -> inkwell::types::FunctionType<'ctx> {
context.llvm().ptr_type(Default::default()).fn_type(
&[context.xlen_type().into(), context.xlen_type().into()],
false,
)
}
fn emit_body<'ctx>(
&self,
context: &mut Context<'ctx, D>,
) -> anyhow::Result<Option<inkwell::values::BasicValueEnum<'ctx>>> {
let offset = Self::paramater(context, 0).into_int_value();
let size = Self::paramater(context, 1).into_int_value();
let trap_block = context.append_basic_block("trap");
let offset_in_bounds_block = context.append_basic_block("offset_in_bounds");
let is_offset_out_of_bounds = context.builder().build_int_compare(
inkwell::IntPredicate::UGE,
offset,
context.heap_size(),
"offset_out_of_bounds",
)?;
context.build_conditional_branch(
is_offset_out_of_bounds,
trap_block,
offset_in_bounds_block,
)?;
context.set_basic_block(trap_block);
context.build_call(context.intrinsics().trap, &[], "invalid_trap");
context.build_unreachable();
context.set_basic_block(offset_in_bounds_block);
let mask = context
.xlen_type()
.const_int(revive_common::BYTE_LENGTH_WORD as u64 - 1, false);
let total_size = context
.builder()
.build_int_add(offset, size, "total_size")?;
let memory_size = context.builder().build_and(
context.builder().build_int_add(total_size, mask, "mask")?,
context.builder().build_not(mask, "mask_not")?,
"memory_size",
)?;
let size_in_bounds_block = context.append_basic_block("size_in_bounds");
let is_size_out_of_bounds = context.builder().build_int_compare(
inkwell::IntPredicate::UGT,
memory_size,
context.heap_size(),
"size_out_of_bounds",
)?;
context.build_conditional_branch(
is_size_out_of_bounds,
trap_block,
size_in_bounds_block,
)?;
context.set_basic_block(size_in_bounds_block);
let return_block = context.append_basic_block("return_pointer");
let new_size_block = context.append_basic_block("new_size");
let is_new_size = context.builder().build_int_compare(
inkwell::IntPredicate::UGT,
memory_size,
context
.get_global_value(crate::polkavm::GLOBAL_HEAP_SIZE)?
.into_int_value(),
"is_new_size",
)?;
context.build_conditional_branch(is_new_size, new_size_block, return_block)?;
context.set_basic_block(new_size_block);
context.build_store(
context.get_global(crate::polkavm::GLOBAL_HEAP_SIZE)?.into(),
memory_size,
)?;
context.build_unconditional_branch(return_block);
context.set_basic_block(return_block);
Ok(Some(
context
.build_gep(
context
.get_global(crate::polkavm::GLOBAL_HEAP_MEMORY)?
.into(),
&[context.xlen_type().const_zero(), offset],
context.byte_type(),
"allocation_start_pointer",
)
.value
.as_basic_value_enum(),
))
}
}
impl<D> WriteLLVM<D> for Sbrk
where
D: Dependency + Clone,
{
fn declare(&mut self, context: &mut Context<D>) -> anyhow::Result<()> {
<Self as RuntimeFunction<_>>::declare(self, context)
}
fn into_llvm(self, context: &mut Context<D>) -> anyhow::Result<()> {
<Self as RuntimeFunction<_>>::emit(&self, context)
}
}
+35 -19
View File
@@ -33,6 +33,7 @@ use crate::polkavm::Dependency;
use crate::target_machine::target::Target;
use crate::target_machine::TargetMachine;
use crate::PolkaVMLoadHeapWordFunction;
use crate::PolkaVMSbrkFunction;
use crate::PolkaVMStoreHeapWordFunction;
use self::address_space::AddressSpace;
@@ -85,6 +86,8 @@ where
loop_stack: Vec<Loop<'ctx>>,
/// The extra LLVM arguments that were used during target initialization.
llvm_arguments: &'ctx [String],
/// The emulated EVM linear heap memory size.
heap_size: u32,
/// The project dependency manager. It can be any entity implementing the trait.
/// The manager is used to get information about contracts and their dependencies during
@@ -254,6 +257,7 @@ where
current_function: None,
loop_stack: Vec::with_capacity(Self::LOOP_STACK_INITIAL_CAPACITY),
llvm_arguments,
heap_size: 65536,
dependency_manager,
include_metadata_hash,
@@ -1084,32 +1088,40 @@ where
offset: inkwell::values::IntValue<'ctx>,
size: inkwell::values::IntValue<'ctx>,
) -> anyhow::Result<inkwell::values::PointerValue<'ctx>> {
Ok(self
.builder()
.build_call(
self.runtime_api_method(revive_runtime_api::polkavm_imports::SBRK),
&[offset.into(), size.into()],
"call_sbrk",
)?
let call_site_value = self.builder().build_call(
<PolkaVMSbrkFunction as RuntimeFunction<D>>::declaration(self).function_value(),
&[offset.into(), size.into()],
"alloc_start",
)?;
call_site_value.add_attribute(
inkwell::attributes::AttributeLoc::Return,
self.llvm
.create_enum_attribute(Attribute::NonNull as u32, 0),
);
call_site_value.add_attribute(
inkwell::attributes::AttributeLoc::Return,
self.llvm
.create_enum_attribute(Attribute::NoUndef as u32, 0),
);
Ok(call_site_value
.try_as_basic_value()
.left()
.expect("sbrk returns a pointer")
.unwrap_or_else(|| {
panic!(
"revive runtime function {} should return a value",
<PolkaVMSbrkFunction as RuntimeFunction<D>>::NAME,
)
})
.into_pointer_value())
}
/// Build a call to PolkaVM `msize` for querying the linear memory size.
pub fn build_msize(&self) -> anyhow::Result<inkwell::values::IntValue<'ctx>> {
let memory_size_pointer = self
.module()
.get_global(revive_runtime_api::polkavm_imports::MEMORY_SIZE)
.expect("the memory size symbol should have been declared")
.as_pointer_value();
let memory_size_value = self.builder().build_load(
self.xlen_type(),
memory_size_pointer,
"memory_size_value",
)?;
Ok(memory_size_value.into_int_value())
Ok(self
.get_global_value(crate::polkavm::GLOBAL_HEAP_SIZE)?
.into_int_value())
}
/// Returns a pointer to `offset` into the heap, allocating
@@ -1428,4 +1440,8 @@ where
pub fn optimizer_settings(&self) -> &OptimizerSettings {
self.optimizer.settings()
}
pub fn heap_size(&self) -> inkwell::values::IntValue<'ctx> {
self.xlen_type().const_int(self.heap_size as u64, false)
}
}
-28
View File
@@ -5,34 +5,6 @@
// Missing builtins
#define EVM_WORD_SIZE 32
#define ALIGN(size) ((size + EVM_WORD_SIZE - 1) & ~(EVM_WORD_SIZE - 1))
#define MAX_MEMORY_SIZE (64 * 1024)
char __memory[MAX_MEMORY_SIZE];
uint32_t __memory_size = 0;
void * __sbrk_internal(uint32_t offset, uint32_t size) {
if (offset >= MAX_MEMORY_SIZE || size > MAX_MEMORY_SIZE) {
POLKAVM_TRAP();
}
uint32_t new_size = ALIGN(offset + size);
if (new_size > MAX_MEMORY_SIZE) {
POLKAVM_TRAP();
}
if (new_size > __memory_size) {
__memory_size = new_size;
}
return (void *)&__memory[offset];
}
void * memset(void *b, int c, size_t len) {
uint8_t *dest = b;
while (len-- > 0) *dest++ = c;
return b;
}
void * memcpy(void *dst, const void *_src, size_t len) {
uint8_t *dest = dst;
const uint8_t *src = _src;
+1 -10
View File
@@ -2,14 +2,6 @@ use inkwell::{context::Context, memory_buffer::MemoryBuffer, module::Module, sup
include!(concat!(env!("OUT_DIR"), "/polkavm_imports.rs"));
/// The emulated EVM heap memory global symbol.
pub static MEMORY: &str = "__memory";
/// The emulated EVM heap memory size global symbol.
pub static MEMORY_SIZE: &str = "__memory_size";
pub static SBRK: &str = "__sbrk_internal";
pub static ADDRESS: &str = "address";
pub static BALANCE: &str = "balance";
@@ -78,8 +70,7 @@ pub static WEIGHT_TO_FEE: &str = "weight_to_fee";
/// All imported runtime API symbols.
/// Useful for configuring common attributes and linkage.
pub static IMPORTS: [&str; 34] = [
SBRK,
pub static IMPORTS: [&str; 33] = [
ADDRESS,
BALANCE,
BALANCE_OF,
+1 -1
View File
@@ -167,8 +167,8 @@ pub struct Arguments {
#[arg(long = "recursive-process-input")]
pub recursive_process_input: Option<String>,
#[arg(long = "llvm-arg")]
/// These are passed to LLVM as the command line to allow manual control.
#[arg(long = "llvm-arg")]
pub llvm_arguments: Vec<String>,
}
@@ -209,6 +209,8 @@ where
revive_llvm_context::PolkaVMRemainderFunction.declare(context)?;
revive_llvm_context::PolkaVMSignedRemainderFunction.declare(context)?;
revive_llvm_context::PolkaVMSbrkFunction.declare(context)?;
let mut entry = revive_llvm_context::PolkaVMEntryFunction::default();
entry.declare(context)?;
@@ -261,6 +263,8 @@ where
revive_llvm_context::PolkaVMRemainderFunction.into_llvm(context)?;
revive_llvm_context::PolkaVMSignedRemainderFunction.into_llvm(context)?;
revive_llvm_context::PolkaVMSbrkFunction.into_llvm(context)?;
Ok(())
}