llvm-context: modularize compiler builtin functions (#234)

- Add the revive runtime function interface to minimize boiler plate
code.
- Outline heavily repeated code into dedicated functions to bring down
code size.
- The code size tests builds optimized for size.
- Function attributes are passed as slices.

This significantly brings down the code size for all OpenZeppelin wizard
contracts (using all possible features) compiled against OpenZeppelin
`v5.0.0` with size optimizations.

|contract|| `-Oz` main | `-Oz` PR || `-O3` main | `-O3` PR |
|-|-|-|-|-|-|-|
|erc1155.sol||100K|67K||114K|147K|
|erc20.sol||120K|90K||160K|191K|
|erc721.sol||128K|101K||178K|214K|
|governor.sol||226K|165K||293K|349K|
|rwa.sol||116K|85K||154K|185K|
|stable.sol||116K|86K||155K|192K|

On the flip side this introduces a heavy penalty for cycle optimized
builds. Setting the no-inline attributes for cycle optimized builds
helps a lot but heavily penalizes runtime speed (LLVM does not yet
inline everything properly - to be investigated later on).

Next steps:
- Modularize more functions
- Refactor the YUL function arguments to use pointers instead of values
- Afterwards check if LLVM still has trouble inline-ing properly on O3
or set the no-inline attribute if it does not penalize runtime
performance too bad.
This commit is contained in:
xermicus
2025-02-25 16:47:01 +01:00
committed by GitHub
parent 7ffe64ed7c
commit a07968205b
32 changed files with 1444 additions and 655 deletions
@@ -0,0 +1,110 @@
//! The revive simulated EVM linear memory pointer functions.
use inkwell::values::BasicValueEnum;
use crate::polkavm::context::runtime::RuntimeFunction;
use crate::polkavm::context::Context;
use crate::polkavm::Dependency;
use crate::polkavm::WriteLLVM;
/// Load a word size value from a heap pointer.
pub struct LoadWord;
impl<D> RuntimeFunction<D> for LoadWord
where
D: Dependency + Clone,
{
const NAME: &'static str = "__revive_load_heap_word";
fn r#type<'ctx>(context: &Context<'ctx, D>) -> inkwell::types::FunctionType<'ctx> {
context
.word_type()
.fn_type(&[context.xlen_type().into()], false)
}
fn emit_body<'ctx>(
&self,
context: &mut Context<'ctx, D>,
) -> anyhow::Result<Option<BasicValueEnum<'ctx>>> {
let offset = Self::paramater(context, 0).into_int_value();
let length = context
.xlen_type()
.const_int(revive_common::BYTE_LENGTH_WORD as u64, false);
let pointer = context.build_heap_gep(offset, length)?;
let value = context
.builder()
.build_load(context.word_type(), pointer.value, "value")?;
context
.basic_block()
.get_last_instruction()
.expect("Always exists")
.set_alignment(revive_common::BYTE_LENGTH_BYTE as u32)
.expect("Alignment is valid");
let swapped_value = context.build_byte_swap(value)?;
Ok(Some(swapped_value))
}
}
impl<D> WriteLLVM<D> for LoadWord
where
D: Dependency + Clone,
{
fn declare(&mut self, context: &mut Context<D>) -> anyhow::Result<()> {
<Self as RuntimeFunction<_>>::declare(self, context)
}
fn into_llvm(self, context: &mut Context<D>) -> anyhow::Result<()> {
<Self as RuntimeFunction<_>>::emit(&self, context)
}
}
/// Store a word size value through a heap pointer.
pub struct StoreWord;
impl<D> RuntimeFunction<D> for StoreWord
where
D: Dependency + Clone,
{
const NAME: &'static str = "__revive_store_heap_word";
fn r#type<'ctx>(context: &Context<'ctx, D>) -> inkwell::types::FunctionType<'ctx> {
context.void_type().fn_type(
&[context.xlen_type().into(), context.word_type().into()],
false,
)
}
fn emit_body<'ctx>(
&self,
context: &mut Context<'ctx, D>,
) -> anyhow::Result<Option<BasicValueEnum<'ctx>>> {
let offset = Self::paramater(context, 0).into_int_value();
let length = context
.xlen_type()
.const_int(revive_common::BYTE_LENGTH_WORD as u64, false);
let pointer = context.build_heap_gep(offset, length)?;
let value = context.build_byte_swap(Self::paramater(context, 1))?;
context
.builder()
.build_store(pointer.value, value)?
.set_alignment(revive_common::BYTE_LENGTH_BYTE as u32)
.expect("Alignment is valid");
Ok(None)
}
}
impl<D> WriteLLVM<D> for StoreWord
where
D: Dependency + Clone,
{
fn declare(&mut self, context: &mut Context<D>) -> anyhow::Result<()> {
<Self as RuntimeFunction<_>>::declare(self, context)
}
fn into_llvm(self, context: &mut Context<D>) -> anyhow::Result<()> {
<Self as RuntimeFunction<_>>::emit(&self, context)
}
}
@@ -0,0 +1,136 @@
//! The LLVM pointer.
use inkwell::types::BasicType;
use crate::polkavm::context::address_space::AddressSpace;
use crate::polkavm::context::global::Global;
use crate::polkavm::context::Context;
use crate::polkavm::Dependency;
pub mod heap;
pub mod storage;
/// The LLVM pointer.
#[derive(Debug, Clone, Copy)]
pub struct Pointer<'ctx> {
/// The pointee type.
pub r#type: inkwell::types::BasicTypeEnum<'ctx>,
/// The address space.
pub address_space: AddressSpace,
/// The pointer value.
pub value: inkwell::values::PointerValue<'ctx>,
}
impl<'ctx> Pointer<'ctx> {
/// A shortcut constructor.
pub fn new<T>(
r#type: T,
address_space: AddressSpace,
value: inkwell::values::PointerValue<'ctx>,
) -> Self
where
T: BasicType<'ctx>,
{
Self {
r#type: r#type.as_basic_type_enum(),
address_space,
value,
}
}
/// Wraps a 256-bit primitive type pointer.
pub fn new_stack_field<D>(
context: &Context<'ctx, D>,
value: inkwell::values::PointerValue<'ctx>,
) -> Self
where
D: Dependency + Clone,
{
Self {
r#type: context.word_type().as_basic_type_enum(),
address_space: AddressSpace::Stack,
value,
}
}
/// Creates a new pointer with the specified `offset`.
pub fn new_with_offset<D, T>(
context: &Context<'ctx, D>,
address_space: AddressSpace,
r#type: T,
offset: inkwell::values::IntValue<'ctx>,
name: &str,
) -> Self
where
D: Dependency + Clone,
T: BasicType<'ctx>,
{
assert_ne!(
address_space,
AddressSpace::Stack,
"Stack pointers cannot be addressed"
);
let offset = context.safe_truncate_int_to_xlen(offset).unwrap();
let value = context
.builder
.build_int_to_ptr(offset, context.llvm().ptr_type(address_space.into()), name)
.unwrap();
Self::new(r#type, address_space, value)
}
/// Casts the pointer into another type.
pub fn cast<T>(self, r#type: T) -> Self
where
T: BasicType<'ctx>,
{
Self {
r#type: r#type.as_basic_type_enum(),
address_space: self.address_space,
value: self.value,
}
}
/// Cast this pointer to a register sized integer value.
pub fn to_int<D>(&self, context: &Context<'ctx, D>) -> inkwell::values::IntValue<'ctx>
where
D: Dependency + Clone,
{
context
.builder()
.build_ptr_to_int(self.value, context.xlen_type(), "ptr_to_xlen")
.expect("we should be positioned")
}
pub fn address_space_cast<D>(
self,
context: &Context<'ctx, D>,
address_space: AddressSpace,
name: &str,
) -> anyhow::Result<Self>
where
D: Dependency + Clone,
{
let value = context.builder().build_address_space_cast(
self.value,
context.llvm().ptr_type(address_space.into()),
name,
)?;
Ok(Self {
address_space,
value,
..self
})
}
}
impl<'ctx> From<Global<'ctx>> for Pointer<'ctx> {
fn from(global: Global<'ctx>) -> Self {
Self {
r#type: global.r#type,
address_space: AddressSpace::Stack,
value: global.value.as_pointer_value(),
}
}
}
@@ -0,0 +1,135 @@
//! The revive storage pointer functions.
use inkwell::values::BasicValueEnum;
use crate::polkavm::context::runtime::RuntimeFunction;
use crate::polkavm::context::Context;
use crate::polkavm::Dependency;
use crate::polkavm::WriteLLVM;
/// Load a word size value from a storage pointer.
pub struct LoadWord;
impl<D> RuntimeFunction<D> for LoadWord
where
D: Dependency + Clone,
{
const NAME: &'static str = "__revive_load_storage_word";
fn r#type<'ctx>(context: &Context<'ctx, D>) -> inkwell::types::FunctionType<'ctx> {
context.word_type().fn_type(
&[context.xlen_type().into(), context.word_type().into()],
false,
)
}
fn emit_body<'ctx>(
&self,
context: &mut Context<'ctx, D>,
) -> anyhow::Result<Option<BasicValueEnum<'ctx>>> {
let is_transient = Self::paramater(context, 0);
let key_value = Self::paramater(context, 1);
let key_pointer = context.build_alloca_at_entry(context.word_type(), "key_pointer");
let value_pointer = context.build_alloca_at_entry(context.word_type(), "value_pointer");
let length_pointer = context.build_alloca_at_entry(context.xlen_type(), "length_pointer");
context
.builder()
.build_store(key_pointer.value, key_value)?;
context.build_store(value_pointer, context.word_const(0))?;
context.build_store(
length_pointer,
context
.xlen_type()
.const_int(revive_common::BYTE_LENGTH_WORD as u64, false),
)?;
let arguments = [
is_transient,
key_pointer.to_int(context).into(),
context.xlen_type().const_all_ones().into(),
value_pointer.to_int(context).into(),
length_pointer.to_int(context).into(),
];
context.build_runtime_call(revive_runtime_api::polkavm_imports::GET_STORAGE, &arguments);
// We do not to check the return value: Solidity assumes infallible loads.
// If a key doesn't exist the "zero" value is returned (ensured by above write).
Ok(Some(context.build_load(value_pointer, "storage_value")?))
}
}
impl<D> WriteLLVM<D> for LoadWord
where
D: Dependency + Clone,
{
fn declare(&mut self, context: &mut Context<D>) -> anyhow::Result<()> {
<Self as RuntimeFunction<_>>::declare(self, context)
}
fn into_llvm(self, context: &mut Context<D>) -> anyhow::Result<()> {
<Self as RuntimeFunction<_>>::emit(&self, context)
}
}
/// Store a word size value through a storage pointer.
pub struct StoreWord;
impl<D> RuntimeFunction<D> for StoreWord
where
D: Dependency + Clone,
{
const NAME: &'static str = "__revive_store_storage_word";
fn r#type<'ctx>(context: &Context<'ctx, D>) -> inkwell::types::FunctionType<'ctx> {
context.void_type().fn_type(
&[
context.xlen_type().into(),
context.word_type().into(),
context.word_type().into(),
],
false,
)
}
fn emit_body<'ctx>(
&self,
context: &mut Context<'ctx, D>,
) -> anyhow::Result<Option<BasicValueEnum<'ctx>>> {
let is_transient = Self::paramater(context, 0);
let key = Self::paramater(context, 1);
let value = Self::paramater(context, 2);
let key_pointer = context.build_alloca_at_entry(context.word_type(), "key_pointer");
let value_pointer = context.build_alloca_at_entry(context.word_type(), "value_pointer");
context.build_store(key_pointer, key)?;
context.build_store(value_pointer, value)?;
let arguments = [
is_transient,
key_pointer.to_int(context).into(),
context.xlen_type().const_all_ones().into(),
value_pointer.to_int(context).into(),
context.integer_const(crate::polkavm::XLEN, 32).into(),
];
context.build_runtime_call(revive_runtime_api::polkavm_imports::SET_STORAGE, &arguments);
Ok(None)
}
}
impl<D> WriteLLVM<D> for StoreWord
where
D: Dependency + Clone,
{
fn declare(&mut self, context: &mut Context<D>) -> anyhow::Result<()> {
<Self as RuntimeFunction<_>>::declare(self, context)
}
fn into_llvm(self, context: &mut Context<D>) -> anyhow::Result<()> {
<Self as RuntimeFunction<_>>::emit(&self, context)
}
}