llvm-context: modularize compiler builtin functions (#234)

- Add the revive runtime function interface to minimize boiler plate
code.
- Outline heavily repeated code into dedicated functions to bring down
code size.
- The code size tests builds optimized for size.
- Function attributes are passed as slices.

This significantly brings down the code size for all OpenZeppelin wizard
contracts (using all possible features) compiled against OpenZeppelin
`v5.0.0` with size optimizations.

|contract|| `-Oz` main | `-Oz` PR || `-O3` main | `-O3` PR |
|-|-|-|-|-|-|-|
|erc1155.sol||100K|67K||114K|147K|
|erc20.sol||120K|90K||160K|191K|
|erc721.sol||128K|101K||178K|214K|
|governor.sol||226K|165K||293K|349K|
|rwa.sol||116K|85K||154K|185K|
|stable.sol||116K|86K||155K|192K|

On the flip side this introduces a heavy penalty for cycle optimized
builds. Setting the no-inline attributes for cycle optimized builds
helps a lot but heavily penalizes runtime speed (LLVM does not yet
inline everything properly - to be investigated later on).

Next steps:
- Modularize more functions
- Refactor the YUL function arguments to use pointers instead of values
- Afterwards check if LLVM still has trouble inline-ing properly on O3
or set the no-inline attribute if it does not penalize runtime
performance too bad.
This commit is contained in:
xermicus
2025-02-25 16:47:01 +01:00
committed by GitHub
parent 7ffe64ed7c
commit a07968205b
32 changed files with 1444 additions and 655 deletions
@@ -0,0 +1,147 @@
//! The revive compiler runtime functions.
use inkwell::values::BasicValue;
use crate::polkavm::context::function::Attribute;
use crate::polkavm::context::runtime::RuntimeFunction;
use crate::polkavm::context::Context;
use crate::polkavm::Dependency;
use crate::polkavm::WriteLLVM;
/// Pointers are represented as opaque 256 bit integer values in EVM.
/// In practice, they should never exceed a register sized bit value.
/// However, we still protect against this possibility here: Heap index
/// offsets are generally untrusted and potentially represent valid
/// (but wrong) pointers when truncated.
pub struct WordToPointer;
impl<D> RuntimeFunction<D> for WordToPointer
where
D: Dependency + Clone,
{
const NAME: &'static str = "__revive_int_truncate";
const ATTRIBUTES: &'static [Attribute] = &[
Attribute::WillReturn,
Attribute::NoFree,
Attribute::AlwaysInline,
];
fn r#type<'ctx>(context: &Context<'ctx, D>) -> inkwell::types::FunctionType<'ctx> {
context
.xlen_type()
.fn_type(&[context.word_type().into()], false)
}
fn emit_body<'ctx>(
&self,
context: &mut Context<'ctx, D>,
) -> anyhow::Result<Option<inkwell::values::BasicValueEnum<'ctx>>> {
let value = Self::paramater(context, 0).into_int_value();
let truncated =
context
.builder()
.build_int_truncate(value, context.xlen_type(), "offset_truncated")?;
let extended = context.builder().build_int_z_extend(
truncated,
context.word_type(),
"offset_extended",
)?;
let is_overflow = context.builder().build_int_compare(
inkwell::IntPredicate::NE,
value,
extended,
"compare_truncated_extended",
)?;
let block_continue = context.append_basic_block("offset_pointer_ok");
let block_trap = context.append_basic_block("offset_pointer_overflow");
context.build_conditional_branch(is_overflow, block_trap, block_continue)?;
context.set_basic_block(block_trap);
context.build_call(context.intrinsics().trap, &[], "invalid_trap");
context.build_unreachable();
context.set_basic_block(block_continue);
Ok(Some(truncated.as_basic_value_enum()))
}
}
impl<D> WriteLLVM<D> for WordToPointer
where
D: Dependency + Clone,
{
fn declare(&mut self, context: &mut Context<D>) -> anyhow::Result<()> {
<Self as RuntimeFunction<_>>::declare(self, context)
}
fn into_llvm(self, context: &mut Context<D>) -> anyhow::Result<()> {
<Self as RuntimeFunction<_>>::emit(&self, context)
}
}
/// The revive runtime exit function.
pub struct Exit;
impl<D> RuntimeFunction<D> for Exit
where
D: Dependency + Clone,
{
const NAME: &'static str = "__revive_exit";
const ATTRIBUTES: &'static [Attribute] = &[
Attribute::NoReturn,
Attribute::NoFree,
Attribute::AlwaysInline,
];
fn r#type<'ctx>(context: &Context<'ctx, D>) -> inkwell::types::FunctionType<'ctx> {
context.void_type().fn_type(
&[
context.xlen_type().into(),
context.word_type().into(),
context.word_type().into(),
],
false,
)
}
fn emit_body<'ctx>(
&self,
context: &mut Context<'ctx, D>,
) -> anyhow::Result<Option<inkwell::values::BasicValueEnum<'ctx>>> {
let flags = Self::paramater(context, 0).into_int_value();
let offset = Self::paramater(context, 1).into_int_value();
let length = Self::paramater(context, 2).into_int_value();
let offset_truncated = context.safe_truncate_int_to_xlen(offset)?;
let length_truncated = context.safe_truncate_int_to_xlen(length)?;
let heap_pointer = context.build_heap_gep(offset_truncated, length_truncated)?;
let offset_pointer = context.builder().build_ptr_to_int(
heap_pointer.value,
context.xlen_type(),
"return_data_ptr_to_int",
)?;
context.build_runtime_call(
revive_runtime_api::polkavm_imports::RETURN,
&[flags.into(), offset_pointer.into(), length_truncated.into()],
);
context.build_unreachable();
Ok(None)
}
}
impl<D> WriteLLVM<D> for Exit
where
D: Dependency + Clone,
{
fn declare(&mut self, context: &mut Context<D>) -> anyhow::Result<()> {
<Self as RuntimeFunction<_>>::declare(self, context)
}
fn into_llvm(self, context: &mut Context<D>) -> anyhow::Result<()> {
<Self as RuntimeFunction<_>>::emit(&self, context)
}
}