llvm-context: modularize compiler builtin functions (#234)

- Add the revive runtime function interface to minimize boiler plate
code.
- Outline heavily repeated code into dedicated functions to bring down
code size.
- The code size tests builds optimized for size.
- Function attributes are passed as slices.

This significantly brings down the code size for all OpenZeppelin wizard
contracts (using all possible features) compiled against OpenZeppelin
`v5.0.0` with size optimizations.

|contract|| `-Oz` main | `-Oz` PR || `-O3` main | `-O3` PR |
|-|-|-|-|-|-|-|
|erc1155.sol||100K|67K||114K|147K|
|erc20.sol||120K|90K||160K|191K|
|erc721.sol||128K|101K||178K|214K|
|governor.sol||226K|165K||293K|349K|
|rwa.sol||116K|85K||154K|185K|
|stable.sol||116K|86K||155K|192K|

On the flip side this introduces a heavy penalty for cycle optimized
builds. Setting the no-inline attributes for cycle optimized builds
helps a lot but heavily penalizes runtime speed (LLVM does not yet
inline everything properly - to be investigated later on).

Next steps:
- Modularize more functions
- Refactor the YUL function arguments to use pointers instead of values
- Afterwards check if LLVM still has trouble inline-ing properly on O3
or set the no-inline attribute if it does not penalize runtime
performance too bad.
This commit is contained in:
xermicus
2025-02-25 16:47:01 +01:00
committed by GitHub
parent 7ffe64ed7c
commit a07968205b
32 changed files with 1444 additions and 655 deletions
@@ -0,0 +1,269 @@
//! Translates the arithmetic operations.
use inkwell::values::BasicValue;
use crate::polkavm::context::runtime::RuntimeFunction;
use crate::polkavm::context::Context;
use crate::polkavm::Dependency;
use crate::polkavm::WriteLLVM;
/// Implements the division operator according to the EVM specification.
pub struct Division;
impl<D> RuntimeFunction<D> for Division
where
D: Dependency + Clone,
{
const NAME: &'static str = "__revive_division";
fn r#type<'ctx>(context: &Context<'ctx, D>) -> inkwell::types::FunctionType<'ctx> {
context.word_type().fn_type(
&[context.word_type().into(), context.word_type().into()],
false,
)
}
fn emit_body<'ctx>(
&self,
context: &mut Context<'ctx, D>,
) -> anyhow::Result<Option<inkwell::values::BasicValueEnum<'ctx>>> {
let operand_1 = Self::paramater(context, 0).into_int_value();
let operand_2 = Self::paramater(context, 1).into_int_value();
wrapped_division(context, operand_2, || {
Ok(context
.builder()
.build_int_unsigned_div(operand_1, operand_2, "DIV")?)
})
.map(Into::into)
}
}
impl<D> WriteLLVM<D> for Division
where
D: Dependency + Clone,
{
fn declare(&mut self, context: &mut Context<D>) -> anyhow::Result<()> {
<Self as RuntimeFunction<_>>::declare(self, context)
}
fn into_llvm(self, context: &mut Context<D>) -> anyhow::Result<()> {
<Self as RuntimeFunction<_>>::emit(&self, context)
}
}
/// Implements the signed division operator according to the EVM specification.
pub struct SignedDivision;
impl<D> RuntimeFunction<D> for SignedDivision
where
D: Dependency + Clone,
{
const NAME: &'static str = "__revive_signed_division";
fn r#type<'ctx>(context: &Context<'ctx, D>) -> inkwell::types::FunctionType<'ctx> {
context.word_type().fn_type(
&[context.word_type().into(), context.word_type().into()],
false,
)
}
fn emit_body<'ctx>(
&self,
context: &mut Context<'ctx, D>,
) -> anyhow::Result<Option<inkwell::values::BasicValueEnum<'ctx>>> {
let operand_1 = Self::paramater(context, 0).into_int_value();
let operand_2 = Self::paramater(context, 1).into_int_value();
let block_calculate = context.append_basic_block("calculate");
let block_overflow = context.append_basic_block("overflow");
let block_select = context.append_basic_block("select_result");
let block_origin = context.basic_block();
context.builder().build_switch(
operand_2,
block_calculate,
&[
(context.word_type().const_zero(), block_select),
(context.word_type().const_all_ones(), block_overflow),
],
)?;
context.set_basic_block(block_calculate);
let quotient = context
.builder()
.build_int_signed_div(operand_1, operand_2, "SDIV")?;
context.build_unconditional_branch(block_select);
context.set_basic_block(block_overflow);
let max_uint = context.builder().build_int_z_extend(
context
.integer_type(revive_common::BIT_LENGTH_WORD - 1)
.const_all_ones(),
context.word_type(),
"max_uint",
)?;
let is_operand_1_overflow = context.builder().build_int_compare(
inkwell::IntPredicate::EQ,
operand_1,
context.builder().build_int_neg(max_uint, "min_uint")?,
"is_operand_1_overflow",
)?;
context.build_conditional_branch(is_operand_1_overflow, block_select, block_calculate)?;
context.set_basic_block(block_select);
let result = context.builder().build_phi(context.word_type(), "result")?;
result.add_incoming(&[
(&operand_1, block_overflow),
(&context.word_const(0), block_origin),
(&quotient.as_basic_value_enum(), block_calculate),
]);
Ok(Some(result.as_basic_value()))
}
}
impl<D> WriteLLVM<D> for SignedDivision
where
D: Dependency + Clone,
{
fn declare(&mut self, context: &mut Context<D>) -> anyhow::Result<()> {
<Self as RuntimeFunction<_>>::declare(self, context)
}
fn into_llvm(self, context: &mut Context<D>) -> anyhow::Result<()> {
<Self as RuntimeFunction<_>>::emit(&self, context)
}
}
/// Implements the remainder operator according to the EVM specification.
pub struct Remainder;
impl<D> RuntimeFunction<D> for Remainder
where
D: Dependency + Clone,
{
const NAME: &'static str = "__revive_remainder";
fn r#type<'ctx>(context: &Context<'ctx, D>) -> inkwell::types::FunctionType<'ctx> {
context.word_type().fn_type(
&[context.word_type().into(), context.word_type().into()],
false,
)
}
fn emit_body<'ctx>(
&self,
context: &mut Context<'ctx, D>,
) -> anyhow::Result<Option<inkwell::values::BasicValueEnum<'ctx>>> {
let operand_1 = Self::paramater(context, 0).into_int_value();
let operand_2 = Self::paramater(context, 1).into_int_value();
wrapped_division(context, operand_2, || {
Ok(context
.builder()
.build_int_unsigned_rem(operand_1, operand_2, "MOD")?)
})
.map(Into::into)
}
}
impl<D> WriteLLVM<D> for Remainder
where
D: Dependency + Clone,
{
fn declare(&mut self, context: &mut Context<D>) -> anyhow::Result<()> {
<Self as RuntimeFunction<_>>::declare(self, context)
}
fn into_llvm(self, context: &mut Context<D>) -> anyhow::Result<()> {
<Self as RuntimeFunction<_>>::emit(&self, context)
}
}
/// Implements the signed remainder operator according to the EVM specification.
pub struct SignedRemainder;
impl<D> RuntimeFunction<D> for SignedRemainder
where
D: Dependency + Clone,
{
const NAME: &'static str = "__revive_signed_remainder";
fn r#type<'ctx>(context: &Context<'ctx, D>) -> inkwell::types::FunctionType<'ctx> {
context.word_type().fn_type(
&[context.word_type().into(), context.word_type().into()],
false,
)
}
fn emit_body<'ctx>(
&self,
context: &mut Context<'ctx, D>,
) -> anyhow::Result<Option<inkwell::values::BasicValueEnum<'ctx>>> {
let operand_1 = Self::paramater(context, 0).into_int_value();
let operand_2 = Self::paramater(context, 1).into_int_value();
wrapped_division(context, operand_2, || {
Ok(context
.builder()
.build_int_signed_rem(operand_1, operand_2, "SMOD")?)
})
.map(Into::into)
}
}
impl<D> WriteLLVM<D> for SignedRemainder
where
D: Dependency + Clone,
{
fn declare(&mut self, context: &mut Context<D>) -> anyhow::Result<()> {
<Self as RuntimeFunction<_>>::declare(self, context)
}
fn into_llvm(self, context: &mut Context<D>) -> anyhow::Result<()> {
<Self as RuntimeFunction<_>>::emit(&self, context)
}
}
/// Wrap division operations so that zero will be returned if the
/// denominator is zero (see also Ethereum YP Appendix H.2).
///
/// The closure is expected to calculate and return the quotient.
///
/// The result is either the calculated quotient or zero,
/// selected at runtime.
fn wrapped_division<'ctx, D, F, T>(
context: &Context<'ctx, D>,
denominator: inkwell::values::IntValue<'ctx>,
f: F,
) -> anyhow::Result<inkwell::values::BasicValueEnum<'ctx>>
where
D: Dependency + Clone,
F: FnOnce() -> anyhow::Result<T>,
T: inkwell::values::IntMathValue<'ctx>,
{
assert_eq!(
denominator.get_type().get_bit_width(),
revive_common::BIT_LENGTH_WORD as u32
);
let block_calculate = context.append_basic_block("calculate");
let block_select = context.append_basic_block("select");
let block_origin = context.basic_block();
context.builder().build_switch(
denominator,
block_calculate,
&[(context.word_const(0), block_select)],
)?;
context.set_basic_block(block_calculate);
let calculated_value = f()?.as_basic_value_enum();
context.build_unconditional_branch(block_select);
context.set_basic_block(block_select);
let result = context.builder().build_phi(context.word_type(), "result")?;
result.add_incoming(&[
(&context.word_const(0), block_origin),
(&calculated_value, block_calculate),
]);
Ok(result.as_basic_value())
}
@@ -145,7 +145,7 @@ where
crate::PolkaVMFunction::set_attributes(
context.llvm(),
entry,
vec![crate::PolkaVMAttribute::NoReturn],
&[crate::PolkaVMAttribute::NoReturn],
true,
);
@@ -1,118 +0,0 @@
//! The immutable data runtime function.
use crate::polkavm::context::address_space::AddressSpace;
use crate::polkavm::context::function::runtime;
use crate::polkavm::context::pointer::Pointer;
use crate::polkavm::context::Context;
use crate::polkavm::Dependency;
use crate::polkavm::WriteLLVM;
/// A function for requesting the immutable data from the runtime.
/// This is a special function that is only used by the front-end generated code.
///
/// The runtime API is called lazily and subsequent calls are no-ops.
///
/// The bytes written is asserted to match the expected length.
/// This should never fail; the length is known.
/// However, this is a one time assertion, hence worth it.
#[derive(Debug)]
pub struct ImmutableDataLoad;
impl<D> WriteLLVM<D> for ImmutableDataLoad
where
D: Dependency + Clone,
{
fn declare(&mut self, context: &mut Context<D>) -> anyhow::Result<()> {
context.add_function(
runtime::FUNCTION_LOAD_IMMUTABLE_DATA,
context.void_type().fn_type(Default::default(), false),
0,
Some(inkwell::module::Linkage::External),
)?;
Ok(())
}
fn into_llvm(self, context: &mut Context<D>) -> anyhow::Result<()> {
context.set_current_function(runtime::FUNCTION_LOAD_IMMUTABLE_DATA, None)?;
context.set_basic_block(context.current_function().borrow().entry_block());
let immutable_data_size_pointer = context
.get_global(revive_runtime_api::immutable_data::GLOBAL_IMMUTABLE_DATA_SIZE)?
.value
.as_pointer_value();
let immutable_data_size = context.build_load(
Pointer::new(
context.xlen_type(),
AddressSpace::Stack,
immutable_data_size_pointer,
),
"immutable_data_size_load",
)?;
let load_immutable_data_block = context.append_basic_block("load_immutables_block");
let return_block = context.current_function().borrow().return_block();
let immutable_data_size_is_zero = context.builder().build_int_compare(
inkwell::IntPredicate::EQ,
context.xlen_type().const_zero(),
immutable_data_size.into_int_value(),
"immutable_data_size_is_zero",
)?;
context.build_conditional_branch(
immutable_data_size_is_zero,
return_block,
load_immutable_data_block,
)?;
context.set_basic_block(load_immutable_data_block);
let output_pointer = context
.get_global(revive_runtime_api::immutable_data::GLOBAL_IMMUTABLE_DATA_POINTER)?
.value
.as_pointer_value();
context.build_runtime_call(
revive_runtime_api::polkavm_imports::GET_IMMUTABLE_DATA,
&[
context
.builder()
.build_ptr_to_int(output_pointer, context.xlen_type(), "ptr_to_xlen")?
.into(),
context
.builder()
.build_ptr_to_int(
immutable_data_size_pointer,
context.xlen_type(),
"ptr_to_xlen",
)?
.into(),
],
);
let bytes_written = context.builder().build_load(
context.xlen_type(),
immutable_data_size_pointer,
"bytes_written",
)?;
context.builder().build_store(
immutable_data_size_pointer,
context.xlen_type().const_zero(),
)?;
let overflow_block = context.append_basic_block("immutable_data_overflow");
let is_overflow = context.builder().build_int_compare(
inkwell::IntPredicate::UGT,
immutable_data_size.into_int_value(),
bytes_written.into_int_value(),
"is_overflow",
)?;
context.build_conditional_branch(is_overflow, overflow_block, return_block)?;
context.set_basic_block(overflow_block);
context.build_call(context.intrinsics().trap, &[], "invalid_trap");
context.build_unreachable();
context.set_basic_block(return_block);
context.build_return(None);
context.pop_debug_scope();
Ok(())
}
}
@@ -1,8 +1,9 @@
//! The front-end runtime functions.
pub mod arithmetics;
pub mod deploy_code;
pub mod entry;
pub mod immutable_data_load;
pub mod revive;
pub mod runtime_code;
/// The main entry function name.
@@ -13,6 +14,3 @@ pub const FUNCTION_DEPLOY_CODE: &str = "__deploy";
/// The runtime code function name.
pub const FUNCTION_RUNTIME_CODE: &str = "__runtime";
/// The immutable data load function name.
pub const FUNCTION_LOAD_IMMUTABLE_DATA: &str = "__immutable_data_load";
@@ -0,0 +1,147 @@
//! The revive compiler runtime functions.
use inkwell::values::BasicValue;
use crate::polkavm::context::function::Attribute;
use crate::polkavm::context::runtime::RuntimeFunction;
use crate::polkavm::context::Context;
use crate::polkavm::Dependency;
use crate::polkavm::WriteLLVM;
/// Pointers are represented as opaque 256 bit integer values in EVM.
/// In practice, they should never exceed a register sized bit value.
/// However, we still protect against this possibility here: Heap index
/// offsets are generally untrusted and potentially represent valid
/// (but wrong) pointers when truncated.
pub struct WordToPointer;
impl<D> RuntimeFunction<D> for WordToPointer
where
D: Dependency + Clone,
{
const NAME: &'static str = "__revive_int_truncate";
const ATTRIBUTES: &'static [Attribute] = &[
Attribute::WillReturn,
Attribute::NoFree,
Attribute::AlwaysInline,
];
fn r#type<'ctx>(context: &Context<'ctx, D>) -> inkwell::types::FunctionType<'ctx> {
context
.xlen_type()
.fn_type(&[context.word_type().into()], false)
}
fn emit_body<'ctx>(
&self,
context: &mut Context<'ctx, D>,
) -> anyhow::Result<Option<inkwell::values::BasicValueEnum<'ctx>>> {
let value = Self::paramater(context, 0).into_int_value();
let truncated =
context
.builder()
.build_int_truncate(value, context.xlen_type(), "offset_truncated")?;
let extended = context.builder().build_int_z_extend(
truncated,
context.word_type(),
"offset_extended",
)?;
let is_overflow = context.builder().build_int_compare(
inkwell::IntPredicate::NE,
value,
extended,
"compare_truncated_extended",
)?;
let block_continue = context.append_basic_block("offset_pointer_ok");
let block_trap = context.append_basic_block("offset_pointer_overflow");
context.build_conditional_branch(is_overflow, block_trap, block_continue)?;
context.set_basic_block(block_trap);
context.build_call(context.intrinsics().trap, &[], "invalid_trap");
context.build_unreachable();
context.set_basic_block(block_continue);
Ok(Some(truncated.as_basic_value_enum()))
}
}
impl<D> WriteLLVM<D> for WordToPointer
where
D: Dependency + Clone,
{
fn declare(&mut self, context: &mut Context<D>) -> anyhow::Result<()> {
<Self as RuntimeFunction<_>>::declare(self, context)
}
fn into_llvm(self, context: &mut Context<D>) -> anyhow::Result<()> {
<Self as RuntimeFunction<_>>::emit(&self, context)
}
}
/// The revive runtime exit function.
pub struct Exit;
impl<D> RuntimeFunction<D> for Exit
where
D: Dependency + Clone,
{
const NAME: &'static str = "__revive_exit";
const ATTRIBUTES: &'static [Attribute] = &[
Attribute::NoReturn,
Attribute::NoFree,
Attribute::AlwaysInline,
];
fn r#type<'ctx>(context: &Context<'ctx, D>) -> inkwell::types::FunctionType<'ctx> {
context.void_type().fn_type(
&[
context.xlen_type().into(),
context.word_type().into(),
context.word_type().into(),
],
false,
)
}
fn emit_body<'ctx>(
&self,
context: &mut Context<'ctx, D>,
) -> anyhow::Result<Option<inkwell::values::BasicValueEnum<'ctx>>> {
let flags = Self::paramater(context, 0).into_int_value();
let offset = Self::paramater(context, 1).into_int_value();
let length = Self::paramater(context, 2).into_int_value();
let offset_truncated = context.safe_truncate_int_to_xlen(offset)?;
let length_truncated = context.safe_truncate_int_to_xlen(length)?;
let heap_pointer = context.build_heap_gep(offset_truncated, length_truncated)?;
let offset_pointer = context.builder().build_ptr_to_int(
heap_pointer.value,
context.xlen_type(),
"return_data_ptr_to_int",
)?;
context.build_runtime_call(
revive_runtime_api::polkavm_imports::RETURN,
&[flags.into(), offset_pointer.into(), length_truncated.into()],
);
context.build_unreachable();
Ok(None)
}
}
impl<D> WriteLLVM<D> for Exit
where
D: Dependency + Clone,
{
fn declare(&mut self, context: &mut Context<D>) -> anyhow::Result<()> {
<Self as RuntimeFunction<_>>::declare(self, context)
}
fn into_llvm(self, context: &mut Context<D>) -> anyhow::Result<()> {
<Self as RuntimeFunction<_>>::emit(&self, context)
}
}