Skip to content

Commit 9b3386b

Browse files
authored
perf: pass state variables by value in tco exec handlers (#2121)
- pass `pc`, `instret` and `instret_end`/`max_execution_cost`/`segment_check_insns` by value in execution handlers to get them to be passed in registers - add `likely`, `unlikely` hints for suspension/termination in `tco` [benchmark comparison](https://github.com/axiom-crypto/openvm-reth-benchmark/actions/runs/17513217695#summary-49747838308) Towards INT-4921
1 parent 4378963 commit 9b3386b

File tree

61 files changed

+1365
-698
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+1365
-698
lines changed

crates/sdk/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,7 @@ where
411411
let (cost, final_state) = interpreter
412412
.execute_metered_cost(inputs, ctx)
413413
.map_err(VirtualMachineError::from)?;
414-
let instret = final_state.instret;
414+
let instret = final_state.instret();
415415

416416
let public_values = extract_public_values(
417417
self.executor.config.as_ref().num_public_values,

crates/vm/derive/src/tco.rs

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,19 @@ pub fn tco_impl(item: TokenStream) -> TokenStream {
3030
// Build the function call with all the generics
3131
let generic_args = build_generic_args(generics);
3232
let execute_call = if generic_args.is_empty() {
33-
quote! { #fn_name(pre_compute, exec_state) }
33+
quote! { #fn_name(pre_compute, &mut instret, &mut pc, arg, exec_state) }
3434
} else {
35-
quote! { #fn_name::<#(#generic_args),*>(pre_compute, exec_state) }
35+
quote! { #fn_name::<#(#generic_args),*>(pre_compute, &mut instret, &mut pc, arg, exec_state) }
3636
};
3737

3838
// Generate the TCO handler function
3939
let handler_fn = quote! {
4040
#[inline(never)]
4141
unsafe fn #handler_name #handler_generics (
4242
interpreter: &::openvm_circuit::arch::interpreter::InterpretedInstance<#f_type, #ctx_type>,
43+
mut instret: u64,
44+
mut pc: u32,
45+
arg: u64,
4346
exec_state: &mut ::openvm_circuit::arch::VmExecState<
4447
#f_type,
4548
::openvm_circuit::system::memory::online::GuestMemory,
@@ -50,28 +53,30 @@ pub fn tco_impl(item: TokenStream) -> TokenStream {
5053
{
5154
use ::openvm_circuit::arch::ExecutionError;
5255

53-
let pre_compute = interpreter.get_pre_compute(exec_state.vm_state.pc);
56+
let pre_compute = interpreter.get_pre_compute(pc);
5457
#execute_call;
5558

56-
if exec_state.exit_code.is_err() {
57-
// stop execution
59+
if ::core::intrinsics::unlikely(exec_state.exit_code.is_err()) {
60+
exec_state.set_instret_and_pc(instret, pc);
5861
return;
5962
}
60-
if #ctx_type::should_suspend(exec_state) {
63+
if ::core::intrinsics::unlikely(#ctx_type::should_suspend(instret, pc, arg, exec_state)) {
64+
exec_state.set_instret_and_pc(instret, pc);
6165
return;
6266
}
63-
// exec_state.pc should have been updated by execute_impl at this point
64-
let next_handler = interpreter.get_handler(exec_state.vm_state.pc);
65-
if next_handler.is_none() {
66-
exec_state.exit_code = Err(ExecutionError::PcOutOfBounds (exec_state.vm_state.pc));
67+
68+
let next_handler = interpreter.get_handler(pc);
69+
if ::core::intrinsics::unlikely(next_handler.is_none()) {
70+
exec_state.set_instret_and_pc(instret, pc);
71+
exec_state.exit_code = Err(ExecutionError::PcOutOfBounds(pc));
6772
return;
6873
}
6974
let next_handler = next_handler.unwrap_unchecked();
7075

7176
// NOTE: `become` is a keyword that requires Rust Nightly.
7277
// It is part of the explicit tail calls RFC: <https://github.com/rust-lang/rust/issues/112788>
7378
// which is still incomplete.
74-
become next_handler(interpreter, exec_state)
79+
become next_handler(interpreter, instret, pc, arg, exec_state)
7580
}
7681
};
7782

crates/vm/src/arch/execution.rs

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -85,22 +85,40 @@ pub enum StaticProgramError {
8585
ExecutorNotFound { opcode: VmOpcode },
8686
}
8787

88-
/// Function pointer for interpreter execution with function signature `(pre_compute, exec_state)`.
89-
/// The `pre_compute: &[u8]` is a pre-computed buffer of data corresponding to a single instruction.
90-
/// The contents of `pre_compute` are determined from the program code as specified by the
91-
/// [Executor] and [MeteredExecutor] traits.
92-
pub type ExecuteFunc<F, CTX> =
93-
unsafe fn(pre_compute: &[u8], exec_state: &mut VmExecState<F, GuestMemory, CTX>);
88+
/// Function pointer for interpreter execution with function signature `(pre_compute, instret, pc,
89+
/// arg, exec_state)`. The `pre_compute: &[u8]` is a pre-computed buffer of data
90+
/// corresponding to a single instruction. The contents of `pre_compute` are determined from the
91+
/// program code as specified by the [Executor] and [MeteredExecutor] traits.
92+
/// `arg` is a runtime constant that we want to keep in register:
93+
/// - For pure execution it is `instret_end`
94+
/// - For metered cost execution it is the `max_execution_cost`
95+
/// - For metered execution it is `segment_check_insns`
96+
pub type ExecuteFunc<F, CTX> = unsafe fn(
97+
pre_compute: &[u8],
98+
instret: &mut u64,
99+
pc: &mut u32,
100+
arg: u64,
101+
exec_state: &mut VmExecState<F, GuestMemory, CTX>,
102+
);
94103

95104
/// Handler for tail call elimination. The `CTX` is assumed to contain pointers to the pre-computed
96105
/// buffer and the function handler table.
97106
///
98107
/// - `pre_compute_buf` is the starting pointer of the pre-computed buffer.
99108
/// - `handlers` is the starting pointer of the table of function pointers of `Handler` type. The
100109
/// pointer is typeless to avoid self-referential types.
110+
/// - `pc`, `instret`, `instret_end` are passed as separate arguments for efficiency
111+
///
112+
/// `arg` is a runtime constant that we want to keep in register:
113+
/// - For pure execution it is `instret_end`
114+
/// - For metered cost execution it is the `max_execution_cost`
115+
/// - For metered execution it is `segment_check_insns`
101116
#[cfg(feature = "tco")]
102117
pub type Handler<F, CTX> = unsafe fn(
103118
interpreter: &InterpretedInstance<F, CTX>,
119+
instret: u64,
120+
pc: u32,
121+
arg: u64,
104122
exec_state: &mut VmExecState<F, GuestMemory, CTX>,
105123
);
106124

crates/vm/src/arch/execution_mode/metered/ctx.rs

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -125,11 +125,11 @@ impl<const PAGE_BITS: usize> MeteredCtx<PAGE_BITS> {
125125
}
126126

127127
#[inline(always)]
128-
pub fn check_and_segment(&mut self, instret: u64) {
128+
pub fn check_and_segment(&mut self, instret: u64, segment_check_insns: u64) {
129129
let threshold = self
130130
.segmentation_ctx
131131
.instret_last_segment_check
132-
.wrapping_add(self.segmentation_ctx.segment_check_insns);
132+
.wrapping_add(segment_check_insns);
133133
debug_assert!(
134134
threshold >= self.segmentation_ctx.instret_last_segment_check,
135135
"overflow in segment check threshold calculation"
@@ -195,23 +195,30 @@ impl<const PAGE_BITS: usize> ExecutionCtxTrait for MeteredCtx<PAGE_BITS> {
195195
}
196196

197197
#[inline(always)]
198-
fn should_suspend<F>(vm_state: &mut VmExecState<F, GuestMemory, Self>) -> bool {
198+
fn should_suspend<F>(
199+
instret: u64,
200+
_pc: u32,
201+
segment_check_insns: u64,
202+
exec_state: &mut VmExecState<F, GuestMemory, Self>,
203+
) -> bool {
199204
// E2 always runs until termination. Here we use the function as a hook called every
200205
// instruction.
201-
vm_state.ctx.check_and_segment(vm_state.instret);
206+
exec_state
207+
.ctx
208+
.check_and_segment(instret, segment_check_insns);
202209
false
203210
}
204211

205212
#[inline(always)]
206-
fn on_terminate<F>(vm_state: &mut VmExecState<F, GuestMemory, Self>) {
207-
vm_state
213+
fn on_terminate<F>(instret: u64, _pc: u32, exec_state: &mut VmExecState<F, GuestMemory, Self>) {
214+
exec_state
208215
.ctx
209216
.memory_ctx
210-
.lazy_update_boundary_heights(&mut vm_state.ctx.trace_heights);
211-
vm_state
217+
.lazy_update_boundary_heights(&mut exec_state.ctx.trace_heights);
218+
exec_state
212219
.ctx
213220
.segmentation_ctx
214-
.segment(vm_state.instret, &vm_state.ctx.trace_heights);
221+
.segment(instret, &exec_state.ctx.trace_heights);
215222
}
216223
}
217224

crates/vm/src/arch/execution_mode/metered_cost.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,14 @@ impl ExecutionCtxTrait for MeteredCostCtx {
123123
);
124124
}
125125

126-
fn should_suspend<F>(vm_state: &mut VmExecState<F, GuestMemory, Self>) -> bool {
127-
vm_state.ctx.cost > vm_state.ctx.max_execution_cost
126+
#[inline(always)]
127+
fn should_suspend<F>(
128+
_instret: u64,
129+
_pc: u32,
130+
max_execution_cost: u64,
131+
exec_state: &mut VmExecState<F, GuestMemory, Self>,
132+
) -> bool {
133+
exec_state.ctx.cost > max_execution_cost
128134
}
129135
}
130136

crates/vm/src/arch/execution_mode/mod.rs

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,20 @@ pub use pure::ExecutionCtx;
1212

1313
pub trait ExecutionCtxTrait: Sized {
1414
fn on_memory_operation(&mut self, address_space: u32, ptr: u32, size: u32);
15-
fn should_suspend<F>(vm_state: &mut VmExecState<F, GuestMemory, Self>) -> bool;
16-
fn on_terminate<F>(_vm_state: &mut VmExecState<F, GuestMemory, Self>) {}
15+
16+
fn should_suspend<F>(
17+
instret: u64,
18+
pc: u32,
19+
_arg: u64,
20+
exec_state: &mut VmExecState<F, GuestMemory, Self>,
21+
) -> bool;
22+
23+
fn on_terminate<F>(
24+
_instret: u64,
25+
_pc: u32,
26+
_exec_state: &mut VmExecState<F, GuestMemory, Self>,
27+
) {
28+
}
1729
}
1830

1931
pub trait MeteredExecutionCtxTrait: ExecutionCtxTrait {

crates/vm/src/arch/execution_mode/pure.rs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use crate::{
44
};
55

66
pub struct ExecutionCtx {
7-
instret_end: u64,
7+
pub instret_end: u64,
88
}
99

1010
impl ExecutionCtx {
@@ -22,8 +22,14 @@ impl ExecutionCtx {
2222
impl ExecutionCtxTrait for ExecutionCtx {
2323
#[inline(always)]
2424
fn on_memory_operation(&mut self, _address_space: u32, _ptr: u32, _size: u32) {}
25+
2526
#[inline(always)]
26-
fn should_suspend<F>(vm_state: &mut VmExecState<F, GuestMemory, Self>) -> bool {
27-
vm_state.instret >= vm_state.ctx.instret_end
27+
fn should_suspend<F>(
28+
instret: u64,
29+
_pc: u32,
30+
instret_end: u64,
31+
_exec_state: &mut VmExecState<F, GuestMemory, Self>,
32+
) -> bool {
33+
instret >= instret_end
2834
}
2935
}

0 commit comments

Comments
 (0)