Skip to content

Commit ce7c037

Browse files
committed
perf: remove pc_base from pc_idx calc
1 parent 64c0571 commit ce7c037

File tree

4 files changed

+70
-89
lines changed

4 files changed

+70
-89
lines changed

crates/vm/derive/src/tco.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ pub fn tco_impl(item: TokenStream) -> TokenStream {
4848
)
4949
#where_clause
5050
{
51+
use ::openvm_circuit::arch::ExecutionError;
52+
5153
let pre_compute = interpreter.get_pre_compute(exec_state.vm_state.pc);
5254
#execute_call;
5355

@@ -61,7 +63,7 @@ pub fn tco_impl(item: TokenStream) -> TokenStream {
6163
// exec_state.pc should have been updated by execute_impl at this point
6264
let next_handler = interpreter.get_handler(exec_state.vm_state.pc);
6365
if next_handler.is_none() {
64-
exec_state.exit_code = Err(interpreter.pc_out_of_bounds_err(exec_state.vm_state.pc));
66+
exec_state.exit_code = Err(ExecutionError::PcOutOfBounds (exec_state.vm_state.pc));
6567
return;
6668
}
6769
let next_handler = next_handler.unwrap_unchecked();

crates/vm/src/arch/execution.rs

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,8 @@ use crate::{
2828
pub enum ExecutionError {
2929
#[error("execution failed at pc {pc}, err: {msg}")]
3030
Fail { pc: u32, msg: &'static str },
31-
#[error("pc {pc} out of bounds for program of length {program_len}, with pc_base {pc_base}")]
32-
PcOutOfBounds {
33-
pc: u32,
34-
pc_base: u32,
35-
program_len: usize,
36-
},
31+
#[error("pc {0} out of bounds")]
32+
PcOutOfBounds(u32),
3733
#[error("unreachable instruction at pc {0}")]
3834
Unreachable(u32),
3935
#[error("at pc {pc}, opcode {opcode} was not enabled")]

crates/vm/src/arch/interpreter.rs

Lines changed: 50 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use std::{
22
alloc::{alloc, dealloc, handle_alloc_error, Layout},
33
borrow::{Borrow, BorrowMut},
4+
iter::repeat_n,
45
ptr::NonNull,
56
};
67

@@ -44,15 +45,16 @@ pub struct InterpretedInstance<'a, F, Ctx> {
4445
#[allow(dead_code)]
4546
pre_compute_buf: AlignedBuf,
4647
/// Instruction table of function pointers and pointers to the pre-computed buffer. Indexed by
47-
/// `pc_index = (pc - pc_base) / DEFAULT_PC_STEP`.
48+
/// `pc_index = pc / DEFAULT_PC_STEP`.
49+
/// SAFETY: The first `pc_base / DEFAULT_PC_STEP` entries will be unreachable. We do this to
50+
/// avoid needing to subtract `pc_base` during runtime.
4851
pre_compute_insns: Vec<PreComputeInstruction<'a, F, Ctx>>,
4952
#[cfg(feature = "tco")]
5053
pre_compute_max_size: usize,
5154
/// Handler function pointers for tail call optimization.
5255
#[cfg(feature = "tco")]
5356
handlers: Vec<Handler<F, Ctx>>,
5457

55-
pc_base: u32,
5658
pc_start: u32,
5759

5860
init_memory: SparseMemoryImage,
@@ -84,22 +86,14 @@ macro_rules! run {
8486
#[cfg(not(feature = "tco"))]
8587
unsafe {
8688
tracing::debug!("execute_trampoline");
87-
execute_trampoline(
88-
$interpreter.pc_base,
89-
&mut $exec_state,
90-
&$interpreter.pre_compute_insns,
91-
);
89+
execute_trampoline(&mut $exec_state, &$interpreter.pre_compute_insns);
9290
}
9391
#[cfg(feature = "tco")]
9492
{
9593
tracing::debug!("execute_tco");
96-
let handler = $interpreter.get_handler($exec_state.pc).ok_or(
97-
ExecutionError::PcOutOfBounds {
98-
pc: $exec_state.pc,
99-
pc_base: $interpreter.pc_base,
100-
program_len: $interpreter.handlers.len(),
101-
},
102-
)?;
94+
let handler = $interpreter
95+
.get_handler($exec_state.pc)
96+
.ok_or(ExecutionError::PcOutOfBounds($exec_state.pc))?;
10397
// SAFETY:
10498
// - handler is generated by Executor, MeteredExecutor traits
10599
// - it is the responsibility of each Executor to ensure handler is safe given a
@@ -151,27 +145,25 @@ where
151145
{
152146
let program = &exe.program;
153147
let pre_compute_max_size = get_pre_compute_max_size(program, inventory);
154-
let mut pre_compute_buf = alloc_pre_compute_buf(program.len(), pre_compute_max_size);
148+
let mut pre_compute_buf = alloc_pre_compute_buf(program, pre_compute_max_size);
155149
let mut split_pre_compute_buf =
156150
split_pre_compute_buf(program, &mut pre_compute_buf, pre_compute_max_size);
157151
let pre_compute_insns = get_pre_compute_instructions::<F, Ctx, E>(
158152
program,
159153
inventory,
160154
&mut split_pre_compute_buf,
161155
)?;
162-
let pc_base = program.pc_base;
163156
let pc_start = exe.pc_start;
164157
let init_memory = exe.init_memory.clone();
165158
#[cfg(feature = "tco")]
166-
let handlers = program
167-
.instructions_and_debug_infos
168-
.iter()
159+
let handlers = repeat_n(&None, get_pc_index(program.pc_base))
160+
.chain(program.instructions_and_debug_infos.iter())
169161
.zip_eq(split_pre_compute_buf.iter_mut())
170162
.enumerate()
171163
.map(
172164
|(pc_idx, (inst_opt, pre_compute))| -> Result<Handler<F, Ctx>, StaticProgramError> {
173165
if let Some((inst, _)) = inst_opt {
174-
let pc = pc_base + pc_idx as u32 * DEFAULT_PC_STEP;
166+
let pc = pc_idx as u32 * DEFAULT_PC_STEP;
175167
if get_system_opcode_handler::<F, Ctx>(inst, pre_compute).is_some() {
176168
Ok(terminate_execute_e12_tco_handler)
177169
} else {
@@ -191,7 +183,6 @@ where
191183
system_config: inventory.config().clone(),
192184
pre_compute_buf,
193185
pre_compute_insns,
194-
pc_base,
195186
pc_start,
196187
init_memory,
197188
#[cfg(feature = "tco")]
@@ -209,7 +200,7 @@ where
209200
#[cfg(feature = "tco")]
210201
#[inline(always)]
211202
pub fn get_pre_compute(&self, pc: u32) -> &[u8] {
212-
let pc_idx = get_pc_index(self.pc_base, pc);
203+
let pc_idx = get_pc_index(pc);
213204
// SAFETY:
214205
// - we assume that pc is in bounds
215206
// - pre_compute_buf is allocated for pre_compute_max_size * program_len bytes, with each
@@ -228,18 +219,10 @@ where
228219
}
229220
}
230221

231-
pub fn pc_out_of_bounds_err(&self, pc: u32) -> ExecutionError {
232-
ExecutionError::PcOutOfBounds {
233-
pc,
234-
pc_base: self.pc_base,
235-
program_len: self.pre_compute_insns.len(),
236-
}
237-
}
238-
239222
#[cfg(feature = "tco")]
240223
#[inline(always)]
241224
pub fn get_handler(&self, pc: u32) -> Option<Handler<F, Ctx>> {
242-
let pc_idx = get_pc_index(self.pc_base, pc);
225+
let pc_idx = get_pc_index(pc);
243226
self.handlers.get(pc_idx).copied()
244227
}
245228
}
@@ -261,7 +244,7 @@ where
261244
{
262245
let program = &exe.program;
263246
let pre_compute_max_size = get_metered_pre_compute_max_size(program, inventory);
264-
let mut pre_compute_buf = alloc_pre_compute_buf(program.len(), pre_compute_max_size);
247+
let mut pre_compute_buf = alloc_pre_compute_buf(program, pre_compute_max_size);
265248
let mut split_pre_compute_buf =
266249
split_pre_compute_buf(program, &mut pre_compute_buf, pre_compute_max_size);
267250
let pre_compute_insns = get_metered_pre_compute_instructions::<F, Ctx, E>(
@@ -271,19 +254,17 @@ where
271254
&mut split_pre_compute_buf,
272255
)?;
273256

274-
let pc_base = program.pc_base;
275257
let pc_start = exe.pc_start;
276258
let init_memory = exe.init_memory.clone();
277259
#[cfg(feature = "tco")]
278-
let handlers = program
279-
.instructions_and_debug_infos
280-
.iter()
260+
let handlers = repeat_n(&None, get_pc_index(program.pc_base))
261+
.chain(program.instructions_and_debug_infos.iter())
281262
.zip_eq(split_pre_compute_buf.iter_mut())
282263
.enumerate()
283264
.map(
284265
|(pc_idx, (inst_opt, pre_compute))| -> Result<Handler<F, Ctx>, StaticProgramError> {
285266
if let Some((inst, _)) = inst_opt {
286-
let pc = pc_base + pc_idx as u32 * DEFAULT_PC_STEP;
267+
let pc = pc_idx as u32 * DEFAULT_PC_STEP;
287268
if get_system_opcode_handler::<F, Ctx>(inst, pre_compute).is_some() {
288269
Ok(terminate_execute_e12_tco_handler)
289270
} else {
@@ -305,7 +286,6 @@ where
305286
system_config: inventory.config().clone(),
306287
pre_compute_buf,
307288
pre_compute_insns,
308-
pc_base,
309289
pc_start,
310290
init_memory,
311291
#[cfg(feature = "tco")]
@@ -448,8 +428,10 @@ where
448428
}
449429
}
450430

451-
fn alloc_pre_compute_buf(program_len: usize, pre_compute_max_size: usize) -> AlignedBuf {
452-
let buf_len = program_len * pre_compute_max_size;
431+
fn alloc_pre_compute_buf<F>(program: &Program<F>, pre_compute_max_size: usize) -> AlignedBuf {
432+
let base_idx = get_pc_index(program.pc_base);
433+
let padded_program_len = base_idx + program.instructions_and_debug_infos.len();
434+
let buf_len = padded_program_len * pre_compute_max_size;
453435
AlignedBuf::uninit(buf_len, pre_compute_max_size)
454436
}
455437

@@ -458,8 +440,9 @@ fn split_pre_compute_buf<'a, F>(
458440
pre_compute_buf: &'a mut AlignedBuf,
459441
pre_compute_max_size: usize,
460442
) -> Vec<&'a mut [u8]> {
461-
let program_len = program.instructions_and_debug_infos.len();
462-
let buf_len = program_len * pre_compute_max_size;
443+
let base_idx = get_pc_index(program.pc_base);
444+
let padded_program_len = base_idx + program.instructions_and_debug_infos.len();
445+
let buf_len = padded_program_len * pre_compute_max_size;
463446
// SAFETY:
464447
// - pre_compute_buf.ptr was allocated with exactly buf_len bytes
465448
// - lifetime 'a ensures the returned slices don't outlive the AlignedBuf
@@ -475,7 +458,6 @@ fn split_pre_compute_buf<'a, F>(
475458
/// The `fn_ptrs` pointer to pre-computed buffers that outlive this function.
476459
#[inline(always)]
477460
unsafe fn execute_trampoline<F: PrimeField32, Ctx: ExecutionCtxTrait>(
478-
pc_base: u32,
479461
vm_state: &mut VmExecState<F, GuestMemory, Ctx>,
480462
fn_ptrs: &[PreComputeInstruction<F, Ctx>],
481463
) {
@@ -487,16 +469,12 @@ unsafe fn execute_trampoline<F: PrimeField32, Ctx: ExecutionCtxTrait>(
487469
if Ctx::should_suspend(vm_state) {
488470
break;
489471
}
490-
let pc_index = get_pc_index(pc_base, vm_state.pc);
472+
let pc_index = get_pc_index(vm_state.pc);
491473
if let Some(inst) = fn_ptrs.get(pc_index) {
492474
// SAFETY: pre_compute assumed to live long enough
493475
unsafe { (inst.handler)(inst.pre_compute, vm_state) };
494476
} else {
495-
vm_state.exit_code = Err(ExecutionError::PcOutOfBounds {
496-
pc: vm_state.pc,
497-
pc_base,
498-
program_len: fn_ptrs.len(),
499-
});
477+
vm_state.exit_code = Err(ExecutionError::PcOutOfBounds(vm_state.pc));
500478
}
501479
}
502480
if vm_state
@@ -509,8 +487,8 @@ unsafe fn execute_trampoline<F: PrimeField32, Ctx: ExecutionCtxTrait>(
509487
}
510488

511489
#[inline(always)]
512-
pub fn get_pc_index(pc_base: u32, pc: u32) -> usize {
513-
((pc - pc_base) / DEFAULT_PC_STEP) as usize
490+
pub fn get_pc_index(pc: u32) -> usize {
491+
(pc / DEFAULT_PC_STEP) as usize
514492
}
515493

516494
/// Bytes allocated according to the given Layout
@@ -647,15 +625,19 @@ where
647625
Ctx: ExecutionCtxTrait,
648626
E: Executor<F>,
649627
{
650-
program
651-
.instructions_and_debug_infos
652-
.iter()
628+
let unreachable_handler: ExecuteFunc<F, Ctx> = |_, vm_state| {
629+
vm_state.exit_code = Err(ExecutionError::Unreachable(vm_state.pc));
630+
};
631+
632+
repeat_n(&None, get_pc_index(program.pc_base))
633+
.chain(program.instructions_and_debug_infos.iter())
653634
.zip_eq(pre_compute.iter_mut())
654635
.enumerate()
655636
.map(|(i, (inst_opt, buf))| {
656-
// SAFETY: we cast to raw pointer and then borrow to remove the lifetime. This is safe
657-
// only in the current context because `buf` comes from `pre_compute_buf` which will
658-
// outlive the returned `PreComputeInstruction`s.
637+
// SAFETY: we cast to raw pointer and then borrow to remove the lifetime. This
638+
// is safe only in the current context because `buf` comes
639+
// from `pre_compute_buf` which will outlive the returned
640+
// `PreComputeInstruction`s.
659641
let buf: &mut [u8] = unsafe { &mut *(*buf as *mut [u8]) };
660642
let pre_inst = if let Some((inst, _)) = inst_opt {
661643
tracing::trace!("get_pre_compute_instruction {inst:?}");
@@ -679,9 +661,7 @@ where
679661
} else {
680662
// Dead instruction at this pc
681663
PreComputeInstruction {
682-
handler: |_, vm_state| {
683-
vm_state.exit_code = Err(ExecutionError::Unreachable(vm_state.pc));
684-
},
664+
handler: unreachable_handler,
685665
pre_compute: buf,
686666
}
687667
};
@@ -701,15 +681,18 @@ where
701681
Ctx: MeteredExecutionCtxTrait,
702682
E: MeteredExecutor<F>,
703683
{
704-
program
705-
.instructions_and_debug_infos
706-
.iter()
684+
let unreachable_handler: ExecuteFunc<F, Ctx> = |_, vm_state| {
685+
vm_state.exit_code = Err(ExecutionError::Unreachable(vm_state.pc));
686+
};
687+
repeat_n(&None, get_pc_index(program.pc_base))
688+
.chain(program.instructions_and_debug_infos.iter())
707689
.zip_eq(pre_compute.iter_mut())
708690
.enumerate()
709691
.map(|(i, (inst_opt, buf))| {
710-
// SAFETY: we cast to raw pointer and then borrow to remove the lifetime. This is safe
711-
// only in the current context because `buf` comes from `pre_compute_buf` which will
712-
// outlive the returned `PreComputeInstruction`s.
692+
// SAFETY: we cast to raw pointer and then borrow to remove the lifetime. This
693+
// is safe only in the current context because `buf` comes
694+
// from `pre_compute_buf` which will outlive the returned
695+
// `PreComputeInstruction`s.
713696
let buf: &mut [u8] = unsafe { &mut *(*buf as *mut [u8]) };
714697
let pre_inst = if let Some((inst, _)) = inst_opt {
715698
tracing::trace!("get_metered_pre_compute_instruction {inst:?}");
@@ -738,9 +721,7 @@ where
738721
}
739722
} else {
740723
PreComputeInstruction {
741-
handler: |_, vm_state| {
742-
vm_state.exit_code = Err(ExecutionError::Unreachable(vm_state.pc));
743-
},
724+
handler: unreachable_handler,
744725
pre_compute: buf,
745726
}
746727
};

0 commit comments

Comments
 (0)