Skip to content

Commit e066de4

Browse files
committed
perf: remove pc_base from pc_idx calc
1 parent 64c0571 commit e066de4

File tree

4 files changed

+62
-79
lines changed

4 files changed

+62
-79
lines changed

crates/vm/derive/src/tco.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ pub fn tco_impl(item: TokenStream) -> TokenStream {
6161
// exec_state.pc should have been updated by execute_impl at this point
6262
let next_handler = interpreter.get_handler(exec_state.vm_state.pc);
6363
if next_handler.is_none() {
64-
exec_state.exit_code = Err(interpreter.pc_out_of_bounds_err(exec_state.vm_state.pc));
64+
exec_state.exit_code = Err(ExecutionError::PcOutOfBounds (exec_state.vm_state.pc));
6565
return;
6666
}
6767
let next_handler = next_handler.unwrap_unchecked();

crates/vm/src/arch/execution.rs

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,8 @@ use crate::{
2828
pub enum ExecutionError {
2929
#[error("execution failed at pc {pc}, err: {msg}")]
3030
Fail { pc: u32, msg: &'static str },
31-
#[error("pc {pc} out of bounds for program of length {program_len}, with pc_base {pc_base}")]
32-
PcOutOfBounds {
33-
pc: u32,
34-
pc_base: u32,
35-
program_len: usize,
36-
},
31+
#[error("pc {0} out of bounds")]
32+
PcOutOfBounds(u32),
3733
#[error("unreachable instruction at pc {0}")]
3834
Unreachable(u32),
3935
#[error("at pc {pc}, opcode {opcode} was not enabled")]

crates/vm/src/arch/interpreter.rs

Lines changed: 44 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use std::{
22
alloc::{alloc, dealloc, handle_alloc_error, Layout},
33
borrow::{Borrow, BorrowMut},
4+
iter::repeat_n,
45
ptr::NonNull,
56
};
67

@@ -44,15 +45,16 @@ pub struct InterpretedInstance<'a, F, Ctx> {
4445
#[allow(dead_code)]
4546
pre_compute_buf: AlignedBuf,
4647
/// Instruction table of function pointers and pointers to the pre-computed buffer. Indexed by
47-
/// `pc_index = (pc - pc_base) / DEFAULT_PC_STEP`.
48+
/// `pc_index = pc / DEFAULT_PC_STEP`.
49+
/// SAFETY: The first `pc_base / DEFAULT_PC_STEP` entries will be unreachable. We do this to
50+
/// avoid needing to subtract `pc_base` during runtime.
4851
pre_compute_insns: Vec<PreComputeInstruction<'a, F, Ctx>>,
4952
#[cfg(feature = "tco")]
5053
pre_compute_max_size: usize,
5154
/// Handler function pointers for tail call optimization.
5255
#[cfg(feature = "tco")]
5356
handlers: Vec<Handler<F, Ctx>>,
5457

55-
pc_base: u32,
5658
pc_start: u32,
5759

5860
init_memory: SparseMemoryImage,
@@ -84,11 +86,7 @@ macro_rules! run {
8486
#[cfg(not(feature = "tco"))]
8587
unsafe {
8688
tracing::debug!("execute_trampoline");
87-
execute_trampoline(
88-
$interpreter.pc_base,
89-
&mut $exec_state,
90-
&$interpreter.pre_compute_insns,
91-
);
89+
execute_trampoline(&mut $exec_state, &$interpreter.pre_compute_insns);
9290
}
9391
#[cfg(feature = "tco")]
9492
{
@@ -151,21 +149,19 @@ where
151149
{
152150
let program = &exe.program;
153151
let pre_compute_max_size = get_pre_compute_max_size(program, inventory);
154-
let mut pre_compute_buf = alloc_pre_compute_buf(program.len(), pre_compute_max_size);
152+
let mut pre_compute_buf = alloc_pre_compute_buf(program, pre_compute_max_size);
155153
let mut split_pre_compute_buf =
156154
split_pre_compute_buf(program, &mut pre_compute_buf, pre_compute_max_size);
157155
let pre_compute_insns = get_pre_compute_instructions::<F, Ctx, E>(
158156
program,
159157
inventory,
160158
&mut split_pre_compute_buf,
161159
)?;
162-
let pc_base = program.pc_base;
163160
let pc_start = exe.pc_start;
164161
let init_memory = exe.init_memory.clone();
165162
#[cfg(feature = "tco")]
166-
let handlers = program
167-
.instructions_and_debug_infos
168-
.iter()
163+
let handlers = repeat_n(&None, get_pc_index(program.pc_base))
164+
.chain(program.instructions_and_debug_infos.iter())
169165
.zip_eq(split_pre_compute_buf.iter_mut())
170166
.enumerate()
171167
.map(
@@ -191,7 +187,6 @@ where
191187
system_config: inventory.config().clone(),
192188
pre_compute_buf,
193189
pre_compute_insns,
194-
pc_base,
195190
pc_start,
196191
init_memory,
197192
#[cfg(feature = "tco")]
@@ -209,7 +204,7 @@ where
209204
#[cfg(feature = "tco")]
210205
#[inline(always)]
211206
pub fn get_pre_compute(&self, pc: u32) -> &[u8] {
212-
let pc_idx = get_pc_index(self.pc_base, pc);
207+
let pc_idx = get_pc_index(pc);
213208
// SAFETY:
214209
// - we assume that pc is in bounds
215210
// - pre_compute_buf is allocated for pre_compute_max_size * program_len bytes, with each
@@ -228,14 +223,6 @@ where
228223
}
229224
}
230225

231-
pub fn pc_out_of_bounds_err(&self, pc: u32) -> ExecutionError {
232-
ExecutionError::PcOutOfBounds {
233-
pc,
234-
pc_base: self.pc_base,
235-
program_len: self.pre_compute_insns.len(),
236-
}
237-
}
238-
239226
#[cfg(feature = "tco")]
240227
#[inline(always)]
241228
pub fn get_handler(&self, pc: u32) -> Option<Handler<F, Ctx>> {
@@ -261,7 +248,7 @@ where
261248
{
262249
let program = &exe.program;
263250
let pre_compute_max_size = get_metered_pre_compute_max_size(program, inventory);
264-
let mut pre_compute_buf = alloc_pre_compute_buf(program.len(), pre_compute_max_size);
251+
let mut pre_compute_buf = alloc_pre_compute_buf(program, pre_compute_max_size);
265252
let mut split_pre_compute_buf =
266253
split_pre_compute_buf(program, &mut pre_compute_buf, pre_compute_max_size);
267254
let pre_compute_insns = get_metered_pre_compute_instructions::<F, Ctx, E>(
@@ -271,13 +258,11 @@ where
271258
&mut split_pre_compute_buf,
272259
)?;
273260

274-
let pc_base = program.pc_base;
275261
let pc_start = exe.pc_start;
276262
let init_memory = exe.init_memory.clone();
277263
#[cfg(feature = "tco")]
278-
let handlers = program
279-
.instructions_and_debug_infos
280-
.iter()
264+
let handlers = repeat_n(&None, get_pc_index(program.pc_base))
265+
.chain(program.instructions_and_debug_infos.iter())
281266
.zip_eq(split_pre_compute_buf.iter_mut())
282267
.enumerate()
283268
.map(
@@ -305,7 +290,6 @@ where
305290
system_config: inventory.config().clone(),
306291
pre_compute_buf,
307292
pre_compute_insns,
308-
pc_base,
309293
pc_start,
310294
init_memory,
311295
#[cfg(feature = "tco")]
@@ -448,8 +432,10 @@ where
448432
}
449433
}
450434

451-
fn alloc_pre_compute_buf(program_len: usize, pre_compute_max_size: usize) -> AlignedBuf {
452-
let buf_len = program_len * pre_compute_max_size;
435+
fn alloc_pre_compute_buf<F>(program: &Program<F>, pre_compute_max_size: usize) -> AlignedBuf {
436+
let base_idx = get_pc_index(program.pc_base);
437+
let padded_program_len = base_idx + program.instructions_and_debug_infos.len();
438+
let buf_len = padded_program_len * pre_compute_max_size;
453439
AlignedBuf::uninit(buf_len, pre_compute_max_size)
454440
}
455441

@@ -458,8 +444,9 @@ fn split_pre_compute_buf<'a, F>(
458444
pre_compute_buf: &'a mut AlignedBuf,
459445
pre_compute_max_size: usize,
460446
) -> Vec<&'a mut [u8]> {
461-
let program_len = program.instructions_and_debug_infos.len();
462-
let buf_len = program_len * pre_compute_max_size;
447+
let base_idx = get_pc_index(program.pc_base);
448+
let padded_program_len = base_idx + program.instructions_and_debug_infos.len();
449+
let buf_len = padded_program_len * pre_compute_max_size;
463450
// SAFETY:
464451
// - pre_compute_buf.ptr was allocated with exactly buf_len bytes
465452
// - lifetime 'a ensures the returned slices don't outlive the AlignedBuf
@@ -475,7 +462,6 @@ fn split_pre_compute_buf<'a, F>(
475462
/// The `fn_ptrs` pointer to pre-computed buffers that outlive this function.
476463
#[inline(always)]
477464
unsafe fn execute_trampoline<F: PrimeField32, Ctx: ExecutionCtxTrait>(
478-
pc_base: u32,
479465
vm_state: &mut VmExecState<F, GuestMemory, Ctx>,
480466
fn_ptrs: &[PreComputeInstruction<F, Ctx>],
481467
) {
@@ -487,16 +473,12 @@ unsafe fn execute_trampoline<F: PrimeField32, Ctx: ExecutionCtxTrait>(
487473
if Ctx::should_suspend(vm_state) {
488474
break;
489475
}
490-
let pc_index = get_pc_index(pc_base, vm_state.pc);
476+
let pc_index = get_pc_index(vm_state.pc);
491477
if let Some(inst) = fn_ptrs.get(pc_index) {
492478
// SAFETY: pre_compute assumed to live long enough
493479
unsafe { (inst.handler)(inst.pre_compute, vm_state) };
494480
} else {
495-
vm_state.exit_code = Err(ExecutionError::PcOutOfBounds {
496-
pc: vm_state.pc,
497-
pc_base,
498-
program_len: fn_ptrs.len(),
499-
});
481+
vm_state.exit_code = Err(ExecutionError::PcOutOfBounds(vm_state.pc));
500482
}
501483
}
502484
if vm_state
@@ -509,8 +491,8 @@ unsafe fn execute_trampoline<F: PrimeField32, Ctx: ExecutionCtxTrait>(
509491
}
510492

511493
#[inline(always)]
512-
pub fn get_pc_index(pc_base: u32, pc: u32) -> usize {
513-
((pc - pc_base) / DEFAULT_PC_STEP) as usize
494+
pub fn get_pc_index(pc: u32) -> usize {
495+
(pc / DEFAULT_PC_STEP) as usize
514496
}
515497

516498
/// Bytes allocated according to the given Layout
@@ -647,15 +629,19 @@ where
647629
Ctx: ExecutionCtxTrait,
648630
E: Executor<F>,
649631
{
650-
program
651-
.instructions_and_debug_infos
652-
.iter()
632+
let unreachable_handler: ExecuteFunc<F, Ctx> = |_, vm_state| {
633+
vm_state.exit_code = Err(ExecutionError::Unreachable(vm_state.pc));
634+
};
635+
636+
repeat_n(&None, get_pc_index(program.pc_base))
637+
.chain(program.instructions_and_debug_infos.iter())
653638
.zip_eq(pre_compute.iter_mut())
654639
.enumerate()
655640
.map(|(i, (inst_opt, buf))| {
656-
// SAFETY: we cast to raw pointer and then borrow to remove the lifetime. This is safe
657-
// only in the current context because `buf` comes from `pre_compute_buf` which will
658-
// outlive the returned `PreComputeInstruction`s.
641+
// SAFETY: we cast to raw pointer and then borrow to remove the lifetime. This
642+
// is safe only in the current context because `buf` comes
643+
// from `pre_compute_buf` which will outlive the returned
644+
// `PreComputeInstruction`s.
659645
let buf: &mut [u8] = unsafe { &mut *(*buf as *mut [u8]) };
660646
let pre_inst = if let Some((inst, _)) = inst_opt {
661647
tracing::trace!("get_pre_compute_instruction {inst:?}");
@@ -679,9 +665,7 @@ where
679665
} else {
680666
// Dead instruction at this pc
681667
PreComputeInstruction {
682-
handler: |_, vm_state| {
683-
vm_state.exit_code = Err(ExecutionError::Unreachable(vm_state.pc));
684-
},
668+
handler: unreachable_handler,
685669
pre_compute: buf,
686670
}
687671
};
@@ -701,15 +685,18 @@ where
701685
Ctx: MeteredExecutionCtxTrait,
702686
E: MeteredExecutor<F>,
703687
{
704-
program
705-
.instructions_and_debug_infos
706-
.iter()
688+
let unreachable_handler: ExecuteFunc<F, Ctx> = |_, vm_state| {
689+
vm_state.exit_code = Err(ExecutionError::Unreachable(vm_state.pc));
690+
};
691+
repeat_n(&None, get_pc_index(program.pc_base))
692+
.chain(program.instructions_and_debug_infos.iter())
707693
.zip_eq(pre_compute.iter_mut())
708694
.enumerate()
709695
.map(|(i, (inst_opt, buf))| {
710-
// SAFETY: we cast to raw pointer and then borrow to remove the lifetime. This is safe
711-
// only in the current context because `buf` comes from `pre_compute_buf` which will
712-
// outlive the returned `PreComputeInstruction`s.
696+
// SAFETY: we cast to raw pointer and then borrow to remove the lifetime. This
697+
// is safe only in the current context because `buf` comes
698+
// from `pre_compute_buf` which will outlive the returned
699+
// `PreComputeInstruction`s.
713700
let buf: &mut [u8] = unsafe { &mut *(*buf as *mut [u8]) };
714701
let pre_inst = if let Some((inst, _)) = inst_opt {
715702
tracing::trace!("get_metered_pre_compute_instruction {inst:?}");
@@ -738,9 +725,7 @@ where
738725
}
739726
} else {
740727
PreComputeInstruction {
741-
handler: |_, vm_state| {
742-
vm_state.exit_code = Err(ExecutionError::Unreachable(vm_state.pc));
743-
},
728+
handler: unreachable_handler,
744729
pre_compute: buf,
745730
}
746731
};

crates/vm/src/arch/interpreter_preflight.rs

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use std::sync::Arc;
1+
use std::{iter::repeat_n, sync::Arc};
22

33
use openvm_instructions::{instruction::Instruction, program::Program, LocalOpcode, SystemOpcode};
44
use openvm_stark_backend::{
@@ -36,6 +36,7 @@ pub struct PreflightInterpretedInstance<F, E> {
3636
}
3737

3838
#[repr(C)]
39+
#[derive(Clone)]
3940
pub struct PcEntry<F> {
4041
// NOTE[jpw]: revisit storing only smaller `precompute` for better cache locality. Currently
4142
// VmOpcode is usize so align=8 and there are 7 u32 operands so we store ExecutorId(u32) after
@@ -60,7 +61,10 @@ impl<F: Field, E> PreflightInterpretedInstance<F, E> {
6061
return Err(StaticProgramError::TooManyExecutors);
6162
}
6263
let len = program.instructions_and_debug_infos.len();
63-
let mut pc_handler = Vec::with_capacity(len);
64+
let pc_base = program.pc_base;
65+
let base_idx = get_pc_index(pc_base);
66+
let mut pc_handler = Vec::with_capacity(base_idx + len);
67+
pc_handler.extend(repeat_n(PcEntry::undefined(), base_idx));
6468
for insn_and_debug_info in &program.instructions_and_debug_infos {
6569
if let Some((insn, _)) = insn_and_debug_info {
6670
let insn = insn.clone();
@@ -86,9 +90,9 @@ impl<F: Field, E> PreflightInterpretedInstance<F, E> {
8690
}
8791
Ok(Self {
8892
inventory,
89-
execution_frequencies: vec![0u32; len],
93+
execution_frequencies: vec![0u32; base_idx + len],
94+
pc_base,
9095
pc_handler,
91-
pc_base: program.pc_base,
9296
executor_idx_to_air_idx,
9397
})
9498
}
@@ -101,9 +105,11 @@ impl<F: Field, E> PreflightInterpretedInstance<F, E> {
101105
where
102106
E: Send + Sync,
103107
{
108+
let base_idx = get_pc_index(self.pc_base);
104109
self.pc_handler
105110
.par_iter()
106111
.enumerate()
112+
.skip(base_idx)
107113
.filter(|(_, entry)| entry.is_some())
108114
.map(|(i, _)| self.execution_frequencies[i])
109115
.collect()
@@ -157,15 +163,11 @@ impl<F: PrimeField32, E> PreflightInterpretedInstance<F, E> {
157163
E: PreflightExecutor<F, RA>,
158164
{
159165
let pc = state.pc;
160-
let pc_idx = get_pc_index(self.pc_base, pc);
161-
let pc_entry =
162-
self.pc_handler
163-
.get(pc_idx)
164-
.ok_or_else(|| ExecutionError::PcOutOfBounds {
165-
pc,
166-
pc_base: self.pc_base,
167-
program_len: self.pc_handler.len(),
168-
})?;
166+
let pc_idx = get_pc_index(pc);
167+
let pc_entry = self
168+
.pc_handler
169+
.get(pc_idx)
170+
.ok_or_else(|| ExecutionError::PcOutOfBounds(pc))?;
169171
// SAFETY: `execution_frequencies` has the same length as `pc_handler` so `get_pc_entry`
170172
// already does the bounds check
171173
unsafe {

0 commit comments

Comments
 (0)