Skip to content

Commit fb3d997

Browse files
committed
feat: rv32im tco without become keyword
`become` keyword was causing some corruption and not passing references properly between calls.
1 parent 13cc6f2 commit fb3d997

File tree

20 files changed

+397
-108
lines changed

20 files changed

+397
-108
lines changed

crates/vm/derive/src/tco.rs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,19 +49,25 @@ pub fn tco_impl(item: TokenStream) -> TokenStream {
4949
) -> Result<(), ::openvm_circuit::arch::ExecutionError>
5050
#where_clause
5151
{
52-
let pre_compute = interpreter.get_pre_compute(exec_state.pc);
52+
let pre_compute = interpreter.get_pre_compute(exec_state.vm_state.pc);
5353
#execute_call;
5454

5555
if std::hint::unlikely(exec_state.exit_code.is_err()) {
5656
return Err(::openvm_circuit::arch::ExecutionError::ExecStateError);
5757
}
5858
if std::hint::unlikely(exec_state.exit_code.as_ref().unwrap().is_some()) {
59+
#ctx_type::on_terminate(exec_state);
5960
// terminate
6061
return Ok(());
6162
}
63+
if #ctx_type::should_suspend(exec_state) {
64+
return Ok(());
65+
}
6266
// exec_state.pc should have been updated by execute_impl at this point
63-
let next_handler = interpreter.get_handler(exec_state.pc)?;
64-
become next_handler(interpreter, exec_state)
67+
let next_handler = interpreter.get_handler(exec_state.vm_state.pc)?;
68+
69+
// The `become` keyword has a bug that is not re-passing the `interpreter`, `exec_state` references properly. But llvm seems to almost always guarantee tail call elimination when the function signature is the same as the current function.
70+
next_handler(interpreter, exec_state)
6571
}
6672
};
6773

crates/vm/src/arch/interpreter.rs

Lines changed: 41 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ where
139139
// unwrap because get_pre_compute_instructions would have errored
140140
// already on DisabledOperation
141141
let executor = inventory.get_executor(inst.opcode).unwrap();
142-
executor.handler(pc, inst, *pre_compute)
142+
executor.handler(pc, inst, pre_compute)
143143
}
144144
} else {
145145
Ok(unreachable_tco_handler)
@@ -177,6 +177,9 @@ where
177177
// instruction getting pre_compute_max_size bytes
178178
// - self.pre_compute_buf.ptr is non-null
179179
// - initialization of the contents of the slice is the responsibility of each Executor
180+
debug_assert!(
181+
(pc_idx + 1) * self.pre_compute_max_size <= self.pre_compute_buf.layout.size()
182+
);
180183
unsafe {
181184
let ptr = self
182185
.pre_compute_buf
@@ -190,7 +193,7 @@ where
190193
#[inline(always)]
191194
pub fn get_handler(&self, pc: u32) -> Result<Handler<F, Ctx>, ExecutionError> {
192195
let pc_idx = get_pc_index(self.pc_base, pc);
193-
if std::hint::unlikely(pc_idx > self.handlers.len()) {
196+
if std::hint::unlikely(pc_idx >= self.handlers.len()) {
194197
return Err(ExecutionError::PcOutOfBounds {
195198
pc,
196199
pc_base: self.pc_base,
@@ -287,13 +290,38 @@ where
287290
) -> Result<VmState<F, GuestMemory>, ExecutionError> {
288291
let ctx = ExecutionCtx::new(num_insns);
289292
let mut exec_state = VmExecState::new(from_state, ctx);
290-
// Start execution
291-
execute_with_metrics!(
292-
"execute_e1",
293-
self.pc_base,
294-
&mut exec_state,
295-
&self.pre_compute_insns
296-
);
293+
294+
#[cfg(feature = "metrics")]
295+
let start = std::time::Instant::now();
296+
#[cfg(feature = "metrics")]
297+
let start_instret = exec_state.instret;
298+
299+
#[cfg(not(feature = "tco"))]
300+
unsafe {
301+
execute_trampoline(self.pc_base, &mut exec_state, &self.pre_compute_insns);
302+
}
303+
#[cfg(feature = "tco")]
304+
unsafe {
305+
let handler = self.get_handler(exec_state.pc)?;
306+
let res = handler(self, &mut exec_state);
307+
if let Err(err) = res {
308+
match err {
309+
ExecutionError::ExecStateError => {}
310+
_ => {
311+
return Err(err);
312+
}
313+
}
314+
}
315+
}
316+
317+
#[cfg(feature = "metrics")]
318+
{
319+
let elapsed = start.elapsed();
320+
let insns = exec_state.instret - start_instret;
321+
tracing::info!("instructions_executed={insns}");
322+
metrics::counter!("execute_e1_insns").absolute(insns);
323+
metrics::gauge!("execute_e1_insn_mi/s").set(insns as f64 / elapsed.as_micros() as f64);
324+
}
297325
if num_insns.is_some() {
298326
check_exit_code(exec_state.exit_code)?;
299327
} else {
@@ -410,15 +438,10 @@ fn split_pre_compute_buf<'a, F>(
410438
) -> Vec<&'a mut [u8]> {
411439
let program_len = program.instructions_and_debug_infos.len();
412440
let buf_len = program_len * pre_compute_max_size;
413-
let mut pre_compute_buf_ptr =
414-
unsafe { std::slice::from_raw_parts_mut(pre_compute_buf.ptr, buf_len) };
415-
let mut split_pre_compute_buf = Vec::with_capacity(program_len);
416-
for _ in 0..program_len {
417-
let (first, last) = pre_compute_buf_ptr.split_at_mut(pre_compute_max_size);
418-
pre_compute_buf_ptr = last;
419-
split_pre_compute_buf.push(first);
420-
}
421-
split_pre_compute_buf
441+
let pre_compute_buf = unsafe { std::slice::from_raw_parts_mut(pre_compute_buf.ptr, buf_len) };
442+
pre_compute_buf
443+
.chunks_exact_mut(pre_compute_max_size)
444+
.collect()
422445
}
423446

424447
/// Executes using function pointers with the trampoline (loop) approach.

crates/vm/src/utils/stark_utils.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,8 @@ where
114114
let exe = exe.into();
115115
let input = input.into();
116116
let metered_ctx = vm.build_metered_ctx();
117+
// TEMP: for testing
118+
vm.interpreter(&exe)?.execute(input.clone(), None)?;
117119
let (segments, _) = vm
118120
.metered_interpreter(&exe)?
119121
.execute_metered(input.clone(), metered_ctx)?;

extensions/rv32im/circuit/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,10 @@ openvm-circuit = { workspace = true, features = ["test-utils"] }
3333
test-case.workspace = true
3434

3535
[features]
36-
default = ["parallel", "jemalloc"]
36+
default = ["parallel", "jemalloc", "tco"]
3737
parallel = ["openvm-circuit/parallel"]
3838
test-utils = ["openvm-circuit/test-utils", "dep:openvm-stark-sdk"]
39+
tco = ["openvm-circuit/tco"]
3940
# performance features:
4041
mimalloc = ["openvm-circuit/mimalloc"]
4142
jemalloc = ["openvm-circuit/jemalloc"]

extensions/rv32im/circuit/src/auipc/execution.rs

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,7 @@ use std::{
33
mem::size_of,
44
};
55

6-
use openvm_circuit::{
7-
arch::{
8-
E2PreCompute, ExecuteFunc, ExecutionCtxTrait, Executor, MeteredExecutionCtxTrait,
9-
MeteredExecutor, StaticProgramError, VmExecState,
10-
},
11-
system::memory::online::GuestMemory,
12-
};
6+
use openvm_circuit::{arch::*, system::memory::online::GuestMemory};
137
use openvm_circuit_primitives_derive::AlignedBytesBorrow;
148
use openvm_instructions::{
159
instruction::Instruction, program::DEFAULT_PC_STEP, riscv::RV32_REGISTER_AS,
@@ -66,6 +60,21 @@ where
6660
self.pre_compute_impl(pc, inst, data)?;
6761
Ok(execute_e1_impl)
6862
}
63+
64+
#[cfg(feature = "tco")]
65+
fn handler<Ctx>(
66+
&self,
67+
pc: u32,
68+
inst: &Instruction<F>,
69+
data: &mut [u8],
70+
) -> Result<Handler<F, Ctx>, StaticProgramError>
71+
where
72+
Ctx: ExecutionCtxTrait,
73+
{
74+
let data: &mut AuiPcPreCompute = data.borrow_mut();
75+
self.pre_compute_impl(pc, inst, data)?;
76+
Ok(execute_e1_tco_handler)
77+
}
6978
}
7079

7180
impl<F, A> MeteredExecutor<F> for Rv32AuipcExecutor<A>
@@ -105,6 +114,7 @@ unsafe fn execute_e12_impl<F: PrimeField32, CTX: ExecutionCtxTrait>(
105114
vm_state.instret += 1;
106115
}
107116

117+
#[create_tco_handler]
108118
unsafe fn execute_e1_impl<F: PrimeField32, CTX: ExecutionCtxTrait>(
109119
pre_compute: &[u8],
110120
vm_state: &mut VmExecState<F, GuestMemory, CTX>,

extensions/rv32im/circuit/src/base_alu/execution.rs

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,7 @@ use std::{
33
mem::size_of,
44
};
55

6-
use openvm_circuit::{
7-
arch::{
8-
E2PreCompute, ExecuteFunc, ExecutionCtxTrait, Executor, MeteredExecutionCtxTrait,
9-
MeteredExecutor, StaticProgramError, VmExecState,
10-
},
11-
system::memory::online::GuestMemory,
12-
};
6+
use openvm_circuit::{arch::*, system::memory::online::GuestMemory};
137
use openvm_circuit_primitives_derive::AlignedBytesBorrow;
148
use openvm_instructions::{
159
instruction::Instruction,
@@ -102,6 +96,38 @@ where
10296
};
10397
Ok(fn_ptr)
10498
}
99+
100+
#[cfg(feature = "tco")]
101+
fn handler<Ctx>(
102+
&self,
103+
pc: u32,
104+
inst: &Instruction<F>,
105+
data: &mut [u8],
106+
) -> Result<Handler<F, Ctx>, StaticProgramError>
107+
where
108+
Ctx: ExecutionCtxTrait,
109+
{
110+
let data: &mut BaseAluPreCompute = data.borrow_mut();
111+
let is_imm = self.pre_compute_impl(pc, inst, data)?;
112+
let opcode = inst.opcode;
113+
114+
let fn_ptr = match (
115+
is_imm,
116+
BaseAluOpcode::from_usize(opcode.local_opcode_idx(self.offset)),
117+
) {
118+
(true, BaseAluOpcode::ADD) => execute_e1_tco_handler::<_, _, true, AddOp>,
119+
(false, BaseAluOpcode::ADD) => execute_e1_tco_handler::<_, _, false, AddOp>,
120+
(true, BaseAluOpcode::SUB) => execute_e1_tco_handler::<_, _, true, SubOp>,
121+
(false, BaseAluOpcode::SUB) => execute_e1_tco_handler::<_, _, false, SubOp>,
122+
(true, BaseAluOpcode::XOR) => execute_e1_tco_handler::<_, _, true, XorOp>,
123+
(false, BaseAluOpcode::XOR) => execute_e1_tco_handler::<_, _, false, XorOp>,
124+
(true, BaseAluOpcode::OR) => execute_e1_tco_handler::<_, _, true, OrOp>,
125+
(false, BaseAluOpcode::OR) => execute_e1_tco_handler::<_, _, false, OrOp>,
126+
(true, BaseAluOpcode::AND) => execute_e1_tco_handler::<_, _, true, AndOp>,
127+
(false, BaseAluOpcode::AND) => execute_e1_tco_handler::<_, _, false, AndOp>,
128+
};
129+
Ok(fn_ptr)
130+
}
105131
}
106132

107133
impl<F, A, const LIMB_BITS: usize> MeteredExecutor<F>
@@ -174,6 +200,7 @@ unsafe fn execute_e12_impl<
174200
vm_state.instret += 1;
175201
}
176202

203+
#[create_tco_handler]
177204
#[inline(always)]
178205
unsafe fn execute_e1_impl<
179206
F: PrimeField32,

extensions/rv32im/circuit/src/branch_eq/execution.rs

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,7 @@ use std::{
33
mem::size_of,
44
};
55

6-
use openvm_circuit::{
7-
arch::{
8-
E2PreCompute, ExecuteFunc, ExecutionCtxTrait, Executor, MeteredExecutionCtxTrait,
9-
MeteredExecutor, StaticProgramError, VmExecState,
10-
},
11-
system::memory::online::GuestMemory,
12-
};
6+
use openvm_circuit::{arch::*, system::memory::online::GuestMemory};
137
use openvm_circuit_primitives_derive::AlignedBytesBorrow;
148
use openvm_instructions::{
159
instruction::Instruction, program::DEFAULT_PC_STEP, riscv::RV32_REGISTER_AS, LocalOpcode,
@@ -84,6 +78,26 @@ where
8478
};
8579
Ok(fn_ptr)
8680
}
81+
82+
#[cfg(feature = "tco")]
83+
fn handler<Ctx>(
84+
&self,
85+
pc: u32,
86+
inst: &Instruction<F>,
87+
data: &mut [u8],
88+
) -> Result<Handler<F, Ctx>, StaticProgramError>
89+
where
90+
Ctx: ExecutionCtxTrait,
91+
{
92+
let data: &mut BranchEqualPreCompute = data.borrow_mut();
93+
let is_bne = self.pre_compute_impl(pc, inst, data)?;
94+
let fn_ptr = if is_bne {
95+
execute_e1_tco_handler::<_, _, true>
96+
} else {
97+
execute_e1_tco_handler::<_, _, false>
98+
};
99+
Ok(fn_ptr)
100+
}
87101
}
88102

89103
impl<F, A, const NUM_LIMBS: usize> MeteredExecutor<F> for BranchEqualExecutor<A, NUM_LIMBS>
@@ -131,6 +145,7 @@ unsafe fn execute_e12_impl<F: PrimeField32, CTX: ExecutionCtxTrait, const IS_NE:
131145
vm_state.instret += 1;
132146
}
133147

148+
#[create_tco_handler]
134149
unsafe fn execute_e1_impl<F: PrimeField32, CTX: ExecutionCtxTrait, const IS_NE: bool>(
135150
pre_compute: &[u8],
136151
vm_state: &mut VmExecState<F, GuestMemory, CTX>,

extensions/rv32im/circuit/src/branch_lt/execution.rs

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,7 @@ use std::{
33
mem::size_of,
44
};
55

6-
use openvm_circuit::{
7-
arch::{
8-
E2PreCompute, ExecuteFunc, ExecutionCtxTrait, Executor, MeteredExecutionCtxTrait,
9-
MeteredExecutor, StaticProgramError, VmExecState,
10-
},
11-
system::memory::online::GuestMemory,
12-
};
6+
use openvm_circuit::{arch::*, system::memory::online::GuestMemory};
137
use openvm_circuit_primitives_derive::AlignedBytesBorrow;
148
use openvm_instructions::{
159
instruction::Instruction, program::DEFAULT_PC_STEP, riscv::RV32_REGISTER_AS, LocalOpcode,
@@ -86,6 +80,27 @@ where
8680
};
8781
Ok(fn_ptr)
8882
}
83+
84+
#[cfg(feature = "tco")]
85+
fn handler<Ctx>(
86+
&self,
87+
pc: u32,
88+
inst: &Instruction<F>,
89+
data: &mut [u8],
90+
) -> Result<Handler<F, Ctx>, StaticProgramError>
91+
where
92+
Ctx: ExecutionCtxTrait,
93+
{
94+
let data: &mut BranchLePreCompute = data.borrow_mut();
95+
let local_opcode = self.pre_compute_impl(pc, inst, data)?;
96+
let fn_ptr = match local_opcode {
97+
BranchLessThanOpcode::BLT => execute_e1_tco_handler::<_, _, BltOp>,
98+
BranchLessThanOpcode::BLTU => execute_e1_tco_handler::<_, _, BltuOp>,
99+
BranchLessThanOpcode::BGE => execute_e1_tco_handler::<_, _, BgeOp>,
100+
BranchLessThanOpcode::BGEU => execute_e1_tco_handler::<_, _, BgeuOp>,
101+
};
102+
Ok(fn_ptr)
103+
}
89104
}
90105

91106
impl<F, A, const NUM_LIMBS: usize, const LIMB_BITS: usize> MeteredExecutor<F>
@@ -136,6 +151,7 @@ unsafe fn execute_e12_impl<F: PrimeField32, CTX: ExecutionCtxTrait, OP: BranchLe
136151
vm_state.instret += 1;
137152
}
138153

154+
#[create_tco_handler]
139155
unsafe fn execute_e1_impl<F: PrimeField32, CTX: ExecutionCtxTrait, OP: BranchLessThanOp>(
140156
pre_compute: &[u8],
141157
vm_state: &mut VmExecState<F, GuestMemory, CTX>,

0 commit comments

Comments
 (0)