Skip to content

Commit b2b7dc7

Browse files
committed
update
1 parent ff7974c commit b2b7dc7

File tree

2 files changed

+28
-13
lines changed

2 files changed

+28
-13
lines changed

crates/luars/src/lua_vm/dispatcher/loop_instructions.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,8 @@ pub fn exec_forprep(vm: &mut LuaVM, instr: u32) -> LuaResult<DispatchAction> {
103103
/// R[A]+=R[A+2];
104104
/// if R[A] <?= R[A+1] then { pc-=Bx; R[A+3]=R[A] }
105105
///
106-
/// ULTRA-OPTIMIZED: Direct bit-mask type checking with combined check
106+
/// ULTRA-OPTIMIZED V2: Cache frame pointer + direct bit-mask type checking
107+
/// - Eliminate Vec::len() call by using last_mut() directly
107108
/// - Single type check for all 3 values (branchless fast path)
108109
/// - Zero function calls in hot path
109110
#[inline(always)]
@@ -112,9 +113,11 @@ pub fn exec_forloop(vm: &mut LuaVM, instr: u32) -> LuaResult<DispatchAction> {
112113
let bx = Instruction::get_bx(instr) as usize;
113114

114115
// OPTIMIZATION: Single unsafe block + direct bit-mask type checking
116+
// Use last_mut() to avoid len() call
115117
unsafe {
116-
let frame_ptr = vm.frames.as_mut_ptr().add(vm.frames.len() - 1);
117-
let reg_base = vm.register_stack.as_mut_ptr().add((*frame_ptr).base_ptr + a);
118+
let frame_ptr = vm.frames.last_mut().unwrap_unchecked() as *mut LuaCallFrame;
119+
let base_ptr = (*frame_ptr).base_ptr;
120+
let reg_base = vm.register_stack.as_mut_ptr().add(base_ptr + a);
118121

119122
// Load all 3 values
120123
let idx = *reg_base;

crates/luars/src/lua_vm/mod.rs

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -240,33 +240,45 @@ impl LuaVM {
240240
/// Main execution loop - interprets bytecode instructions
241241
/// Returns the final return value from the chunk
242242
fn run(&mut self) -> LuaResult<LuaValue> {
243+
// OPTIMIZATION: Cache chunk pointer across instructions to avoid RefCell::borrow() overhead
244+
// This is safe because chunk doesn't change during function execution
245+
let mut cached_chunk_ptr: Option<*const Chunk> = None;
246+
let mut cached_func_ptr: Option<*const RefCell<LuaFunction>> = None;
247+
243248
loop {
244249
// Check if we have any frames to execute
245250
if self.frames.is_empty() {
246251
// Execution finished
247252
return Ok(self.return_values.first().copied().unwrap_or(LuaValue::nil()));
248253
}
249254

250-
// Get current frame and chunk
255+
// Get current frame
251256
let frame = self.current_frame();
252257
let func_ptr = match frame.get_function_ptr() {
253258
Some(ptr) => ptr,
254259
None => return Err(LuaError::RuntimeError("Not a Lua function".to_string())),
255260
};
256261

257-
// Safety: func_ptr is valid as long as the function exists in object_pool
258-
let func = unsafe { &*func_ptr };
259-
let func_ref = func.borrow();
260-
let chunk = &func_ref.chunk;
262+
// OPTIMIZATION: Cache chunk pointer to avoid repeated RefCell::borrow()
263+
// Only update cache when function changes
264+
let chunk_ptr = if Some(func_ptr) == cached_func_ptr {
265+
unsafe { cached_chunk_ptr.unwrap_unchecked() }
266+
} else {
267+
// Function changed (call/return), update cache
268+
let func = unsafe { &*func_ptr };
269+
let func_ref = func.borrow();
270+
let chunk_ptr = Rc::as_ptr(&func_ref.chunk);
271+
drop(func_ref);
272+
cached_func_ptr = Some(func_ptr);
273+
cached_chunk_ptr = Some(chunk_ptr);
274+
chunk_ptr
275+
};
261276

262-
// OPTIMIZATION: Use unsafe for unchecked instruction fetch (hot path)
263-
// Safety: PC bounds are checked by bytecode compiler and instruction execution
277+
// OPTIMIZATION: Use cached chunk pointer directly (zero overhead)
278+
let chunk = unsafe { &*chunk_ptr };
264279
let pc = frame.pc;
265280
let instr = unsafe { *chunk.code.get_unchecked(pc) };
266281

267-
// Drop borrows before executing instruction
268-
drop(func_ref);
269-
270282
// Increment PC before dispatching (standard for most instructions)
271283
// Some instructions (JMP, FORLOOP, etc.) will override this
272284
self.current_frame_mut().pc += 1;

0 commit comments

Comments
 (0)