Skip to content

Commit 48ec4dc

Browse files
committed
optimize
1 parent af8f788 commit 48ec4dc

File tree

2 files changed

+29
-53
lines changed

2 files changed

+29
-53
lines changed

crates/luars/src/lua_vm/execute/arithmetic_instructions.rs

Lines changed: 27 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,7 @@ pub fn exec_unm(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> Lua
351351
// ============ Arithmetic Immediate Instructions ============
352352

353353
/// ADDI: R[A] = R[B] + sC
354-
/// OPTIMIZED: Matches Lua C's setivalue behavior
354+
/// OPTIMIZED: Minimal branches, inline integer path
355355
#[inline(always)]
356356
pub fn exec_addi(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) {
357357
let a = Instruction::get_a(instr) as usize;
@@ -365,22 +365,11 @@ pub fn exec_addi(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) {
365365

366366
if left.primary == TAG_INTEGER {
367367
let result = (left.secondary as i64).wrapping_add(sc as i64);
368-
*reg_base.add(a) = LuaValue {
369-
primary: TAG_INTEGER,
370-
secondary: result as u64,
371-
};
368+
// Write both fields atomically (matches Lua's setivalue)
369+
let dest = reg_base.add(a);
370+
(*dest).primary = TAG_INTEGER;
371+
(*dest).secondary = result as u64;
372372
(*frame_ptr).pc += 1; // Skip MMBINI
373-
374-
// OPTIMIZATION: Check if next instruction is backward JMP (loop)
375-
let next_instr = (*frame_ptr).code_ptr.add((*frame_ptr).pc).read();
376-
if (next_instr & 0x7F) == 56 {
377-
// JMP opcode
378-
let sj = ((next_instr >> 7) & 0x1FFFFFF) as i32 - 16777215;
379-
if sj < 0 {
380-
// Backward jump = loop
381-
(*frame_ptr).pc = ((*frame_ptr).pc as i32 + 1 + sj) as usize;
382-
}
383-
}
384373
return;
385374
}
386375

@@ -484,7 +473,7 @@ pub fn exec_subk(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) {
484473
}
485474

486475
/// MULK: R[A] = R[B] * K[C]
487-
/// OPTIMIZED: Uses cached constants_ptr for direct constant access
476+
/// OPTIMIZED: Direct field writes, minimal branching
488477
#[inline(always)]
489478
pub fn exec_mulk(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) {
490479
let a = Instruction::get_a(instr) as usize;
@@ -498,47 +487,37 @@ pub fn exec_mulk(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) {
498487
// FAST PATH: Direct constant access via cached pointer
499488
let constant = *(*frame_ptr).constants_ptr.add(c);
500489

501-
// Float * Float fast path (most common in benchmarks)
490+
// Integer * Integer fast path FIRST (most common in benchmarks with integer loops)
491+
if left.primary == TAG_INTEGER && constant.primary == TAG_INTEGER {
492+
let result = (left.secondary as i64).wrapping_mul(constant.secondary as i64);
493+
let dest = vm.register_stack.as_mut_ptr().add(base_ptr + a);
494+
(*dest).primary = TAG_INTEGER;
495+
(*dest).secondary = result as u64;
496+
(*frame_ptr).pc += 1;
497+
return;
498+
}
499+
500+
// Float * Float fast path
502501
if left.primary == TAG_FLOAT && constant.primary == TAG_FLOAT {
503502
let result = f64::from_bits(left.secondary) * f64::from_bits(constant.secondary);
504-
*vm.register_stack.as_mut_ptr().add(base_ptr + a) = LuaValue {
505-
primary: TAG_FLOAT,
506-
secondary: result.to_bits(),
507-
};
503+
let dest = vm.register_stack.as_mut_ptr().add(base_ptr + a);
504+
(*dest).primary = TAG_FLOAT;
505+
(*dest).secondary = result.to_bits();
508506
(*frame_ptr).pc += 1;
509507
return;
510508
}
511509

512-
// Integer * Integer
513-
if left.primary == TAG_INTEGER && constant.primary == TAG_INTEGER {
514-
let result = (left.secondary as i64).wrapping_mul(constant.secondary as i64);
515-
*vm.register_stack.as_mut_ptr().add(base_ptr + a) = LuaValue {
516-
primary: TAG_INTEGER,
517-
secondary: result as u64,
518-
};
510+
// Mixed types: Integer * Float or Float * Integer
511+
if left.primary == TAG_INTEGER && constant.primary == TAG_FLOAT {
512+
let result = (left.secondary as i64) as f64 * f64::from_bits(constant.secondary);
513+
*vm.register_stack.as_mut_ptr().add(base_ptr + a) = LuaValue::float(result);
519514
(*frame_ptr).pc += 1;
520515
return;
521516
}
522517

523-
// Mixed types
524-
if left.primary == TAG_FLOAT || constant.primary == TAG_FLOAT {
525-
let l = if left.primary == TAG_FLOAT {
526-
f64::from_bits(left.secondary)
527-
} else if left.primary == TAG_INTEGER {
528-
left.secondary as i64 as f64
529-
} else {
530-
return;
531-
};
532-
533-
let r = if constant.primary == TAG_FLOAT {
534-
f64::from_bits(constant.secondary)
535-
} else if constant.primary == TAG_INTEGER {
536-
constant.secondary as i64 as f64
537-
} else {
538-
return;
539-
};
540-
541-
*vm.register_stack.as_mut_ptr().add(base_ptr + a) = LuaValue::float(l * r);
518+
if left.primary == TAG_FLOAT && constant.primary == TAG_INTEGER {
519+
let result = f64::from_bits(left.secondary) * (constant.secondary as i64) as f64;
520+
*vm.register_stack.as_mut_ptr().add(base_ptr + a) = LuaValue::float(result);
542521
(*frame_ptr).pc += 1;
543522
}
544523
}

crates/luars/src/lua_vm/execute/loop_instructions.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ pub fn exec_forprep(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) ->
131131
/// R[A]+=R[A+2];
132132
/// if R[A] <?= R[A+1] then { pc-=Bx; R[A+3]=R[A] }
133133
///
134-
/// ULTRA-OPTIMIZED: Minimized memory access, branch prediction friendly
134+
/// ULTRA-OPTIMIZED: Matches Lua's chgivalue - only update secondary field for integers
135135
#[inline(always)]
136136
pub fn exec_forloop(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> LuaResult<()> {
137137
let a = Instruction::get_a(instr) as usize;
@@ -145,18 +145,15 @@ pub fn exec_forloop(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) ->
145145
let counter = (*reg_base.add(1)).secondary as i64;
146146

147147
// Fast path: integer loop with counter > 0
148-
// Check counter first (most common exit condition)
149148
if counter > 0 {
150-
// Only read other values if we're continuing
151149
let idx_i = (*reg_base).secondary as i64;
152150
let step_i = (*reg_base.add(2)).secondary as i64;
153151
let new_idx = idx_i.wrapping_add(step_i);
154152

155-
// Write back - minimize writes
153+
// Use chgivalue pattern - only update secondary field, type tags stay the same
156154
(*reg_base).secondary = new_idx as u64;
157155
(*reg_base.add(1)).secondary = (counter - 1) as u64;
158156
(*reg_base.add(3)).secondary = new_idx as u64;
159-
// Note: type tags stay TAG_INTEGER, no need to rewrite primary
160157

161158
(*frame_ptr).pc -= bx;
162159
return Ok(());

0 commit comments

Comments
 (0)