Skip to content

Commit 867b73f

Browse files
committed
optimize loop
1 parent 3adb7dc commit 867b73f

File tree

1 file changed

+42
-27
lines changed

1 file changed

+42
-27
lines changed

crates/luars/src/lua_vm/execute/loop_instructions.rs

Lines changed: 42 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ pub fn exec_forprep(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) ->
131131
/// R[A]+=R[A+2];
132132
/// if R[A] <?= R[A+1] then { pc-=Bx; R[A+3]=R[A] }
133133
///
134-
/// ULTRA-OPTIMIZED: Matches Lua's chgivalue - only update secondary field for integers
134+
/// ULTRA-OPTIMIZED: Only check step type (like Lua C), use chgivalue pattern
135135
#[inline(always)]
136136
pub fn exec_forloop(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> LuaResult<()> {
137137
let a = Instruction::get_a(instr) as usize;
@@ -141,37 +141,52 @@ pub fn exec_forloop(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) ->
141141
let base_ptr = (*frame_ptr).base_ptr;
142142
let reg_base = vm.register_stack.as_mut_ptr().add(base_ptr + a);
143143

144-
// Check types first to distinguish integer vs float loop
145-
let idx = *reg_base;
146-
let counter_or_limit = *reg_base.add(1);
144+
// Only check step type - like Lua C's ttisinteger(s2v(ra + 2))
147145
let step = *reg_base.add(2);
148-
149-
let idx_tag = idx.primary & TYPE_MASK;
150-
let limit_tag = counter_or_limit.primary & TYPE_MASK;
151-
let step_tag = step.primary & TYPE_MASK;
152-
153-
// Fast path: pure integer loop (all three values are integers)
154-
if idx_tag == TAG_INTEGER && limit_tag == TAG_INTEGER && step_tag == TAG_INTEGER {
155-
// Read counter - R[A+1] stores remaining iteration count for integer loops
156-
let counter = counter_or_limit.secondary as i64;
157-
158-
if counter > 0 {
159-
let idx_i = idx.secondary as i64;
146+
147+
if step.primary == TAG_INTEGER {
148+
// Integer loop - step is integer, so idx and counter must be too (set by FORPREP)
149+
let count = (*reg_base.add(1)).secondary; // counter as u64
150+
151+
if count > 0 {
152+
let idx = (*reg_base).secondary as i64;
160153
let step_i = step.secondary as i64;
161-
let new_idx = idx_i.wrapping_add(step_i);
162-
163-
// Use chgivalue pattern - only update secondary field, type tags stay the same
164-
(*reg_base).secondary = new_idx as u64;
165-
(*reg_base.add(1)).secondary = (counter - 1) as u64;
166-
(*reg_base.add(3)).secondary = new_idx as u64;
167-
154+
let new_idx = idx.wrapping_add(step_i);
155+
156+
// chgivalue pattern - only update secondary (value), primary (type) stays same
157+
(*reg_base.add(1)).secondary = count - 1; // counter--
158+
(*reg_base).secondary = new_idx as u64; // idx += step
159+
(*reg_base.add(3)).secondary = new_idx as u64; // control = idx
160+
168161
(*frame_ptr).pc -= bx;
169162
}
170-
// counter == 0 means loop ended, just fall through
163+
// count == 0: loop ended, fall through
171164
return Ok(());
172165
}
173166

174-
// Slow path: float loop (at least one value is float)
167+
// Float loop - slower path
168+
exec_forloop_float(vm, reg_base, bx, frame_ptr)
169+
}
170+
}
171+
172+
/// Float loop - separate cold function
173+
#[cold]
174+
#[inline(never)]
175+
fn exec_forloop_float(
176+
vm: &mut LuaVM,
177+
reg_base: *mut LuaValue,
178+
bx: usize,
179+
frame_ptr: *mut LuaCallFrame,
180+
) -> LuaResult<()> {
181+
unsafe {
182+
let idx = *reg_base;
183+
let limit = *reg_base.add(1);
184+
let step = *reg_base.add(2);
185+
186+
let idx_tag = idx.primary & TYPE_MASK;
187+
let limit_tag = limit.primary & TYPE_MASK;
188+
let step_tag = step.primary & TYPE_MASK;
189+
175190
let idx_f = if idx_tag == TAG_FLOAT {
176191
f64::from_bits(idx.secondary)
177192
} else if idx_tag == TAG_INTEGER {
@@ -181,9 +196,9 @@ pub fn exec_forloop(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) ->
181196
};
182197

183198
let limit_f = if limit_tag == TAG_FLOAT {
184-
f64::from_bits(counter_or_limit.secondary)
199+
f64::from_bits(limit.secondary)
185200
} else if limit_tag == TAG_INTEGER {
186-
counter_or_limit.secondary as i64 as f64
201+
limit.secondary as i64 as f64
187202
} else {
188203
return Err(vm.error("'for' limit must be a number".to_string()));
189204
};

0 commit comments

Comments
 (0)