Skip to content

Commit ff7974c

Browse files
committed
optimize for
1 parent b6673f3 commit ff7974c

File tree

1 file changed

+74
-69
lines changed

1 file changed

+74
-69
lines changed

crates/luars/src/lua_vm/dispatcher/loop_instructions.rs

Lines changed: 74 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use super::DispatchAction;
44
/// These instructions handle for loops (numeric and generic iterators).
55
use crate::{
66
LuaValue,
7-
lua_value::LuaValueKind,
7+
lua_value::{LuaValueKind, TAG_INTEGER, TAG_FLOAT, TYPE_MASK},
88
lua_vm::{Instruction, LuaCallFrame, LuaError, LuaResult, LuaVM},
99
};
1010

@@ -102,86 +102,91 @@ pub fn exec_forprep(vm: &mut LuaVM, instr: u32) -> LuaResult<DispatchAction> {
102102
/// FORLOOP A Bx
103103
/// R[A]+=R[A+2];
104104
/// if R[A] <?= R[A+1] then { pc-=Bx; R[A+3]=R[A] }
105+
///
106+
/// ULTRA-OPTIMIZED: Direct bit-mask type checking with combined check
107+
/// - Single type check for all 3 values (branchless fast path)
108+
/// - Zero function calls in hot path
105109
#[inline(always)]
106110
pub fn exec_forloop(vm: &mut LuaVM, instr: u32) -> LuaResult<DispatchAction> {
107111
let a = Instruction::get_a(instr) as usize;
108112
let bx = Instruction::get_bx(instr) as usize;
109113

110-
let base_ptr = vm.current_frame().base_ptr;
111-
112-
// OPTIMIZATION: Use unsafe for unchecked register access (hot path)
113-
// Safety: FORPREP guarantees these registers exist and are initialized
114-
let (idx, counter_or_limit, step) = unsafe {
115-
let reg_base = vm.register_stack.as_ptr().add(base_ptr + a);
116-
(*reg_base, *reg_base.add(1), *reg_base.add(2))
117-
};
118-
119-
// Check if this is an integer loop (step is integer)
120-
if let Some(step_i) = step.as_integer() {
121-
// Integer loop: R[A+1] is a counter
122-
// OPTIMIZATION: Use match instead of ok_or_else to avoid closure
123-
let count = match counter_or_limit.as_integer() {
124-
Some(c) => c,
125-
None => return Err(LuaError::RuntimeError("'for' counter must be a number".to_string())),
126-
};
127-
128-
if count > 0 {
129-
// Update internal index
130-
let idx_i = match idx.as_integer() {
131-
Some(i) => i,
132-
None => return Err(LuaError::RuntimeError("'for' index must be a number".to_string())),
133-
};
134-
let new_idx = idx_i.wrapping_add(step_i);
114+
// OPTIMIZATION: Single unsafe block + direct bit-mask type checking
115+
unsafe {
116+
let frame_ptr = vm.frames.as_mut_ptr().add(vm.frames.len() - 1);
117+
let reg_base = vm.register_stack.as_mut_ptr().add((*frame_ptr).base_ptr + a);
118+
119+
// Load all 3 values
120+
let idx = *reg_base;
121+
let counter_or_limit = *reg_base.add(1);
122+
let step = *reg_base.add(2);
123+
124+
// OPTIMIZATION: Combined type check - single OR operation to detect if all are integers
125+
// If all 3 are TAG_INTEGER, then (a|b|c) & TYPE_MASK == TAG_INTEGER
126+
let combined_tags = (idx.primary | counter_or_limit.primary | step.primary) & TYPE_MASK;
127+
128+
// Fast path: All integers (single branch!)
129+
if combined_tags == TAG_INTEGER {
130+
let count = counter_or_limit.secondary as i64;
135131

136-
// OPTIMIZATION: Use unsafe for unchecked writes (hot path)
137-
// Safety: Same registers we just read from, still valid
138-
unsafe {
139-
let reg_base = vm.register_stack.as_mut_ptr().add(base_ptr + a);
132+
if count > 0 {
133+
let idx_i = idx.secondary as i64;
134+
let step_i = step.secondary as i64;
135+
let new_idx = idx_i.wrapping_add(step_i);
136+
137+
// Update registers
140138
*reg_base = LuaValue::integer(new_idx);
141139
*reg_base.add(1) = LuaValue::integer(count - 1);
142140
*reg_base.add(3) = LuaValue::integer(new_idx);
143-
}
144141

145-
// OPTIMIZATION: Direct PC manipulation
146-
let pc = vm.current_frame().pc;
147-
vm.current_frame_mut().pc = pc - bx;
148-
}
149-
// If count <= 0, exit loop (don't jump)
150-
} else {
151-
// Float loop: R[A+1] is limit, use traditional comparison
152-
let limit_f = match counter_or_limit.as_number() {
153-
Some(l) => l,
154-
None => return Err(LuaError::RuntimeError("'for' limit must be a number".to_string())),
155-
};
156-
let idx_f = match idx.as_number() {
157-
Some(i) => i,
158-
None => return Err(LuaError::RuntimeError("'for' index must be a number".to_string())),
159-
};
160-
let step_f = match step.as_number() {
161-
Some(s) => s,
162-
None => return Err(LuaError::RuntimeError("'for' step must be a number".to_string())),
163-
};
164-
165-
// Add step to index
166-
let new_idx_f = idx_f + step_f;
167-
168-
// Check condition
169-
let should_continue = if step_f > 0.0 {
170-
new_idx_f <= limit_f
171-
} else {
172-
new_idx_f >= limit_f
173-
};
174-
175-
if should_continue {
176-
// OPTIMIZATION: Unsafe writes for float path too
177-
unsafe {
178-
let reg_base = vm.register_stack.as_mut_ptr().add(base_ptr + a);
179-
*reg_base = LuaValue::number(new_idx_f);
180-
*reg_base.add(3) = LuaValue::number(new_idx_f);
142+
(*frame_ptr).pc -= bx;
181143
}
144+
}
145+
// Slow path: at least one non-integer
146+
else {
147+
let step_tag = step.primary & TYPE_MASK;
148+
let counter_tag = counter_or_limit.primary & TYPE_MASK;
149+
let idx_tag = idx.primary & TYPE_MASK;
182150

183-
let pc = vm.current_frame().pc;
184-
vm.current_frame_mut().pc = pc - bx;
151+
// Check if all are numbers (integer or float)
152+
if (step_tag == TAG_FLOAT || step_tag == TAG_INTEGER) &&
153+
(counter_tag == TAG_FLOAT || counter_tag == TAG_INTEGER) &&
154+
(idx_tag == TAG_FLOAT || idx_tag == TAG_INTEGER) {
155+
156+
// Convert to float
157+
let idx_f = if idx_tag == TAG_FLOAT {
158+
f64::from_bits(idx.secondary)
159+
} else {
160+
idx.secondary as i64 as f64
161+
};
162+
163+
let limit_f = if counter_tag == TAG_FLOAT {
164+
f64::from_bits(counter_or_limit.secondary)
165+
} else {
166+
counter_or_limit.secondary as i64 as f64
167+
};
168+
169+
let step_f = if step_tag == TAG_FLOAT {
170+
f64::from_bits(step.secondary)
171+
} else {
172+
step.secondary as i64 as f64
173+
};
174+
175+
let new_idx_f = idx_f + step_f;
176+
let should_continue = if step_f > 0.0 {
177+
new_idx_f <= limit_f
178+
} else {
179+
new_idx_f >= limit_f
180+
};
181+
182+
if should_continue {
183+
*reg_base = LuaValue::number(new_idx_f);
184+
*reg_base.add(3) = LuaValue::number(new_idx_f);
185+
(*frame_ptr).pc -= bx;
186+
}
187+
} else {
188+
return Err(LuaError::RuntimeError("'for' values must be numbers".to_string()));
189+
}
185190
}
186191
}
187192

0 commit comments

Comments
 (0)