Skip to content

Commit d3973e1

Browse files
committed
optimize for
1 parent 0b78b20 commit d3973e1

File tree

2 files changed

+159
-126
lines changed

2 files changed

+159
-126
lines changed

crates/luars/src/lua_vm/dispatcher/loop_instructions.rs

Lines changed: 157 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -9,89 +9,113 @@ use crate::{
99

1010
/// FORPREP A Bx
1111
/// Prepare numeric for loop: R[A]-=R[A+2]; R[A+3]=R[A]; if (skip) pc+=Bx+1
12+
/// OPTIMIZED: Uses frame_ptr directly, no i128, unsafe register access
1213
#[inline(always)]
13-
pub fn exec_forprep(vm: &mut LuaVM, instr: u32) -> LuaResult<()> {
14+
pub fn exec_forprep(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> LuaResult<()> {
1415
let a = Instruction::get_a(instr) as usize;
1516
let bx = Instruction::get_bx(instr) as usize;
1617

17-
let frame = vm.current_frame();
18-
let base_ptr = frame.base_ptr;
18+
unsafe {
19+
let base_ptr = (*frame_ptr).base_ptr;
20+
let reg_base = vm.register_stack.as_mut_ptr().add(base_ptr + a);
21+
22+
let init = *reg_base;
23+
let limit = *reg_base.add(1);
24+
let step = *reg_base.add(2);
1925

20-
let init = vm.register_stack[base_ptr + a];
21-
let limit = vm.register_stack[base_ptr + a + 1];
22-
let step = vm.register_stack[base_ptr + a + 2];
26+
// Check for integer loop using type tags
27+
let init_tag = init.primary & TYPE_MASK;
28+
let limit_tag = limit.primary & TYPE_MASK;
29+
let step_tag = step.primary & TYPE_MASK;
2330

24-
// Check for integer loop
25-
if let (Some(init_i), Some(limit_i), Some(step_i)) =
26-
(init.as_integer(), limit.as_integer(), step.as_integer())
27-
{
28-
if step_i == 0 {
29-
return Err(vm.error("'for' step is zero".to_string()));
30-
}
31+
if init_tag == TAG_INTEGER && limit_tag == TAG_INTEGER && step_tag == TAG_INTEGER {
32+
let init_i = init.secondary as i64;
33+
let limit_i = limit.secondary as i64;
34+
let step_i = step.secondary as i64;
3135

32-
// Set control variable (R[A+3] = init)
33-
vm.register_stack[base_ptr + a + 3] = LuaValue::integer(init_i);
36+
if step_i == 0 {
37+
return Err(vm.error("'for' step is zero".to_string()));
38+
}
39+
40+
// Set control variable (R[A+3] = init)
41+
*reg_base.add(3) = LuaValue::integer(init_i);
42+
43+
// Calculate loop count using i64 arithmetic (avoid i128!)
44+
// Lua 5.4 style: use saturating arithmetic to avoid overflow
45+
let count: u64 = if step_i > 0 {
46+
// Ascending loop
47+
if limit_i < init_i {
48+
0
49+
} else {
50+
// (limit - init) / step, using unsigned division
51+
let diff = (limit_i as u64).wrapping_sub(init_i as u64);
52+
diff / (step_i as u64)
53+
}
54+
} else {
55+
// Descending loop
56+
if init_i < limit_i {
57+
0
58+
} else {
59+
// (init - limit) / (-step)
60+
let diff = (init_i as u64).wrapping_sub(limit_i as u64);
61+
let neg_step = (-(step_i as i64)) as u64;
62+
diff / neg_step
63+
}
64+
};
3465

35-
// Calculate loop count (Lua 5.4 uses counter for integer loops)
36-
let count = if step_i > 0 {
37-
// Ascending: count = (limit - init) / step
38-
if limit_i < init_i {
39-
0 // skip loop
66+
if count == 0 {
67+
// Skip the entire loop body and FORLOOP
68+
(*frame_ptr).pc += bx;
4069
} else {
41-
let diff = (limit_i as i128) - (init_i as i128);
42-
(diff / (step_i as i128)) as u64
70+
// Store count in R[A+1] (replacing limit)
71+
*reg_base.add(1) = LuaValue::integer(count as i64);
4372
}
4473
} else {
45-
// Descending: count = (init - limit) / (-(step+1)+1)
46-
if init_i < limit_i {
47-
0 // skip loop
74+
// Float loop - convert to f64
75+
let init_f = if init_tag == TAG_INTEGER {
76+
init.secondary as i64 as f64
77+
} else if init_tag == TAG_FLOAT {
78+
f64::from_bits(init.secondary)
4879
} else {
49-
let diff = (init_i as i128) - (limit_i as i128);
50-
let divisor = -((step_i + 1) as i128) + 1;
51-
(diff / divisor) as u64
52-
}
53-
};
80+
return Err(vm.error("'for' initial value must be a number".to_string()));
81+
};
5482

55-
if count == 0 {
56-
// Skip the entire loop body and FORLOOP
57-
vm.current_frame_mut().pc = vm.current_frame().pc + bx;
58-
} else {
59-
// Store count in R[A+1] (replacing limit)
60-
vm.register_stack[base_ptr + a + 1] = LuaValue::integer(count as i64);
61-
// R[A] keeps init value (will be updated by FORLOOP)
62-
// Don't modify R[A] here!
63-
}
64-
} else {
65-
// Float loop
66-
let Some(init_f) = init.as_number() else {
67-
return Err(vm.error("'for' initial value must be a number".to_string()));
68-
};
69-
let Some(limit_f) = limit.as_number() else {
70-
return Err(vm.error("'for' limit must be a number".to_string()));
71-
};
72-
let Some(step_f) = step.as_number() else {
73-
return Err(vm.error("'for' step must be a number".to_string()));
74-
};
75-
76-
if step_f == 0.0 {
77-
return Err(vm.error("'for' step is zero".to_string()));
78-
}
83+
let limit_f = if limit_tag == TAG_INTEGER {
84+
limit.secondary as i64 as f64
85+
} else if limit_tag == TAG_FLOAT {
86+
f64::from_bits(limit.secondary)
87+
} else {
88+
return Err(vm.error("'for' limit must be a number".to_string()));
89+
};
7990

80-
// Set control variable
81-
vm.register_stack[base_ptr + a + 3] = LuaValue::number(init_f);
91+
let step_f = if step_tag == TAG_INTEGER {
92+
step.secondary as i64 as f64
93+
} else if step_tag == TAG_FLOAT {
94+
f64::from_bits(step.secondary)
95+
} else {
96+
return Err(vm.error("'for' step must be a number".to_string()));
97+
};
8298

83-
// Check if we should skip
84-
let should_skip = if step_f > 0.0 {
85-
init_f > limit_f
86-
} else {
87-
init_f < limit_f
88-
};
99+
if step_f == 0.0 {
100+
return Err(vm.error("'for' step is zero".to_string()));
101+
}
89102

90-
if should_skip {
91-
vm.current_frame_mut().pc = vm.current_frame().pc + bx;
92-
} else {
93-
// Prepare internal index
94-
vm.register_stack[base_ptr + a] = LuaValue::number(init_f - step_f);
103+
// Set control variable
104+
*reg_base.add(3) = LuaValue::number(init_f);
105+
106+
// Check if we should skip
107+
let should_skip = if step_f > 0.0 {
108+
init_f > limit_f
109+
} else {
110+
init_f < limit_f
111+
};
112+
113+
if should_skip {
114+
(*frame_ptr).pc += bx;
115+
} else {
116+
// Prepare internal index
117+
*reg_base = LuaValue::number(init_f - step_f);
118+
}
95119
}
96120
}
97121

@@ -102,7 +126,7 @@ pub fn exec_forprep(vm: &mut LuaVM, instr: u32) -> LuaResult<()> {
102126
/// R[A]+=R[A+2];
103127
/// if R[A] <?= R[A+1] then { pc-=Bx; R[A+3]=R[A] }
104128
///
105-
/// ULTRA-OPTIMIZED: Uses pre-fetched frame_ptr + direct bit-mask type checking
129+
/// ULTRA-OPTIMIZED: Minimized memory access, branch prediction friendly
106130
#[inline(always)]
107131
pub fn exec_forloop(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> LuaResult<()> {
108132
let a = Instruction::get_a(instr) as usize;
@@ -112,71 +136,80 @@ pub fn exec_forloop(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) ->
112136
let base_ptr = (*frame_ptr).base_ptr;
113137
let reg_base = vm.register_stack.as_mut_ptr().add(base_ptr + a);
114138

115-
let idx = *reg_base;
116-
let counter_or_limit = *reg_base.add(1);
117-
let step = *reg_base.add(2);
118-
119-
let combined_tags = (idx.primary | counter_or_limit.primary | step.primary) & TYPE_MASK;
139+
// Read counter first - this is the hot path check
140+
let counter = (*reg_base.add(1)).secondary as i64;
141+
142+
// Fast path: integer loop with counter > 0
143+
// Check counter first (most common exit condition)
144+
if counter > 0 {
145+
// Only read other values if we're continuing
146+
let idx_i = (*reg_base).secondary as i64;
147+
let step_i = (*reg_base.add(2)).secondary as i64;
148+
let new_idx = idx_i.wrapping_add(step_i);
149+
150+
// Write back - minimize writes
151+
(*reg_base).secondary = new_idx as u64;
152+
(*reg_base.add(1)).secondary = (counter - 1) as u64;
153+
(*reg_base.add(3)).secondary = new_idx as u64;
154+
// Note: type tags stay TAG_INTEGER, no need to rewrite primary
155+
156+
(*frame_ptr).pc -= bx;
157+
return Ok(());
158+
}
120159

121-
// Fast path: All integers
160+
// Check if this is actually an integer loop (counter == 0 means loop ended)
161+
let idx = *reg_base;
162+
let combined_tags = (idx.primary | (*reg_base.add(1)).primary | (*reg_base.add(2)).primary) & TYPE_MASK;
163+
122164
if combined_tags == TAG_INTEGER {
123-
let count = counter_or_limit.secondary as i64;
124-
125-
if count > 0 {
126-
let idx_i = idx.secondary as i64;
127-
let step_i = step.secondary as i64;
128-
let new_idx = idx_i.wrapping_add(step_i);
129-
130-
*reg_base = LuaValue::integer(new_idx);
131-
*reg_base.add(1) = LuaValue::integer(count - 1);
132-
*reg_base.add(3) = LuaValue::integer(new_idx);
133-
134-
(*frame_ptr).pc -= bx;
135-
}
165+
// Integer loop ended (counter == 0)
166+
return Ok(());
136167
}
137-
// Slow path: at least one non-integer
138-
else {
139-
let step_tag = step.primary & TYPE_MASK;
140-
let counter_tag = counter_or_limit.primary & TYPE_MASK;
141-
let idx_tag = idx.primary & TYPE_MASK;
142-
143-
if (step_tag == TAG_FLOAT || step_tag == TAG_INTEGER)
144-
&& (counter_tag == TAG_FLOAT || counter_tag == TAG_INTEGER)
145-
&& (idx_tag == TAG_FLOAT || idx_tag == TAG_INTEGER)
146-
{
147-
let idx_f = if idx_tag == TAG_FLOAT {
148-
f64::from_bits(idx.secondary)
149-
} else {
150-
idx.secondary as i64 as f64
151-
};
152168

153-
let limit_f = if counter_tag == TAG_FLOAT {
154-
f64::from_bits(counter_or_limit.secondary)
155-
} else {
156-
counter_or_limit.secondary as i64 as f64
157-
};
169+
// Slow path: float loop
170+
let counter_or_limit = *reg_base.add(1);
171+
let step = *reg_base.add(2);
172+
173+
let step_tag = step.primary & TYPE_MASK;
174+
let limit_tag = counter_or_limit.primary & TYPE_MASK;
175+
let idx_tag = idx.primary & TYPE_MASK;
176+
177+
if (step_tag == TAG_FLOAT || step_tag == TAG_INTEGER)
178+
&& (limit_tag == TAG_FLOAT || limit_tag == TAG_INTEGER)
179+
&& (idx_tag == TAG_FLOAT || idx_tag == TAG_INTEGER)
180+
{
181+
let idx_f = if idx_tag == TAG_FLOAT {
182+
f64::from_bits(idx.secondary)
183+
} else {
184+
idx.secondary as i64 as f64
185+
};
158186

159-
let step_f = if step_tag == TAG_FLOAT {
160-
f64::from_bits(step.secondary)
161-
} else {
162-
step.secondary as i64 as f64
163-
};
187+
let limit_f = if limit_tag == TAG_FLOAT {
188+
f64::from_bits(counter_or_limit.secondary)
189+
} else {
190+
counter_or_limit.secondary as i64 as f64
191+
};
164192

165-
let new_idx_f = idx_f + step_f;
166-
let should_continue = if step_f > 0.0 {
167-
new_idx_f <= limit_f
168-
} else {
169-
new_idx_f >= limit_f
170-
};
193+
let step_f = if step_tag == TAG_FLOAT {
194+
f64::from_bits(step.secondary)
195+
} else {
196+
step.secondary as i64 as f64
197+
};
171198

172-
if should_continue {
173-
*reg_base = LuaValue::number(new_idx_f);
174-
*reg_base.add(3) = LuaValue::number(new_idx_f);
175-
(*frame_ptr).pc -= bx;
176-
}
199+
let new_idx_f = idx_f + step_f;
200+
let should_continue = if step_f > 0.0 {
201+
new_idx_f <= limit_f
177202
} else {
178-
return Err(vm.error("'for' values must be numbers".to_string()));
203+
new_idx_f >= limit_f
204+
};
205+
206+
if should_continue {
207+
*reg_base = LuaValue::number(new_idx_f);
208+
*reg_base.add(3) = LuaValue::number(new_idx_f);
209+
(*frame_ptr).pc -= bx;
179210
}
211+
} else {
212+
return Err(vm.error("'for' values must be numbers".to_string()));
180213
}
181214
}
182215

crates/luars/src/lua_vm/dispatcher/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,8 +132,8 @@ pub fn dispatch_instruction(
132132
OpCode::SetList => exec_setlist(vm, instr),
133133
OpCode::Tbc => exec_tbc(vm, instr),
134134

135-
// Loop operations - use frame_ptr for FORLOOP hot path
136-
OpCode::ForPrep => exec_forprep(vm, instr),
135+
// Loop operations - use frame_ptr for hot paths
136+
OpCode::ForPrep => exec_forprep(vm, instr, frame_ptr),
137137
OpCode::ForLoop => exec_forloop(vm, instr, frame_ptr),
138138
OpCode::TForPrep => exec_tforprep(vm, instr),
139139
OpCode::TForCall => exec_tforcall(vm, instr),

0 commit comments

Comments
 (0)