Skip to content

Commit 30fa5d7

Browse files
committed
Fix for performance
1 parent b25e3aa commit 30fa5d7

File tree

3 files changed

+160
-135
lines changed

3 files changed

+160
-135
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
/target
2+
lua_src/lua-5.4.6

crates/luars/src/lua_vm/execute/control_instructions.rs

Lines changed: 96 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -276,112 +276,128 @@ pub fn exec_eq(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> LuaR
276276

277277
/// LT A B k
278278
/// if ((R[A] < R[B]) ~= k) then pc++
279-
/// ULTRA-OPTIMIZED: Inline integer/float comparison, minimal type checks
279+
/// ULTRA-OPTIMIZED: Direct integer fast path like Lua C
280280
#[inline(always)]
281281
pub fn exec_lt(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> LuaResult<()> {
282282
let a = Instruction::get_a(instr) as usize;
283283
let b = Instruction::get_b(instr) as usize;
284284
let k = Instruction::get_k(instr);
285285

286-
let base_ptr = unsafe { (*frame_ptr).base_ptr };
287-
288-
// OPTIMIZATION: Use unsafe for unchecked register access (hot path)
289-
let (left, right) = unsafe {
286+
unsafe {
287+
let base_ptr = (*frame_ptr).base_ptr;
290288
let reg_base = vm.register_stack.as_ptr().add(base_ptr);
291-
(*reg_base.add(a), *reg_base.add(b))
292-
};
289+
let left = *reg_base.add(a);
290+
let right = *reg_base.add(b);
291+
292+
use crate::lua_value::{TAG_INTEGER, TAG_FLOAT, TYPE_MASK};
293+
let left_tag = left.primary & TYPE_MASK;
294+
let right_tag = right.primary & TYPE_MASK;
295+
296+
// Fast path: both integers (most common case in loops)
297+
if left_tag == TAG_INTEGER && right_tag == TAG_INTEGER {
298+
let is_less = (left.secondary as i64) < (right.secondary as i64);
299+
if is_less != k {
300+
(*frame_ptr).pc += 1;
301+
}
302+
return Ok(());
303+
}
293304

294-
// OPTIMIZATION: Direct type tag comparison (inline integer/float checks)
295-
use crate::lua_value::TYPE_MASK;
296-
let left_tag = left.primary & TYPE_MASK;
297-
let right_tag = right.primary & TYPE_MASK;
305+
// Fast path: both floats
306+
if left_tag == TAG_FLOAT && right_tag == TAG_FLOAT {
307+
let is_less = f64::from_bits(left.secondary) < f64::from_bits(right.secondary);
308+
if is_less != k {
309+
(*frame_ptr).pc += 1;
310+
}
311+
return Ok(());
312+
}
298313

299-
// Combined type check for fast paths (single branch!)
300-
// Note: Shift TAG values right by 48 bits to get small values (0-15) for combining
301-
let left_tag_small = left_tag >> 48;
302-
let right_tag_small = right_tag >> 48;
303-
let combined_tags = (left_tag_small << 4) | right_tag_small;
304-
305-
// Small tag values after >> 48: TAG_INTEGER=3, TAG_FLOAT=4, TAG_STRING=5
306-
const INT_INT: u64 = (3 << 4) | 3; // 0x33
307-
const FLOAT_FLOAT: u64 = (4 << 4) | 4; // 0x44
308-
const INT_FLOAT: u64 = (3 << 4) | 4; // 0x34
309-
const FLOAT_INT: u64 = (4 << 4) | 3; // 0x43
310-
const STRING_STRING: u64 = (5 << 4) | 5; // 0x55
314+
// Mixed numeric types
315+
if (left_tag == TAG_INTEGER || left_tag == TAG_FLOAT)
316+
&& (right_tag == TAG_INTEGER || right_tag == TAG_FLOAT) {
317+
let left_f = if left_tag == TAG_INTEGER {
318+
(left.secondary as i64) as f64
319+
} else {
320+
f64::from_bits(left.secondary)
321+
};
322+
let right_f = if right_tag == TAG_INTEGER {
323+
(right.secondary as i64) as f64
324+
} else {
325+
f64::from_bits(right.secondary)
326+
};
327+
let is_less = left_f < right_f;
328+
if is_less != k {
329+
(*frame_ptr).pc += 1;
330+
}
331+
return Ok(());
332+
}
311333

312-
let is_less = if combined_tags == INT_INT {
313-
// Fast integer path - single branch!
314-
(left.secondary as i64) < (right.secondary as i64)
315-
} else if combined_tags == FLOAT_FLOAT {
316-
// Fast float path
317-
f64::from_bits(left.secondary) < f64::from_bits(right.secondary)
318-
} else if combined_tags == INT_FLOAT {
319-
// Mixed: integer < float
320-
((left.secondary as i64) as f64) < f64::from_bits(right.secondary)
321-
} else if combined_tags == FLOAT_INT {
322-
// Mixed: float < integer
323-
f64::from_bits(left.secondary) < ((right.secondary as i64) as f64)
324-
} else if combined_tags == STRING_STRING {
325334
// String comparison
326-
left < right
327-
} else {
328-
// Try __lt metamethod
329-
let mm_key = vm.create_string("__lt");
330-
let mut found_metamethod = false;
335+
use crate::lua_value::TAG_STRING;
336+
if left_tag == TAG_STRING && right_tag == TAG_STRING {
337+
let is_less = left < right;
338+
if is_less != k {
339+
(*frame_ptr).pc += 1;
340+
}
341+
return Ok(());
342+
}
331343

332-
if let Some(mt) = vm.table_get_metatable(&left) {
344+
// Slow path: metamethod
345+
exec_lt_metamethod(vm, left, right, k, frame_ptr)
346+
}
347+
}
348+
349+
/// Slow path for LT metamethod lookup
350+
#[cold]
351+
#[inline(never)]
352+
fn exec_lt_metamethod(
353+
vm: &mut LuaVM,
354+
left: crate::LuaValue,
355+
right: crate::LuaValue,
356+
k: bool,
357+
frame_ptr: *mut LuaCallFrame,
358+
) -> LuaResult<()> {
359+
let mm_key = vm.create_string("__lt");
360+
let mut found_metamethod = false;
361+
362+
if let Some(mt) = vm.table_get_metatable(&left) {
363+
if let Some(metamethod) = vm.table_get_with_meta(&mt, &mm_key) {
364+
if !metamethod.is_nil() {
365+
if let Some(result) = vm.call_metamethod(&metamethod, &[left, right])? {
366+
let is_less_result = !result.is_nil() && result.as_bool().unwrap_or(true);
367+
if is_less_result != k {
368+
unsafe { (*frame_ptr).pc += 1; }
369+
}
370+
return Ok(());
371+
}
372+
found_metamethod = true;
373+
}
374+
}
375+
}
376+
377+
if !found_metamethod {
378+
if let Some(mt) = vm.table_get_metatable(&right) {
333379
if let Some(metamethod) = vm.table_get_with_meta(&mt, &mm_key) {
334380
if !metamethod.is_nil() {
335381
if let Some(result) = vm.call_metamethod(&metamethod, &[left, right])? {
336382
let is_less_result = !result.is_nil() && result.as_bool().unwrap_or(true);
337383
if is_less_result != k {
338-
unsafe {
339-
(*frame_ptr).pc += 1;
340-
}
384+
unsafe { (*frame_ptr).pc += 1; }
341385
}
342386
return Ok(());
343387
}
344388
found_metamethod = true;
345389
}
346390
}
347391
}
348-
349-
if !found_metamethod {
350-
if let Some(mt) = vm.table_get_metatable(&right) {
351-
if let Some(metamethod) = vm.table_get_with_meta(&mt, &mm_key) {
352-
if !metamethod.is_nil() {
353-
if let Some(result) = vm.call_metamethod(&metamethod, &[left, right])? {
354-
let is_less_result =
355-
!result.is_nil() && result.as_bool().unwrap_or(true);
356-
if is_less_result != k {
357-
unsafe {
358-
(*frame_ptr).pc += 1;
359-
}
360-
}
361-
return Ok(());
362-
}
363-
found_metamethod = true;
364-
}
365-
}
366-
}
367-
}
368-
369-
if !found_metamethod {
370-
return Err(vm.error(format!(
371-
"attempt to compare {} with {}",
372-
left.type_name(),
373-
right.type_name()
374-
)));
375-
}
376-
return Ok(());
377-
};
378-
379-
if is_less != k {
380-
unsafe {
381-
(*frame_ptr).pc += 1;
382-
}
383392
}
384393

394+
if !found_metamethod {
395+
return Err(vm.error(format!(
396+
"attempt to compare {} with {}",
397+
left.type_name(),
398+
right.type_name()
399+
)));
400+
}
385401
Ok(())
386402
}
387403

crates/luars/src/lua_vm/execute/loop_instructions.rs

Lines changed: 63 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ pub fn exec_forprep(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) ->
131131
/// R[A]+=R[A+2];
132132
/// if R[A] <?= R[A+1] then { pc-=Bx; R[A+3]=R[A] }
133133
///
134-
/// ULTRA-OPTIMIZED: Matches Lua's chgivalue - only update secondary field for integers
134+
/// ULTRA-OPTIMIZED: Check counter FIRST (most common path), minimize reads
135135
#[inline(always)]
136136
pub fn exec_forloop(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> LuaResult<()> {
137137
let a = Instruction::get_a(instr) as usize;
@@ -141,72 +141,80 @@ pub fn exec_forloop(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) ->
141141
let base_ptr = (*frame_ptr).base_ptr;
142142
let reg_base = vm.register_stack.as_mut_ptr().add(base_ptr + a);
143143

144-
// Check types first to distinguish integer vs float loop
144+
// Read counter first - this is the hot path check
145+
// For integer loops, R[A+1] stores remaining iteration count
146+
let counter = (*reg_base.add(1)).secondary as i64;
147+
148+
// Fast path: integer loop with counter > 0
149+
// Check counter first (most common exit condition)
150+
if counter > 0 {
151+
// Only read other values if we're continuing
152+
let idx_i = (*reg_base).secondary as i64;
153+
let step_i = (*reg_base.add(2)).secondary as i64;
154+
let new_idx = idx_i.wrapping_add(step_i);
155+
156+
// Write back - minimize writes, type tags stay TAG_INTEGER
157+
(*reg_base).secondary = new_idx as u64;
158+
(*reg_base.add(1)).secondary = (counter - 1) as u64;
159+
(*reg_base.add(3)).secondary = new_idx as u64;
160+
161+
(*frame_ptr).pc -= bx;
162+
return Ok(());
163+
}
164+
165+
// Check if this is actually an integer loop (counter == 0 means loop ended)
145166
let idx = *reg_base;
167+
let combined_tags = (idx.primary | (*reg_base.add(1)).primary | (*reg_base.add(2)).primary) & TYPE_MASK;
168+
169+
if combined_tags == TAG_INTEGER {
170+
// Integer loop ended (counter == 0)
171+
return Ok(());
172+
}
173+
174+
// Slow path: float loop
146175
let counter_or_limit = *reg_base.add(1);
147176
let step = *reg_base.add(2);
148177

149-
let idx_tag = idx.primary & TYPE_MASK;
150-
let limit_tag = counter_or_limit.primary & TYPE_MASK;
151178
let step_tag = step.primary & TYPE_MASK;
179+
let limit_tag = counter_or_limit.primary & TYPE_MASK;
180+
let idx_tag = idx.primary & TYPE_MASK;
152181

153-
// Fast path: pure integer loop (all three values are integers)
154-
if idx_tag == TAG_INTEGER && limit_tag == TAG_INTEGER && step_tag == TAG_INTEGER {
155-
// Read counter - R[A+1] stores remaining iteration count for integer loops
156-
let counter = counter_or_limit.secondary as i64;
182+
if (step_tag == TAG_FLOAT || step_tag == TAG_INTEGER)
183+
&& (limit_tag == TAG_FLOAT || limit_tag == TAG_INTEGER)
184+
&& (idx_tag == TAG_FLOAT || idx_tag == TAG_INTEGER)
185+
{
186+
let idx_f = if idx_tag == TAG_FLOAT {
187+
f64::from_bits(idx.secondary)
188+
} else {
189+
idx.secondary as i64 as f64
190+
};
191+
192+
let limit_f = if limit_tag == TAG_FLOAT {
193+
f64::from_bits(counter_or_limit.secondary)
194+
} else {
195+
counter_or_limit.secondary as i64 as f64
196+
};
157197

158-
if counter > 0 {
159-
let idx_i = idx.secondary as i64;
160-
let step_i = step.secondary as i64;
161-
let new_idx = idx_i.wrapping_add(step_i);
198+
let step_f = if step_tag == TAG_FLOAT {
199+
f64::from_bits(step.secondary)
200+
} else {
201+
step.secondary as i64 as f64
202+
};
162203

163-
// Use chgivalue pattern - only update secondary field, type tags stay the same
164-
(*reg_base).secondary = new_idx as u64;
165-
(*reg_base.add(1)).secondary = (counter - 1) as u64;
166-
(*reg_base.add(3)).secondary = new_idx as u64;
204+
let new_idx_f = idx_f + step_f;
205+
let should_continue = if step_f > 0.0 {
206+
new_idx_f <= limit_f
207+
} else {
208+
new_idx_f >= limit_f
209+
};
167210

211+
if should_continue {
212+
*reg_base = LuaValue::number(new_idx_f);
213+
*reg_base.add(3) = LuaValue::number(new_idx_f);
168214
(*frame_ptr).pc -= bx;
169215
}
170-
// counter == 0 means loop ended, just fall through
171-
return Ok(());
172-
}
173-
174-
// Slow path: float loop (at least one value is float)
175-
let idx_f = if idx_tag == TAG_FLOAT {
176-
f64::from_bits(idx.secondary)
177-
} else if idx_tag == TAG_INTEGER {
178-
idx.secondary as i64 as f64
179-
} else {
180-
return Err(vm.error("'for' index must be a number".to_string()));
181-
};
182-
183-
let limit_f = if limit_tag == TAG_FLOAT {
184-
f64::from_bits(counter_or_limit.secondary)
185-
} else if limit_tag == TAG_INTEGER {
186-
counter_or_limit.secondary as i64 as f64
187-
} else {
188-
return Err(vm.error("'for' limit must be a number".to_string()));
189-
};
190-
191-
let step_f = if step_tag == TAG_FLOAT {
192-
f64::from_bits(step.secondary)
193-
} else if step_tag == TAG_INTEGER {
194-
step.secondary as i64 as f64
195216
} else {
196-
return Err(vm.error("'for' step must be a number".to_string()));
197-
};
198-
199-
let new_idx_f = idx_f + step_f;
200-
let should_continue = if step_f > 0.0 {
201-
new_idx_f <= limit_f
202-
} else {
203-
new_idx_f >= limit_f
204-
};
205-
206-
if should_continue {
207-
*reg_base = LuaValue::number(new_idx_f);
208-
*reg_base.add(3) = LuaValue::number(new_idx_f);
209-
(*frame_ptr).pc -= bx;
217+
return Err(vm.error("'for' values must be numbers".to_string()));
210218
}
211219
}
212220

0 commit comments

Comments
 (0)