Skip to content

Commit 3adb7dc

Browse files
committed
fix performance
1 parent 8506b1a commit 3adb7dc

File tree

3 files changed

+68
-30
lines changed

3 files changed

+68
-30
lines changed

crates/luars/src/lua_vm/execute/arithmetic_instructions.rs

Lines changed: 68 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,35 @@ use crate::{
77
lua_vm::{Instruction, LuaCallFrame, LuaResult, LuaVM},
88
};
99

10+
/// Slow path for ADD - separate function to hint branch predictor
11+
#[cold]
12+
#[inline(never)]
13+
fn exec_add_slow(
14+
reg_base: *mut LuaValue,
15+
a: usize,
16+
left: LuaValue,
17+
right: LuaValue,
18+
frame_ptr: *mut LuaCallFrame,
19+
) {
20+
unsafe {
21+
let left_tag = left.primary & TYPE_MASK;
22+
let right_tag = right.primary & TYPE_MASK;
23+
24+
let result = if left_tag == TAG_FLOAT && right_tag == TAG_FLOAT {
25+
LuaValue::number(f64::from_bits(left.secondary) + f64::from_bits(right.secondary))
26+
} else if left_tag == TAG_INTEGER && right_tag == TAG_FLOAT {
27+
LuaValue::number((left.secondary as i64) as f64 + f64::from_bits(right.secondary))
28+
} else if left_tag == TAG_FLOAT && right_tag == TAG_INTEGER {
29+
LuaValue::number(f64::from_bits(left.secondary) + (right.secondary as i64) as f64)
30+
} else {
31+
return;
32+
};
33+
34+
*reg_base.add(a) = result;
35+
(*frame_ptr).pc += 1;
36+
}
37+
}
38+
1039
/// ADD: R[A] = R[B] + R[C]
1140
/// OPTIMIZED: Matches Lua C's setivalue behavior - always write both fields
1241
#[inline(always)]
@@ -21,7 +50,7 @@ pub fn exec_add(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) {
2150
let left = *reg_base.add(b);
2251
let right = *reg_base.add(c);
2352

24-
// Combined type check - if result is 0, both are integers
53+
// Combined type check - if result is TAG_INTEGER, both are integers
2554
let combined_tags = (left.primary | right.primary) & TYPE_MASK;
2655

2756
if combined_tags == TAG_INTEGER {
@@ -35,16 +64,31 @@ pub fn exec_add(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) {
3564
return;
3665
}
3766

38-
// Slow path: Float operations
67+
// Slow path: Float operations - in separate cold function
68+
exec_add_slow(reg_base, a, left, right, frame_ptr);
69+
}
70+
}
71+
72+
/// Slow path for SUB
73+
#[cold]
74+
#[inline(never)]
75+
fn exec_sub_slow(
76+
reg_base: *mut LuaValue,
77+
a: usize,
78+
left: LuaValue,
79+
right: LuaValue,
80+
frame_ptr: *mut LuaCallFrame,
81+
) {
82+
unsafe {
3983
let left_tag = left.primary & TYPE_MASK;
4084
let right_tag = right.primary & TYPE_MASK;
4185

4286
let result = if left_tag == TAG_FLOAT && right_tag == TAG_FLOAT {
43-
LuaValue::number(f64::from_bits(left.secondary) + f64::from_bits(right.secondary))
87+
LuaValue::number(f64::from_bits(left.secondary) - f64::from_bits(right.secondary))
4488
} else if left_tag == TAG_INTEGER && right_tag == TAG_FLOAT {
45-
LuaValue::number((left.secondary as i64) as f64 + f64::from_bits(right.secondary))
89+
LuaValue::number((left.secondary as i64) as f64 - f64::from_bits(right.secondary))
4690
} else if left_tag == TAG_FLOAT && right_tag == TAG_INTEGER {
47-
LuaValue::number(f64::from_bits(left.secondary) + (right.secondary as i64) as f64)
91+
LuaValue::number(f64::from_bits(left.secondary) - (right.secondary as i64) as f64)
4892
} else {
4993
return;
5094
};
@@ -80,15 +124,30 @@ pub fn exec_sub(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) {
80124
return;
81125
}
82126

127+
exec_sub_slow(reg_base, a, left, right, frame_ptr);
128+
}
129+
}
130+
131+
/// Slow path for MUL
132+
#[cold]
133+
#[inline(never)]
134+
fn exec_mul_slow(
135+
reg_base: *mut LuaValue,
136+
a: usize,
137+
left: LuaValue,
138+
right: LuaValue,
139+
frame_ptr: *mut LuaCallFrame,
140+
) {
141+
unsafe {
83142
let left_tag = left.primary & TYPE_MASK;
84143
let right_tag = right.primary & TYPE_MASK;
85144

86145
let result = if left_tag == TAG_FLOAT && right_tag == TAG_FLOAT {
87-
LuaValue::number(f64::from_bits(left.secondary) - f64::from_bits(right.secondary))
146+
LuaValue::number(f64::from_bits(left.secondary) * f64::from_bits(right.secondary))
88147
} else if left_tag == TAG_INTEGER && right_tag == TAG_FLOAT {
89-
LuaValue::number((left.secondary as i64) as f64 - f64::from_bits(right.secondary))
148+
LuaValue::number((left.secondary as i64) as f64 * f64::from_bits(right.secondary))
90149
} else if left_tag == TAG_FLOAT && right_tag == TAG_INTEGER {
91-
LuaValue::number(f64::from_bits(left.secondary) - (right.secondary as i64) as f64)
150+
LuaValue::number(f64::from_bits(left.secondary) * (right.secondary as i64) as f64)
92151
} else {
93152
return;
94153
};
@@ -124,27 +183,12 @@ pub fn exec_mul(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) {
124183
return;
125184
}
126185

127-
let left_tag = left.primary & TYPE_MASK;
128-
let right_tag = right.primary & TYPE_MASK;
129-
130-
let result = if left_tag == TAG_FLOAT && right_tag == TAG_FLOAT {
131-
LuaValue::number(f64::from_bits(left.secondary) * f64::from_bits(right.secondary))
132-
} else if left_tag == TAG_INTEGER && right_tag == TAG_FLOAT {
133-
LuaValue::number((left.secondary as i64) as f64 * f64::from_bits(right.secondary))
134-
} else if left_tag == TAG_FLOAT && right_tag == TAG_INTEGER {
135-
LuaValue::number(f64::from_bits(left.secondary) * (right.secondary as i64) as f64)
136-
} else {
137-
return;
138-
};
139-
140-
*reg_base.add(a) = result;
141-
(*frame_ptr).pc += 1;
186+
exec_mul_slow(reg_base, a, left, right, frame_ptr);
142187
}
143188
}
144189

145190
/// DIV: R[A] = R[B] / R[C]
146191
/// Division always returns float in Lua
147-
#[inline(always)]
148192
pub fn exec_div(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) {
149193
let a = Instruction::get_a(instr) as usize;
150194
let b = Instruction::get_b(instr) as usize;
@@ -181,7 +225,6 @@ pub fn exec_div(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) {
181225
}
182226

183227
/// IDIV: R[A] = R[B] // R[C] (floor division)
184-
#[inline(always)]
185228
pub fn exec_idiv(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) {
186229
let a = Instruction::get_a(instr) as usize;
187230
let b = Instruction::get_b(instr) as usize;
@@ -231,7 +274,6 @@ pub fn exec_idiv(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) {
231274
}
232275

233276
/// MOD: R[A] = R[B] % R[C]
234-
#[inline(always)]
235277
pub fn exec_mod(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) {
236278
let a = Instruction::get_a(instr) as usize;
237279
let b = Instruction::get_b(instr) as usize;
@@ -282,7 +324,6 @@ pub fn exec_mod(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) {
282324
}
283325

284326
/// POW: R[A] = R[B] ^ R[C]
285-
#[inline(always)]
286327
pub fn exec_pow(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) {
287328
let a = Instruction::get_a(instr) as usize;
288329
let b = Instruction::get_b(instr) as usize;
@@ -308,7 +349,6 @@ pub fn exec_pow(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) {
308349
}
309350

310351
/// UNM: R[A] = -R[B] (unary minus)
311-
#[inline(always)]
312352
pub fn exec_unm(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> LuaResult<()> {
313353
let a = Instruction::get_a(instr) as usize;
314354
let b = Instruction::get_b(instr) as usize;

crates/luars/src/lua_vm/execute/control_instructions.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ use crate::lua_vm::{Instruction, LuaCallFrame, LuaError, LuaResult, LuaVM};
77

88
/// RETURN A B C k
99
/// return R[A], ... ,R[A+B-2]
10-
#[inline(always)]
1110
pub fn exec_return(
1211
vm: &mut LuaVM,
1312
instr: u32,

crates/luars/src/lua_vm/execute/load_instructions.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ use crate::lua_vm::{Instruction, LuaCallFrame, LuaVM};
1010
///
1111
/// This instruction moves vararg arguments to a safe location after max_stack_size,
1212
/// so they won't be overwritten by local variable operations.
13-
#[inline(always)]
1413
pub fn exec_varargprep(vm: &mut LuaVM, instr: u32, _frame_ptr: *mut LuaCallFrame) {
1514
let a = Instruction::get_a(instr) as usize; // number of fixed params
1615

0 commit comments

Comments
 (0)