@@ -7,6 +7,35 @@ use crate::{
77 lua_vm:: { Instruction , LuaCallFrame , LuaResult , LuaVM } ,
88} ;
99
10+ /// Slow path for ADD - separate function to hint branch predictor
11+ #[ cold]
12+ #[ inline( never) ]
13+ fn exec_add_slow (
14+ reg_base : * mut LuaValue ,
15+ a : usize ,
16+ left : LuaValue ,
17+ right : LuaValue ,
18+ frame_ptr : * mut LuaCallFrame ,
19+ ) {
20+ unsafe {
21+ let left_tag = left. primary & TYPE_MASK ;
22+ let right_tag = right. primary & TYPE_MASK ;
23+
24+ let result = if left_tag == TAG_FLOAT && right_tag == TAG_FLOAT {
25+ LuaValue :: number ( f64:: from_bits ( left. secondary ) + f64:: from_bits ( right. secondary ) )
26+ } else if left_tag == TAG_INTEGER && right_tag == TAG_FLOAT {
27+ LuaValue :: number ( ( left. secondary as i64 ) as f64 + f64:: from_bits ( right. secondary ) )
28+ } else if left_tag == TAG_FLOAT && right_tag == TAG_INTEGER {
29+ LuaValue :: number ( f64:: from_bits ( left. secondary ) + ( right. secondary as i64 ) as f64 )
30+ } else {
31+ return ;
32+ } ;
33+
34+ * reg_base. add ( a) = result;
35+ ( * frame_ptr) . pc += 1 ;
36+ }
37+ }
38+
1039/// ADD: R[A] = R[B] + R[C]
1140/// OPTIMIZED: Matches Lua C's setivalue behavior - always write both fields
1241#[ inline( always) ]
@@ -21,7 +50,7 @@ pub fn exec_add(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) {
2150 let left = * reg_base. add ( b) ;
2251 let right = * reg_base. add ( c) ;
2352
24- // Combined type check - if result is 0 , both are integers
53+ // Combined type check - if result is TAG_INTEGER , both are integers
2554 let combined_tags = ( left. primary | right. primary ) & TYPE_MASK ;
2655
2756 if combined_tags == TAG_INTEGER {
@@ -35,16 +64,31 @@ pub fn exec_add(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) {
3564 return ;
3665 }
3766
38- // Slow path: Float operations
67+ // Slow path: Float operations - in separate cold function
68+ exec_add_slow ( reg_base, a, left, right, frame_ptr) ;
69+ }
70+ }
71+
72+ /// Slow path for SUB
73+ #[ cold]
74+ #[ inline( never) ]
75+ fn exec_sub_slow (
76+ reg_base : * mut LuaValue ,
77+ a : usize ,
78+ left : LuaValue ,
79+ right : LuaValue ,
80+ frame_ptr : * mut LuaCallFrame ,
81+ ) {
82+ unsafe {
3983 let left_tag = left. primary & TYPE_MASK ;
4084 let right_tag = right. primary & TYPE_MASK ;
4185
4286 let result = if left_tag == TAG_FLOAT && right_tag == TAG_FLOAT {
43- LuaValue :: number ( f64:: from_bits ( left. secondary ) + f64:: from_bits ( right. secondary ) )
87+ LuaValue :: number ( f64:: from_bits ( left. secondary ) - f64:: from_bits ( right. secondary ) )
4488 } else if left_tag == TAG_INTEGER && right_tag == TAG_FLOAT {
45- LuaValue :: number ( ( left. secondary as i64 ) as f64 + f64:: from_bits ( right. secondary ) )
89+ LuaValue :: number ( ( left. secondary as i64 ) as f64 - f64:: from_bits ( right. secondary ) )
4690 } else if left_tag == TAG_FLOAT && right_tag == TAG_INTEGER {
47- LuaValue :: number ( f64:: from_bits ( left. secondary ) + ( right. secondary as i64 ) as f64 )
91+ LuaValue :: number ( f64:: from_bits ( left. secondary ) - ( right. secondary as i64 ) as f64 )
4892 } else {
4993 return ;
5094 } ;
@@ -80,15 +124,30 @@ pub fn exec_sub(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) {
80124 return ;
81125 }
82126
127+ exec_sub_slow ( reg_base, a, left, right, frame_ptr) ;
128+ }
129+ }
130+
131+ /// Slow path for MUL
132+ #[ cold]
133+ #[ inline( never) ]
134+ fn exec_mul_slow (
135+ reg_base : * mut LuaValue ,
136+ a : usize ,
137+ left : LuaValue ,
138+ right : LuaValue ,
139+ frame_ptr : * mut LuaCallFrame ,
140+ ) {
141+ unsafe {
83142 let left_tag = left. primary & TYPE_MASK ;
84143 let right_tag = right. primary & TYPE_MASK ;
85144
86145 let result = if left_tag == TAG_FLOAT && right_tag == TAG_FLOAT {
87- LuaValue :: number ( f64:: from_bits ( left. secondary ) - f64:: from_bits ( right. secondary ) )
146+ LuaValue :: number ( f64:: from_bits ( left. secondary ) * f64:: from_bits ( right. secondary ) )
88147 } else if left_tag == TAG_INTEGER && right_tag == TAG_FLOAT {
89- LuaValue :: number ( ( left. secondary as i64 ) as f64 - f64:: from_bits ( right. secondary ) )
148+ LuaValue :: number ( ( left. secondary as i64 ) as f64 * f64:: from_bits ( right. secondary ) )
90149 } else if left_tag == TAG_FLOAT && right_tag == TAG_INTEGER {
91- LuaValue :: number ( f64:: from_bits ( left. secondary ) - ( right. secondary as i64 ) as f64 )
150+ LuaValue :: number ( f64:: from_bits ( left. secondary ) * ( right. secondary as i64 ) as f64 )
92151 } else {
93152 return ;
94153 } ;
@@ -124,27 +183,12 @@ pub fn exec_mul(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) {
124183 return ;
125184 }
126185
127- let left_tag = left. primary & TYPE_MASK ;
128- let right_tag = right. primary & TYPE_MASK ;
129-
130- let result = if left_tag == TAG_FLOAT && right_tag == TAG_FLOAT {
131- LuaValue :: number ( f64:: from_bits ( left. secondary ) * f64:: from_bits ( right. secondary ) )
132- } else if left_tag == TAG_INTEGER && right_tag == TAG_FLOAT {
133- LuaValue :: number ( ( left. secondary as i64 ) as f64 * f64:: from_bits ( right. secondary ) )
134- } else if left_tag == TAG_FLOAT && right_tag == TAG_INTEGER {
135- LuaValue :: number ( f64:: from_bits ( left. secondary ) * ( right. secondary as i64 ) as f64 )
136- } else {
137- return ;
138- } ;
139-
140- * reg_base. add ( a) = result;
141- ( * frame_ptr) . pc += 1 ;
186+ exec_mul_slow ( reg_base, a, left, right, frame_ptr) ;
142187 }
143188}
144189
145190/// DIV: R[A] = R[B] / R[C]
146191/// Division always returns float in Lua
147- #[ inline( always) ]
148192pub fn exec_div ( vm : & mut LuaVM , instr : u32 , frame_ptr : * mut LuaCallFrame ) {
149193 let a = Instruction :: get_a ( instr) as usize ;
150194 let b = Instruction :: get_b ( instr) as usize ;
@@ -181,7 +225,6 @@ pub fn exec_div(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) {
181225}
182226
183227/// IDIV: R[A] = R[B] // R[C] (floor division)
184- #[ inline( always) ]
185228pub fn exec_idiv ( vm : & mut LuaVM , instr : u32 , frame_ptr : * mut LuaCallFrame ) {
186229 let a = Instruction :: get_a ( instr) as usize ;
187230 let b = Instruction :: get_b ( instr) as usize ;
@@ -231,7 +274,6 @@ pub fn exec_idiv(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) {
231274}
232275
233276/// MOD: R[A] = R[B] % R[C]
234- #[ inline( always) ]
235277pub fn exec_mod ( vm : & mut LuaVM , instr : u32 , frame_ptr : * mut LuaCallFrame ) {
236278 let a = Instruction :: get_a ( instr) as usize ;
237279 let b = Instruction :: get_b ( instr) as usize ;
@@ -282,7 +324,6 @@ pub fn exec_mod(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) {
282324}
283325
284326/// POW: R[A] = R[B] ^ R[C]
285- #[ inline( always) ]
286327pub fn exec_pow ( vm : & mut LuaVM , instr : u32 , frame_ptr : * mut LuaCallFrame ) {
287328 let a = Instruction :: get_a ( instr) as usize ;
288329 let b = Instruction :: get_b ( instr) as usize ;
@@ -308,7 +349,6 @@ pub fn exec_pow(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) {
308349}
309350
310351/// UNM: R[A] = -R[B] (unary minus)
311- #[ inline( always) ]
312352pub fn exec_unm ( vm : & mut LuaVM , instr : u32 , frame_ptr : * mut LuaCallFrame ) -> LuaResult < ( ) > {
313353 let a = Instruction :: get_a ( instr) as usize ;
314354 let b = Instruction :: get_b ( instr) as usize ;
0 commit comments