Skip to content

Commit 0a6e706

Browse files
committed
optimize code
1 parent e9bb684 commit 0a6e706

File tree

5 files changed

+94
-149
lines changed

5 files changed

+94
-149
lines changed

crates/luars/src/lua_vm/dispatcher/arithmetic_instructions.rs

Lines changed: 59 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -4,153 +4,122 @@
44
use crate::{
55
LuaValue,
66
lua_value::{TAG_FLOAT, TAG_INTEGER, TYPE_MASK},
7-
lua_vm::{Instruction, LuaResult, LuaVM},
7+
lua_vm::{Instruction, LuaCallFrame, LuaResult, LuaVM},
88
};
99

1010
/// ADD: R[A] = R[B] + R[C]
11-
/// ULTRA-OPTIMIZED: Combined type check (branchless for integer fast path)
11+
/// ULTRA-OPTIMIZED: Uses pre-fetched frame_ptr to avoid Vec lookups
1212
#[inline(always)]
13-
pub fn exec_add(vm: &mut LuaVM, instr: u32) -> LuaResult<()> {
13+
pub fn exec_add(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> LuaResult<()> {
1414
let a = Instruction::get_a(instr) as usize;
1515
let b = Instruction::get_b(instr) as usize;
1616
let c = Instruction::get_c(instr) as usize;
1717

1818
unsafe {
19-
let base_ptr = (*vm.frames.last().unwrap_unchecked()).base_ptr;
19+
let base_ptr = (*frame_ptr).base_ptr;
2020
let reg_base = vm.register_stack.as_ptr().add(base_ptr);
2121
let left = *reg_base.add(b);
2222
let right = *reg_base.add(c);
2323

24-
// OPTIMIZATION: Combined type check (same as FORLOOP)
25-
// If both are integers, (left.primary | right.primary) & TYPE_MASK == TAG_INTEGER
24+
// OPTIMIZATION: Combined type check
2625
let combined_tags = (left.primary | right.primary) & TYPE_MASK;
2726

2827
// Fast path: Both integers (single branch!)
2928
if combined_tags == TAG_INTEGER {
3029
let result =
3130
LuaValue::integer((left.secondary as i64).wrapping_add(right.secondary as i64));
3231
*vm.register_stack.as_mut_ptr().add(base_ptr + a) = result;
33-
vm.current_frame_mut().pc += 1;
32+
(*frame_ptr).pc += 1;
3433
return Ok(());
3534
}
3635

3736
// Slow path: Check individual types
3837
let left_tag = left.primary & TYPE_MASK;
3938
let right_tag = right.primary & TYPE_MASK;
4039

41-
// Both floats
4240
let result = if left_tag == TAG_FLOAT && right_tag == TAG_FLOAT {
4341
LuaValue::number(f64::from_bits(left.secondary) + f64::from_bits(right.secondary))
44-
}
45-
// Mixed: integer + float
46-
else if left_tag == TAG_INTEGER && right_tag == TAG_FLOAT {
42+
} else if left_tag == TAG_INTEGER && right_tag == TAG_FLOAT {
4743
LuaValue::number((left.secondary as i64) as f64 + f64::from_bits(right.secondary))
48-
}
49-
// Mixed: float + integer
50-
else if left_tag == TAG_FLOAT && right_tag == TAG_INTEGER {
44+
} else if left_tag == TAG_FLOAT && right_tag == TAG_INTEGER {
5145
LuaValue::number(f64::from_bits(left.secondary) + (right.secondary as i64) as f64)
5246
} else {
53-
return add_error(left, right);
47+
return Ok(());
5448
};
5549

5650
*vm.register_stack.as_mut_ptr().add(base_ptr + a) = result;
57-
vm.current_frame_mut().pc += 1;
51+
(*frame_ptr).pc += 1;
5852
Ok(())
5953
}
6054
}
6155

62-
#[cold]
63-
#[inline(never)]
64-
fn add_error(_left: LuaValue, _right: LuaValue) -> LuaResult<()> {
65-
// Don't throw error - let MMBIN handle metamethod
66-
Ok(())
67-
}
68-
6956
/// SUB: R[A] = R[B] - R[C]
70-
/// ULTRA-OPTIMIZED: Combined type check (branchless for integer fast path)
57+
/// ULTRA-OPTIMIZED: Uses pre-fetched frame_ptr
7158
#[inline(always)]
72-
pub fn exec_sub(vm: &mut LuaVM, instr: u32) -> LuaResult<()> {
59+
pub fn exec_sub(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> LuaResult<()> {
7360
let a = Instruction::get_a(instr) as usize;
7461
let b = Instruction::get_b(instr) as usize;
7562
let c = Instruction::get_c(instr) as usize;
7663

7764
unsafe {
78-
let base_ptr = (*vm.frames.last().unwrap_unchecked()).base_ptr;
65+
let base_ptr = (*frame_ptr).base_ptr;
7966
let reg_base = vm.register_stack.as_ptr().add(base_ptr);
8067
let left = *reg_base.add(b);
8168
let right = *reg_base.add(c);
8269

83-
// OPTIMIZATION: Combined type check
8470
let combined_tags = (left.primary | right.primary) & TYPE_MASK;
8571

86-
// Fast path: Both integers (single branch!)
8772
if combined_tags == TAG_INTEGER {
8873
let result =
8974
LuaValue::integer((left.secondary as i64).wrapping_sub(right.secondary as i64));
9075
*vm.register_stack.as_mut_ptr().add(base_ptr + a) = result;
91-
vm.current_frame_mut().pc += 1;
76+
(*frame_ptr).pc += 1;
9277
return Ok(());
9378
}
9479

95-
// Slow path: Check individual types
9680
let left_tag = left.primary & TYPE_MASK;
9781
let right_tag = right.primary & TYPE_MASK;
9882

99-
// Both floats
10083
let result = if left_tag == TAG_FLOAT && right_tag == TAG_FLOAT {
10184
LuaValue::number(f64::from_bits(left.secondary) - f64::from_bits(right.secondary))
102-
}
103-
// Mixed: integer - float
104-
else if left_tag == TAG_INTEGER && right_tag == TAG_FLOAT {
85+
} else if left_tag == TAG_INTEGER && right_tag == TAG_FLOAT {
10586
LuaValue::number((left.secondary as i64) as f64 - f64::from_bits(right.secondary))
106-
}
107-
// Mixed: float - integer
108-
else if left_tag == TAG_FLOAT && right_tag == TAG_INTEGER {
87+
} else if left_tag == TAG_FLOAT && right_tag == TAG_INTEGER {
10988
LuaValue::number(f64::from_bits(left.secondary) - (right.secondary as i64) as f64)
11089
} else {
111-
return sub_error(left, right);
90+
return Ok(());
11291
};
11392

11493
*vm.register_stack.as_mut_ptr().add(base_ptr + a) = result;
115-
vm.current_frame_mut().pc += 1;
94+
(*frame_ptr).pc += 1;
11695
Ok(())
11796
}
11897
}
11998

120-
#[cold]
121-
#[inline(never)]
122-
fn sub_error(_left: LuaValue, _right: LuaValue) -> LuaResult<()> {
123-
// Don't throw error - let MMBIN handle metamethod
124-
Ok(())
125-
}
126-
12799
/// MUL: R[A] = R[B] * R[C]
128-
/// ULTRA-OPTIMIZED: Combined type check
100+
/// ULTRA-OPTIMIZED: Uses pre-fetched frame_ptr
129101
#[inline(always)]
130-
pub fn exec_mul(vm: &mut LuaVM, instr: u32) -> LuaResult<()> {
102+
pub fn exec_mul(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> LuaResult<()> {
131103
let a = Instruction::get_a(instr) as usize;
132104
let b = Instruction::get_b(instr) as usize;
133105
let c = Instruction::get_c(instr) as usize;
134106

135107
unsafe {
136-
let base_ptr = (*vm.frames.last().unwrap_unchecked()).base_ptr;
108+
let base_ptr = (*frame_ptr).base_ptr;
137109
let reg_base = vm.register_stack.as_ptr().add(base_ptr);
138110
let left = *reg_base.add(b);
139111
let right = *reg_base.add(c);
140112

141-
// OPTIMIZATION: Combined type check
142113
let combined_tags = (left.primary | right.primary) & TYPE_MASK;
143114

144-
// Fast path: Both integers
145115
if combined_tags == TAG_INTEGER {
146116
let result =
147117
LuaValue::integer((left.secondary as i64).wrapping_mul(right.secondary as i64));
148118
*vm.register_stack.as_mut_ptr().add(base_ptr + a) = result;
149-
vm.current_frame_mut().pc += 1;
119+
(*frame_ptr).pc += 1;
150120
return Ok(());
151121
}
152122

153-
// Slow path
154123
let left_tag = left.primary & TYPE_MASK;
155124
let right_tag = right.primary & TYPE_MASK;
156125

@@ -161,78 +130,67 @@ pub fn exec_mul(vm: &mut LuaVM, instr: u32) -> LuaResult<()> {
161130
} else if left_tag == TAG_FLOAT && right_tag == TAG_INTEGER {
162131
LuaValue::number(f64::from_bits(left.secondary) * (right.secondary as i64) as f64)
163132
} else {
164-
return mul_error(left, right);
133+
return Ok(());
165134
};
166135

167136
*vm.register_stack.as_mut_ptr().add(base_ptr + a) = result;
168-
vm.current_frame_mut().pc += 1;
137+
(*frame_ptr).pc += 1;
169138
Ok(())
170139
}
171140
}
172141

173-
#[cold]
174-
#[inline(never)]
175-
fn mul_error(_left: LuaValue, _right: LuaValue) -> LuaResult<()> {
176-
// Don't throw error - let MMBIN handle metamethod
177-
Ok(())
178-
}
179-
180142
/// DIV: R[A] = R[B] / R[C]
181143
/// Division always returns float in Lua
182144
#[inline(always)]
183-
pub fn exec_div(vm: &mut LuaVM, instr: u32) -> LuaResult<()> {
145+
pub fn exec_div(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> LuaResult<()> {
184146
let a = Instruction::get_a(instr) as usize;
185147
let b = Instruction::get_b(instr) as usize;
186148
let c = Instruction::get_c(instr) as usize;
187149

188150
unsafe {
189-
let base_ptr = (*vm.frames.last().unwrap_unchecked()).base_ptr;
151+
let base_ptr = (*frame_ptr).base_ptr;
190152
let reg_base = vm.register_stack.as_ptr().add(base_ptr);
191153
let left = *reg_base.add(b);
192154
let right = *reg_base.add(c);
193155

194-
// Fast type check
195156
let left_tag = left.primary & TYPE_MASK;
196157
let right_tag = right.primary & TYPE_MASK;
197158

198-
// Convert to float and divide
199159
let l_float = if left_tag == TAG_INTEGER {
200160
(left.secondary as i64) as f64
201161
} else if left_tag == TAG_FLOAT {
202162
f64::from_bits(left.secondary)
203163
} else {
204-
return Ok(()); // Let MMBIN handle
164+
return Ok(());
205165
};
206166

207167
let r_float = if right_tag == TAG_INTEGER {
208168
(right.secondary as i64) as f64
209169
} else if right_tag == TAG_FLOAT {
210170
f64::from_bits(right.secondary)
211171
} else {
212-
return Ok(()); // Let MMBIN handle
172+
return Ok(());
213173
};
214174

215-
let result = LuaValue::number(l_float / r_float);
216-
*vm.register_stack.as_mut_ptr().add(base_ptr + a) = result;
217-
vm.current_frame_mut().pc += 1; // Skip MMBIN
175+
*vm.register_stack.as_mut_ptr().add(base_ptr + a) = LuaValue::number(l_float / r_float);
176+
(*frame_ptr).pc += 1;
218177
Ok(())
219178
}
220179
}
221180

222181
/// IDIV: R[A] = R[B] // R[C] (floor division)
223182
#[inline(always)]
224-
pub fn exec_idiv(vm: &mut LuaVM, instr: u32) -> LuaResult<()> {
183+
pub fn exec_idiv(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> LuaResult<()> {
225184
let a = Instruction::get_a(instr) as usize;
226185
let b = Instruction::get_b(instr) as usize;
227186
let c = Instruction::get_c(instr) as usize;
228187

229188
unsafe {
230-
let base_ptr = (*vm.frames.last().unwrap_unchecked()).base_ptr;
189+
let base_ptr = (*frame_ptr).base_ptr;
231190
let reg_base = vm.register_stack.as_ptr().add(base_ptr);
232191
let left = *reg_base.add(b);
233192
let right = *reg_base.add(c);
234193

235-
// Combined type check for integer fast path
236194
let combined_tags = (left.primary | right.primary) & TYPE_MASK;
237195

238196
let result = if combined_tags == TAG_INTEGER {
@@ -266,25 +224,24 @@ pub fn exec_idiv(vm: &mut LuaVM, instr: u32) -> LuaResult<()> {
266224
};
267225

268226
*vm.register_stack.as_mut_ptr().add(base_ptr + a) = result;
269-
vm.current_frame_mut().pc += 1;
227+
(*frame_ptr).pc += 1;
270228
Ok(())
271229
}
272230
}
273231

274232
/// MOD: R[A] = R[B] % R[C]
275233
#[inline(always)]
276-
pub fn exec_mod(vm: &mut LuaVM, instr: u32) -> LuaResult<()> {
234+
pub fn exec_mod(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> LuaResult<()> {
277235
let a = Instruction::get_a(instr) as usize;
278236
let b = Instruction::get_b(instr) as usize;
279237
let c = Instruction::get_c(instr) as usize;
280238

281239
unsafe {
282-
let base_ptr = (*vm.frames.last().unwrap_unchecked()).base_ptr;
240+
let base_ptr = (*frame_ptr).base_ptr;
283241
let reg_base = vm.register_stack.as_ptr().add(base_ptr);
284242
let left = *reg_base.add(b);
285243
let right = *reg_base.add(c);
286244

287-
// Combined type check for integer fast path
288245
let combined_tags = (left.primary | right.primary) & TYPE_MASK;
289246

290247
let result = if combined_tags == TAG_INTEGER {
@@ -314,43 +271,41 @@ pub fn exec_mod(vm: &mut LuaVM, instr: u32) -> LuaResult<()> {
314271
return Ok(());
315272
};
316273

317-
// Lua uses floored division modulo: a % b = a - floor(a/b) * b
318274
let result = l_float - (l_float / r_float).floor() * r_float;
319275
LuaValue::number(result)
320276
};
321277

322278
*vm.register_stack.as_mut_ptr().add(base_ptr + a) = result;
323-
vm.current_frame_mut().pc += 1;
279+
(*frame_ptr).pc += 1;
324280
Ok(())
325281
}
326282
}
327283

328284
/// POW: R[A] = R[B] ^ R[C]
329-
pub fn exec_pow(vm: &mut LuaVM, instr: u32) -> LuaResult<()> {
285+
#[inline(always)]
286+
pub fn exec_pow(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> LuaResult<()> {
330287
let a = Instruction::get_a(instr) as usize;
331288
let b = Instruction::get_b(instr) as usize;
332289
let c = Instruction::get_c(instr) as usize;
333290

334-
let frame = vm.current_frame();
335-
let base_ptr = frame.base_ptr;
336-
337-
let left = vm.register_stack[base_ptr + b];
338-
let right = vm.register_stack[base_ptr + c];
291+
unsafe {
292+
let base_ptr = (*frame_ptr).base_ptr;
293+
let left = *vm.register_stack.as_ptr().add(base_ptr + b);
294+
let right = *vm.register_stack.as_ptr().add(base_ptr + c);
339295

340-
// Power always uses float
341-
let l_float = match left.as_number() {
342-
Some(n) => n,
343-
None => return Ok(()), // Let MMBIN handle
344-
};
345-
let r_float = match right.as_number() {
346-
Some(n) => n,
347-
None => return Ok(()), // Let MMBIN handle
348-
};
296+
let l_float = match left.as_number() {
297+
Some(n) => n,
298+
None => return Ok(()),
299+
};
300+
let r_float = match right.as_number() {
301+
Some(n) => n,
302+
None => return Ok(()),
303+
};
349304

350-
let result = LuaValue::number(l_float.powf(r_float));
351-
vm.register_stack[base_ptr + a] = result;
352-
vm.current_frame_mut().pc += 1;
353-
Ok(())
305+
*vm.register_stack.as_mut_ptr().add(base_ptr + a) = LuaValue::number(l_float.powf(r_float));
306+
(*frame_ptr).pc += 1;
307+
Ok(())
308+
}
354309
}
355310

356311
/// UNM: R[A] = -R[B] (unary minus)
@@ -399,33 +354,30 @@ pub fn exec_unm(vm: &mut LuaVM, instr: u32) -> LuaResult<()> {
399354

400355
/// ADDI: R[A] = R[B] + sC
401356
#[inline(always)]
402-
pub fn exec_addi(vm: &mut LuaVM, instr: u32) -> LuaResult<()> {
357+
pub fn exec_addi(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> LuaResult<()> {
403358
let a = Instruction::get_a(instr) as usize;
404359
let b = Instruction::get_b(instr) as usize;
405-
let sc = Instruction::get_sc(instr); // Signed immediate value
360+
let sc = Instruction::get_sc(instr);
406361

407362
unsafe {
408-
let base_ptr = (*vm.frames.last().unwrap_unchecked()).base_ptr;
363+
let base_ptr = (*frame_ptr).base_ptr;
409364
let left = *vm.register_stack.as_ptr().add(base_ptr + b);
410365

411366
if left.primary == TAG_INTEGER {
412-
// Integer fast path
413367
let l = left.secondary as i64;
414368
*vm.register_stack.as_mut_ptr().add(base_ptr + a) =
415369
LuaValue::integer(l.wrapping_add(sc as i64));
416-
vm.current_frame_mut().pc += 1;
370+
(*frame_ptr).pc += 1;
417371
return Ok(());
418372
}
419373

420374
if left.primary == TAG_FLOAT {
421-
// Float fast path
422375
let l = f64::from_bits(left.secondary);
423376
*vm.register_stack.as_mut_ptr().add(base_ptr + a) = LuaValue::float(l + sc as f64);
424-
vm.current_frame_mut().pc += 1;
377+
(*frame_ptr).pc += 1;
425378
return Ok(());
426379
}
427380

428-
// Not a number, fallthrough to MMBINI
429381
Ok(())
430382
}
431383
}

0 commit comments

Comments
 (0)