Skip to content

Commit 26a3809

Browse files
committed
optimize some function call
1 parent 48ec4dc commit 26a3809

File tree

9 files changed

+171
-146
lines changed

9 files changed

+171
-146
lines changed

crates/luars/src/lib_registry.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ pub fn create_standard_registry() -> LibraryRegistry {
204204

205205
/// Helper to get function arguments from VM registers
206206
pub fn get_args(vm: &LuaVM) -> Vec<LuaValue> {
207-
let frame = vm.frames.last().unwrap();
207+
let frame = vm.current_frame();
208208
let base_ptr = frame.base_ptr;
209209
let top = frame.top;
210210

@@ -216,7 +216,7 @@ pub fn get_args(vm: &LuaVM) -> Vec<LuaValue> {
216216
/// 1 based index
217217
#[inline(always)]
218218
pub fn get_arg(vm: &LuaVM, index: usize) -> Option<LuaValue> {
219-
let frame = vm.frames.last().unwrap();
219+
let frame = vm.current_frame();
220220
let base_ptr = frame.base_ptr;
221221
let top = frame.top;
222222

@@ -250,7 +250,7 @@ pub fn require_arg(vm: &mut LuaVM, index: usize, func_name: &str) -> LuaResult<L
250250
/// Helper to get argument count
251251
#[inline(always)]
252252
pub fn arg_count(vm: &LuaVM) -> usize {
253-
let frame = vm.frames.last().unwrap();
253+
let frame = vm.current_frame();
254254
// Subtract 1 for the function itself
255255
frame.top.saturating_sub(1)
256256
}

crates/luars/src/lua_async.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ pub fn create_async_wrapper(func_name: String) -> impl Fn(&mut LuaVM) -> LuaResu
182182
})?;
183183

184184
// 收集参数
185-
let frame = vm.frames.last().unwrap();
185+
let frame = &vm.frames[vm.frame_count - 1];
186186
let base = frame.base_ptr;
187187
let top = frame.top;
188188
let mut args = Vec::new();

crates/luars/src/lua_value/lua_thread.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,12 @@ pub struct LuaThread {
77
/// Coroutine status
88
pub status: CoroutineStatus,
99

10-
/// Independent call stack for this coroutine (Box for pointer stability)
11-
pub frames: Vec<Box<crate::lua_vm::LuaCallFrame>>,
10+
/// Independent call stack for this coroutine
11+
/// Using Vec<LuaCallFrame> directly (no Box) for efficiency
12+
pub frames: Vec<crate::lua_vm::LuaCallFrame>,
13+
14+
/// Current frame count (tracks active frames in the pre-allocated Vec)
15+
pub frame_count: usize,
1216

1317
/// Independent register stack for this coroutine
1418
pub register_stack: Vec<LuaValue>,

crates/luars/src/lua_vm/execute/control_instructions.rs

Lines changed: 56 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -870,15 +870,11 @@ fn exec_call_lua_function(
870870
call_metamethod_self: LuaValue,
871871
frame_ptr_ptr: &mut *mut LuaCallFrame, // Use passed frame_ptr!
872872
) -> LuaResult<()> {
873-
// Get function ID and lookup in ObjectPool
874-
let Some(func_id) = func.as_function_id() else {
875-
return Err(vm.error("Invalid function".to_string()));
876-
};
873+
// Get function ID - FAST PATH: assume valid function
874+
let func_id = unsafe { func.as_function_id().unwrap_unchecked() };
877875

878-
// Extract chunk info from ObjectPool
879-
let Some(func_ref) = vm.object_pool.get_function(func_id) else {
880-
return Err(vm.error("Invalid function ID".to_string()));
881-
};
876+
// Extract chunk info from ObjectPool - use unchecked for hot path
877+
let func_ref = unsafe { vm.object_pool.get_function_unchecked(func_id) };
882878

883879
let (max_stack_size, is_vararg, code_ptr, constants_ptr) = (
884880
func_ref.chunk.max_stack_size,
@@ -907,38 +903,25 @@ fn exec_call_lua_function(
907903
// Simple case: just ensure capacity and push frame
908904
let required_capacity = new_base + max_stack_size;
909905

910-
// Inline capacity check - avoid function call overhead
906+
// Ensure capacity - single branch
911907
if vm.register_stack.len() < required_capacity {
912-
vm.register_stack
913-
.reserve(required_capacity - vm.register_stack.len());
914-
// Only resize what's needed, don't initialize everything
915-
unsafe {
916-
vm.register_stack.set_len(required_capacity);
917-
// Initialize only slots beyond arguments
918-
let reg_ptr = vm.register_stack.as_mut_ptr();
919-
let nil_val = LuaValue::nil();
920-
for i in arg_count..max_stack_size {
921-
std::ptr::write(reg_ptr.add(new_base + i), nil_val);
922-
}
923-
}
924-
} else if arg_count < max_stack_size {
925-
// Stack is big enough, just initialize locals beyond args
908+
vm.register_stack.resize(required_capacity, LuaValue::nil());
909+
}
910+
911+
// Initialize locals beyond arguments (rare for simple functions)
912+
// Only if there are more locals than arguments
913+
if arg_count < max_stack_size {
926914
unsafe {
927-
let reg_ptr = vm.register_stack.as_mut_ptr();
915+
let reg_ptr = vm.register_stack.as_mut_ptr().add(new_base);
928916
let nil_val = LuaValue::nil();
929917
for i in arg_count..max_stack_size {
930-
*reg_ptr.add(new_base + i) = nil_val;
918+
*reg_ptr.add(i) = nil_val;
931919
}
932920
}
933921
}
934922

935-
// Create and push new frame
936-
// nresults: return_count as i16, use LUA_MULTRET for usize::MAX
937-
let nresults = if return_count == usize::MAX {
938-
-1i16
939-
} else {
940-
return_count as i16
941-
};
923+
// Create and push new frame - inline nresults calculation
924+
let nresults = if c == 0 { -1i16 } else { (c - 1) as i16 };
942925
let new_frame = LuaCallFrame::new_lua_function(
943926
func,
944927
code_ptr,
@@ -1153,7 +1136,7 @@ pub fn exec_tailcall(
11531136

11541137
// Extract all frame information we'll need BEFORE taking mutable references
11551138
let (base, return_count, result_reg, _function_value, _pc) = {
1156-
let frame = vm.frames.last().unwrap();
1139+
let frame = &vm.frames[vm.frame_count - 1];
11571140
(
11581141
frame.base_ptr,
11591142
frame.get_num_results(),
@@ -1315,7 +1298,7 @@ pub fn exec_tailcall(
13151298

13161299
/// RETURN0
13171300
/// return (no values)
1318-
/// OPTIMIZED: Use frame_ptr directly
1301+
/// OPTIMIZED: Use frame_ptr directly, calculate caller ptr before pop
13191302
#[inline(always)]
13201303
pub fn exec_return0(
13211304
vm: &mut LuaVM,
@@ -1336,17 +1319,24 @@ pub fn exec_return0(
13361319
vm.close_upvalues_from(base_ptr);
13371320
}
13381321

1339-
vm.pop_frame_discard();
1322+
// OPTIMIZED: Calculate caller frame pointer BEFORE pop
1323+
let has_caller = vm.frame_count > 1;
1324+
let caller_ptr = if has_caller {
1325+
unsafe { vm.frames.as_mut_ptr().add(vm.frame_count - 2) }
1326+
} else {
1327+
std::ptr::null_mut()
1328+
};
13401329

1341-
vm.return_values.clear();
1330+
// Pop frame - just decrement counter
1331+
vm.frame_count -= 1;
13421332

13431333
// FAST PATH: Check if we have a caller frame
1344-
if !vm.frames_is_empty() {
1345-
// Update frame_ptr to point to caller frame
1346-
*frame_ptr_ptr = vm.current_frame_ptr();
1334+
if has_caller {
1335+
// Update frame_ptr (already computed)
1336+
*frame_ptr_ptr = caller_ptr;
13471337

13481338
// Get caller's base_ptr
1349-
let caller_base = unsafe { (**frame_ptr_ptr).base_ptr };
1339+
let caller_base = unsafe { (*caller_ptr).base_ptr };
13501340

13511341
// Fill expected return values with nil
13521342
if num_results != usize::MAX && num_results > 0 {
@@ -1362,17 +1352,19 @@ pub fn exec_return0(
13621352

13631353
// Update caller's top
13641354
unsafe {
1365-
(**frame_ptr_ptr).top = result_reg;
1355+
(*caller_ptr).top = result_reg;
13661356
}
13671357
Ok(())
13681358
} else {
1359+
// No caller - exit VM, clear return_values (empty return)
1360+
vm.return_values.clear();
13691361
Err(LuaError::Exit)
13701362
}
13711363
}
13721364

13731365
/// RETURN1 A
13741366
/// return R[A]
1375-
/// OPTIMIZED: Fast path for single-value return (most common case)
1367+
/// OPTIMIZED: Ultra-fast path for single-value return (most common case)
13761368
#[inline(always)]
13771369
pub fn exec_return1(
13781370
vm: &mut LuaVM,
@@ -1389,47 +1381,45 @@ pub fn exec_return1(
13891381
)
13901382
};
13911383

1384+
// Get return value BEFORE any other operations
1385+
let return_value = unsafe { *vm.register_stack.get_unchecked(base_ptr + a) };
1386+
13921387
// Only close upvalues if there are any open (rare for simple functions)
13931388
if !vm.open_upvalues.is_empty() {
13941389
vm.close_upvalues_from(base_ptr);
13951390
}
13961391

1397-
// Get return value before popping frame
1398-
let return_value = if base_ptr + a < vm.register_stack.len() {
1399-
unsafe { *vm.register_stack.get_unchecked(base_ptr + a) }
1392+
// OPTIMIZED: Calculate caller frame pointer BEFORE pop (avoid recalculation)
1393+
// frame_count - 1 is current, frame_count - 2 is caller
1394+
let has_caller = vm.frame_count > 1;
1395+
let caller_ptr = if has_caller {
1396+
unsafe { vm.frames.as_mut_ptr().add(vm.frame_count - 2) }
14001397
} else {
1401-
LuaValue::nil()
1398+
std::ptr::null_mut()
14021399
};
14031400

1404-
// Pop frame - we already have all info we need from frame_ptr
1405-
vm.pop_frame_discard();
1406-
1407-
// CRITICAL: Always set return_values for call_function_internal compatibility
1408-
vm.return_values.clear();
1409-
vm.return_values.push(return_value);
1401+
// Pop frame - just decrement counter
1402+
vm.frame_count -= 1;
14101403

14111404
// Check if there's a caller frame
1412-
if !vm.frames_is_empty() {
1413-
// Update frame_ptr to point to caller frame
1414-
*frame_ptr_ptr = vm.current_frame_ptr();
1415-
1416-
// Get caller's base_ptr
1417-
let caller_base = unsafe { (**frame_ptr_ptr).base_ptr };
1418-
let dest_pos = caller_base + result_reg;
1405+
if has_caller {
1406+
// Update frame_ptr to caller (already computed above)
1407+
*frame_ptr_ptr = caller_ptr;
14191408

1420-
// Write to caller's result register
1421-
if dest_pos < vm.register_stack.len() {
1422-
vm.register_stack[dest_pos] = return_value;
1423-
}
1424-
1425-
// Update top
1409+
// Get caller's base_ptr and write return value directly
1410+
let caller_base = unsafe { (*caller_ptr).base_ptr };
14261411
unsafe {
1427-
(**frame_ptr_ptr).top = result_reg + 1;
1412+
*vm.register_stack.get_unchecked_mut(caller_base + result_reg) = return_value;
1413+
// Update top
1414+
(*caller_ptr).top = result_reg + 1;
14281415
}
14291416

14301417
Ok(())
14311418
} else {
14321419
// No caller - exit VM (only happens at script end)
1420+
// Only update return_values when exiting - this is what call_function_internal reads
1421+
vm.return_values.clear();
1422+
vm.return_values.push(return_value);
14331423
Err(LuaError::Exit)
14341424
}
14351425
}

crates/luars/src/lua_vm/execute/load_instructions.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,7 @@ use crate::lua_vm::{Instruction, LuaCallFrame, LuaVM};
1414
pub fn exec_varargprep(vm: &mut LuaVM, instr: u32, _frame_ptr: *mut LuaCallFrame) {
1515
let a = Instruction::get_a(instr) as usize; // number of fixed params
1616

17-
let frame_idx = vm.frames.len() - 1;
18-
let frame = &vm.frames[frame_idx];
17+
let frame = vm.current_frame();
1918
let base_ptr = frame.base_ptr;
2019
let top = frame.top;
2120

@@ -49,10 +48,10 @@ pub fn exec_varargprep(vm: &mut LuaVM, instr: u32, _frame_ptr: *mut LuaCallFrame
4948
}
5049

5150
// Set vararg info in frame
52-
vm.frames[frame_idx].set_vararg(vararg_dest, vararg_count);
51+
vm.current_frame_mut().set_vararg(vararg_dest, vararg_count);
5352
} else {
5453
// No varargs passed
55-
vm.frames[frame_idx].set_vararg(base_ptr + max_stack_size, 0);
54+
vm.current_frame_mut().set_vararg(base_ptr + max_stack_size, 0);
5655
}
5756

5857
// Initialize local variables (registers from 0 to max_stack_size) with nil

crates/luars/src/lua_vm/execute/upvalue_instructions.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,9 @@ pub fn exec_getupval(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) {
2020
let func_ref = unsafe { vm.object_pool.get_function_unchecked(func_id) };
2121
let upvalue_id = unsafe { *func_ref.upvalues.get_unchecked(b) };
2222

23-
// Get upvalue value
24-
let value = vm.read_upvalue(upvalue_id);
23+
// OPTIMIZED: Use unchecked read for hot path
24+
// SAFETY: upvalue_id is from a valid function closure
25+
let value = unsafe { vm.read_upvalue_unchecked(upvalue_id) };
2526
unsafe {
2627
*vm.register_stack.get_unchecked_mut(base_ptr + a) = value;
2728
}

crates/luars/src/lua_vm/lua_call_frame.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ use crate::LuaValue;
2828
/// - 2 bytes: vararg_count (u16)
2929
/// - 1 byte: callstatus
3030
/// - 3 bytes: padding
31+
#[derive(Clone)]
3132
pub struct LuaCallFrame {
3233
pub function_value: LuaValue, // 16 bytes
3334
pub code_ptr: *const u32, // 8 bytes - 直接指向指令数组
@@ -52,6 +53,25 @@ pub const CIST_TAIL: u8 = 1 << 3; // 尾调用
5253
#[allow(dead_code)]
5354
pub const LUA_MULTRET: i16 = -1;
5455

56+
impl Default for LuaCallFrame {
57+
#[inline(always)]
58+
fn default() -> Self {
59+
LuaCallFrame {
60+
function_value: LuaValue::nil(),
61+
code_ptr: std::ptr::null(),
62+
constants_ptr: std::ptr::null(),
63+
base_ptr: 0,
64+
top: 0,
65+
pc: 0,
66+
result_reg: 0,
67+
vararg_start: 0,
68+
nresults: 0,
69+
vararg_count: 0,
70+
callstatus: 0,
71+
}
72+
}
73+
}
74+
5575
impl LuaCallFrame {
5676
#[inline(always)]
5777
pub fn new_lua_function(

0 commit comments

Comments
 (0)