Skip to content

Commit d0fd4a0

Browse files
committed
optimize pcall and method call
1 parent e6977d6 commit d0fd4a0

File tree

2 files changed

+329
-17
lines changed

2 files changed

+329
-17
lines changed

crates/luars/src/lua_vm/mod.rs

Lines changed: 313 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ mod lua_call_frame;
55
mod lua_error;
66
mod opcode;
77

8-
use crate::gc::{GC, GcFunction, TableId, ThreadId, UpvalueId};
8+
use crate::gc::{FunctionId, GC, GcFunction, TableId, ThreadId, UpvalueId};
99
#[cfg(feature = "async")]
1010
use crate::lua_async::AsyncExecutor;
1111
use crate::lua_value::{
@@ -1146,11 +1146,164 @@ impl LuaVM {
11461146
}
11471147
}
11481148

1149-
// Slow path for Lua functions and general cases
1149+
// Fast path for Lua functions with 1-2 args (common case)
1150+
if let Some(func_id) = func.as_function_id() {
1151+
match args.len() {
1152+
1 => return self.call_lua_metamethod_1(func_id, args[0]),
1153+
2 => return self.call_lua_metamethod_2(func_id, args[0], args[1]),
1154+
_ => {}
1155+
}
1156+
}
1157+
1158+
// Slow path for general cases
11501159
let result = self.call_function_internal(func.clone(), args.to_vec())?;
11511160
Ok(result.get(0).cloned())
11521161
}
11531162

1163+
/// ULTRA-OPTIMIZED: Call Lua function metamethod with 2 args
1164+
/// Used by __index, __eq, __lt, __le, etc.
1165+
/// Zero Vec allocation - copies args directly to stack
1166+
#[inline]
1167+
fn call_lua_metamethod_2(
1168+
&mut self,
1169+
func_id: FunctionId,
1170+
arg1: LuaValue,
1171+
arg2: LuaValue,
1172+
) -> LuaResult<Option<LuaValue>> {
1173+
let (max_stack_size, code_ptr, constants_ptr, upvalues_ptr) = {
1174+
let Some(func_ref) = self.object_pool.get_function(func_id) else {
1175+
return Err(self.error("Invalid function".to_string()));
1176+
};
1177+
let size = func_ref.chunk.max_stack_size.max(2);
1178+
(
1179+
size,
1180+
func_ref.chunk.code.as_ptr(),
1181+
func_ref.chunk.constants.as_ptr(),
1182+
func_ref.upvalues.as_ptr(),
1183+
)
1184+
};
1185+
1186+
let new_base = if self.frame_count > 0 {
1187+
let current_frame = &self.frames[self.frame_count - 1];
1188+
(current_frame.base_ptr as usize) + 256
1189+
} else {
1190+
0
1191+
};
1192+
1193+
self.ensure_stack_capacity(new_base + max_stack_size);
1194+
1195+
// Set up args directly - no Vec allocation
1196+
unsafe {
1197+
let dst = self.register_stack.as_mut_ptr().add(new_base);
1198+
*dst = arg1;
1199+
*dst.add(1) = arg2;
1200+
// Initialize remaining with nil
1201+
let nil_val = LuaValue::nil();
1202+
for i in 2..max_stack_size {
1203+
*dst.add(i) = nil_val;
1204+
}
1205+
}
1206+
1207+
// Push boundary + Lua frame
1208+
let boundary_frame = LuaCallFrame::new_c_function(new_base, 0);
1209+
self.push_frame(boundary_frame);
1210+
1211+
let new_frame = LuaCallFrame::new_lua_function(
1212+
func_id,
1213+
code_ptr,
1214+
constants_ptr,
1215+
upvalues_ptr,
1216+
new_base,
1217+
max_stack_size,
1218+
0,
1219+
-1,
1220+
);
1221+
self.push_frame(new_frame);
1222+
1223+
let exec_result = execute::luavm_execute(self);
1224+
1225+
match exec_result {
1226+
Ok(_) | Err(LuaError::Exit) => {
1227+
self.pop_frame_discard();
1228+
Ok(self.return_values.first().cloned())
1229+
}
1230+
Err(LuaError::Yield) => Err(LuaError::Yield),
1231+
Err(e) => {
1232+
self.pop_frame_discard();
1233+
Err(e)
1234+
}
1235+
}
1236+
}
1237+
1238+
/// ULTRA-OPTIMIZED: Call Lua function metamethod with 1 arg
1239+
/// Used by __len, __unm, __bnot, __tostring
1240+
#[inline]
1241+
fn call_lua_metamethod_1(
1242+
&mut self,
1243+
func_id: FunctionId,
1244+
arg1: LuaValue,
1245+
) -> LuaResult<Option<LuaValue>> {
1246+
let (max_stack_size, code_ptr, constants_ptr, upvalues_ptr) = {
1247+
let Some(func_ref) = self.object_pool.get_function(func_id) else {
1248+
return Err(self.error("Invalid function".to_string()));
1249+
};
1250+
let size = func_ref.chunk.max_stack_size.max(1);
1251+
(
1252+
size,
1253+
func_ref.chunk.code.as_ptr(),
1254+
func_ref.chunk.constants.as_ptr(),
1255+
func_ref.upvalues.as_ptr(),
1256+
)
1257+
};
1258+
1259+
let new_base = if self.frame_count > 0 {
1260+
let current_frame = &self.frames[self.frame_count - 1];
1261+
(current_frame.base_ptr as usize) + 256
1262+
} else {
1263+
0
1264+
};
1265+
1266+
self.ensure_stack_capacity(new_base + max_stack_size);
1267+
1268+
unsafe {
1269+
let dst = self.register_stack.as_mut_ptr().add(new_base);
1270+
*dst = arg1;
1271+
let nil_val = LuaValue::nil();
1272+
for i in 1..max_stack_size {
1273+
*dst.add(i) = nil_val;
1274+
}
1275+
}
1276+
1277+
let boundary_frame = LuaCallFrame::new_c_function(new_base, 0);
1278+
self.push_frame(boundary_frame);
1279+
1280+
let new_frame = LuaCallFrame::new_lua_function(
1281+
func_id,
1282+
code_ptr,
1283+
constants_ptr,
1284+
upvalues_ptr,
1285+
new_base,
1286+
max_stack_size,
1287+
0,
1288+
-1,
1289+
);
1290+
self.push_frame(new_frame);
1291+
1292+
let exec_result = execute::luavm_execute(self);
1293+
1294+
match exec_result {
1295+
Ok(_) | Err(LuaError::Exit) => {
1296+
self.pop_frame_discard();
1297+
Ok(self.return_values.first().cloned())
1298+
}
1299+
Err(LuaError::Yield) => Err(LuaError::Yield),
1300+
Err(e) => {
1301+
self.pop_frame_discard();
1302+
Err(e)
1303+
}
1304+
}
1305+
}
1306+
11541307
/// Fast path for calling CFunction metamethods with 2 arguments
11551308
/// Used by __index, __newindex, etc. Avoids Vec allocation.
11561309
/// Returns the first return value.
@@ -2431,6 +2584,164 @@ impl LuaVM {
24312584
}
24322585
}
24332586

2587+
/// ULTRA-OPTIMIZED pcall for CFunction calls
2588+
/// Works directly on the stack without any Vec allocations
2589+
/// Args are read from caller's stack and results are written directly to return_values
2590+
/// Returns: (success, result_count) where results are in self.return_values
2591+
#[inline]
2592+
pub fn protected_call_stack_based(
2593+
&mut self,
2594+
func: LuaValue,
2595+
arg_base: usize, // Where args start in stack (caller's base + 1)
2596+
arg_count: usize, // Number of arguments
2597+
) -> LuaResult<(bool, usize)> {
2598+
// Save current state
2599+
let initial_frame_count = self.frame_count;
2600+
2601+
// Call function directly without Vec allocation
2602+
let result = self.call_function_stack_based(func, arg_base, arg_count);
2603+
2604+
match result {
2605+
Ok(result_count) => Ok((true, result_count)),
2606+
Err(LuaError::Yield) => Err(LuaError::Yield),
2607+
Err(_) => {
2608+
// Error path: clean up and return error message
2609+
self.open_upvalues.clear();
2610+
while self.frame_count > initial_frame_count {
2611+
self.pop_frame_discard();
2612+
}
2613+
let msg = std::mem::take(&mut self.error_message);
2614+
let error_str = self.create_string(&msg);
2615+
self.return_values.clear();
2616+
self.return_values.push(error_str);
2617+
Ok((false, 1))
2618+
}
2619+
}
2620+
}
2621+
2622+
/// Internal helper that calls function using stack-based arguments
2623+
/// Avoids Vec allocation for the common case
2624+
/// Results are placed in self.return_values, returns count
2625+
#[inline]
2626+
fn call_function_stack_based(
2627+
&mut self,
2628+
func: LuaValue,
2629+
arg_base: usize,
2630+
arg_count: usize,
2631+
) -> LuaResult<usize> {
2632+
match func.kind() {
2633+
LuaValueKind::CFunction => {
2634+
let cfunc = func.as_cfunction().unwrap();
2635+
2636+
// Calculate new base for the call frame
2637+
let new_base = if self.frame_count > 0 {
2638+
let current_frame = &self.frames[self.frame_count - 1];
2639+
(current_frame.base_ptr as usize) + 256
2640+
} else {
2641+
0
2642+
};
2643+
2644+
let stack_size = arg_count + 1;
2645+
self.ensure_stack_capacity(new_base + stack_size);
2646+
2647+
// Copy args from caller's stack to new frame
2648+
unsafe {
2649+
let src = self.register_stack.as_ptr().add(arg_base);
2650+
let dst = self.register_stack.as_mut_ptr().add(new_base);
2651+
*dst = func; // func at slot 0
2652+
std::ptr::copy_nonoverlapping(src, dst.add(1), arg_count);
2653+
}
2654+
2655+
let temp_frame = LuaCallFrame::new_c_function(new_base, stack_size);
2656+
self.push_frame(temp_frame);
2657+
2658+
match cfunc(self) {
2659+
Ok(r) => {
2660+
self.pop_frame_discard();
2661+
self.return_values = r.all_values();
2662+
Ok(self.return_values.len())
2663+
}
2664+
Err(LuaError::Yield) => Err(LuaError::Yield),
2665+
Err(e) => {
2666+
self.pop_frame_discard();
2667+
Err(e)
2668+
}
2669+
}
2670+
}
2671+
LuaValueKind::Function => {
2672+
let Some(func_id) = func.as_function_id() else {
2673+
return Err(self.error("Invalid function reference".to_string()));
2674+
};
2675+
2676+
let (max_stack_size, code_ptr, constants_ptr, upvalues_ptr) = {
2677+
let Some(func_ref) = self.object_pool.get_function(func_id) else {
2678+
return Err(self.error("Invalid function".to_string()));
2679+
};
2680+
let size = func_ref.chunk.max_stack_size.max(1);
2681+
(
2682+
size,
2683+
func_ref.chunk.code.as_ptr(),
2684+
func_ref.chunk.constants.as_ptr(),
2685+
func_ref.upvalues.as_ptr(),
2686+
)
2687+
};
2688+
2689+
let new_base = if self.frame_count > 0 {
2690+
let current_frame = &self.frames[self.frame_count - 1];
2691+
(current_frame.base_ptr as usize) + 256
2692+
} else {
2693+
0
2694+
};
2695+
2696+
self.ensure_stack_capacity(new_base + max_stack_size);
2697+
2698+
// Copy args and initialize remaining slots
2699+
unsafe {
2700+
let src = self.register_stack.as_ptr().add(arg_base);
2701+
let dst = self.register_stack.as_mut_ptr().add(new_base);
2702+
let copy_count = arg_count.min(max_stack_size);
2703+
std::ptr::copy_nonoverlapping(src, dst, copy_count);
2704+
// Initialize remaining with nil
2705+
let nil_val = LuaValue::nil();
2706+
for i in copy_count..max_stack_size {
2707+
*dst.add(i) = nil_val;
2708+
}
2709+
}
2710+
2711+
// Push boundary frame and Lua function frame
2712+
let boundary_frame = LuaCallFrame::new_c_function(new_base, 0);
2713+
self.push_frame(boundary_frame);
2714+
2715+
let new_frame = LuaCallFrame::new_lua_function(
2716+
func_id,
2717+
code_ptr,
2718+
constants_ptr,
2719+
upvalues_ptr,
2720+
new_base,
2721+
max_stack_size,
2722+
0,
2723+
-1,
2724+
);
2725+
self.push_frame(new_frame);
2726+
2727+
let exec_result = execute::luavm_execute(self);
2728+
2729+
match exec_result {
2730+
Ok(_) | Err(LuaError::Exit) => {
2731+
self.pop_frame_discard();
2732+
Ok(self.return_values.len())
2733+
}
2734+
Err(LuaError::Yield) => Err(LuaError::Yield),
2735+
Err(e) => {
2736+
self.pop_frame_discard();
2737+
Err(e)
2738+
}
2739+
}
2740+
}
2741+
_ => Err(self.error("attempt to call a non-function value".to_string())),
2742+
}
2743+
}
2744+
24342745
/// Protected call with error handler (xpcall semantics)
24352746
/// The error handler is registered and will be called by error() when an error occurs
24362747
/// Note: Yields are NOT caught by xpcall - they propagate through

crates/luars/src/stdlib/basic.rs

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -355,10 +355,9 @@ fn lua_next(vm: &mut LuaVM) -> LuaResult<MultiValue> {
355355
}
356356

357357
/// pcall(f [, arg1, ...]) - Protected call
358-
/// OPTIMIZED: Avoid Vec allocations on success path
358+
/// ULTRA-OPTIMIZED: Zero Vec allocations on success path
359+
/// Uses stack-based argument passing and direct return value handling
359360
fn lua_pcall(vm: &mut LuaVM) -> LuaResult<MultiValue> {
360-
// pcall(f, arg1, arg2, ...) -> status, result or error
361-
362361
// Get frame info to read args directly
363362
let frame = vm.current_frame();
364363
let base_ptr = frame.base_ptr as usize;
@@ -371,22 +370,24 @@ fn lua_pcall(vm: &mut LuaVM) -> LuaResult<MultiValue> {
371370
return Err(vm.error("pcall() requires argument 1".to_string()));
372371
};
373372

374-
// Collect remaining args (2..top) into a small vec
375-
// Most pcalls have 0-3 args, so this is fast
373+
// Args start at base_ptr + 2, count is (top - 2) if > 0
374+
let arg_base = base_ptr + 2;
376375
let arg_count = if top > 2 { top - 2 } else { 0 };
377-
let args: Vec<LuaValue> = if arg_count > 0 {
378-
(2..top).map(|i| vm.register_stack[base_ptr + i]).collect()
379-
} else {
380-
Vec::new()
381-
};
382376

383-
// Use protected_call from VM
384-
let (success, results) = vm.protected_call(func, args)?;
377+
// Use stack-based protected call - no Vec allocations!
378+
let (success, result_count) = vm.protected_call_stack_based(func, arg_base, arg_count)?;
385379

386-
// Return status and results - preallocate with capacity
387-
let mut return_values = Vec::with_capacity(1 + results.len());
380+
// Build return: status first, then results from vm.return_values
381+
// We need to copy since return_values will be reused
382+
let mut return_values = Vec::with_capacity(1 + result_count);
388383
return_values.push(LuaValue::boolean(success));
389-
return_values.extend(results);
384+
385+
// Take results from vm.return_values
386+
for i in 0..result_count {
387+
if i < vm.return_values.len() {
388+
return_values.push(vm.return_values[i]);
389+
}
390+
}
390391

391392
Ok(MultiValue::multiple(return_values))
392393
}

0 commit comments

Comments
 (0)