Skip to content

Commit 7217a95

Browse files
committed
optimize return
1 parent 147886a commit 7217a95

File tree

2 files changed

+259
-136
lines changed

2 files changed

+259
-136
lines changed

crates/luars/src/lua_vm/execute/control_instructions.rs

Lines changed: 83 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -859,12 +859,6 @@ fn exec_call_lua_function(
859859
call_metamethod_self: LuaValue,
860860
frame_ptr_ptr: &mut *mut LuaCallFrame, // Use passed frame_ptr!
861861
) -> LuaResult<()> {
862-
// Safepoint GC check: run GC at function call boundaries
863-
// This is much cheaper than checking on every table operation
864-
if vm.gc_debt_local > 1024 * 1024 {
865-
vm.check_gc_slow_pub();
866-
}
867-
868862
// Get function ID - FAST PATH: assume valid function
869863
let func_id = unsafe { func.as_function_id().unwrap_unchecked() };
870864

@@ -1306,144 +1300,113 @@ pub fn exec_tailcall(
13061300

13071301
/// RETURN0
13081302
/// return (no values)
1309-
/// OPTIMIZED: Use frame_ptr directly, calculate caller ptr before pop
1303+
/// ULTRA-OPTIMIZED: Minimal work for the common case (Lua->Lua call with no upvalues)
1304+
/// NOTE: Currently inlined in main loop for performance, but kept for potential future use
13101305
#[inline(always)]
1306+
#[allow(dead_code)]
13111307
pub fn exec_return0(
13121308
vm: &mut LuaVM,
13131309
_instr: u32,
13141310
frame_ptr_ptr: &mut *mut LuaCallFrame,
13151311
) -> LuaResult<()> {
1316-
// FAST PATH: Use passed frame_ptr directly - get all info BEFORE popping
1317-
let (base_ptr, result_reg, num_results) = unsafe {
1318-
(
1319-
(**frame_ptr_ptr).base_ptr as usize,
1320-
(**frame_ptr_ptr).get_result_reg(),
1321-
(**frame_ptr_ptr).get_num_results(),
1322-
)
1323-
};
1324-
1325-
// Only close upvalues if there are any
1326-
if !vm.open_upvalues.is_empty() {
1327-
vm.close_upvalues_from(base_ptr);
1328-
}
1329-
1330-
// OPTIMIZED: Calculate caller frame pointer and check if Lua BEFORE pop
1331-
let has_caller = vm.frame_count > 1;
1332-
let (caller_ptr, caller_is_lua) = if has_caller {
1333-
let ptr = unsafe { vm.frames.as_mut_ptr().add(vm.frame_count - 2) };
1334-
let is_lua = unsafe { (*ptr).is_lua() };
1335-
(ptr, is_lua)
1336-
} else {
1337-
(std::ptr::null_mut(), false)
1338-
};
1339-
1340-
// Pop frame - just decrement counter
1341-
vm.frame_count -= 1;
1342-
1343-
// FAST PATH: Lua caller (most common case - Lua calling Lua)
1344-
if has_caller && caller_is_lua {
1345-
// Update frame_ptr (already computed)
1346-
*frame_ptr_ptr = caller_ptr;
1347-
1348-
// Get caller's base_ptr
1349-
let caller_base = unsafe { (*caller_ptr).base_ptr } as usize;
1350-
1351-
// Fill expected return values with nil
1352-
if num_results != usize::MAX && num_results > 0 {
1353-
let dest_base = caller_base + result_reg;
1354-
unsafe {
1355-
let reg_ptr = vm.register_stack.as_mut_ptr();
1356-
let nil_val = LuaValue::nil();
1357-
for i in 0..num_results {
1358-
*reg_ptr.add(dest_base + i) = nil_val;
1312+
// Like Lua C: check if we have a Lua caller (most common case)
1313+
if vm.frame_count > 1 {
1314+
// FAST PATH: Calculate caller frame pointer BEFORE pop
1315+
let caller_ptr = unsafe { vm.frames.as_mut_ptr().add(vm.frame_count - 2) };
1316+
1317+
// Pop frame - just decrement counter (like Lua C: L->ci = ci->previous)
1318+
vm.frame_count -= 1;
1319+
1320+
// Check if caller is Lua function
1321+
if unsafe { (*caller_ptr).is_lua() } {
1322+
// Get info we need
1323+
let (result_reg, num_results) = unsafe {
1324+
((**frame_ptr_ptr).get_result_reg(), (**frame_ptr_ptr).get_num_results())
1325+
};
1326+
1327+
// Update frame_ptr to caller
1328+
*frame_ptr_ptr = caller_ptr;
1329+
1330+
// Only fill nil if caller expects results
1331+
// Like Lua C: for (nres = ci->nresults; l_unlikely(nres > 0); nres--)
1332+
if num_results > 0 && num_results != usize::MAX {
1333+
let caller_base = unsafe { (*caller_ptr).base_ptr } as usize;
1334+
let dest_base = caller_base + result_reg;
1335+
unsafe {
1336+
let reg_ptr = vm.register_stack.as_mut_ptr();
1337+
let nil_val = LuaValue::nil();
1338+
for i in 0..num_results {
1339+
*reg_ptr.add(dest_base + i) = nil_val;
1340+
}
13591341
}
13601342
}
1343+
1344+
return Ok(());
1345+
} else {
1346+
// C function caller
1347+
*frame_ptr_ptr = caller_ptr;
1348+
vm.return_values.clear();
1349+
return Err(LuaError::Exit);
13611350
}
1362-
1363-
// Update caller's top
1364-
unsafe {
1365-
(*caller_ptr).top = result_reg as u32;
1366-
}
1367-
Ok(())
1368-
} else if has_caller {
1369-
// C function caller (pcall/xpcall/metamethods via call_function_internal)
1370-
// Write to return_values for call_function_internal to read
1371-
*frame_ptr_ptr = caller_ptr;
1372-
vm.return_values.clear();
1373-
Err(LuaError::Exit)
1374-
} else {
1375-
// No caller - exit VM, clear return_values (empty return)
1376-
vm.return_values.clear();
1377-
Err(LuaError::Exit)
13781351
}
1352+
1353+
// No caller - exit VM
1354+
vm.frame_count -= 1;
1355+
vm.return_values.clear();
1356+
Err(LuaError::Exit)
13791357
}
13801358

13811359
/// RETURN1 A
13821360
/// return R[A]
1383-
/// OPTIMIZED: Ultra-fast path for single-value return (most common case)
1361+
/// ULTRA-OPTIMIZED: Minimal work for single-value return (most common case)
1362+
/// NOTE: Currently inlined in main loop for performance, but kept for potential future use
13841363
#[inline(always)]
1364+
#[allow(dead_code)]
13851365
pub fn exec_return1(
13861366
vm: &mut LuaVM,
13871367
instr: u32,
13881368
frame_ptr_ptr: &mut *mut LuaCallFrame,
13891369
) -> LuaResult<()> {
13901370
let a = Instruction::get_a(instr) as usize;
13911371

1392-
// FAST PATH: Use passed frame_ptr directly - get all info we need
1393-
let (base_ptr, result_reg) = unsafe {
1394-
(
1395-
(**frame_ptr_ptr).base_ptr as usize,
1396-
(**frame_ptr_ptr).get_result_reg(),
1397-
)
1398-
};
1399-
1400-
// Get return value BEFORE any other operations
1372+
// Get base_ptr and return value FIRST
1373+
let base_ptr = unsafe { (**frame_ptr_ptr).base_ptr } as usize;
14011374
let return_value = unsafe { *vm.register_stack.get_unchecked(base_ptr + a) };
14021375

1403-
// Only close upvalues if there are any open (rare for simple functions)
1404-
if !vm.open_upvalues.is_empty() {
1405-
vm.close_upvalues_from(base_ptr);
1406-
}
1407-
1408-
// OPTIMIZED: Calculate caller frame pointer and check if Lua BEFORE pop
1409-
let has_caller = vm.frame_count > 1;
1410-
let (caller_ptr, caller_is_lua) = if has_caller {
1411-
let ptr = unsafe { vm.frames.as_mut_ptr().add(vm.frame_count - 2) };
1412-
let is_lua = unsafe { (*ptr).is_lua() };
1413-
(ptr, is_lua)
1414-
} else {
1415-
(std::ptr::null_mut(), false)
1416-
};
1417-
1418-
// Pop frame - just decrement counter
1419-
vm.frame_count -= 1;
1420-
1421-
// FAST PATH: Lua caller (most common - Lua calling Lua)
1422-
if has_caller && caller_is_lua {
1423-
// Update frame_ptr to caller (already computed above)
1424-
*frame_ptr_ptr = caller_ptr;
1425-
1426-
// Get caller's base_ptr and write return value directly
1427-
let caller_base = unsafe { (*caller_ptr).base_ptr } as usize;
1428-
unsafe {
1429-
*vm.register_stack
1430-
.get_unchecked_mut(caller_base + result_reg) = return_value;
1431-
// Update top
1432-
(*caller_ptr).top = (result_reg + 1) as u32;
1376+
// Like Lua C: check if we have a Lua caller (most common case)
1377+
if vm.frame_count > 1 {
1378+
// FAST PATH: Calculate caller frame pointer BEFORE pop
1379+
let caller_ptr = unsafe { vm.frames.as_mut_ptr().add(vm.frame_count - 2) };
1380+
1381+
// Pop frame - just decrement counter
1382+
vm.frame_count -= 1;
1383+
1384+
// Check if caller is Lua function
1385+
if unsafe { (*caller_ptr).is_lua() } {
1386+
let result_reg = unsafe { (**frame_ptr_ptr).get_result_reg() };
1387+
1388+
// Update frame_ptr to caller
1389+
*frame_ptr_ptr = caller_ptr;
1390+
1391+
// Write return value directly to caller's register
1392+
let caller_base = unsafe { (*caller_ptr).base_ptr } as usize;
1393+
unsafe {
1394+
*vm.register_stack.get_unchecked_mut(caller_base + result_reg) = return_value;
1395+
}
1396+
1397+
return Ok(());
1398+
} else {
1399+
// C function caller
1400+
*frame_ptr_ptr = caller_ptr;
1401+
vm.return_values.clear();
1402+
vm.return_values.push(return_value);
1403+
return Err(LuaError::Exit);
14331404
}
1434-
1435-
Ok(())
1436-
} else if has_caller {
1437-
// C function caller (pcall/xpcall/metamethods via call_function_internal)
1438-
// Write to return_values for call_function_internal to read
1439-
*frame_ptr_ptr = caller_ptr;
1440-
vm.return_values.clear();
1441-
vm.return_values.push(return_value);
1442-
Err(LuaError::Exit)
1443-
} else {
1444-
// No caller - exit VM (only happens at script end)
1445-
vm.return_values.clear();
1446-
vm.return_values.push(return_value);
1447-
Err(LuaError::Exit)
14481405
}
1406+
1407+
// No caller - exit VM
1408+
vm.frame_count -= 1;
1409+
vm.return_values.clear();
1410+
vm.return_values.push(return_value);
1411+
Err(LuaError::Exit)
14491412
}

0 commit comments

Comments
 (0)