@@ -870,15 +870,11 @@ fn exec_call_lua_function(
870870 call_metamethod_self : LuaValue ,
871871 frame_ptr_ptr : & mut * mut LuaCallFrame , // Use passed frame_ptr!
872872) -> LuaResult < ( ) > {
873- // Get function ID and lookup in ObjectPool
874- let Some ( func_id) = func. as_function_id ( ) else {
875- return Err ( vm. error ( "Invalid function" . to_string ( ) ) ) ;
876- } ;
873+ // Get function ID - FAST PATH: assume valid function
874+ let func_id = unsafe { func. as_function_id ( ) . unwrap_unchecked ( ) } ;
877875
878- // Extract chunk info from ObjectPool
879- let Some ( func_ref) = vm. object_pool . get_function ( func_id) else {
880- return Err ( vm. error ( "Invalid function ID" . to_string ( ) ) ) ;
881- } ;
876+ // Extract chunk info from ObjectPool - use unchecked for hot path
877+ let func_ref = unsafe { vm. object_pool . get_function_unchecked ( func_id) } ;
882878
883879 let ( max_stack_size, is_vararg, code_ptr, constants_ptr) = (
884880 func_ref. chunk . max_stack_size ,
@@ -907,38 +903,25 @@ fn exec_call_lua_function(
907903 // Simple case: just ensure capacity and push frame
908904 let required_capacity = new_base + max_stack_size;
909905
910- // Inline capacity check - avoid function call overhead
906+ // Ensure capacity - single branch
911907 if vm. register_stack . len ( ) < required_capacity {
912- vm. register_stack
913- . reserve ( required_capacity - vm. register_stack . len ( ) ) ;
914- // Only resize what's needed, don't initialize everything
915- unsafe {
916- vm. register_stack . set_len ( required_capacity) ;
917- // Initialize only slots beyond arguments
918- let reg_ptr = vm. register_stack . as_mut_ptr ( ) ;
919- let nil_val = LuaValue :: nil ( ) ;
920- for i in arg_count..max_stack_size {
921- std:: ptr:: write ( reg_ptr. add ( new_base + i) , nil_val) ;
922- }
923- }
924- } else if arg_count < max_stack_size {
925- // Stack is big enough, just initialize locals beyond args
908+ vm. register_stack . resize ( required_capacity, LuaValue :: nil ( ) ) ;
909+ }
910+
911+ // Initialize locals beyond arguments (rare for simple functions)
912+ // Only if there are more locals than arguments
913+ if arg_count < max_stack_size {
926914 unsafe {
927- let reg_ptr = vm. register_stack . as_mut_ptr ( ) ;
915+ let reg_ptr = vm. register_stack . as_mut_ptr ( ) . add ( new_base ) ;
928916 let nil_val = LuaValue :: nil ( ) ;
929917 for i in arg_count..max_stack_size {
930- * reg_ptr. add ( new_base + i) = nil_val;
918+ * reg_ptr. add ( i) = nil_val;
931919 }
932920 }
933921 }
934922
935- // Create and push new frame
936- // nresults: return_count as i16, use LUA_MULTRET for usize::MAX
937- let nresults = if return_count == usize:: MAX {
938- -1i16
939- } else {
940- return_count as i16
941- } ;
923+ // Create and push new frame - inline nresults calculation
924+ let nresults = if c == 0 { -1i16 } else { ( c - 1 ) as i16 } ;
942925 let new_frame = LuaCallFrame :: new_lua_function (
943926 func,
944927 code_ptr,
@@ -1153,7 +1136,7 @@ pub fn exec_tailcall(
11531136
11541137 // Extract all frame information we'll need BEFORE taking mutable references
11551138 let ( base, return_count, result_reg, _function_value, _pc) = {
1156- let frame = vm. frames . last ( ) . unwrap ( ) ;
1139+ let frame = & vm. frames [ vm . frame_count - 1 ] ;
11571140 (
11581141 frame. base_ptr ,
11591142 frame. get_num_results ( ) ,
@@ -1315,7 +1298,7 @@ pub fn exec_tailcall(
13151298
13161299/// RETURN0
13171300/// return (no values)
1318- /// OPTIMIZED: Use frame_ptr directly
1301+ /// OPTIMIZED: Use frame_ptr directly, calculate caller ptr before pop
13191302#[ inline( always) ]
13201303pub fn exec_return0 (
13211304 vm : & mut LuaVM ,
@@ -1336,17 +1319,24 @@ pub fn exec_return0(
13361319 vm. close_upvalues_from ( base_ptr) ;
13371320 }
13381321
1339- vm. pop_frame_discard ( ) ;
1322+ // OPTIMIZED: Calculate caller frame pointer BEFORE pop
1323+ let has_caller = vm. frame_count > 1 ;
1324+ let caller_ptr = if has_caller {
1325+ unsafe { vm. frames . as_mut_ptr ( ) . add ( vm. frame_count - 2 ) }
1326+ } else {
1327+ std:: ptr:: null_mut ( )
1328+ } ;
13401329
1341- vm. return_values . clear ( ) ;
1330+ // Pop frame - just decrement counter
1331+ vm. frame_count -= 1 ;
13421332
13431333 // FAST PATH: Check if we have a caller frame
1344- if !vm . frames_is_empty ( ) {
1345- // Update frame_ptr to point to caller frame
1346- * frame_ptr_ptr = vm . current_frame_ptr ( ) ;
1334+ if has_caller {
1335+ // Update frame_ptr (already computed)
1336+ * frame_ptr_ptr = caller_ptr ;
13471337
13481338 // Get caller's base_ptr
1349- let caller_base = unsafe { ( * * frame_ptr_ptr ) . base_ptr } ;
1339+ let caller_base = unsafe { ( * caller_ptr ) . base_ptr } ;
13501340
13511341 // Fill expected return values with nil
13521342 if num_results != usize:: MAX && num_results > 0 {
@@ -1362,17 +1352,19 @@ pub fn exec_return0(
13621352
13631353 // Update caller's top
13641354 unsafe {
1365- ( * * frame_ptr_ptr ) . top = result_reg;
1355+ ( * caller_ptr ) . top = result_reg;
13661356 }
13671357 Ok ( ( ) )
13681358 } else {
1359+ // No caller - exit VM, clear return_values (empty return)
1360+ vm. return_values . clear ( ) ;
13691361 Err ( LuaError :: Exit )
13701362 }
13711363}
13721364
13731365/// RETURN1 A
13741366/// return R[A]
1375- /// OPTIMIZED: Fast path for single-value return (most common case)
1367+ /// OPTIMIZED: Ultra-fast path for single-value return (most common case)
13761368#[ inline( always) ]
13771369pub fn exec_return1 (
13781370 vm : & mut LuaVM ,
@@ -1389,47 +1381,45 @@ pub fn exec_return1(
13891381 )
13901382 } ;
13911383
1384+ // Get return value BEFORE any other operations
1385+ let return_value = unsafe { * vm. register_stack . get_unchecked ( base_ptr + a) } ;
1386+
13921387 // Only close upvalues if there are any open (rare for simple functions)
13931388 if !vm. open_upvalues . is_empty ( ) {
13941389 vm. close_upvalues_from ( base_ptr) ;
13951390 }
13961391
1397- // Get return value before popping frame
1398- let return_value = if base_ptr + a < vm. register_stack . len ( ) {
1399- unsafe { * vm. register_stack . get_unchecked ( base_ptr + a) }
1392+ // OPTIMIZED: Calculate caller frame pointer BEFORE pop (avoid recalculation)
1393+ // frame_count - 1 is current, frame_count - 2 is caller
1394+ let has_caller = vm. frame_count > 1 ;
1395+ let caller_ptr = if has_caller {
1396+ unsafe { vm. frames . as_mut_ptr ( ) . add ( vm. frame_count - 2 ) }
14001397 } else {
1401- LuaValue :: nil ( )
1398+ std :: ptr :: null_mut ( )
14021399 } ;
14031400
1404- // Pop frame - we already have all info we need from frame_ptr
1405- vm. pop_frame_discard ( ) ;
1406-
1407- // CRITICAL: Always set return_values for call_function_internal compatibility
1408- vm. return_values . clear ( ) ;
1409- vm. return_values . push ( return_value) ;
1401+ // Pop frame - just decrement counter
1402+ vm. frame_count -= 1 ;
14101403
14111404 // Check if there's a caller frame
1412- if !vm. frames_is_empty ( ) {
1413- // Update frame_ptr to point to caller frame
1414- * frame_ptr_ptr = vm. current_frame_ptr ( ) ;
1415-
1416- // Get caller's base_ptr
1417- let caller_base = unsafe { ( * * frame_ptr_ptr) . base_ptr } ;
1418- let dest_pos = caller_base + result_reg;
1405+ if has_caller {
1406+ // Update frame_ptr to caller (already computed above)
1407+ * frame_ptr_ptr = caller_ptr;
14191408
1420- // Write to caller's result register
1421- if dest_pos < vm. register_stack . len ( ) {
1422- vm. register_stack [ dest_pos] = return_value;
1423- }
1424-
1425- // Update top
1409+ // Get caller's base_ptr and write return value directly
1410+ let caller_base = unsafe { ( * caller_ptr) . base_ptr } ;
14261411 unsafe {
1427- ( * * frame_ptr_ptr) . top = result_reg + 1 ;
1412+ * vm. register_stack . get_unchecked_mut ( caller_base + result_reg) = return_value;
1413+ // Update top
1414+ ( * caller_ptr) . top = result_reg + 1 ;
14281415 }
14291416
14301417 Ok ( ( ) )
14311418 } else {
14321419 // No caller - exit VM (only happens at script end)
1420+ // Only update return_values when exiting - this is what call_function_internal reads
1421+ vm. return_values . clear ( ) ;
1422+ vm. return_values . push ( return_value) ;
14331423 Err ( LuaError :: Exit )
14341424 }
14351425}
0 commit comments