Skip to content

Commit 524a14f

Browse files
committed
optimize
1 parent bb03baa commit 524a14f

File tree

3 files changed

+132
-111
lines changed

3 files changed

+132
-111
lines changed

crates/luars/src/lua_vm/execute/arithmetic_instructions.rs

Lines changed: 49 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1056,50 +1056,70 @@ pub fn exec_not(vm: &mut LuaVM, instr: u32, base_ptr: usize) {
10561056
}
10571057

10581058
/// LEN: R[A] = #R[B]
1059+
/// OPTIMIZED: Fast path for tables without __len metamethod
10591060
#[inline(always)]
10601061
pub fn exec_len(vm: &mut LuaVM, instr: u32, base_ptr: usize) -> LuaResult<()> {
10611062
let a = Instruction::get_a(instr) as usize;
10621063
let b = Instruction::get_b(instr) as usize;
10631064

10641065
let value = vm.register_stack[base_ptr + b];
10651066

1066-
// Check for __len metamethod first (for tables)
1067-
if value.is_table() {
1068-
// Use pre-cached __len StringId
1069-
let mm_key = LuaValue::string(vm.object_pool.tm_len);
1070-
if let Some(mt) = vm.table_get_metatable(&value) {
1071-
if let Some(metamethod) = vm.table_get_with_meta(&mt, &mm_key) {
1072-
if !metamethod.is_nil() {
1073-
let result = vm
1074-
.call_metamethod(&metamethod, &[value])?
1075-
.unwrap_or(LuaValue::nil());
1076-
vm.register_stack[base_ptr + a] = result;
1077-
return Ok(());
1067+
// Fast path: direct table/string length
1068+
if let Some(table_id) = value.as_table_id() {
1069+
// Check for __len metamethod first
1070+
let (len, has_len_mm) = {
1071+
if let Some(table) = vm.object_pool.get_table(table_id) {
1072+
let mt = table.get_metatable();
1073+
if let Some(mt_val) = mt {
1074+
// Has metatable, check for __len
1075+
if let Some(mt_id) = mt_val.as_table_id() {
1076+
let mm_key = LuaValue::string(vm.object_pool.tm_len);
1077+
if let Some(mt_table) = vm.object_pool.get_table(mt_id) {
1078+
if let Some(len_mm) = mt_table.raw_get(&mm_key) {
1079+
if !len_mm.is_nil() {
1080+
(0, Some(len_mm))
1081+
} else {
1082+
(table.len() as i64, None)
1083+
}
1084+
} else {
1085+
(table.len() as i64, None)
1086+
}
1087+
} else {
1088+
(table.len() as i64, None)
1089+
}
1090+
} else {
1091+
(table.len() as i64, None)
1092+
}
1093+
} else {
1094+
// No metatable, just get length
1095+
(table.len() as i64, None)
10781096
}
1097+
} else {
1098+
(0, None)
10791099
}
1080-
}
1081-
}
1100+
};
10821101

1083-
// Use ObjectPool for table/string length
1084-
let len = if let Some(table_id) = value.as_table_id() {
1085-
if let Some(table) = vm.object_pool.get_table(table_id) {
1086-
table.len() as i64
1102+
if let Some(metamethod) = has_len_mm {
1103+
// Call __len metamethod
1104+
let result = vm
1105+
.call_metamethod(&metamethod, &[value])?
1106+
.unwrap_or(LuaValue::nil());
1107+
vm.register_stack[base_ptr + a] = result;
10871108
} else {
1088-
0
1109+
vm.register_stack[base_ptr + a] = LuaValue::integer(len);
10891110
}
1090-
} else if let Some(string_id) = value.as_string_id() {
1111+
return Ok(());
1112+
}
1113+
1114+
// String length - no metamethod for strings
1115+
if let Some(string_id) = value.as_string_id() {
10911116
if let Some(s) = vm.object_pool.get_string(string_id) {
1092-
s.as_str().len() as i64
1093-
} else {
1094-
0
1117+
vm.register_stack[base_ptr + a] = LuaValue::integer(s.as_str().len() as i64);
1118+
return Ok(());
10951119
}
1096-
} else {
1097-
return Err(vm.error(format!("attempt to get length of {}", value.type_name())));
1098-
};
1120+
}
10991121

1100-
let result = LuaValue::integer(len);
1101-
vm.register_stack[base_ptr + a] = result;
1102-
Ok(())
1122+
Err(vm.error(format!("attempt to get length of {}", value.type_name())))
11031123
}
11041124

11051125
/// MmBin: Metamethod binary operation (register, register)

crates/luars/src/lua_vm/mod.rs

Lines changed: 34 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1134,26 +1134,21 @@ impl LuaVM {
11341134
/// Fast path for calling CFunction metamethods with 2 arguments
11351135
/// Used by __index, __newindex, etc. Avoids Vec allocation.
11361136
/// Returns the first return value.
1137+
/// OPTIMIZED: Skip expensive get_function lookup by using a fixed offset from current base
11371138
#[inline(always)]
11381139
pub fn call_cfunc_metamethod_2(
11391140
&mut self,
11401141
cfunc: crate::lua_value::CFunction,
11411142
arg1: LuaValue,
11421143
arg2: LuaValue,
11431144
) -> LuaResult<Option<LuaValue>> {
1144-
// Calculate new base position - use current frame's top area
1145+
// Fast path: use a fixed offset from current base (256 slots is enough for most cases)
1146+
// This avoids the expensive object_pool.get_function lookup
11451147
let new_base = if self.frame_count > 0 {
11461148
let current_frame = &self.frames[self.frame_count - 1];
1147-
let caller_base = current_frame.base_ptr as usize;
1148-
let caller_max_stack = if let Some(func_id) = current_frame.get_function_id() {
1149-
self.object_pool
1150-
.get_function(func_id)
1151-
.map(|f| f.chunk.max_stack_size)
1152-
.unwrap_or(256)
1153-
} else {
1154-
256
1155-
};
1156-
caller_base + caller_max_stack
1149+
// Use top as the base for nested calls, since all args are already there
1150+
// Adding 256 ensures we don't overwrite the caller's stack
1151+
(current_frame.base_ptr as usize) + 256
11571152
} else {
11581153
0
11591154
};
@@ -1162,9 +1157,12 @@ impl LuaVM {
11621157
self.ensure_stack_capacity(new_base + stack_size);
11631158

11641159
// Set up arguments directly (no Vec allocation)
1165-
self.register_stack[new_base] = LuaValue::cfunction(cfunc);
1166-
self.register_stack[new_base + 1] = arg1;
1167-
self.register_stack[new_base + 2] = arg2;
1160+
unsafe {
1161+
let base = self.register_stack.as_mut_ptr().add(new_base);
1162+
*base = LuaValue::cfunction(cfunc);
1163+
*base.add(1) = arg1;
1164+
*base.add(2) = arg2;
1165+
}
11681166

11691167
// Create C function frame
11701168
let temp_frame = LuaCallFrame::new_c_function(new_base, stack_size);
@@ -1188,6 +1186,7 @@ impl LuaVM {
11881186

11891187
/// Fast path for calling CFunction metamethods with 1 argument
11901188
/// Used by __len, __unm, __bnot, etc. Avoids Vec allocation.
1189+
/// OPTIMIZED: Skip expensive get_function lookup
11911190
#[inline(always)]
11921191
pub fn call_cfunc_metamethod_1(
11931192
&mut self,
@@ -1196,25 +1195,19 @@ impl LuaVM {
11961195
) -> LuaResult<Option<LuaValue>> {
11971196
let new_base = if self.frame_count > 0 {
11981197
let current_frame = &self.frames[self.frame_count - 1];
1199-
let caller_base = current_frame.base_ptr as usize;
1200-
let caller_max_stack = if let Some(func_id) = current_frame.get_function_id() {
1201-
self.object_pool
1202-
.get_function(func_id)
1203-
.map(|f| f.chunk.max_stack_size)
1204-
.unwrap_or(256)
1205-
} else {
1206-
256
1207-
};
1208-
caller_base + caller_max_stack
1198+
(current_frame.base_ptr as usize) + 256
12091199
} else {
12101200
0
12111201
};
12121202

12131203
let stack_size = 2; // func + 1 arg
12141204
self.ensure_stack_capacity(new_base + stack_size);
12151205

1216-
self.register_stack[new_base] = LuaValue::cfunction(cfunc);
1217-
self.register_stack[new_base + 1] = arg1;
1206+
unsafe {
1207+
let base = self.register_stack.as_mut_ptr().add(new_base);
1208+
*base = LuaValue::cfunction(cfunc);
1209+
*base.add(1) = arg1;
1210+
}
12181211

12191212
let temp_frame = LuaCallFrame::new_c_function(new_base, stack_size);
12201213
self.push_frame(temp_frame);
@@ -1236,6 +1229,7 @@ impl LuaVM {
12361229

12371230
/// Fast path for calling CFunction metamethods with 3 arguments
12381231
/// Used by __newindex. Avoids Vec allocation.
1232+
/// OPTIMIZED: Skip expensive get_function lookup
12391233
#[inline(always)]
12401234
pub fn call_cfunc_metamethod_3(
12411235
&mut self,
@@ -1246,27 +1240,21 @@ impl LuaVM {
12461240
) -> LuaResult<Option<LuaValue>> {
12471241
let new_base = if self.frame_count > 0 {
12481242
let current_frame = &self.frames[self.frame_count - 1];
1249-
let caller_base = current_frame.base_ptr as usize;
1250-
let caller_max_stack = if let Some(func_id) = current_frame.get_function_id() {
1251-
self.object_pool
1252-
.get_function(func_id)
1253-
.map(|f| f.chunk.max_stack_size)
1254-
.unwrap_or(256)
1255-
} else {
1256-
256
1257-
};
1258-
caller_base + caller_max_stack
1243+
(current_frame.base_ptr as usize) + 256
12591244
} else {
12601245
0
12611246
};
12621247

12631248
let stack_size = 4; // func + 3 args
12641249
self.ensure_stack_capacity(new_base + stack_size);
12651250

1266-
self.register_stack[new_base] = LuaValue::cfunction(cfunc);
1267-
self.register_stack[new_base + 1] = arg1;
1268-
self.register_stack[new_base + 2] = arg2;
1269-
self.register_stack[new_base + 3] = arg3;
1251+
unsafe {
1252+
let base = self.register_stack.as_mut_ptr().add(new_base);
1253+
*base = LuaValue::cfunction(cfunc);
1254+
*base.add(1) = arg1;
1255+
*base.add(2) = arg2;
1256+
*base.add(3) = arg3;
1257+
}
12701258

12711259
let temp_frame = LuaCallFrame::new_c_function(new_base, stack_size);
12721260
self.push_frame(temp_frame);
@@ -2408,8 +2396,8 @@ impl LuaVM {
24082396
self.pop_frame_discard();
24092397
}
24102398

2411-
// Return error - the actual message is stored in vm.error_message
2412-
let msg = self.error_message.clone();
2399+
// Return error - take the message to avoid allocation
2400+
let msg = std::mem::take(&mut self.error_message);
24132401
let error_str = self.create_string(&msg);
24142402

24152403
Ok((false, vec![error_str]))
@@ -2480,19 +2468,10 @@ impl LuaVM {
24802468
LuaValueKind::CFunction => {
24812469
let cfunc = func.as_cfunction().unwrap();
24822470

2483-
// Calculate new base position
2471+
// OPTIMIZED: Use fixed offset instead of expensive get_function lookup
24842472
let new_base = if self.frame_count > 0 {
24852473
let current_frame = &self.frames[self.frame_count - 1];
2486-
let caller_base = current_frame.base_ptr as usize;
2487-
let caller_max_stack = if let Some(func_id) = current_frame.get_function_id() {
2488-
self.object_pool
2489-
.get_function(func_id)
2490-
.map(|f| f.chunk.max_stack_size)
2491-
.unwrap_or(256)
2492-
} else {
2493-
256
2494-
};
2495-
caller_base + caller_max_stack
2474+
(current_frame.base_ptr as usize) + 256
24962475
} else {
24972476
0
24982477
};
@@ -2543,20 +2522,10 @@ impl LuaVM {
25432522
)
25442523
};
25452524

2546-
// Calculate new base
2525+
// OPTIMIZED: Use fixed offset instead of expensive get_function lookup
25472526
let new_base = if self.frame_count > 0 {
25482527
let current_frame = &self.frames[self.frame_count - 1];
2549-
let caller_base = current_frame.base_ptr as usize;
2550-
let caller_max_stack =
2551-
if let Some(caller_func_id) = current_frame.get_function_id() {
2552-
self.object_pool
2553-
.get_function(caller_func_id)
2554-
.map(|f| f.chunk.max_stack_size)
2555-
.unwrap_or(256)
2556-
} else {
2557-
256
2558-
};
2559-
caller_base + caller_max_stack
2528+
(current_frame.base_ptr as usize) + 256
25602529
} else {
25612530
0
25622531
};

crates/luars/src/stdlib/basic.rs

Lines changed: 49 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -355,42 +355,74 @@ fn lua_next(vm: &mut LuaVM) -> LuaResult<MultiValue> {
355355
}
356356

357357
/// pcall(f [, arg1, ...]) - Protected call
358+
/// OPTIMIZED: Avoid Vec allocations on success path
358359
fn lua_pcall(vm: &mut LuaVM) -> LuaResult<MultiValue> {
359360
// pcall(f, arg1, arg2, ...) -> status, result or error
360361

361-
// Get the function to call (argument 1)
362-
let func = require_arg(vm, 1, "pcall")?;
362+
// Get frame info to read args directly
363+
let frame = vm.current_frame();
364+
let base_ptr = frame.base_ptr as usize;
365+
let top = frame.top as usize;
366+
367+
// Arg 1 is the function (at base_ptr + 1)
368+
let func = if top > 1 {
369+
vm.register_stack[base_ptr + 1]
370+
} else {
371+
return Err(vm.error("pcall() requires argument 1".to_string()));
372+
};
363373

364-
// Get all arguments after the function
365-
let all_args = get_args(vm);
366-
let args: Vec<LuaValue> = if all_args.len() > 1 {
367-
all_args[1..].to_vec()
374+
// Collect remaining args (2..top) into a small vec
375+
// Most pcalls have 0-3 args, so this is fast
376+
let arg_count = if top > 2 { top - 2 } else { 0 };
377+
let args: Vec<LuaValue> = if arg_count > 0 {
378+
(2..top)
379+
.map(|i| vm.register_stack[base_ptr + i])
380+
.collect()
368381
} else {
369382
Vec::new()
370383
};
371384

372385
// Use protected_call from VM
373386
let (success, results) = vm.protected_call(func, args)?;
374387

375-
// Return status and results
376-
let mut return_values = vec![LuaValue::boolean(success)];
388+
// Return status and results - preallocate with capacity
389+
let mut return_values = Vec::with_capacity(1 + results.len());
390+
return_values.push(LuaValue::boolean(success));
377391
return_values.extend(results);
378392

379393
Ok(MultiValue::multiple(return_values))
380394
}
381395

382396
/// xpcall(f, msgh [, arg1, ...]) - Protected call with error handler
397+
/// OPTIMIZED: Avoid Vec allocations
383398
fn lua_xpcall(vm: &mut LuaVM) -> LuaResult<MultiValue> {
384399
// xpcall(f, msgh, arg1, arg2, ...) -> status, result or error
385-
// Get the function to call (argument 1)
386-
let func = require_arg(vm, 1, "xpcall")?;
387-
// Get the error handler (argument 2)
388-
let err_handler = require_arg(vm, 2, "xpcall")?;
389-
390-
// Get all arguments after the function and error handler
391-
let all_args = get_args(vm);
392-
let args: Vec<LuaValue> = if all_args.len() > 2 {
393-
all_args[3..].to_vec()
400+
401+
// Get frame info to read args directly
402+
let frame = vm.current_frame();
403+
let base_ptr = frame.base_ptr as usize;
404+
let top = frame.top as usize;
405+
406+
// Arg 1 is the function (at base_ptr + 1)
407+
let func = if top > 1 {
408+
vm.register_stack[base_ptr + 1]
409+
} else {
410+
return Err(vm.error("xpcall() requires argument 1".to_string()));
411+
};
412+
413+
// Arg 2 is the error handler (at base_ptr + 2)
414+
let err_handler = if top > 2 {
415+
vm.register_stack[base_ptr + 2]
416+
} else {
417+
return Err(vm.error("xpcall() requires argument 2".to_string()));
418+
};
419+
420+
// Collect remaining args (3..top) into a small vec
421+
let arg_count = if top > 3 { top - 3 } else { 0 };
422+
let args: Vec<LuaValue> = if arg_count > 0 {
423+
(3..top)
424+
.map(|i| vm.register_stack[base_ptr + i])
425+
.collect()
394426
} else {
395427
Vec::new()
396428
};

0 commit comments

Comments
 (0)