Skip to content

Commit af8f788

Browse files
committed
optimize table operation
1 parent b3542ae commit af8f788

File tree

6 files changed

+112
-75
lines changed

6 files changed

+112
-75
lines changed

crates/luars/src/compiler/expr.rs

Lines changed: 34 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -2757,36 +2757,36 @@ fn compile_table_expr_to(
27572757
}
27582758
}
27592759

2760-
// Helper function to encode table size (Lua's int2fb encoding)
2761-
fn int2fb(x: usize) -> u32 {
2762-
if x < 8 {
2763-
x as u32
2760+
// Helper function to compute ceil(log2(x)) + 1 for hash size encoding
2761+
// This matches Lua's encoding: rb = (hsize != 0) ? luaO_ceillog2(hsize) + 1 : 0
2762+
fn ceillog2_plus1(x: usize) -> u32 {
2763+
if x == 0 {
2764+
0
2765+
} else if x == 1 {
2766+
1
27642767
} else {
2765-
let mut e = 0;
2766-
let mut x = x - 1;
2767-
while x >= 16 {
2768-
x = (x + 1) >> 1;
2769-
e += 1;
2770-
}
2771-
if x < 8 {
2772-
((e + 1) << 3 | x) as u32
2773-
} else {
2774-
((e + 2) << 3 | (x - 8)) as u32
2775-
}
2768+
// ceil(log2(x)) = number of bits needed to represent x-1, which is floor(log2(x-1)) + 1
2769+
// For x > 1: ceil(log2(x)) = 32 - (x-1).leading_zeros() for u32
2770+
let bits = usize::BITS - (x - 1).leading_zeros();
2771+
bits + 1 // +1 as per Lua encoding
27762772
}
27772773
}
27782774

27792775
// Create table with size hints
2780-
// NEWTABLE A B C: B = hash size (encoded), C = array size (encoded)
2781-
let b_param = int2fb(hash_count);
2782-
let c_param = int2fb(array_count);
2776+
// NEWTABLE A B C k: B = log2(hash_size)+1, C = array_size % 256
2777+
// EXTRAARG contains array_size / 256 when k=1
2778+
const MAXARG_C: usize = 255;
2779+
let b_param = ceillog2_plus1(hash_count);
2780+
let extra = array_count / (MAXARG_C + 1); // higher bits of array size
2781+
let c_param = (array_count % (MAXARG_C + 1)) as u32; // lower bits of array size
2782+
let k = if extra > 0 { 1 } else { 0 };
27832783
emit(
27842784
c,
2785-
Instruction::encode_abc(OpCode::NewTable, reg, b_param, c_param),
2785+
Instruction::encode_abck(OpCode::NewTable, reg, b_param, c_param, k),
27862786
);
27872787

2788-
// EXTRAARG instruction (always 0 for now, used for extended parameters)
2789-
emit(c, Instruction::create_ax(OpCode::ExtraArg, 0));
2788+
// EXTRAARG instruction for extended array size
2789+
emit(c, Instruction::create_ax(OpCode::ExtraArg, extra as u32));
27902790

27912791
if fields.is_empty() {
27922792
return Ok(reg);
@@ -2913,8 +2913,8 @@ fn compile_table_expr_to(
29132913
// key is a numeric literal - try SETI optimization
29142914
if !number_token.is_float() {
29152915
let int_value = number_token.get_int_value();
2916-
// SETI: B field is sB (signed byte), range -128 to 127
2917-
if int_value >= -128 && int_value <= 127 {
2916+
// SETI: B field is unsigned byte, range 0-255
2917+
if int_value >= 0 && int_value <= 255 {
29182918
// Try to compile value as constant first (for RK optimization)
29192919
let (value_operand, use_constant) =
29202920
if let Some(value_expr) = field.get_value_expr() {
@@ -2929,9 +2929,8 @@ fn compile_table_expr_to(
29292929
(r, false)
29302930
};
29312931

2932-
// Use SETI: R(A)[sB] := RK(C) where sB is signed byte
2933-
// Encode sB: add OFFSET_SB (128)
2934-
let encoded_b = (int_value + 128) as u32;
2932+
// Use SETI: R(A)[B] := RK(C) where B is unsigned byte
2933+
let encoded_b = int_value as u32;
29352934
emit(
29362935
c,
29372936
Instruction::create_abck(
@@ -2964,8 +2963,8 @@ fn compile_table_expr_to(
29642963
LuaIndexKey::Expr(key_expr) => {
29652964
// key is an expression - try to evaluate as constant integer for SETI
29662965
if let Some(int_val) = try_eval_const_int(&key_expr) {
2967-
// SETI: B field is sB (signed byte), range -128 to 127
2968-
if int_val >= -128 && int_val <= 127 {
2966+
// SETI: B field is unsigned byte, range 0-255
2967+
if int_val >= 0 && int_val <= 255 {
29692968
// Use SETI for small integer keys
29702969
let (value_operand, use_constant) =
29712970
if let Some(value_expr) = field.get_value_expr() {
@@ -2980,8 +2979,8 @@ fn compile_table_expr_to(
29802979
(r, false)
29812980
};
29822981

2983-
// Encode sB: add OFFSET_SB (128)
2984-
let encoded_b = (int_val + 128) as u32;
2982+
// B is unsigned byte
2983+
let encoded_b = int_val as u32;
29852984
emit(
29862985
c,
29872986
Instruction::create_abck(
@@ -3128,13 +3127,12 @@ pub fn compile_var_expr(c: &mut Compiler, var: &LuaVarExpr, value_reg: u32) -> R
31283127

31293128
match index_key {
31303129
LuaIndexKey::Integer(number_token) => {
3131-
// Optimized: table[integer] = value -> SETI A sB C k
3132-
// B field is sB (signed byte), range -128 to 127
3130+
// Optimized: table[integer] = value -> SETI A B C k
3131+
// B field is unsigned byte, range 0-255
31333132
let int_value = number_token.get_int_value();
3134-
if int_value >= -128 && int_value <= 127 {
3135-
// Use SETI: R(A)[sB] := RK(C)
3136-
// Encode sB: add OFFSET_SB (128) to get 0-255 range
3137-
let encoded_b = (int_value + 128) as u32;
3133+
if int_value >= 0 && int_value <= 255 {
3134+
// Use SETI: R(A)[B] := RK(C)
3135+
let encoded_b = int_value as u32;
31383136
emit(
31393137
c,
31403138
Instruction::encode_abc(OpCode::SetI, table_reg, encoded_b, value_reg),

crates/luars/src/lua_value/lua_table.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,8 @@ impl LuaTable {
240240

241241
/// Fast integer key access - O(1) for array part
242242
/// Ultra-optimized hot path for ipairs iterations
243+
/// Note: This only checks the array part for performance.
244+
/// Use get_int_full() if the value might be in the hash part.
243245
#[inline(always)]
244246
pub fn get_int(&self, key: i64) -> Option<LuaValue> {
245247
if key > 0 {
@@ -257,6 +259,18 @@ impl LuaTable {
257259
None
258260
}
259261

262+
/// Integer key access that also checks hash part
263+
/// Used by GETI when array lookup fails
264+
#[inline]
265+
pub fn get_int_full(&self, key: i64) -> Option<LuaValue> {
266+
// First try array part (fast path)
267+
if let Some(val) = self.get_int(key) {
268+
return Some(val);
269+
}
270+
// Fall back to hash part
271+
self.get_from_hash(&LuaValue::integer(key))
272+
}
273+
260274
/// Optimized string key access using &str - avoids LuaValue allocation
261275
/// This is a hot path for table access with string literals
262276
#[inline(always)]

crates/luars/src/lua_vm/execute/control_instructions.rs

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -319,17 +319,22 @@ pub fn exec_lt(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> LuaR
319319
};
320320

321321
// OPTIMIZATION: Direct type tag comparison (inline integer/float checks)
322-
use crate::lua_value::{TAG_FLOAT, TAG_INTEGER, TAG_STRING, TYPE_MASK};
322+
use crate::lua_value::TYPE_MASK;
323323
let left_tag = left.primary & TYPE_MASK;
324324
let right_tag = right.primary & TYPE_MASK;
325325

326326
// Combined type check for fast paths (single branch!)
327-
let combined_tags = (left_tag << 16) | right_tag;
328-
const INT_INT: u64 = (TAG_INTEGER << 16) | TAG_INTEGER;
329-
const FLOAT_FLOAT: u64 = (TAG_FLOAT << 16) | TAG_FLOAT;
330-
const INT_FLOAT: u64 = (TAG_INTEGER << 16) | TAG_FLOAT;
331-
const FLOAT_INT: u64 = (TAG_FLOAT << 16) | TAG_INTEGER;
332-
const STRING_STRING: u64 = (TAG_STRING << 16) | TAG_STRING;
327+
// Note: Shift TAG values right by 48 bits to get small values (0-15) for combining
328+
let left_tag_small = left_tag >> 48;
329+
let right_tag_small = right_tag >> 48;
330+
let combined_tags = (left_tag_small << 4) | right_tag_small;
331+
332+
// Small tag values after >> 48: TAG_INTEGER=3, TAG_FLOAT=4, TAG_STRING=5
333+
const INT_INT: u64 = (3 << 4) | 3; // 0x33
334+
const FLOAT_FLOAT: u64 = (4 << 4) | 4; // 0x44
335+
const INT_FLOAT: u64 = (3 << 4) | 4; // 0x34
336+
const FLOAT_INT: u64 = (4 << 4) | 3; // 0x43
337+
const STRING_STRING: u64 = (5 << 4) | 5; // 0x55
333338

334339
let is_less = if combined_tags == INT_INT {
335340
// Fast integer path - single branch!
@@ -425,17 +430,22 @@ pub fn exec_le(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> LuaR
425430
};
426431

427432
// OPTIMIZATION: Direct type tag comparison with combined_tags (like LT)
428-
use crate::lua_value::{TAG_FLOAT, TAG_INTEGER, TAG_STRING, TYPE_MASK};
433+
use crate::lua_value::TYPE_MASK;
429434
let left_tag = left.primary & TYPE_MASK;
430435
let right_tag = right.primary & TYPE_MASK;
431436

432437
// Combined type check for fast paths (single branch!)
433-
let combined_tags = (left_tag << 16) | right_tag;
434-
const INT_INT: u64 = (TAG_INTEGER << 16) | TAG_INTEGER;
435-
const FLOAT_FLOAT: u64 = (TAG_FLOAT << 16) | TAG_FLOAT;
436-
const INT_FLOAT: u64 = (TAG_INTEGER << 16) | TAG_FLOAT;
437-
const FLOAT_INT: u64 = (TAG_FLOAT << 16) | TAG_INTEGER;
438-
const STRING_STRING: u64 = (TAG_STRING << 16) | TAG_STRING;
438+
// Note: Shift TAG values right by 48 bits to get small values (0-15) for combining
439+
let left_tag_small = left_tag >> 48;
440+
let right_tag_small = right_tag >> 48;
441+
let combined_tags = (left_tag_small << 4) | right_tag_small;
442+
443+
// Small tag values after >> 48: TAG_INTEGER=3, TAG_FLOAT=4, TAG_STRING=5
444+
const INT_INT: u64 = (3 << 4) | 3; // 0x33
445+
const FLOAT_FLOAT: u64 = (4 << 4) | 4; // 0x44
446+
const INT_FLOAT: u64 = (3 << 4) | 4; // 0x34
447+
const FLOAT_INT: u64 = (4 << 4) | 3; // 0x43
448+
const STRING_STRING: u64 = (5 << 4) | 5; // 0x55
439449

440450
let is_less_or_equal = if combined_tags == INT_INT {
441451
// Fast integer path - single branch!

crates/luars/src/lua_vm/execute/table_instructions.rs

Lines changed: 31 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6,43 +6,54 @@ use crate::lua_vm::{Instruction, LuaCallFrame, LuaResult, LuaVM};
66

77
/// NEWTABLE A B C k
88
/// R[A] := {} (size = B,C)
9+
/// B = log2(hash_size) + 1 (0 means no hash part)
10+
/// C = array_size % 256
11+
/// k = 1 means EXTRAARG follows with array_size / 256
912
/// OPTIMIZED: Fast path for common empty/small table case
1013
#[inline(always)]
1114
pub fn exec_newtable(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) {
1215
let a = Instruction::get_a(instr) as usize;
13-
let b = Instruction::get_b(instr);
14-
let c = Instruction::get_c(instr);
16+
let b = Instruction::get_b(instr); // log2(hash_size) + 1
17+
let c = Instruction::get_c(instr); // array_size % 256
18+
let k = Instruction::get_k(instr); // true if EXTRAARG has high bits of array_size
19+
20+
// Decode hash size: if b > 0, hash_size = 2^(b-1)
21+
let hash_size = if b > 0 {
22+
1usize << (b - 1)
23+
} else {
24+
0
25+
};
1526

1627
let (base_ptr, func_value) = unsafe {
1728
(*frame_ptr).pc += 1; // Skip EXTRAARG
1829
((*frame_ptr).base_ptr, (*frame_ptr).function_value)
1930
};
2031

21-
// Calculate array size hint
22-
let array_size = if b > 0 {
23-
// FAST PATH: Small table, no EXTRAARG needed
24-
(b - 1) as usize
25-
} else {
32+
// Calculate array size - C is low bits, EXTRAARG has high bits when k=1
33+
let array_size = if k {
2634
// Need to read EXTRAARG for large arrays
2735
let pc = unsafe { (*frame_ptr).pc - 1 }; // We already incremented pc
2836
// Use new ID-based API to get function and read EXTRAARG
2937
if let Some(func_id) = func_value.as_function_id() {
3038
if let Some(func_ref) = vm.object_pool.get_function(func_id) {
3139
if pc < func_ref.chunk.code.len() {
32-
Instruction::get_ax(func_ref.chunk.code[pc]) as usize
40+
let extra = Instruction::get_ax(func_ref.chunk.code[pc]) as usize;
41+
extra * 256 + c as usize // MAXARG_C + 1 = 256
3342
} else {
34-
0
43+
c as usize
3544
}
3645
} else {
37-
0
46+
c as usize
3847
}
3948
} else {
40-
0
49+
c as usize
4150
}
51+
} else {
52+
c as usize
4253
};
4354

44-
// Create new table with size hints
45-
let table = vm.create_table(array_size, c as usize);
55+
// Create new table with size hints (array_size, hash_size)
56+
let table = vm.create_table(array_size, hash_size);
4657

4758
// Store in register - use unchecked for speed
4859
unsafe {
@@ -170,13 +181,13 @@ pub fn exec_settable(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -
170181
}
171182

172183
/// GETI A B C
173-
/// R[A] := R[B][sC:integer]
174-
/// OPTIMIZED: Direct integer access using get_int() without creating LuaValue key
184+
/// R[A] := R[B][C:integer]
185+
/// OPTIMIZED: Direct integer access using get_int_full() without creating LuaValue key
175186
#[inline(always)]
176187
pub fn exec_geti(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> LuaResult<()> {
177188
let a = Instruction::get_a(instr) as usize;
178189
let b = Instruction::get_b(instr) as usize;
179-
let c = Instruction::get_sc(instr) as i64; // sC is the signed integer index
190+
let c = Instruction::get_c(instr) as i64; // C is unsigned integer index
180191

181192
let base_ptr = unsafe { (*frame_ptr).base_ptr };
182193
let table = unsafe { *vm.register_stack.get_unchecked(base_ptr + b) };
@@ -186,8 +197,9 @@ pub fn exec_geti(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> Lu
186197
// SAFETY: table_id is valid because it came from as_table_id()
187198
let lua_table = unsafe { vm.object_pool.get_table_unchecked(table_id) };
188199

189-
// OPTIMIZATION: Use get_int directly - C is already the integer index!
190-
if let Some(val) = lua_table.get_int(c) {
200+
// Use get_int_full to check both array and hash parts
201+
// This is necessary because integer keys may be stored in hash if array wasn't pre-allocated
202+
if let Some(val) = lua_table.get_int_full(c) {
191203
unsafe { *vm.register_stack.get_unchecked_mut(base_ptr + a) = val };
192204
return Ok(());
193205
}
@@ -215,7 +227,7 @@ pub fn exec_geti(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> Lu
215227
#[inline(always)]
216228
pub fn exec_seti(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> LuaResult<()> {
217229
let a = Instruction::get_a(instr) as usize;
218-
let b = Instruction::get_sb(instr) as i64; // B is already the integer key
230+
let b = Instruction::get_b(instr) as i64; // B is unsigned integer key
219231
let c = Instruction::get_c(instr) as usize;
220232
let k = Instruction::get_k(instr);
221233

crates/luars/src/lua_vm/opcode/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,11 @@ impl Instruction {
397397
Self::create_abc(op, a, b, c)
398398
}
399399

400+
#[inline(always)]
401+
pub fn encode_abck(op: OpCode, a: u32, b: u32, c: u32, k: u32) -> u32 {
402+
Self::create_abck(op, a, b, c, k != 0)
403+
}
404+
400405
#[inline(always)]
401406
pub fn encode_abx(op: OpCode, a: u32, bx: u32) -> u32 {
402407
Self::create_abx(op, a, bx)

crates/luars_interpreter/src/bin/bytecode_dump.rs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -165,15 +165,13 @@ fn dump_chunk(chunk: &Chunk, name: &str, depth: usize) {
165165
}
166166
OpCode::Len => format!("LEN {} {}", a, b),
167167
OpCode::GetI => {
168-
// GETI A B sC: R[A] := R[B][sC]
169-
let sc = Instruction::get_sc(instr);
170-
format!("GetI {} {} {}", a, b, sc)
168+
// GETI A B C: R[A] := R[B][C] - C is unsigned integer index
169+
format!("GetI {} {} {}", a, b, c)
171170
}
172171
OpCode::SetI => {
173-
// SETI A sB C/k: R[A][sB] := RK(C)
174-
let sb = Instruction::get_sb(instr);
172+
// SETI A B C/k: R[A][B] := RK(C) - B is unsigned integer index
175173
let k_str = if k { "k" } else { "" };
176-
format!("SetI {} {} {}{}", a, sb, c, k_str)
174+
format!("SetI {} {} {}{}", a, b, c, k_str)
177175
}
178176
OpCode::EqK => {
179177
// EQK A B k: if ((R[A] == K[B]) ~= k) then pc++

0 commit comments

Comments
 (0)