Skip to content

Commit e45e43e

Browse files
committed
update
1 parent 30fa5d7 commit e45e43e

File tree

7 files changed

+271
-144
lines changed

7 files changed

+271
-144
lines changed

crates/luars/src/gc/object_pool.rs

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -504,11 +504,10 @@ impl ObjectPoolV2 {
504504
#[inline]
505505
pub fn create_string(&mut self, s: &str) -> StringId {
506506
let len = s.len();
507+
let hash = Self::hash_string(s);
507508

508509
// Intern short strings for deduplication
509510
if len <= self.max_intern_length {
510-
let hash = Self::hash_string(s);
511-
512511
// Use closure to compare string content (handles hash collisions correctly)
513512
let compare = |id: StringId| -> bool {
514513
self.strings
@@ -523,10 +522,10 @@ impl ObjectPoolV2 {
523522
return existing_id;
524523
}
525524
Err(insert_idx) => {
526-
// Not found, create new interned string
525+
// Not found, create new interned string with pre-computed hash
527526
let gc_string = GcString {
528527
header: GcHeader::default(),
529-
data: LuaString::new(s.to_string()),
528+
data: LuaString::with_hash(s.to_string(), hash),
530529
};
531530
let id = StringId(self.strings.alloc(gc_string));
532531
self.string_intern.insert(hash, id, insert_idx);
@@ -538,10 +537,10 @@ impl ObjectPoolV2 {
538537
}
539538
}
540539
} else {
541-
// Long strings are not interned
540+
// Long strings are not interned, but still use pre-computed hash
542541
let gc_string = GcString {
543542
header: GcHeader::default(),
544-
data: LuaString::new(s.to_string()),
543+
data: LuaString::with_hash(s.to_string(), hash),
545544
};
546545
StringId(self.strings.alloc(gc_string))
547546
}
@@ -551,10 +550,9 @@ impl ObjectPoolV2 {
551550
#[inline]
552551
pub fn create_string_owned(&mut self, s: String) -> StringId {
553552
let len = s.len();
553+
let hash = Self::hash_string(&s);
554554

555555
if len <= self.max_intern_length {
556-
let hash = Self::hash_string(&s);
557-
558556
// Use closure to compare string content
559557
let compare = |id: StringId| -> bool {
560558
self.strings
@@ -569,10 +567,10 @@ impl ObjectPoolV2 {
569567
return existing_id;
570568
}
571569
Err(insert_idx) => {
572-
// Not found, create new interned string with owned data
570+
// Not found, create new interned string with owned data and pre-computed hash
573571
let gc_string = GcString {
574572
header: GcHeader::default(),
575-
data: LuaString::new(s),
573+
data: LuaString::with_hash(s, hash),
576574
};
577575
let id = StringId(self.strings.alloc(gc_string));
578576
self.string_intern.insert(hash, id, insert_idx);
@@ -584,9 +582,10 @@ impl ObjectPoolV2 {
584582
}
585583
}
586584
} else {
585+
// Long strings use pre-computed hash
587586
let gc_string = GcString {
588587
header: GcHeader::default(),
589-
data: LuaString::new(s),
588+
data: LuaString::with_hash(s, hash),
590589
};
591590
StringId(self.strings.alloc(gc_string))
592591
}

crates/luars/src/lua_value/mod.rs

Lines changed: 85 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,30 +44,103 @@ pub use lua_value::{
4444
};
4545

4646
/// Multi-return values from Lua functions
47+
/// OPTIMIZED: Use inline storage for common single-value case
4748
#[derive(Debug, Clone)]
4849
pub struct MultiValue {
49-
pub values: Option<Vec<LuaValue>>,
50+
// Inline storage for 0-2 values (covers 99% of cases without heap allocation)
51+
pub inline: [LuaValue; 2],
52+
// Count of values stored inline (0, 1, or 2)
53+
pub inline_count: u8,
54+
// Only used when > 2 values
55+
pub overflow: Option<Vec<LuaValue>>,
5056
}
5157

5258
impl MultiValue {
59+
#[inline(always)]
5360
pub fn empty() -> Self {
54-
MultiValue { values: None }
61+
MultiValue {
62+
inline: [LuaValue::nil(), LuaValue::nil()],
63+
inline_count: 0,
64+
overflow: None,
65+
}
5566
}
5667

68+
#[inline(always)]
5769
pub fn single(value: LuaValue) -> Self {
5870
MultiValue {
59-
values: Some(vec![value]),
71+
inline: [value, LuaValue::nil()],
72+
inline_count: 1,
73+
overflow: None,
74+
}
75+
}
76+
77+
#[inline(always)]
78+
pub fn two(v1: LuaValue, v2: LuaValue) -> Self {
79+
MultiValue {
80+
inline: [v1, v2],
81+
inline_count: 2,
82+
overflow: None,
6083
}
6184
}
6285

6386
pub fn multiple(values: Vec<LuaValue>) -> Self {
64-
MultiValue {
65-
values: Some(values),
87+
let len = values.len();
88+
if len == 0 {
89+
Self::empty()
90+
} else if len == 1 {
91+
Self::single(values.into_iter().next().unwrap())
92+
} else if len == 2 {
93+
let mut iter = values.into_iter();
94+
Self::two(iter.next().unwrap(), iter.next().unwrap())
95+
} else {
96+
MultiValue {
97+
inline: [LuaValue::nil(), LuaValue::nil()],
98+
inline_count: 0,
99+
overflow: Some(values),
100+
}
66101
}
67102
}
68103

104+
#[inline(always)]
69105
pub fn all_values(self) -> Vec<LuaValue> {
70-
self.values.unwrap_or_default()
106+
if let Some(v) = self.overflow {
107+
v
108+
} else {
109+
match self.inline_count {
110+
0 => Vec::new(),
111+
1 => vec![self.inline[0]],
112+
2 => vec![self.inline[0], self.inline[1]],
113+
_ => Vec::new(),
114+
}
115+
}
116+
}
117+
118+
/// Get count of return values (optimized, no allocation)
119+
#[inline(always)]
120+
pub fn len(&self) -> usize {
121+
if let Some(ref v) = self.overflow {
122+
v.len()
123+
} else {
124+
self.inline_count as usize
125+
}
126+
}
127+
128+
/// Check if empty
129+
#[inline(always)]
130+
pub fn is_empty(&self) -> bool {
131+
self.inline_count == 0 && self.overflow.is_none()
132+
}
133+
134+
/// Get first value (common case, optimized)
135+
#[inline(always)]
136+
pub fn first(&self) -> Option<LuaValue> {
137+
if let Some(ref v) = self.overflow {
138+
v.first().copied()
139+
} else if self.inline_count > 0 {
140+
Some(self.inline[0])
141+
} else {
142+
None
143+
}
71144
}
72145
}
73146

@@ -90,6 +163,12 @@ impl LuaString {
90163
LuaString { data: s, hash }
91164
}
92165

166+
/// Create LuaString with pre-computed hash (avoids double hashing)
167+
#[inline]
168+
pub fn with_hash(s: String, hash: u64) -> Self {
169+
LuaString { data: s, hash }
170+
}
171+
93172
pub fn as_str(&self) -> &str {
94173
&self.data
95174
}

crates/luars/src/lua_vm/execute/control_instructions.rs

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1082,21 +1082,34 @@ fn exec_call_cfunction(
10821082
*frame_ptr_ptr = vm.current_frame_ptr();
10831083
}
10841084

1085-
// Copy return values
1086-
let values = result.all_values();
1085+
// OPTIMIZED: Copy return values without heap allocation for common cases
1086+
let result_len = result.len();
10871087
let num_returns = if return_count == usize::MAX {
1088-
values.len()
1088+
result_len
10891089
} else {
1090-
return_count.min(values.len())
1090+
return_count.min(result_len)
10911091
};
10921092

1093-
if num_returns > 0 {
1094-
unsafe {
1095-
std::ptr::copy_nonoverlapping(
1096-
values.as_ptr(),
1097-
vm.register_stack.as_mut_ptr().add(call_base),
1098-
num_returns,
1099-
);
1093+
// Fast path: copy inline values directly (no Vec allocation)
1094+
if result.overflow.is_none() {
1095+
// Values are stored inline
1096+
if num_returns > 0 {
1097+
vm.register_stack[call_base] = result.inline[0];
1098+
}
1099+
if num_returns > 1 {
1100+
vm.register_stack[call_base + 1] = result.inline[1];
1101+
}
1102+
} else {
1103+
// Slow path: overflow to Vec
1104+
let values = result.all_values();
1105+
if num_returns > 0 {
1106+
unsafe {
1107+
std::ptr::copy_nonoverlapping(
1108+
values.as_ptr(),
1109+
vm.register_stack.as_mut_ptr().add(call_base),
1110+
num_returns,
1111+
);
1112+
}
11001113
}
11011114
}
11021115

crates/luars/src/lua_vm/execute/loop_instructions.rs

Lines changed: 55 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ pub fn exec_forprep(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) ->
131131
/// R[A]+=R[A+2];
132132
/// if R[A] <?= R[A+1] then { pc-=Bx; R[A+3]=R[A] }
133133
///
134-
/// ULTRA-OPTIMIZED: Check counter FIRST (most common path), minimize reads
134+
/// ULTRA-OPTIMIZED: Matches Lua's chgivalue - only update secondary field for integers
135135
#[inline(always)]
136136
pub fn exec_forloop(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> LuaResult<()> {
137137
let a = Instruction::get_a(instr) as usize;
@@ -141,80 +141,72 @@ pub fn exec_forloop(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) ->
141141
let base_ptr = (*frame_ptr).base_ptr;
142142
let reg_base = vm.register_stack.as_mut_ptr().add(base_ptr + a);
143143

144-
// Read counter first - this is the hot path check
145-
// For integer loops, R[A+1] stores remaining iteration count
146-
let counter = (*reg_base.add(1)).secondary as i64;
147-
148-
// Fast path: integer loop with counter > 0
149-
// Check counter first (most common exit condition)
150-
if counter > 0 {
151-
// Only read other values if we're continuing
152-
let idx_i = (*reg_base).secondary as i64;
153-
let step_i = (*reg_base.add(2)).secondary as i64;
154-
let new_idx = idx_i.wrapping_add(step_i);
155-
156-
// Write back - minimize writes, type tags stay TAG_INTEGER
157-
(*reg_base).secondary = new_idx as u64;
158-
(*reg_base.add(1)).secondary = (counter - 1) as u64;
159-
(*reg_base.add(3)).secondary = new_idx as u64;
160-
161-
(*frame_ptr).pc -= bx;
162-
return Ok(());
163-
}
164-
165-
// Check if this is actually an integer loop (counter == 0 means loop ended)
144+
// Check types first to distinguish integer vs float loop
166145
let idx = *reg_base;
167-
let combined_tags = (idx.primary | (*reg_base.add(1)).primary | (*reg_base.add(2)).primary) & TYPE_MASK;
168-
169-
if combined_tags == TAG_INTEGER {
170-
// Integer loop ended (counter == 0)
171-
return Ok(());
172-
}
173-
174-
// Slow path: float loop
175146
let counter_or_limit = *reg_base.add(1);
176147
let step = *reg_base.add(2);
177148

178-
let step_tag = step.primary & TYPE_MASK;
179-
let limit_tag = counter_or_limit.primary & TYPE_MASK;
180149
let idx_tag = idx.primary & TYPE_MASK;
150+
let limit_tag = counter_or_limit.primary & TYPE_MASK;
151+
let step_tag = step.primary & TYPE_MASK;
181152

182-
if (step_tag == TAG_FLOAT || step_tag == TAG_INTEGER)
183-
&& (limit_tag == TAG_FLOAT || limit_tag == TAG_INTEGER)
184-
&& (idx_tag == TAG_FLOAT || idx_tag == TAG_INTEGER)
185-
{
186-
let idx_f = if idx_tag == TAG_FLOAT {
187-
f64::from_bits(idx.secondary)
188-
} else {
189-
idx.secondary as i64 as f64
190-
};
191-
192-
let limit_f = if limit_tag == TAG_FLOAT {
193-
f64::from_bits(counter_or_limit.secondary)
194-
} else {
195-
counter_or_limit.secondary as i64 as f64
196-
};
153+
// Fast path: pure integer loop (all three values are integers)
154+
if idx_tag == TAG_INTEGER && limit_tag == TAG_INTEGER && step_tag == TAG_INTEGER {
155+
// Read counter - R[A+1] stores remaining iteration count for integer loops
156+
let counter = counter_or_limit.secondary as i64;
197157

198-
let step_f = if step_tag == TAG_FLOAT {
199-
f64::from_bits(step.secondary)
200-
} else {
201-
step.secondary as i64 as f64
202-
};
158+
if counter > 0 {
159+
let idx_i = idx.secondary as i64;
160+
let step_i = step.secondary as i64;
161+
let new_idx = idx_i.wrapping_add(step_i);
203162

204-
let new_idx_f = idx_f + step_f;
205-
let should_continue = if step_f > 0.0 {
206-
new_idx_f <= limit_f
207-
} else {
208-
new_idx_f >= limit_f
209-
};
163+
// Use chgivalue pattern - only update secondary field, type tags stay the same
164+
(*reg_base).secondary = new_idx as u64;
165+
(*reg_base.add(1)).secondary = (counter - 1) as u64;
166+
(*reg_base.add(3)).secondary = new_idx as u64;
210167

211-
if should_continue {
212-
*reg_base = LuaValue::number(new_idx_f);
213-
*reg_base.add(3) = LuaValue::number(new_idx_f);
214168
(*frame_ptr).pc -= bx;
215169
}
170+
// counter == 0 means loop ended, just fall through
171+
return Ok(());
172+
}
173+
174+
// Slow path: float loop (at least one value is float)
175+
let idx_f = if idx_tag == TAG_FLOAT {
176+
f64::from_bits(idx.secondary)
177+
} else if idx_tag == TAG_INTEGER {
178+
idx.secondary as i64 as f64
179+
} else {
180+
return Err(vm.error("'for' index must be a number".to_string()));
181+
};
182+
183+
let limit_f = if limit_tag == TAG_FLOAT {
184+
f64::from_bits(counter_or_limit.secondary)
185+
} else if limit_tag == TAG_INTEGER {
186+
counter_or_limit.secondary as i64 as f64
187+
} else {
188+
return Err(vm.error("'for' limit must be a number".to_string()));
189+
};
190+
191+
let step_f = if step_tag == TAG_FLOAT {
192+
f64::from_bits(step.secondary)
193+
} else if step_tag == TAG_INTEGER {
194+
step.secondary as i64 as f64
216195
} else {
217-
return Err(vm.error("'for' values must be numbers".to_string()));
196+
return Err(vm.error("'for' step must be a number".to_string()));
197+
};
198+
199+
let new_idx_f = idx_f + step_f;
200+
let should_continue = if step_f > 0.0 {
201+
new_idx_f <= limit_f
202+
} else {
203+
new_idx_f >= limit_f
204+
};
205+
206+
if should_continue {
207+
*reg_base = LuaValue::number(new_idx_f);
208+
*reg_base.add(3) = LuaValue::number(new_idx_f);
209+
(*frame_ptr).pc -= bx;
218210
}
219211
}
220212

0 commit comments

Comments
 (0)