diff --git a/PERFORMANCE_REPORT.md b/PERFORMANCE_REPORT.md index f2f5a4d3..fb01cccc 100644 --- a/PERFORMANCE_REPORT.md +++ b/PERFORMANCE_REPORT.md @@ -1,232 +1,262 @@ -# Lua-RS Performance Report - -> **Last Updated**: November 30, 2025 -> **Test Environment**: Windows 11, AMD Ryzen 7 5800X, Rust 1.89.0 -> **Lua-RS Version**: main -> **Native Lua Version**: Lua 5.4.6 - -## Executive Summary - -Lua-RS has achieved **production-ready correctness** with **302/302 tests passing (100%)**. The interpreter delivers **40-100%+ of native Lua 5.4 performance** across most operations, with excellent performance in arithmetic and control flow operations. - -### Key Performance Highlights - -🏆 **Excellent Performance (>90% of native)**: -- **Integer addition**: **~220 M ops/sec** - Near native performance -- **Float multiplication**: **~210 M ops/sec** - Near native performance -- **Local variable access**: **~220 M ops/sec** - Extremely fast -- **Nested loops**: **~210 M ops/sec** - Excellent optimization -- **String length**: **~150 M ops/sec** - Faster than native! -- **Table access**: **~115 M ops/sec** - Solid performance -- **String equality**: **~82 M ops/sec** - Fast comparison - -🎯 **Good Performance (>50% of native)**: -- **While loop**: ~125 M ops/sec -- **If-else control**: ~93 M ops/sec -- **Upvalue access**: ~95 M ops/sec -- **Table insertion**: ~50 M ops/sec -- **Simple function call**: ~24 M calls/sec -- **Bitwise operations**: ~80 M ops/sec -- **Integer division**: ~190 M ops/sec - -📊 **Areas for Optimization**: -- **ipairs/pairs iteration**: ~13-15 K iters/sec (vs ~120 K for numeric for) -- **Vararg to table**: ~0.06 M ops/sec (GC overhead) -- **Object creation**: ~40-160 K ops/sec (allocation overhead) - ---- - -## Latest Comprehensive Benchmark Results (November 30, 2025) - -### Core Operations (10M iterations) -| Operation | Performance | Notes | -|-----------|-------------|-------| -| Integer addition | **219 M ops/sec** | Near native | -| Float multiplication | **200 M ops/sec** | Near native | -| Mixed operations | **111 M ops/sec** | Good | -| Local var access | **219 M ops/sec** | Excellent | -| Global var access | **43 M ops/sec** | 5x slower than local | -| Upvalue access | **96 M ops/sec** | Good | - -### Control Flow (10M iterations) -| Operation | Performance | Notes | -|-----------|-------------|-------| -| If-else | **93 M ops/sec** | Good | -| While loop | **121 M ops/sec** | Excellent | -| Repeat-until | **110 M ops/sec** | Good | -| Nested loops | **218 M ops/sec** | Excellent | -| Numeric for | **122 K iters/sec** | Fast | - -### Functions & Closures (1M iterations) -| Operation | Performance | Notes | -|-----------|-------------|-------| -| Simple function call | **22 M calls/sec** | Good | -| Recursive fib(25) | **0.010s** | Acceptable | -| Vararg function | **1.5 M calls/sec** | OK | -| Closure creation | **6.8 M ops/sec** | Good | -| Upvalue read/write | **22 M ops/sec** | Excellent | -| Nested closures | **18 M ops/sec** | Good | - -### Multiple Returns (1M iterations) -| Operation | Performance | Notes | -|-----------|-------------|-------| -| Single return | **34 M ops/sec** | Excellent | -| Triple return | **15 M ops/sec** | Good | -| 10 returns | **4.8 M ops/sec** | OK | -| select('#') | **4.4 M ops/sec** | OK | -| table.pack | **4 M ops/sec** | OK | -| table.unpack | **8.9 M ops/sec** | Good | - -### Tables (1M iterations unless noted) -| Operation | Performance | Notes | -|-----------|-------------|-------| -| Table insertion | **51 M inserts/sec** | Excellent | -| Table access | **117 M accesses/sec** | Excellent | -| Hash table (100k) | **0.022s** | Fast | -| # operator | **44 M ops/sec** | Excellent | -| table.insert (end) | **25.7 M ops/sec** | Excellent | -| table.insert (mid) | **8.8 M ops/sec** | Good | -| table.remove | **16.3 M ops/sec** | Good | -| table.concat (1k) | **26 K ops/sec** | OK | -| table.sort (random) | **6.6 K ops/sec** | OK | - -### Iterators (100K iterations × 1000 items) -| Operation | Performance | Notes | -|-----------|-------------|-------| -| Numeric for | **122 K iters/sec** | Fast (baseline) | -| ipairs | **14.8 K iters/sec** | 8x slower than for | -| pairs (array) | **12.7 K iters/sec** | Iterator overhead | -| pairs (hash) | **14 K iters/sec** | Similar | -| next() | **14.9 K iters/sec** | Similar | -| Custom iterator | **11.2 K iters/sec** | Overhead | - -### Strings (100K iterations) -| Operation | Performance | Notes | -|-----------|-------------|-------| -| Concatenation | **2.7 M ops/sec** | Good | -| String length | **185 M ops/sec** | Excellent | -| string.upper | **8.5 M ops/sec** | Good | -| string.lower | **7.9 M ops/sec** | Good | -| string.sub | **7.1 M ops/sec** | Good | -| string.find | **5.1 M ops/sec** | Good | -| string.format | **3.4 M ops/sec** | Good | -| string.match | **1.5 M ops/sec** | OK | -| string.gsub | **1.1 M ops/sec** | OK | -| String equality | **82 M ops/sec** | Excellent | - -### Math Library (5M iterations) -| Operation | Performance | Notes | -|-----------|-------------|-------| -| Integer mul/add/mod | **103 M ops/sec** | Excellent | -| Float mul/add/div | **77 M ops/sec** | Good | -| math.sqrt | **22 M ops/sec** | Good | -| math.sin | **20 M ops/sec** | Good | -| math.floor/ceil | **11 M ops/sec** | OK | -| math.abs | **20 M ops/sec** | Good | -| math.random | **11 M ops/sec** | Good | -| Bitwise ops | **82 M ops/sec** | Excellent | -| Integer division | **170 M ops/sec** | Excellent | -| Power (^2) | **43 M ops/sec** | Good | - -### Metatables & OOP (500K/100K iterations) -| Operation | Performance | Notes | -|-----------|-------------|-------| -| __index (function) | **6 M ops/sec** | Good | -| __index (table) | **19 M ops/sec** | Good | -| __newindex | **7.2 M ops/sec** | Good | -| __call | **13 M ops/sec** | Good | -| __len | **7.3 M ops/sec** | Good | -| rawget | **15.4 M ops/sec** | Good | -| Object creation | **41 K ops/sec** | Allocation overhead | -| Method call | **4.5 M calls/sec** | Good | -| Property access | **56 M ops/sec** | Excellent | - -### Coroutines (100K iterations) -| Operation | Performance | Notes | -|-----------|-------------|-------| -| Create/resume/yield | **27 K cycles/sec** | OK | -| Repeated yield | **5.6 M yields/sec** | Good | -| coroutine.wrap | **22 K ops/sec** | OK | -| coroutine.status | **13 M ops/sec** | Excellent | - -### Error Handling (100K iterations) -| Operation | Performance | Notes | -|-----------|-------------|-------| -| pcall (success) | **4.3 M ops/sec** | Good | -| pcall (error) | **3.6 M ops/sec** | Good | -| xpcall (error) | **1.8 M ops/sec** | OK | -| Direct call | **41 M ops/sec** | Baseline | -| assert (success) | **16 M ops/sec** | Good | - ---- - -## Running Benchmarks - -### Run All Benchmarks -```bash -# Using PowerShell script (compares with native Lua) -.\run_benchmarks.ps1 - -# Run with lua-rs only -.\target\release\lua.exe .\benchmarks\run_all.lua -``` - -### Individual Benchmarks -```bash -.\target\release\lua.exe .\benchmarks\bench_arithmetic.lua -.\target\release\lua.exe .\benchmarks\bench_tables.lua -.\target\release\lua.exe .\benchmarks\bench_strings.lua -# ... etc -``` - -### Benchmark Files (16 total) -- **Core**: bench_arithmetic, bench_control_flow, bench_locals -- **Functions**: bench_functions, bench_closures, bench_multiret -- **Tables**: bench_tables, bench_table_lib, bench_iterators -- **Strings**: bench_strings, bench_string_lib -- **Math**: bench_math -- **Advanced**: bench_metatables, bench_oop, bench_coroutines, bench_errors - ---- - -## Performance History - -### November 30, 2025 - Comprehensive Benchmarks & Optimizations -- Added 11 new benchmark files (16 total) -- Fixed floating-point for loop bug -- Optimized `call_function_internal` - reduced code by ~300 lines -- All 302 tests passing -- Total benchmark runtime: ~120 seconds - -### November 29, 2025 - While Loop Optimization -- Optimized while/repeat loop bytecode generation -- While loop at **85% of native** -- Nested loops at **97% of native** - -### November 24, 2025 - CallFrame Optimization -- Implemented code pointer caching in CallFrame -- Eliminated HashMap lookups in hot paths -- Major improvements across all benchmarks - ---- - -## Architecture Notes - -### Performance Characteristics -- **Local variables are ~5x faster** than global variables -- **Numeric for is ~8-9x faster** than ipairs/pairs -- **Property access** is very fast (~56 M ops/sec) -- **Function calls** are efficient (~22 M calls/sec) -- **Bitwise operations** are very fast (~82 M ops/sec) - -### Known Performance Bottlenecks -1. **ipairs/pairs iteration**: Iterator protocol overhead -2. **Object creation**: Allocation and setmetatable overhead -3. **Vararg to table**: Extra allocation and copying -4. **Complex pattern matching**: Regex-like overhead - -### Optimization Opportunities -1. Iterator fast-path for ipairs/pairs -2. Object pooling for common patterns -3. Inlining for small functions -4. Better GC tuning for allocation-heavy code +# Lua-RS vs Native Lua 性能对比分析报告 + +> **更新时间**: 2025-12-01 +> **分支**: gc +> **目的**: 识别性能瓶颈,为后续优化提供参考 + +## 概述 + +| 指标 | Native Lua | Lua-RS | 比率 | +|------|-----------|--------|------| +| **总执行时间** | 11.80s | 80.18s | **6.8x 慢** | + +--- + +## 🔴 严重性能问题 (>5x 差距) - 优先级: 高 + +### 1. OOP 方法调用 (~30x 慢) +| 测试项 | Lua | Lua-RS | 差距 | +|--------|-----|--------|------| +| Method call (colon) | 10,638 K/s | 350 K/s | **30x** | +| Method call (dot) | 11,628 K/s | 399 K/s | **29x** | +| Inherited method call | 10,000 K/s | 392 K/s | **25x** | +| Closure method call | 38,462 K/s | 466 K/s | **82x** | +| Prototype chain (3 levels) | 22,727 K/s | 429 K/s | **53x** | + +**根因分析**: +- 方法调用涉及 `__index` 元方法查找 +- 每次调用都需要多次表查找 +- 可能存在不必要的闭包创建或值复制 + +**优化方向**: +- 优化 `__index` 元方法的快速路径 +- 缓存方法查找结果 +- 减少方法调用的开销 + +--- + +### 2. 迭代器 (~50-60x 慢) +| 测试项 | Lua | Lua-RS | 差距 | +|--------|-----|--------|------| +| Custom stateless iter | 24.88 K/s | 0.44 K/s | **57x** | +| Multi-value iterator | 16.92 K/s | 0.42 K/s | **40x** | +| Closure iterator (100) | 256.41 K/s | 4.03 K/s | **64x** | + +**根因分析**: +- `TFORCALL` 指令实现效率低 +- 迭代器函数调用开销大 +- 多返回值处理性能差 + +**优化方向**: +- 优化 `TFORCALL`/`TFORLOOP` 指令 +- 减少迭代器函数调用的开销 +- 优化多返回值处理 + +--- + +### 3. 协程创建 (~9x 慢) +| 测试项 | Lua | Lua-RS | 差距 | +|--------|-----|--------|------| +| Create/resume/yield | 487.80 K/s | 56.13 K/s | **8.7x** | +| coroutine.wrap | 2,000 K/s | 73.75 K/s | **27x** | + +**根因分析**: +- 协程创建时分配过多内存 +- 协程状态管理开销大 + +**优化方向**: +- 优化协程状态结构 +- 复用协程栈空间 + +--- + +### 4. 对象创建 (~10x 慢) +| 测试项 | Lua | Lua-RS | 差距 | +|--------|-----|--------|------| +| Object creation | 3,571 K/s | 359 K/s | **10x** | +| Inherited object creation | 2,000 K/s | 195 K/s | **10x** | +| Closure object creation | 1,667 K/s | 99 K/s | **17x** | + +**根因分析**: +- 表创建和元表设置开销大 +- 闭包创建效率低 + +--- + +### 5. __call 元方法 (~81x 慢) +| 测试项 | Lua | Lua-RS | 差距 | +|--------|-----|--------|------| +| __call metamethod | 33.33 M/s | 0.41 M/s | **81x** | + +**根因分析**: +- `__call` 元方法查找和调用路径未优化 +- 每次调用都进行完整的元方法查找 + +**优化方向**: +- 缓存 `__call` 元方法 +- 优化表的 `__call` 调用路径 + +--- + +### 6. Returns as func args (~101x 慢) +| 测试项 | Lua | Lua-RS | 差距 | +|--------|-----|--------|------| +| Returns as func args | 22.22 M/s | 0.22 M/s | **101x** | + +**根因分析**: +- 将函数返回值作为另一个函数参数时效率极低 +- 多返回值传递开销大 + +--- + +## 🟠 中等性能问题 (2-5x 差距) - 优先级: 中 + +### 1. 基础循环性能 +| 测试项 | Lua | Lua-RS | 差距 | +|--------|-----|--------|------| +| While loop | 123.46 M/s | 50.23 M/s | **2.5x** | +| Repeat-until | 140.85 M/s | 78.44 M/s | **1.8x** | +| Nested loops | 250 M/s | 138.58 M/s | **1.8x** | + +**根因分析**: +- 指令调度开销 (match 分发) +- 每条指令返回主循环的开销 + +**优化方向**: +- 考虑超级指令合并常见指令序列 +- 优化主循环调度 + +--- + +### 2. ipairs/pairs 迭代 +| 测试项 | Lua | Lua-RS | 差距 | +|--------|-----|--------|------| +| ipairs (1000 items) | 33.56 K/s | 10.35 K/s | **3.2x** | +| pairs on array | 32.47 K/s | 10.45 K/s | **3.1x** | +| pairs on hash | 22.94 K/s | 9.98 K/s | **2.3x** | +| next() iteration | 23.20 K/s | 9.87 K/s | **2.4x** | + +**优化方向**: +- 优化 `ipairs`/`pairs` 的 C 函数实现 +- 减少每次迭代的函数调用开销 + +--- + +### 3. 全局变量访问 +| 测试项 | Lua | Lua-RS | 差距 | +|--------|-----|--------|------| +| Global var access | 73.53 M/s | 28.93 M/s | **2.5x** | +| Global table field | 42.37 M/s | 19.38 M/s | **2.2x** | + +**优化方向**: +- 优化 `GETTABUP`/`SETTABUP` 指令 +- 优化 `_ENV` 表查找 + +--- + +### 4. 字符串操作 +| 测试项 | Lua | Lua-RS | 差距 | +|--------|-----|--------|------| +| string.reverse | 7,143 K/s | 2,668 K/s | **2.7x** | +| string.rep | 2,174 K/s | 1,083 K/s | **2.0x** | +| string.format (complex) | 1,408 K/s | 668 K/s | **2.1x** | +| string.char | 11,111 K/s | 4,557 K/s | **2.4x** | + +--- + +### 5. Upvalue 操作 +| 测试项 | Lua | Lua-RS | 差距 | +|--------|-----|--------|------| +| Upvalue read/write | 45.45 M/s | 18.75 M/s | **2.4x** | +| Multiple upvalues | 32.26 M/s | 16.24 M/s | **2.0x** | + +--- + +### 6. math.min/max +| 测试项 | Lua | Lua-RS | 差距 | +|--------|-----|--------|------| +| math.min/max | 18.05 M/s | 6.18 M/s | **2.9x** | + +--- + +### 7. __index (function) +| 测试项 | Lua | Lua-RS | 差距 | +|--------|-----|--------|------| +| __index (function) | 22.73 M/s | 3.62 M/s | **6.3x** | + +--- + +## 🟢 性能接近 (<2x 差距) - 可接受 + +| 测试项 | Lua | Lua-RS | 差距 | +|--------|-----|--------|------| +| Integer addition | 200 M/s | 137 M/s | 1.5x | +| Float multiplication | 200 M/s | 135 M/s | 1.5x | +| Local var access | 227 M/s | 131 M/s | 1.7x | +| Table insertion | 47.62 M/s | 41.90 M/s | 1.1x | +| Table access | 111 M/s | 61.74 M/s | 1.8x | +| string.upper/lower | ~4,500 K/s | ~6,200 K/s | **Lua-RS 更快!** | +| string.byte | 20,000 K/s | 20,924 K/s | **Lua-RS 更快!** | +| table.sort | 相近 | 相近 | ~1x | +| Integer division (//) | 78 M/s | 114 M/s | **Lua-RS 更快!** | + +--- + +## 🔵 Lua-RS 更快的项目 + +| 测试项 | Lua | Lua-RS | Lua-RS 优势 | +|--------|-----|--------|-------------| +| Integer division (//) | 78 M/s | 114 M/s | 1.5x 更快 | +| Repeated yield | 685 K/s | 2,997 K/s | 4.4x 更快 | +| pcall (error) | 508 K/s | 2,184 K/s | 4.3x 更快 | +| xpcall (error) | 431 K/s | 1,273 K/s | 3.0x 更快 | +| string.upper | 4,545 K/s | 6,716 K/s | 1.5x 更快 | +| table.move | 112 K/s | 163 K/s | 1.5x 更快 | + +--- + +## 优化优先级建议 + +### 第一优先级 (影响最大) +1. **OOP 方法调用** - 30-82x 差距,影响所有面向对象代码 +2. **自定义迭代器** - 40-64x 差距,影响 `for...in` 循环 +3. **__call 元方法** - 81x 差距 + +### 第二优先级 +4. **Returns as func args** - 101x 差距,但使用场景相对较少 +5. **协程创建** - 9-27x 差距 +6. **对象创建** - 10-17x 差距 + +### 第三优先级 +7. **基础循环 (while/repeat)** - 1.8-2.5x 差距 +8. **全局变量访问** - 2.2-2.5x 差距 +9. **ipairs/pairs** - 2.3-3.2x 差距 + +--- + +## 架构层面的优化建议 + +### 1. 方法调用优化 +- 实现内联缓存 (Inline Caching) 加速重复的方法查找 +- 对 `self:method()` 模式进行特殊优化 + +### 2. 迭代器优化 +- 优化 `TFORCALL` 指令,减少函数调用开销 +- 考虑对 `ipairs`/`pairs` 进行特殊处理 + +### 3. 函数调用优化 +- 减少函数调用帧的创建开销 +- 优化多返回值传递 + +### 4. 循环优化 +- 实现超级指令,合并常见指令序列 +- 考虑循环不变代码外提 + +### 5. 元方法优化 +- 缓存元方法查找结果 +- 对常用元方法 (`__index`, `__call`) 实现快速路径 + +--- + +*报告生成时间: 2025-12-01* +*基准测试迭代次数: 见各测试项* diff --git a/benchmarks/bench_iterators.lua b/benchmarks/bench_iterators.lua index f01f244e..65b71000 100644 --- a/benchmarks/bench_iterators.lua +++ b/benchmarks/bench_iterators.lua @@ -1,5 +1,5 @@ -- Benchmark: Iterators (ipairs, pairs, custom) -local iterations = 100000 +local iterations = 10000 print("=== Iterators Benchmark ===") print("Iterations:", iterations) diff --git a/crates/luars/src/gc/mod.rs b/crates/luars/src/gc/mod.rs index 6da472bf..a5807d60 100644 --- a/crates/luars/src/gc/mod.rs +++ b/crates/luars/src/gc/mod.rs @@ -1,34 +1,23 @@ -// Generational Garbage Collector for Lua VM -// Implements 2-generation GC with mark-sweep algorithm -// Young generation: frequently collected, most objects die young -// Old generation: rarely collected, long-lived objects +// Simplified Garbage Collector for Lua VM +// +// Key insight: Objects are already stored in Arena with GcHeader. +// We don't need a separate HashMap to track them! +// +// Design: +// - Arena, Arena, etc. store all objects +// - GcHeader.marked is used for mark-sweep +// - GC directly iterates over Arena, no extra tracking needed +// - Lua 5.4 style debt mechanism for triggering GC + mod object_pool; -// Use ObjectPoolV2 as the main ObjectPool use crate::lua_value::LuaValue; pub use object_pool::{ Arena, FunctionId, GcFunction, GcHeader, GcString, GcTable, GcThread, GcUpvalue, ObjectPoolV2 as ObjectPool, StringId, TableId, ThreadId, UpvalueId, UpvalueState, UserdataId, }; -use std::collections::{HashMap, HashSet}; - -/// GC Generation -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum Generation { - Young, - Old, -} - -/// Object metadata for GC tracking -#[derive(Debug, Clone)] -pub struct GcObject { - pub obj_id: u32, // The actual object ID (StringId, TableId, etc.) - pub generation: Generation, - pub age: u8, - pub marked: bool, - pub obj_type: GcObjectType, -} +// Re-export for compatibility #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum GcObjectType { String, @@ -36,86 +25,24 @@ pub enum GcObjectType { Function, } -impl GcObject { - pub fn new(obj_id: u32, obj_type: GcObjectType) -> Self { - GcObject { - obj_id, - generation: Generation::Young, - age: 0, - marked: false, - obj_type, - } - } - - pub fn promote(&mut self) { - self.generation = Generation::Old; - self.age = 0; - } - - pub fn mark(&mut self) { - self.marked = true; - } - - pub fn unmark(&mut self) { - self.marked = false; - } - - pub fn increment_age(&mut self) -> bool { - if self.generation == Generation::Young { - self.age += 1; - self.age >= 3 // Promote after 3 minor GCs - } else { - false - } - } -} - -/// Garbage collector state (Lua 5.4 style: generational with GCdebt) +/// Simplified GC state - no HashMap tracking! pub struct GC { - // Object tracking for generational GC - key is (type, id) - objects: HashMap<(GcObjectType, u32), GcObject>, - next_gc_id: usize, // Internal GC tracking ID - // Lua 5.4 GC debt mechanism - // GC runs when: GCdebt > 0 - // totalbytes = actual_bytes - GCdebt - pub(crate) gc_debt: isize, // Bytes allocated not yet compensated by collector - pub(crate) total_bytes: usize, // Number of bytes currently allocated - GCdebt - gc_estimate: usize, // Estimate of non-garbage memory - - // GC parameters (Lua 5.4 style) - gc_pause: usize, // Pause parameter (default 200 = 200%) - - // GC mode - gc_kind: GCKind, // KGC_INC or KGC_GEN - - // Shrink optimization - avoid frequent shrinking - shrink_cooldown: u32, // Shrink 冷却计数器 - shrink_threshold: u32, // Shrink 阈值 - - // Generational GC state - allocations_since_minor_gc: usize, - minor_gc_count: usize, - - // Incremental collection throttling - // To avoid overhead of collecting roots every check, only run GC every N checks + pub(crate) gc_debt: isize, + pub(crate) total_bytes: usize, + + // GC parameters + gc_pause: usize, // Pause parameter (default 200 = 200%) + // gc_step_mul: usize, // Step multiplier + + // Collection throttling check_counter: u32, - check_interval: u32, // Run GC every N checks (default: 10) - - // Finalization support (__gc metamethod) - finalize_queue: Vec<(GcObjectType, u32)>, // Objects awaiting finalization - + check_interval: u32, + // Statistics - collection_count: usize, stats: GCStats, } -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum GCKind { - Incremental, // KGC_INC - Generational, // KGC_GEN -} - #[derive(Debug, Clone, Default)] pub struct GCStats { pub bytes_allocated: usize, @@ -132,34 +59,18 @@ pub struct GCStats { impl GC { pub fn new() -> Self { GC { - objects: HashMap::new(), - next_gc_id: 1, - gc_debt: -(200 * 1024), // Start with negative debt (can allocate 200KB) + gc_debt: -(200 * 1024), // Start with 200KB credit total_bytes: 0, - gc_estimate: 0, - gc_pause: 200, // 200% pause (LUAI_GCPAUSE) - gc_kind: GCKind::Generational, // Default to generational (like Lua 5.4) - shrink_cooldown: 0, - shrink_threshold: 10, // 每 10 次 GC 才 shrink 一次 - allocations_since_minor_gc: 0, - minor_gc_count: 0, + gc_pause: 200, check_counter: 0, - check_interval: 10000, // Run GC every 10000 checks when debt > 0 (massive overhead reduction) - finalize_queue: Vec::new(), // __gc finalizer queue - collection_count: 0, + check_interval: 10000, stats: GCStats::default(), } } - /// Register a new object for GC tracking - /// ULTRA-OPTIMIZED: Zero-cost tracking, just increment debt + /// Record allocation - just update debt, no HashMap insertion! #[inline(always)] pub fn register_object(&mut self, _obj_id: u32, obj_type: GcObjectType) { - // Minimal tracking: just update debt and allocation count - // Objects discovered during GC marking from roots - self.allocations_since_minor_gc += 1; - - // Inline size calculation and debt update let size = match obj_type { GcObjectType::String => 64, GcObjectType::Table => 256, @@ -169,711 +80,301 @@ impl GC { self.gc_debt += size as isize; } - /// Register object with full tracking (for when we need to track specific objects) - #[inline] + /// Compatibility alias + #[inline(always)] pub fn register_object_tracked(&mut self, obj_id: u32, obj_type: GcObjectType) -> usize { - let gc_id = self.next_gc_id; - self.next_gc_id += 1; - - let obj = GcObject::new(obj_id, obj_type); - self.objects.insert((obj_type, obj_id), obj); - self.allocations_since_minor_gc += 1; - - let size = match obj_type { - GcObjectType::String => 64, - GcObjectType::Table => 256, - GcObjectType::Function => 128, - }; - self.record_allocation(size); + self.register_object(obj_id, obj_type); + obj_id as usize + } - gc_id + /// Record deallocation + #[inline(always)] + pub fn record_allocation(&mut self, size: usize) { + self.total_bytes += size; + self.gc_debt += size as isize; } - /// Unregister an object (when explicitly deleted) - pub fn unregister_object(&mut self, obj_id: u32, obj_type: GcObjectType) { - if let Some(obj) = self.objects.remove(&(obj_type, obj_id)) { - let size = match obj.obj_type { - GcObjectType::String => 64, - GcObjectType::Table => 256, - GcObjectType::Function => 128, - }; - self.record_deallocation(size); - } + #[inline(always)] + pub fn record_deallocation(&mut self, size: usize) { + self.total_bytes = self.total_bytes.saturating_sub(size); } - /// Check if GC should run (Lua 5.4 style: check GCdebt) - /// GC runs when GCdebt > 0 - #[inline] + /// Check if GC should run + #[inline(always)] pub fn should_collect(&self) -> bool { self.gc_debt > 0 } - /// Increment check counter for throttling GC collection - #[inline] + #[inline(always)] pub fn increment_check_counter(&mut self) { self.check_counter += 1; } - /// Check if we should actually run GC collection (throttled) - /// This reduces overhead by not collecting roots on every check - #[inline] + #[inline(always)] pub fn should_run_collection(&self) -> bool { self.check_counter >= self.check_interval } - /// Reset check counter after running GC - #[inline] - fn reset_check_counter(&mut self) { - self.check_counter = 0; - } - - /// Perform one step of GC work (like luaC_step in Lua 5.4) - pub fn step(&mut self, roots: &[LuaValue], object_pool: &mut ObjectPool) { + /// Perform GC step + pub fn step(&mut self, roots: &[LuaValue], pool: &mut ObjectPool) { if !self.should_collect() { return; } - - // Reset throttling counter - self.reset_check_counter(); - - // Determine which collection to run - match self.gc_kind { - GCKind::Generational => { - // Simple generational: check if we should do minor or major - if self.minor_gc_count >= 10 { - // Every 10 minor GCs, do a major - self.major_collect_internal(roots, object_pool); - } else { - self.minor_collect_internal(roots, object_pool); - } - } - GCKind::Incremental => { - // For incremental, just do a full collection for now - // (Lua 5.4 has complex state machine, we simplify) - self.major_collect_internal(roots, object_pool); - } - } - - // Reset debt based on estimate - self.set_debt(); - } - - /// Set GC debt based on current memory and pause parameter - fn set_debt(&mut self) { - let estimate = self.gc_estimate.max(1024); - // debt = -(estimate * pause / 100) - let pause_bytes = (estimate * self.gc_pause) / 100; - self.gc_debt = -(pause_bytes as isize); - } - - /// Perform garbage collection (chooses minor or major) - /// Takes root set (stack, globals, etc.) and marks reachable objects - pub fn collect(&mut self, roots: &[LuaValue], object_pool: &mut ObjectPool) -> usize { - // For public API, determine which to run based on state - if self.minor_gc_count >= 10 { - self.major_collect_internal(roots, object_pool) - } else { - self.minor_collect_internal(roots, object_pool) - } + + self.check_counter = 0; + self.collect(roots, pool); } - /// Minor GC - collect young generation only - fn minor_collect_internal( - &mut self, - roots: &[LuaValue], - object_pool: &mut ObjectPool, - ) -> usize { - self.collection_count += 1; - self.stats.minor_collections += 1; - - let reachable = self.mark(roots, object_pool); - - let mut collected = 0; - let mut promoted = 0; - let mut survivors = Vec::new(); - - // Get all young generation objects - let young_objs: Vec<(GcObjectType, u32)> = self - .objects - .iter() - .filter(|(_, obj)| obj.generation == Generation::Young) - .map(|(key, _)| *key) - .collect(); - - for key @ (obj_type, obj_id) in young_objs { - if let Some(mut obj) = self.objects.remove(&key) { - if reachable.contains(&key) { - // Object survived - obj.unmark(); - - if obj.increment_age() { - obj.promote(); - promoted += 1; - } - - survivors.push((key, obj)); - } else { - // Object is unreachable - check for __gc finalizer - if self.check_finalizer(obj_type, obj_id, object_pool) { - // Has finalizer, keep alive for one more cycle - // (Finalization happens externally by VM) - survivors.push((key, obj)); - continue; - } - - // Collect garbage - remove from object pool! - collected += 1; - match obj_type { - GcObjectType::String => { - let string_id = StringId(obj_id); - object_pool.remove_string(string_id); - } - GcObjectType::Table => { - let table_id = TableId(obj_id); - object_pool.remove_table(table_id); - } - GcObjectType::Function => { - let func_id = FunctionId(obj_id); - object_pool.remove_function(func_id); - } - } - - let size = match obj_type { - GcObjectType::String => 64, - GcObjectType::Table => 256, - GcObjectType::Function => 128, - }; - self.record_deallocation(size); - } - } - } + /// Main collection - mark and sweep directly on Arena + pub fn collect(&mut self, roots: &[LuaValue], pool: &mut ObjectPool) -> usize { + self.stats.collection_count += 1; + self.stats.major_collections += 1; - // Re-insert survivors - for (key, obj) in survivors { - self.objects.insert(key, obj); - } + // Phase 1: Clear all marks + self.clear_marks(pool); - // Clean up weak tables after GC - let all_tables: Vec<_> = self - .objects - .iter() - .filter(|((obj_type, _), _)| *obj_type == GcObjectType::Table) - .map(|((_, obj_id), _)| TableId(*obj_id)) - .collect(); + // Phase 2: Mark from roots + self.mark_roots(roots, pool); - for table_id in all_tables { - if let Some(weak_mode) = self.get_weak_mode(table_id, object_pool) { - self.clear_weak_entries(table_id, &weak_mode, object_pool); - } - } + // Phase 3: Sweep (free unmarked objects) + let collected = self.sweep(pool); + // Update debt + let alive_estimate = pool.tables.len() * 256 + + pool.functions.len() * 128 + + pool.strings.len() * 64; + self.gc_debt = -((alive_estimate * self.gc_pause / 100) as isize); + self.stats.objects_collected += collected; - self.stats.promoted_objects += promoted; - self.update_generation_sizes(); - - self.allocations_since_minor_gc = 0; - self.minor_gc_count += 1; - - // Shrink only if: - // 1. Collected many objects (>1000) - // 2. Cooldown expired - if collected > 1000 && self.shrink_cooldown == 0 { - object_pool.shrink_to_fit(); - self.shrink_cooldown = self.shrink_threshold; - } else if self.shrink_cooldown > 0 { - self.shrink_cooldown -= 1; - } - collected } - /// Major GC - collect all generations - fn major_collect_internal( - &mut self, - roots: &[LuaValue], - object_pool: &mut ObjectPool, - ) -> usize { - self.collection_count += 1; - self.stats.major_collections += 1; - - let reachable = self.mark(roots, object_pool); - - let mut collected = 0; - let keys: Vec<(GcObjectType, u32)> = self.objects.keys().copied().collect(); - - for key @ (obj_type, obj_id) in keys { - if !reachable.contains(&key) { - // Check for __gc finalizer before collecting - if self.check_finalizer(obj_type, obj_id, object_pool) { - // Has finalizer, keep alive for one more cycle - continue; - } - - self.objects.remove(&key); - collected += 1; - - // Remove from object pool! - match obj_type { - GcObjectType::String => { - let string_id = StringId(obj_id); - object_pool.remove_string(string_id); - } - GcObjectType::Table => { - let table_id = TableId(obj_id); - object_pool.remove_table(table_id); - } - GcObjectType::Function => { - let func_id = FunctionId(obj_id); - object_pool.remove_function(func_id); - } - } - - let size = match obj_type { - GcObjectType::String => 64, - GcObjectType::Table => 256, - GcObjectType::Function => 128, - }; - self.record_deallocation(size); + /// Clear all marks in all arenas (skip fixed objects - they stay marked) + fn clear_marks(&self, pool: &mut ObjectPool) { + for (_, table) in pool.tables.iter_mut() { + if !table.header.fixed { + table.header.marked = false; } } - - // Unmark all survivors - for obj in self.objects.values_mut() { - obj.unmark(); - } - - // Clean up weak tables after GC - let all_tables: Vec<_> = self - .objects - .iter() - .filter(|((obj_type, _), _)| *obj_type == GcObjectType::Table) - .map(|((_, obj_id), _)| TableId(*obj_id)) - .collect(); - - for table_id in all_tables { - if let Some(weak_mode) = self.get_weak_mode(table_id, object_pool) { - self.clear_weak_entries(table_id, &weak_mode, object_pool); + for (_, func) in pool.functions.iter_mut() { + if !func.header.fixed { + func.header.marked = false; } } - - self.stats.objects_collected += collected; - self.update_generation_sizes(); - - self.minor_gc_count = 0; - self.allocations_since_minor_gc = 0; - self.adjust_threshold(); - - // Major GC 后总是 shrink,但重置冷却期 - object_pool.shrink_to_fit(); - self.shrink_cooldown = self.shrink_threshold; - - collected - } - - /// Mark phase: traverse object graph from roots - fn mark( - &mut self, - roots: &[LuaValue], - object_pool: &ObjectPool, - ) -> HashSet<(GcObjectType, u32)> { - let mut marked = HashSet::new(); - let mut worklist: Vec = roots.to_vec(); - - while let Some(value) = worklist.pop() { - // Get object key (type, id) - let key = match value.kind() { - crate::lua_value::LuaValueKind::String => { - value.as_string_id().map(|id| (GcObjectType::String, id.0)) - } - crate::lua_value::LuaValueKind::Table => { - value.as_table_id().map(|id| (GcObjectType::Table, id.0)) - } - crate::lua_value::LuaValueKind::Function => value - .as_function_id() - .map(|id| (GcObjectType::Function, id.0)), - _ => None, - }; - - if let Some(key) = key { - if marked.contains(&key) { - continue; - } - - marked.insert(key); - - // Mark the object - if let Some(obj) = self.objects.get_mut(&key) { - obj.mark(); - } - - // Mark children - match key.0 { - GcObjectType::Table => { - if let Some(table) = object_pool.get_table(TableId(key.1)) { - self.mark_table(table, &mut worklist); - } - } - GcObjectType::Function => { - if let Some(func) = object_pool.get_function(FunctionId(key.1)) { - self.mark_function(func, object_pool, &mut worklist); - } - } - _ => {} - } + for (_, upval) in pool.upvalues.iter_mut() { + if !upval.header.fixed { + upval.header.marked = false; } } - - marked - } - - /// Mark table contents - fn mark_table(&self, table: &crate::LuaTable, worklist: &mut Vec) { - // Mark both keys and values - for (key, val) in table.iter_all() { - worklist.push(key); - worklist.push(val); - } - } - - /// Mark function upvalues - fn mark_function( - &self, - func: &GcFunction, - object_pool: &ObjectPool, - worklist: &mut Vec, - ) { - for upval_id in &func.upvalues { - // Only mark closed upvalues (open ones are on the stack already) - if let Some(upval) = object_pool.get_upvalue(*upval_id) { - if let UpvalueState::Closed(val) = &upval.state { - worklist.push(*val); - } + for (_, thread) in pool.threads.iter_mut() { + if !thread.header.fixed { + thread.header.marked = false; } } - } - - /// Write barrier (forward): called when an old object points to a new object - /// Lua 5.4: luaC_barrier - marks the object to be revisited in next GC cycle - #[inline(always)] - pub fn barrier_forward(&mut self, obj_type: GcObjectType, obj_id: u32) { - // In generational GC, when an old object gets a reference to a new object, - // we need to mark the old object as "touched" so it will be revisited - if let Some(obj) = self.objects.get_mut(&(obj_type, obj_id)) { - // Mark as touched (will be scanned in next minor collection) - obj.mark(); - } - } - - /// Write barrier (backward): called when a value is stored in a table - /// Lua 5.4: luaC_barrierback - marks the value to keep it alive - /// OPTIMIZATION: Only call this for collectable values (table, string, function) - #[inline(always)] - pub fn barrier_back(&mut self, value: &LuaValue) { - // In generational GC, when storing a value in a table, - // ensure the value stays alive by marking it if needed - let key = match value.kind() { - crate::lua_value::LuaValueKind::String => { - value.as_string_id().map(|id| (GcObjectType::String, id.0)) - } - crate::lua_value::LuaValueKind::Table => { - value.as_table_id().map(|id| (GcObjectType::Table, id.0)) - } - crate::lua_value::LuaValueKind::Function => value - .as_function_id() - .map(|id| (GcObjectType::Function, id.0)), - _ => None, - }; - - if let Some(key) = key { - if let Some(obj) = self.objects.get_mut(&key) { - obj.mark(); + for (_, string) in pool.strings.iter_mut() { + if !string.header.fixed { + string.header.marked = false; } } + // Note: userdata uses Rc internally, no GcHeader } - /// Check if a value is collectable (needs GC barrier) - #[inline(always)] - pub fn is_collectable(value: &LuaValue) -> bool { - matches!( - value.kind(), - crate::lua_value::LuaValueKind::String - | crate::lua_value::LuaValueKind::Table - | crate::lua_value::LuaValueKind::Function - ) - } - - /// Update generation size statistics - fn update_generation_sizes(&mut self) { - let (young, old) = self - .objects - .values() - .fold((0, 0), |(y, o), obj| match obj.generation { - Generation::Young => (y + 1, o), - Generation::Old => (y, o + 1), - }); - - self.stats.young_gen_size = young; - self.stats.old_gen_size = old; - } - - /// Adjust GC threshold based on current usage - fn adjust_threshold(&mut self) { - // Update estimate - let alive_bytes: usize = self.objects.len() * 128; // Average size - self.gc_estimate = alive_bytes; - } - - /// Record allocation (Lua 5.4 style: increase GCdebt) - pub fn record_allocation(&mut self, size: usize) { - self.total_bytes += size; - self.gc_debt += size as isize; - self.stats.bytes_allocated = self.total_bytes; - } - - /// Record deallocation (Lua 5.4 style: decrease totalbytes) - pub fn record_deallocation(&mut self, size: usize) { - self.total_bytes = self.total_bytes.saturating_sub(size); - self.stats.bytes_allocated = self.total_bytes; - } - - /// Get statistics - pub fn stats(&self) -> GCStats { - self.stats.clone() - } - - /// Tune GC thresholds based on current state - pub fn tune_thresholds(&mut self) { - // Adjust debt based on current memory - self.adjust_threshold(); - } - - /// Check if object has __gc metamethod that needs to be called - /// This should be called during sweep phase before removing object - pub fn check_finalizer( - &mut self, - obj_type: GcObjectType, - obj_id: u32, - object_pool: &ObjectPool, - ) -> bool { - if obj_type != GcObjectType::Table { - return false; // Only tables can have metatables with __gc - } + /// Mark phase - traverse from roots + /// Uses a worklist algorithm to avoid recursion and handle borrowing correctly + fn mark_roots(&self, roots: &[LuaValue], pool: &mut ObjectPool) { + let mut worklist: Vec = roots.to_vec(); - let table_id = TableId(obj_id); - if let Some(table) = object_pool.get_table(table_id) { - if let Some(meta_value) = table.get_metatable() { - if let Some(meta_id) = meta_value.as_table_id() { - if let Some(meta_table) = object_pool.get_table(meta_id) { - // Check for __gc key in metatable - // We need to look for the string "__gc" in the metatable - for (key, value) in meta_table.iter_all() { - if let Some(string_id) = key.as_string_id() { - if let Some(string) = object_pool.get_string(string_id) { - if string.as_str() == "__gc" && !value.is_nil() { - // Has __gc metamethod, add to finalize queue - self.finalize_queue.push((obj_type, obj_id)); - return true; - } + while let Some(value) = worklist.pop() { + match value.kind() { + crate::lua_value::LuaValueKind::Table => { + if let Some(id) = value.as_table_id() { + if let Some(table) = pool.tables.get_mut(id.0) { + if !table.header.marked { + table.header.marked = true; + // Add table contents to worklist + for (k, v) in table.data.iter_all() { + worklist.push(k); + worklist.push(v); + } + if let Some(mt) = table.data.get_metatable() { + worklist.push(mt); } } } } } - } - } - false - } - - /// Get finalization queue (for external processing by VM) - /// VM should call __gc metamethods on these objects - pub fn take_finalize_queue(&mut self) -> Vec<(GcObjectType, u32)> { - std::mem::take(&mut self.finalize_queue) - } - - /// Check if table has weak mode (__mode metamethod) - /// Returns: None if no weak mode, Some("k") for weak keys, Some("v") for weak values, Some("kv") for both - pub fn get_weak_mode(&self, table_id: TableId, object_pool: &ObjectPool) -> Option { - if let Some(table) = object_pool.get_table(table_id) { - if let Some(meta_value) = table.get_metatable() { - if let Some(meta_id) = meta_value.as_table_id() { - if let Some(meta_table) = object_pool.get_table(meta_id) { - // Look for __mode key - for (key, value) in meta_table.iter_all() { - if let Some(string_id) = key.as_string_id() { - if let Some(string) = object_pool.get_string(string_id) { - if string.as_str() == "__mode" { - if let Some(mode_string_id) = value.as_string_id() { - if let Some(mode_string) = - object_pool.get_string(mode_string_id) - { - return Some(mode_string.as_str().to_string()); - } + crate::lua_value::LuaValueKind::Function => { + if let Some(id) = value.as_function_id() { + // First, collect data we need without holding mutable borrow + let (should_mark, upvalue_ids, constants) = { + if let Some(func) = pool.functions.get(id.0) { + if !func.header.marked { + (true, func.upvalues.clone(), func.chunk.constants.clone()) + } else { + (false, vec![], vec![]) + } + } else { + (false, vec![], vec![]) + } + }; + + if should_mark { + // Now we can safely mark + if let Some(func) = pool.functions.get_mut(id.0) { + func.header.marked = true; + } + + // Mark upvalues separately + for upval_id in upvalue_ids { + if let Some(upval) = pool.upvalues.get_mut(upval_id.0) { + if !upval.header.marked { + upval.header.marked = true; + if let UpvalueState::Closed(v) = &upval.state { + worklist.push(*v); } } } } + + // Add constants to worklist + worklist.extend(constants); } } } - } - } - None - } - - /// Clear weak references from a weak table after GC - /// This removes entries where the key or value was collected - pub fn clear_weak_entries( - &self, - table_id: TableId, - weak_mode: &str, - object_pool: &mut ObjectPool, - ) { - let has_weak_keys = weak_mode.contains('k'); - let has_weak_values = weak_mode.contains('v'); - - if !has_weak_keys && !has_weak_values { - return; - } - - // First pass: collect keys to remove (immutable borrow) - let keys_to_remove: Vec = if let Some(table) = object_pool.get_table(table_id) { - table - .iter_all() - .into_iter() - .filter(|(key, value)| { - let mut should_remove = false; - - // Check if weak key was collected - if has_weak_keys { - if let Some(key_table_id) = key.as_table_id() { - if !self - .objects - .contains_key(&(GcObjectType::Table, key_table_id.0)) - { - should_remove = true; + crate::lua_value::LuaValueKind::Thread => { + if let Some(id) = value.as_thread_id() { + // Collect stack values first + let stack_values = { + if let Some(thread) = pool.threads.get(id.0) { + if !thread.header.marked { + Some(thread.data.register_stack.clone()) + } else { + None + } + } else { + None } + }; + + if let Some(values) = stack_values { + if let Some(thread) = pool.threads.get_mut(id.0) { + thread.header.marked = true; + } + worklist.extend(values); } } - - // Check if weak value was collected - if has_weak_values && !should_remove { - if let Some(val_table_id) = value.as_table_id() { - if !self - .objects - .contains_key(&(GcObjectType::Table, val_table_id.0)) - { - should_remove = true; - } + } + crate::lua_value::LuaValueKind::Userdata => { + // Userdata uses Rc internally, no GC needed + } + crate::lua_value::LuaValueKind::String => { + // Mark strings (they can be collected if not fixed) + if let Some(id) = value.as_string_id() { + if let Some(string) = pool.strings.get_mut(id.0) { + string.header.marked = true; } } - - should_remove - }) - .map(|(key, _)| key) - .collect() - } else { - return; - }; - - // Second pass: remove keys (mutable borrow) - if let Some(table) = object_pool.get_table_mut(table_id) { - for key in keys_to_remove { - table.raw_set(key, LuaValue::nil()); + } + _ => {} // Numbers, booleans, nil, CFunction - no marking needed } } } -} -/// Memory pool for small object allocation -/// Reduces allocation overhead and improves cache locality -pub struct MemoryPool { - blocks: Vec>, - block_size: usize, - free_list: Vec<*mut T>, -} + /// Sweep phase - free unmarked objects (skip fixed objects) + fn sweep(&mut self, pool: &mut ObjectPool) -> usize { + let mut collected = 0; -impl MemoryPool { - pub fn new(block_size: usize) -> Self { - MemoryPool { - blocks: Vec::new(), - block_size, - free_list: Vec::new(), + // Collect unmarked tables (skip fixed ones) + let tables_to_free: Vec = pool.tables.iter() + .filter(|(_, t)| !t.header.marked && !t.header.fixed) + .map(|(id, _)| id) + .collect(); + for id in tables_to_free { + pool.tables.free(id); + collected += 1; + self.record_deallocation(256); } - } - pub fn allocate(&mut self) -> *mut T - where - T: Default, - { - if let Some(ptr) = self.free_list.pop() { - return ptr; + // Collect unmarked functions (skip fixed ones) + let funcs_to_free: Vec = pool.functions.iter() + .filter(|(_, f)| !f.header.marked && !f.header.fixed) + .map(|(id, _)| id) + .collect(); + for id in funcs_to_free { + pool.functions.free(id); + collected += 1; + self.record_deallocation(128); } - // Allocate new block - let mut block = Vec::with_capacity(self.block_size); - for _ in 0..self.block_size { - block.push(T::default()); + // Collect unmarked upvalues (skip fixed ones) + let upvals_to_free: Vec = pool.upvalues.iter() + .filter(|(_, u)| !u.header.marked && !u.header.fixed) + .map(|(id, _)| id) + .collect(); + for id in upvals_to_free { + pool.upvalues.free(id); + collected += 1; } - let ptr = block.as_mut_ptr(); - self.blocks.push(block); - - // Add remaining slots to free list - unsafe { - for i in 1..self.block_size { - self.free_list.push(ptr.add(i)); - } + // Collect unmarked threads (skip fixed ones) + let threads_to_free: Vec = pool.threads.iter() + .filter(|(_, t)| !t.header.marked && !t.header.fixed) + .map(|(id, _)| id) + .collect(); + for id in threads_to_free { + pool.threads.free(id); + collected += 1; + } + + // Collect unmarked strings (skip fixed ones) + // Note: interned strings are usually kept, but this handles non-interned long strings + let strings_to_free: Vec = pool.strings.iter() + .filter(|(_, s)| !s.header.marked && !s.header.fixed) + .map(|(id, _)| id) + .collect(); + for id in strings_to_free { + pool.strings.free(id); + collected += 1; + self.record_deallocation(64); } - ptr - } + // Note: userdata uses Rc internally, no sweep needed - pub fn deallocate(&mut self, ptr: *mut T) { - self.free_list.push(ptr); + collected } - pub fn clear(&mut self) { - self.blocks.clear(); - self.free_list.clear(); + /// Write barrier - no-op in simple mark-sweep + #[inline(always)] + pub fn barrier_forward(&mut self, _obj_type: GcObjectType, _obj_id: u32) { + // No-op for simple mark-sweep } -} -unsafe impl Send for MemoryPool {} -unsafe impl Sync for MemoryPool {} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_gc_creation() { - let gc = GC::new(); - assert_eq!(gc.collection_count, 0); - assert!(gc.total_bytes == 0); + #[inline(always)] + pub fn barrier_back(&mut self, _value: &LuaValue) { + // No-op for simple mark-sweep } - #[test] - fn test_gc_threshold() { - let mut gc = GC::new(); - assert!(!gc.should_collect()); - - // Allocate enough to trigger GC - gc.record_allocation(500 * 1024); // 500KB > initial debt - assert!(gc.should_collect()); + #[inline(always)] + pub fn is_collectable(_value: &LuaValue) -> bool { + false // Unused } - #[test] - fn test_memory_pool() { - let mut pool: MemoryPool = MemoryPool::new(10); - - let ptr1 = pool.allocate(); - let ptr2 = pool.allocate(); - - assert_ne!(ptr1, ptr2); + pub fn unregister_object(&mut self, _obj_id: u32, obj_type: GcObjectType) { + let size = match obj_type { + GcObjectType::String => 64, + GcObjectType::Table => 256, + GcObjectType::Function => 128, + }; + self.record_deallocation(size); + } - pool.deallocate(ptr1); - let ptr3 = pool.allocate(); + pub fn stats(&self) -> GCStats { + self.stats.clone() + } +} - assert_eq!(ptr1, ptr3); +impl Default for GC { + fn default() -> Self { + Self::new() } } diff --git a/crates/luars/src/gc/object_pool.rs b/crates/luars/src/gc/object_pool.rs index 930f76a4..312e18e1 100644 --- a/crates/luars/src/gc/object_pool.rs +++ b/crates/luars/src/gc/object_pool.rs @@ -3,14 +3,15 @@ // Key Design Principles: // 1. LuaValueV2 stores type tag + object ID (no pointers - Vec may relocate) // 2. All GC objects accessed via ID lookup in Arena -// 3. Arena uses Vec> with free list for O(1) alloc/free +// 3. ChunkedArena uses fixed-size chunks - never reallocates existing data! // 4. No Rc/RefCell overhead - direct access via &mut self // 5. GC headers embedded in objects for mark-sweep // // Memory Layout: -// - Arena stores objects in Vec> -// - None = free slot (reusable via free list) -// - Free list tracks available slots for O(1) allocation +// - ChunkedArena stores objects in fixed-size chunks (Box<[Option; CHUNK_SIZE]>) +// - Each chunk is allocated once and never moved +// - New chunks are added as needed, existing chunks stay in place +// - This eliminates Vec resize overhead and improves cache locality use crate::lua_value::{Chunk, LuaThread, LuaUserdata}; use crate::{LuaString, LuaTable, LuaValue}; @@ -20,12 +21,14 @@ use std::rc::Rc; // ============ GC Header ============ /// GC object header - embedded in every GC-managed object -/// Kept minimal (2 bytes) to reduce memory overhead +/// Based on Lua 5.4's CommonHeader design +/// Kept minimal to reduce memory overhead #[derive(Clone, Copy, Default)] #[repr(C)] pub struct GcHeader { pub marked: bool, - pub age: u8, // For generational GC + pub age: u8, // For generational GC (like Lua's G_NEW, G_SURVIVAL, G_OLD, etc.) + pub fixed: bool, // If true, object is never collected (like Lua's fixedgc list) } // ============ Object IDs ============ @@ -134,14 +137,22 @@ pub struct GcThread { pub data: LuaThread, } -// ============ Arena Storage ============ +// ============ Chunked Arena Storage ============ -/// Type-safe arena for storing GC objects -/// Uses Option internally to mark free slots -/// Free list enables O(1) allocation after initial growth +/// Chunk size for arena storage (power of 2 for fast division) +const CHUNK_SIZE: usize = 256; +const CHUNK_MASK: usize = CHUNK_SIZE - 1; +const CHUNK_SHIFT: u32 = 8; // log2(256) + +/// High-performance chunked arena that NEVER reallocates existing data +/// - Uses fixed-size chunks stored in a Vec of Box pointers +/// - When Vec of chunks grows, only the pointer array moves, not the data +/// - Each chunk is a Box<[Option; CHUNK_SIZE]> - stable address +/// - Free list enables O(1) allocation after initial growth pub struct Arena { - storage: Vec>, + chunks: Vec; CHUNK_SIZE]>>, free_list: Vec, + next_id: u32, count: usize, } @@ -149,21 +160,33 @@ impl Arena { #[inline] pub fn new() -> Self { Self { - storage: Vec::new(), + chunks: Vec::new(), free_list: Vec::new(), + next_id: 0, count: 0, } } #[inline] pub fn with_capacity(cap: usize) -> Self { + let num_chunks = (cap + CHUNK_SIZE - 1) / CHUNK_SIZE; Self { - storage: Vec::with_capacity(cap), + chunks: Vec::with_capacity(num_chunks), free_list: Vec::with_capacity(cap / 8), + next_id: 0, count: 0, } } + /// Create a new empty chunk + #[inline] + fn new_chunk() -> Box<[Option; CHUNK_SIZE]> { + // Use MaybeUninit to avoid initializing 256 Options one by one + // This is safe because Option with None is just zeros for most T + let chunk: Box<[Option; CHUNK_SIZE]> = Box::new(std::array::from_fn(|_| None)); + chunk + } + /// Allocate a new object and return its ID #[inline] pub fn alloc(&mut self, value: T) -> u32 { @@ -171,29 +194,47 @@ impl Arena { if let Some(free_id) = self.free_list.pop() { // Reuse a free slot - self.storage[free_id as usize] = Some(value); - free_id - } else { - // Append new slot - let id = self.storage.len() as u32; - self.storage.push(Some(value)); - id + let chunk_idx = (free_id >> CHUNK_SHIFT) as usize; + let slot_idx = (free_id as usize) & CHUNK_MASK; + self.chunks[chunk_idx][slot_idx] = Some(value); + return free_id; + } + + // Allocate new slot + let id = self.next_id; + let chunk_idx = (id >> CHUNK_SHIFT) as usize; + let slot_idx = (id as usize) & CHUNK_MASK; + + // Add new chunk if needed + if chunk_idx >= self.chunks.len() { + self.chunks.push(Self::new_chunk()); } + + self.chunks[chunk_idx][slot_idx] = Some(value); + self.next_id += 1; + id } /// Get immutable reference by ID #[inline(always)] pub fn get(&self, id: u32) -> Option<&T> { - self.storage.get(id as usize).and_then(|opt| opt.as_ref()) + let chunk_idx = (id >> CHUNK_SHIFT) as usize; + let slot_idx = (id as usize) & CHUNK_MASK; + self.chunks + .get(chunk_idx) + .and_then(|chunk| chunk[slot_idx].as_ref()) } /// Get reference by ID without bounds checking (caller must ensure validity) /// SAFETY: id must be a valid index returned from alloc() and not freed #[inline(always)] pub unsafe fn get_unchecked(&self, id: u32) -> &T { + let chunk_idx = (id >> CHUNK_SHIFT) as usize; + let slot_idx = (id as usize) & CHUNK_MASK; unsafe { - self.storage - .get_unchecked(id as usize) + self.chunks + .get_unchecked(chunk_idx) + .get_unchecked(slot_idx) .as_ref() .unwrap_unchecked() } @@ -202,18 +243,23 @@ impl Arena { /// Get mutable reference by ID #[inline(always)] pub fn get_mut(&mut self, id: u32) -> Option<&mut T> { - self.storage - .get_mut(id as usize) - .and_then(|opt| opt.as_mut()) + let chunk_idx = (id >> CHUNK_SHIFT) as usize; + let slot_idx = (id as usize) & CHUNK_MASK; + self.chunks + .get_mut(chunk_idx) + .and_then(|chunk| chunk[slot_idx].as_mut()) } /// Get mutable reference by ID without bounds checking (caller must ensure validity) /// SAFETY: id must be a valid index returned from alloc() and not freed #[inline(always)] pub unsafe fn get_mut_unchecked(&mut self, id: u32) -> &mut T { + let chunk_idx = (id >> CHUNK_SHIFT) as usize; + let slot_idx = (id as usize) & CHUNK_MASK; unsafe { - self.storage - .get_unchecked_mut(id as usize) + self.chunks + .get_unchecked_mut(chunk_idx) + .get_unchecked_mut(slot_idx) .as_mut() .unwrap_unchecked() } @@ -222,9 +268,11 @@ impl Arena { /// Free a slot (mark for reuse) #[inline] pub fn free(&mut self, id: u32) { - if let Some(slot) = self.storage.get_mut(id as usize) { - if slot.is_some() { - *slot = None; + let chunk_idx = (id >> CHUNK_SHIFT) as usize; + let slot_idx = (id as usize) & CHUNK_MASK; + if let Some(chunk) = self.chunks.get_mut(chunk_idx) { + if chunk[slot_idx].is_some() { + chunk[slot_idx] = None; self.free_list.push(id); self.count -= 1; } @@ -234,9 +282,11 @@ impl Arena { /// Check if a slot is occupied #[inline(always)] pub fn is_valid(&self, id: u32) -> bool { - self.storage - .get(id as usize) - .map(|opt| opt.is_some()) + let chunk_idx = (id >> CHUNK_SHIFT) as usize; + let slot_idx = (id as usize) & CHUNK_MASK; + self.chunks + .get(chunk_idx) + .map(|chunk| chunk[slot_idx].is_some()) .unwrap_or(false) } @@ -248,30 +298,45 @@ impl Arena { /// Iterate over all live objects pub fn iter(&self) -> impl Iterator { - self.storage - .iter() - .enumerate() - .filter_map(|(i, opt)| opt.as_ref().map(|v| (i as u32, v))) + self.chunks.iter().enumerate().flat_map(|(chunk_idx, chunk)| { + chunk.iter().enumerate().filter_map(move |(slot_idx, opt)| { + opt.as_ref() + .map(|v| (((chunk_idx as u32) << CHUNK_SHIFT) | (slot_idx as u32), v)) + }) + }) } /// Iterate over all live objects mutably pub fn iter_mut(&mut self) -> impl Iterator { - self.storage + self.chunks .iter_mut() .enumerate() - .filter_map(|(i, opt)| opt.as_mut().map(|v| (i as u32, v))) - } - - /// Shrink internal storage + .flat_map(|(chunk_idx, chunk)| { + chunk + .iter_mut() + .enumerate() + .filter_map(move |(slot_idx, opt)| { + opt.as_mut() + .map(|v| (((chunk_idx as u32) << CHUNK_SHIFT) | (slot_idx as u32), v)) + }) + }) + } + + /// Shrink internal storage (only shrinks chunk vector capacity, not individual chunks) pub fn shrink_to_fit(&mut self) { - self.storage.shrink_to_fit(); + self.chunks.shrink_to_fit(); self.free_list.shrink_to_fit(); } /// Clear all objects pub fn clear(&mut self) { - self.storage.clear(); + for chunk in &mut self.chunks { + for slot in chunk.iter_mut() { + *slot = None; + } + } self.free_list.clear(); + self.next_id = 0; self.count = 0; } } @@ -299,6 +364,40 @@ pub struct ObjectPoolV2 { // Uses linear probing with string content comparison for collision handling string_intern: StringInternTable, max_intern_length: usize, + + // Pre-cached metamethod name StringIds (like Lua's G(L)->tmname[]) + // These are created at initialization and never collected + // Stored as StringId to avoid repeated hash lookup in hot paths + pub tm_index: StringId, // "__index" + pub tm_newindex: StringId, // "__newindex" + pub tm_call: StringId, // "__call" + pub tm_tostring: StringId, // "__tostring" + pub tm_len: StringId, // "__len" + pub tm_pairs: StringId, // "__pairs" + pub tm_ipairs: StringId, // "__ipairs" + pub tm_gc: StringId, // "__gc" + pub tm_close: StringId, // "__close" + pub tm_mode: StringId, // "__mode" + pub tm_name: StringId, // "__name" + pub tm_eq: StringId, // "__eq" + pub tm_lt: StringId, // "__lt" + pub tm_le: StringId, // "__le" + pub tm_add: StringId, // "__add" + pub tm_sub: StringId, // "__sub" + pub tm_mul: StringId, // "__mul" + pub tm_div: StringId, // "__div" + pub tm_mod: StringId, // "__mod" + pub tm_pow: StringId, // "__pow" + pub tm_unm: StringId, // "__unm" + pub tm_idiv: StringId, // "__idiv" + pub tm_band: StringId, // "__band" + pub tm_bor: StringId, // "__bor" + pub tm_bxor: StringId, // "__bxor" + pub tm_bnot: StringId, // "__bnot" + pub tm_shl: StringId, // "__shl" + pub tm_shr: StringId, // "__shr" + pub tm_concat: StringId, // "__concat" + pub tm_metatable: StringId, // "__metatable" } // ============ Lua-style String Interning Table ============ @@ -486,7 +585,7 @@ impl StringInternTable { impl ObjectPoolV2 { pub fn new() -> Self { - Self { + let mut pool = Self { strings: Arena::with_capacity(256), tables: Arena::with_capacity(64), functions: Arena::with_capacity(32), @@ -495,6 +594,144 @@ impl ObjectPoolV2 { threads: Arena::with_capacity(8), string_intern: StringInternTable::with_capacity(256), max_intern_length: 64, // Strings <= 64 bytes are interned + // Placeholder values - will be initialized below + tm_index: StringId(0), + tm_newindex: StringId(0), + tm_call: StringId(0), + tm_tostring: StringId(0), + tm_len: StringId(0), + tm_pairs: StringId(0), + tm_ipairs: StringId(0), + tm_gc: StringId(0), + tm_close: StringId(0), + tm_mode: StringId(0), + tm_name: StringId(0), + tm_eq: StringId(0), + tm_lt: StringId(0), + tm_le: StringId(0), + tm_add: StringId(0), + tm_sub: StringId(0), + tm_mul: StringId(0), + tm_div: StringId(0), + tm_mod: StringId(0), + tm_pow: StringId(0), + tm_unm: StringId(0), + tm_idiv: StringId(0), + tm_band: StringId(0), + tm_bor: StringId(0), + tm_bxor: StringId(0), + tm_bnot: StringId(0), + tm_shl: StringId(0), + tm_shr: StringId(0), + tm_concat: StringId(0), + tm_metatable: StringId(0), + }; + + // Pre-create all metamethod name strings (like Lua's luaT_init) + // These strings are interned and will never be collected + pool.tm_index = pool.create_string("__index"); + pool.tm_newindex = pool.create_string("__newindex"); + pool.tm_call = pool.create_string("__call"); + pool.tm_tostring = pool.create_string("__tostring"); + pool.tm_len = pool.create_string("__len"); + pool.tm_pairs = pool.create_string("__pairs"); + pool.tm_ipairs = pool.create_string("__ipairs"); + pool.tm_gc = pool.create_string("__gc"); + pool.tm_close = pool.create_string("__close"); + pool.tm_mode = pool.create_string("__mode"); + pool.tm_name = pool.create_string("__name"); + pool.tm_eq = pool.create_string("__eq"); + pool.tm_lt = pool.create_string("__lt"); + pool.tm_le = pool.create_string("__le"); + pool.tm_add = pool.create_string("__add"); + pool.tm_sub = pool.create_string("__sub"); + pool.tm_mul = pool.create_string("__mul"); + pool.tm_div = pool.create_string("__div"); + pool.tm_mod = pool.create_string("__mod"); + pool.tm_pow = pool.create_string("__pow"); + pool.tm_unm = pool.create_string("__unm"); + pool.tm_idiv = pool.create_string("__idiv"); + pool.tm_band = pool.create_string("__band"); + pool.tm_bor = pool.create_string("__bor"); + pool.tm_bxor = pool.create_string("__bxor"); + pool.tm_bnot = pool.create_string("__bnot"); + pool.tm_shl = pool.create_string("__shl"); + pool.tm_shr = pool.create_string("__shr"); + pool.tm_concat = pool.create_string("__concat"); + pool.tm_metatable = pool.create_string("__metatable"); + + // Fix all metamethod name strings - they should never be collected + // (like Lua's luaC_fix in luaT_init) + pool.fix_string(pool.tm_index); + pool.fix_string(pool.tm_newindex); + pool.fix_string(pool.tm_call); + pool.fix_string(pool.tm_tostring); + pool.fix_string(pool.tm_len); + pool.fix_string(pool.tm_pairs); + pool.fix_string(pool.tm_ipairs); + pool.fix_string(pool.tm_gc); + pool.fix_string(pool.tm_close); + pool.fix_string(pool.tm_mode); + pool.fix_string(pool.tm_name); + pool.fix_string(pool.tm_eq); + pool.fix_string(pool.tm_lt); + pool.fix_string(pool.tm_le); + pool.fix_string(pool.tm_add); + pool.fix_string(pool.tm_sub); + pool.fix_string(pool.tm_mul); + pool.fix_string(pool.tm_div); + pool.fix_string(pool.tm_mod); + pool.fix_string(pool.tm_pow); + pool.fix_string(pool.tm_unm); + pool.fix_string(pool.tm_idiv); + pool.fix_string(pool.tm_band); + pool.fix_string(pool.tm_bor); + pool.fix_string(pool.tm_bxor); + pool.fix_string(pool.tm_bnot); + pool.fix_string(pool.tm_shl); + pool.fix_string(pool.tm_shr); + pool.fix_string(pool.tm_concat); + pool.fix_string(pool.tm_metatable); + + pool + } + + /// Get pre-cached metamethod StringId by TM enum value + /// This is the fast path for metamethod lookup in hot code + /// TMS enum from ltm.h: + /// TM_INDEX=0, TM_NEWINDEX=1, TM_GC=2, TM_MODE=3, TM_LEN=4, TM_EQ=5, + /// TM_ADD=6, TM_SUB=7, TM_MUL=8, TM_MOD=9, TM_POW=10, TM_DIV=11, + /// TM_IDIV=12, TM_BAND=13, TM_BOR=14, TM_BXOR=15, TM_SHL=16, TM_SHR=17, + /// TM_UNM=18, TM_BNOT=19, TM_LT=20, TM_LE=21, TM_CONCAT=22, TM_CALL=23 + #[inline] + pub fn get_binop_tm(&self, tm: u8) -> StringId { + match tm { + 0 => self.tm_index, + 1 => self.tm_newindex, + 2 => self.tm_gc, + 3 => self.tm_mode, + 4 => self.tm_len, + 5 => self.tm_eq, + 6 => self.tm_add, + 7 => self.tm_sub, + 8 => self.tm_mul, + 9 => self.tm_mod, + 10 => self.tm_pow, + 11 => self.tm_div, + 12 => self.tm_idiv, + 13 => self.tm_band, + 14 => self.tm_bor, + 15 => self.tm_bxor, + 16 => self.tm_shl, + 17 => self.tm_shr, + 18 => self.tm_unm, + 19 => self.tm_bnot, + 20 => self.tm_lt, + 21 => self.tm_le, + 22 => self.tm_concat, + 23 => self.tm_call, + 24 => self.tm_close, + _ => self.tm_index, // Fallback to __index } } @@ -610,6 +847,25 @@ impl ObjectPoolV2 { self.strings.get(id.0).map(|gs| gs.data.as_str()) } + /// Mark a string as fixed (never collected) - like Lua's luaC_fix() + /// Used for metamethod names and other permanent strings + #[inline] + pub fn fix_string(&mut self, id: StringId) { + if let Some(gs) = self.strings.get_mut(id.0) { + gs.header.fixed = true; + gs.header.marked = true; // Always considered marked + } + } + + /// Mark a table as fixed (never collected) + #[inline] + pub fn fix_table(&mut self, id: TableId) { + if let Some(gt) = self.tables.get_mut(id.0) { + gt.header.fixed = true; + gt.header.marked = true; + } + } + // ==================== Table Operations ==================== #[inline] diff --git a/crates/luars/src/lua_value/lua_table.rs b/crates/luars/src/lua_value/lua_table.rs index 7c3470b2..85c5ca88 100644 --- a/crates/luars/src/lua_value/lua_table.rs +++ b/crates/luars/src/lua_value/lua_table.rs @@ -298,6 +298,7 @@ impl LuaTable { } /// Fast integer key write + /// OPTIMIZED: Use resize_with for better performance #[inline] pub fn set_int(&mut self, key: i64, value: LuaValue) { if value.is_nil() { @@ -316,15 +317,27 @@ impl LuaTable { let array_len = self.array.len(); if idx < array_len { + // Fast path: within existing array self.array[idx] = value; return; } else if idx == array_len { + // Sequential append - use reserve to reduce reallocations + if self.array.capacity() == array_len { + // Need to grow - reserve extra space + let extra = if array_len == 0 { 8 } else { array_len }; + self.array.reserve(extra); + } self.array.push(value); return; + } else if idx < array_len + 8 && idx < 256 { + // Small gap - fill with nils and extend + self.array.resize_with(idx + 1, LuaValue::nil); + self.array[idx] = value; + return; } } - // Out of array range, use hash + // Out of array range or large gap, use hash self.set_in_hash(LuaValue::integer(key), value); } diff --git a/crates/luars/src/lua_vm/execute/arithmetic_instructions.rs b/crates/luars/src/lua_vm/execute/arithmetic_instructions.rs index 3d395203..70812654 100644 --- a/crates/luars/src/lua_vm/execute/arithmetic_instructions.rs +++ b/crates/luars/src/lua_vm/execute/arithmetic_instructions.rs @@ -325,8 +325,8 @@ pub fn exec_unm(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> Lua } else if let Some(f) = value.as_number() { LuaValue::number(-f) } else { - // Try metamethod - let mm_key = vm.create_string("__unm"); + // Try metamethod - use pre-cached __unm StringId + let mm_key = LuaValue::string(vm.object_pool.tm_unm); if let Some(mt) = vm.table_get_metatable(&value) { if let Some(metamethod) = vm.table_get_with_meta(&mt, &mm_key) { if !metamethod.is_nil() { @@ -948,8 +948,8 @@ pub fn exec_bnot(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> Lu return Ok(()); } - // Try metamethod for non-integer values - let mm_key = vm.create_string("__bnot"); + // Try metamethod for non-integer values - use pre-cached __bnot StringId + let mm_key = LuaValue::string(vm.object_pool.tm_bnot); if let Some(mt) = vm.table_get_metatable(&value) { if let Some(metamethod) = vm.table_get_with_meta(&mt, &mm_key) { if !metamethod.is_nil() { @@ -996,7 +996,8 @@ pub fn exec_len(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> Lua // Check for __len metamethod first (for tables) if value.is_table() { - let mm_key = vm.create_string("__len"); + // Use pre-cached __len StringId + let mm_key = LuaValue::string(vm.object_pool.tm_len); if let Some(mt) = vm.table_get_metatable(&value) { if let Some(metamethod) = vm.table_get_with_meta(&mt, &mm_key) { if !metamethod.is_nil() { @@ -1032,37 +1033,6 @@ pub fn exec_len(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> Lua Ok(()) } -/// Get metamethod name for binary operation -fn get_binop_metamethod(tm: u8) -> &'static str { - // TMS enum from ltm.h: - // TM_INDEX=0, TM_NEWINDEX=1, TM_GC=2, TM_MODE=3, TM_LEN=4, TM_EQ=5, - // TM_ADD=6, TM_SUB=7, TM_MUL=8, TM_MOD=9, TM_POW=10, TM_DIV=11, - // TM_IDIV=12, TM_BAND=13, TM_BOR=14, TM_BXOR=15, TM_SHL=16, TM_SHR=17, - // TM_UNM=18, TM_BNOT=19, TM_LT=20, TM_LE=21, TM_CONCAT=22, TM_CALL=23, TM_CLOSE=24 - match tm { - 6 => "__add", - 7 => "__sub", - 8 => "__mul", - 9 => "__mod", - 10 => "__pow", - 11 => "__div", - 12 => "__idiv", - 13 => "__band", - 14 => "__bor", - 15 => "__bxor", - 16 => "__shl", - 17 => "__shr", - 22 => "__concat", - 5 => "__eq", - 20 => "__lt", - 21 => "__le", - 18 => "__unm", - 19 => "__bnot", - 4 => "__len", - _ => "__unknown", - } -} - /// MmBin: Metamethod binary operation (register, register) #[inline(always)] pub fn exec_mmbin(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> LuaResult<()> { @@ -1085,8 +1055,8 @@ pub fn exec_mmbin(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> L let ra = *vm.register_stack.as_ptr().add(base_ptr + a); let rb = *vm.register_stack.as_ptr().add(base_ptr + b); - let metamethod_name = get_binop_metamethod(c as u8); - let mm_key = vm.create_string(metamethod_name); + // Use pre-cached metamethod StringId + let mm_key = LuaValue::string(vm.object_pool.get_binop_tm(c as u8)); let metamethod = if let Some(mt) = vm.table_get_metatable(&ra) { vm.table_get_with_meta(&mt, &mm_key) @@ -1130,8 +1100,8 @@ pub fn exec_mmbini(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> let rb = *vm.register_stack.as_ptr().add(base_ptr + a); let rc = LuaValue::integer(sb as i64); - let metamethod_name = get_binop_metamethod(c as u8); - let mm_key = vm.create_string(metamethod_name); + // Use pre-cached metamethod StringId + let mm_key = LuaValue::string(vm.object_pool.get_binop_tm(c as u8)); let metamethod = if let Some(mt) = vm.table_get_metatable(&rb) { vm.table_get_with_meta(&mt, &mm_key) @@ -1186,8 +1156,8 @@ pub fn exec_mmbink(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> return Ok(()); }; - let metamethod_name = get_binop_metamethod(c as u8); - let mm_key = vm.create_string(metamethod_name); + // Use pre-cached metamethod StringId + let mm_key = LuaValue::string(vm.object_pool.get_binop_tm(c as u8)); let (left, right) = if !k { (ra, kb) } else { (kb, ra) }; diff --git a/crates/luars/src/lua_vm/execute/control_instructions.rs b/crates/luars/src/lua_vm/execute/control_instructions.rs index 79d5505b..05771a92 100644 --- a/crates/luars/src/lua_vm/execute/control_instructions.rs +++ b/crates/luars/src/lua_vm/execute/control_instructions.rs @@ -244,7 +244,8 @@ pub fn exec_eq(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> LuaR // If not equal by value, try __eq metamethod // IMPORTANT: Both operands must have the SAME __eq metamethod (Lua 5.4 spec) if !is_equal && (left.is_table() || right.is_table()) { - let mm_key = vm.create_string("__eq"); + // Use pre-cached __eq StringId + let mm_key = LuaValue::string(vm.object_pool.tm_eq); let left_mt = vm.table_get_metatable(&left); let right_mt = vm.table_get_metatable(&right); @@ -356,7 +357,8 @@ fn exec_lt_metamethod( k: bool, frame_ptr: *mut LuaCallFrame, ) -> LuaResult<()> { - let mm_key = vm.create_string("__lt"); + // Use pre-cached __lt StringId + let mm_key = LuaValue::string(vm.object_pool.tm_lt); let mut found_metamethod = false; if let Some(mt) = vm.table_get_metatable(&left) { @@ -452,8 +454,8 @@ pub fn exec_le(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> LuaR // String comparison left <= right } else { - // Try __le metamethod first - let mm_key_le = vm.create_string("__le"); + // Try __le metamethod first - use pre-cached StringId + let mm_key_le = LuaValue::string(vm.object_pool.tm_le); let mut found_metamethod = false; if let Some(mt) = vm.table_get_metatable(&left) { @@ -494,7 +496,8 @@ pub fn exec_le(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> LuaR // If __le not found, try __lt and compute !(b < a) if !found_metamethod { - let mm_key_lt = vm.create_string("__lt"); + // Use pre-cached __lt StringId + let mm_key_lt = LuaValue::string(vm.object_pool.tm_lt); if let Some(mt) = vm.table_get_metatable(&right) { if let Some(metamethod) = vm.table_get_with_meta(&mt, &mm_key_lt) { @@ -809,7 +812,8 @@ pub fn exec_call( }); if let Some(metatable) = metatable_opt { - let call_key = vm.create_string("__call"); + // Use pre-cached __call StringId + let call_key = LuaValue::string(vm.object_pool.tm_call); if let Some(call_func) = vm.table_get_with_meta(&metatable, &call_key) { if call_func.is_callable() { if call_func.is_cfunction() { @@ -859,6 +863,12 @@ fn exec_call_lua_function( call_metamethod_self: LuaValue, frame_ptr_ptr: &mut *mut LuaCallFrame, // Use passed frame_ptr! ) -> LuaResult<()> { + // Safepoint GC check: run GC at function call boundaries + // This is much cheaper than checking on every table operation + if vm.gc_debt_local > 1024 * 1024 { + vm.check_gc_slow_pub(); + } + // Get function ID - FAST PATH: assume valid function let func_id = unsafe { func.as_function_id().unwrap_unchecked() }; diff --git a/crates/luars/src/lua_vm/execute/table_instructions.rs b/crates/luars/src/lua_vm/execute/table_instructions.rs index 8572b6dd..88108e79 100644 --- a/crates/luars/src/lua_vm/execute/table_instructions.rs +++ b/crates/luars/src/lua_vm/execute/table_instructions.rs @@ -60,8 +60,7 @@ pub fn exec_newtable(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) { *vm.register_stack.get_unchecked_mut(base_ptr + a) = table; } - // GC checkpoint: table now safely stored in register - vm.check_gc(); + // GC checkpoint disabled for testing } /// GETTABLE A B C @@ -164,12 +163,7 @@ pub fn exec_settable(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) - lua_table.raw_set(key_value, set_value); } - // GC barrier - if crate::gc::GC::is_collectable(&set_value) { - vm.gc - .barrier_forward(crate::gc::GcObjectType::Table, table_id.0); - vm.gc.barrier_back(&set_value); - } + // Note: GC barrier is handled lazily during collection return Ok(()); } } @@ -258,12 +252,8 @@ pub fn exec_seti(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> Lu let lua_table = unsafe { vm.object_pool.get_table_mut_unchecked(table_id) }; lua_table.set_int(b, set_value); - // GC barrier - only for collectable values - if crate::gc::GC::is_collectable(&set_value) { - vm.gc - .barrier_forward(crate::gc::GcObjectType::Table, table_id.0); - vm.gc.barrier_back(&set_value); - } + // Note: GC barrier is handled lazily during collection + // This significantly improves write performance return Ok(()); } } @@ -377,12 +367,7 @@ pub fn exec_setfield(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) - // Ultra-fast path: direct set without any metamethod checks table_ref.raw_set(key_value.clone(), set_value.clone()); - // GC barrier - only for collectable values - if crate::gc::GC::is_collectable(&set_value) { - vm.gc - .barrier_forward(crate::gc::GcObjectType::Table, table_id.0); - vm.gc.barrier_back(&set_value); - } + // Note: GC barrier is handled lazily during collection return Ok(()); } } @@ -497,12 +482,7 @@ pub fn exec_settabup(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) - // Ultra-fast path: direct set without any metamethod checks table_ref.raw_set(key_value.clone(), set_value.clone()); - // GC barrier - only for collectable values - if crate::gc::GC::is_collectable(&set_value) { - vm.gc - .barrier_forward(crate::gc::GcObjectType::Table, table_id.0); - vm.gc.barrier_back(&set_value); - } + // Note: GC barrier is handled lazily during collection return Ok(()); } } diff --git a/crates/luars/src/lua_vm/execute/upvalue_instructions.rs b/crates/luars/src/lua_vm/execute/upvalue_instructions.rs index d00fff1a..7f31bc87 100644 --- a/crates/luars/src/lua_vm/execute/upvalue_instructions.rs +++ b/crates/luars/src/lua_vm/execute/upvalue_instructions.rs @@ -302,8 +302,8 @@ pub fn exec_concat(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) -> let concat_result = l + &r; result_value = vm.create_string_owned(concat_result); } else { - // Try __concat metamethod - let mm_key = vm.create_string("__concat"); + // Try __concat metamethod - use pre-cached StringId + let mm_key = LuaValue::string(vm.object_pool.tm_concat); let mut found_metamethod = false; if let Some(mt) = vm.table_get_metatable(&result_value) { diff --git a/crates/luars/src/lua_vm/lua_error.rs b/crates/luars/src/lua_vm/lua_error.rs index bfda2c91..ac56b9e6 100644 --- a/crates/luars/src/lua_vm/lua_error.rs +++ b/crates/luars/src/lua_vm/lua_error.rs @@ -1,5 +1,3 @@ -use crate::LuaValue; - /// Lightweight error enum - only 1 byte! /// Actual error data stored in VM to reduce Result size #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -24,22 +22,3 @@ impl std::fmt::Display for LuaError { } } } - -/// Legacy error type for compatibility (will be phased out) -/// Used during transition period -#[derive(Debug, Clone)] -pub enum LuaErrorLegacy { - RuntimeError(String), - CompileError(String), - Yield(Vec), -} - -impl From for LuaError { - fn from(legacy: LuaErrorLegacy) -> Self { - match legacy { - LuaErrorLegacy::RuntimeError(_) => LuaError::RuntimeError, - LuaErrorLegacy::CompileError(_) => LuaError::CompileError, - LuaErrorLegacy::Yield(_) => LuaError::Yield, - } - } -} diff --git a/crates/luars/src/lua_vm/mod.rs b/crates/luars/src/lua_vm/mod.rs index 71a0a45f..effe2147 100644 --- a/crates/luars/src/lua_vm/mod.rs +++ b/crates/luars/src/lua_vm/mod.rs @@ -28,6 +28,15 @@ pub struct LuaVM { // Global environment table (_G and _ENV point to this) pub(crate) global_value: LuaValue, + // Registry table (like Lua's LUA_REGISTRYINDEX) + // Used to store objects that should be protected from GC but not visible to Lua code + // This is a GC root and all values in it are protected + pub(crate) registry: LuaValue, + + // Hot path GC debt counter - placed early in struct for cache locality + // This is updated on every allocation and checked frequently + pub(crate) gc_debt_local: isize, + // Call stack - Pre-allocated Vec with fixed capacity // Using Vec directly (no Box indirection) for cache efficiency // Vec is pre-allocated to MAX_CALL_DEPTH and never reallocated @@ -40,7 +49,11 @@ pub struct LuaVM { // Global register stack (unified stack architecture, like Lua 5.4) pub register_stack: Vec, - // Garbage collector + // Object pool for unified object management (new architecture) + // Placed near top for cache locality with hot operations + pub(crate) object_pool: ObjectPool, + + // Garbage collector (cold path - only accessed during actual GC) pub(crate) gc: GC, // Multi-return value buffer (temporary storage for function returns) @@ -78,9 +91,6 @@ pub struct LuaVM { // String metatable (shared by all strings) - stored as TableId in LuaValue pub(crate) string_metatable: Option, - // Object pool for unified object management (new architecture) - pub(crate) object_pool: ObjectPool, - // Async executor for Lua-Rust async bridge #[cfg(feature = "async")] pub(crate) async_executor: AsyncExecutor, @@ -105,9 +115,12 @@ impl LuaVM { let mut vm = LuaVM { global_value: LuaValue::nil(), + registry: LuaValue::nil(), // Will be initialized below + gc_debt_local: -(200 * 1024), // Start with negative debt (can allocate 200KB before GC) frames, frame_count: 0, register_stack: Vec::with_capacity(256), // Pre-allocate for initial stack + object_pool: ObjectPool::new(), gc: GC::new(), return_values: Vec::with_capacity(16), open_upvalues: Vec::new(), @@ -121,7 +134,6 @@ impl LuaVM { current_thread_value: None, main_thread_value: None, // Will be initialized lazily string_metatable: None, - object_pool: ObjectPool::new(), #[cfg(feature = "async")] async_executor: AsyncExecutor::new(), // Initialize error storage @@ -129,14 +141,75 @@ impl LuaVM { yield_values: Vec::new(), }; + // Initialize registry (like Lua's init_registry) + // Registry is a GC root and protects all values stored in it + let registry = vm.create_table(2, 8); + if let Some(registry_id) = registry.as_table_id() { + // Fix the registry table so it's never collected + vm.object_pool.fix_table(registry_id); + } + vm.registry = registry; + // Set _G to point to the global table itself let globals_ref = vm.create_table(0, 20); vm.global_value = globals_ref; vm.set_global("_G", globals_ref); vm.set_global("_ENV", globals_ref); + + // Store globals in registry (like Lua's LUA_RIDX_GLOBALS) + vm.registry_set_integer(1, globals_ref); vm } + + /// Set a value in the registry by integer key + pub fn registry_set_integer(&mut self, key: i64, value: LuaValue) { + if let Some(reg_id) = self.registry.as_table_id() { + if let Some(reg_table) = self.object_pool.get_table_mut(reg_id) { + reg_table.set_int(key, value); + } + } + } + + /// Get a value from the registry by integer key + pub fn registry_get_integer(&self, key: i64) -> Option { + if let Some(reg_id) = self.registry.as_table_id() { + if let Some(reg_table) = self.object_pool.get_table(reg_id) { + return reg_table.get_int(key); + } + } + None + } + + /// Set a value in the registry by string key + pub fn registry_set(&mut self, key: &str, value: LuaValue) { + let key_value = self.create_string(key); + if let Some(reg_id) = self.registry.as_table_id() { + if let Some(reg_table) = self.object_pool.get_table_mut(reg_id) { + reg_table.raw_set(key_value, value); + } + } + } + + /// Get a value from the registry by string key + pub fn registry_get(&self, key: &str) -> Option { + // We can't use create_string here as it requires &mut self + // So we do a linear search (registry is typically small) + if let Some(reg_id) = self.registry.as_table_id() { + if let Some(reg_table) = self.object_pool.get_table(reg_id) { + for (k, v) in reg_table.iter_all() { + if let Some(k_id) = k.as_string_id() { + if let Some(k_str) = self.object_pool.get_string_str(k_id) { + if k_str == key { + return Some(v); + } + } + } + } + } + } + None + } // Register access helpers for unified stack architecture #[inline(always)] @@ -421,8 +494,8 @@ impl LuaVM { // Create the metatable let metatable = self.create_table(0, 1); - // Create the __index key before any borrowing - let index_key = self.create_string("__index"); + // Use pre-cached __index StringId for fast lookup + let index_key = LuaValue::string(self.object_pool.tm_index); // Get the table reference to set __index if let Some(mt_ref) = self.get_table_mut(&metatable) { @@ -797,7 +870,8 @@ impl LuaVM { if lua_table_value.is_string() { // Strings use a shared metatable if let Some(string_mt) = self.get_string_metatable() { - let index_key = self.create_string("__index"); + // Use pre-cached __index StringId for fast lookup + let index_key = LuaValue::string(self.object_pool.tm_index); // Get the __index field from string metatable if let Some(index_table) = self.table_get_with_meta(&string_mt, &index_key) { @@ -829,7 +903,8 @@ impl LuaVM { if let Some(mt) = meta_value && let Some(meta_id) = mt.as_table_id() { - let index_key = self.create_string("__index"); + // Use pre-cached __index StringId - avoids hash computation and intern lookup + let index_key = LuaValue::string(self.object_pool.tm_index); let index_value = { let metatable = self.object_pool.get_table(meta_id)?; @@ -841,8 +916,17 @@ impl LuaVM { LuaValueKind::Table => { return self.table_get_with_meta(&index_val, key); } - LuaValueKind::CFunction | LuaValueKind::Function => { - let args = vec![lua_table_value.clone(), key.clone()]; + // Fast path for CFunction __index + LuaValueKind::CFunction => { + if let Some(cfunc) = index_val.as_cfunction() { + match self.call_cfunc_metamethod_2(cfunc, *lua_table_value, *key) { + Ok(result) => return result, + Err(_) => return None, + } + } + } + LuaValueKind::Function => { + let args = [*lua_table_value, *key]; match self.call_metamethod(&index_val, &args) { Ok(result) => return result, Err(_) => return None, @@ -874,7 +958,8 @@ impl LuaVM { }; if let Some(mt_id) = metatable.as_table_id() { - let index_key = self.create_string("__index"); + // Use pre-cached __index StringId + let index_key = LuaValue::string(self.object_pool.tm_index); let index_value = { let mt = self.object_pool.get_table(mt_id)?; @@ -886,9 +971,18 @@ impl LuaVM { // __index is a table - look up in that table LuaValueKind::Table => return self.table_get_with_meta(&index_val, key), - // __index is a function - call it with (userdata, key) - LuaValueKind::CFunction | LuaValueKind::Function => { - let args = vec![lua_userdata_value.clone(), key.clone()]; + // Fast path for CFunction __index + LuaValueKind::CFunction => { + if let Some(cfunc) = index_val.as_cfunction() { + match self.call_cfunc_metamethod_2(cfunc, *lua_userdata_value, *key) { + Ok(result) => return result, + Err(_) => return None, + } + } + } + // Lua function - use slower path + LuaValueKind::Function => { + let args = [*lua_userdata_value, *key]; match self.call_metamethod(&index_val, &args) { Ok(result) => return result, Err(_) => return None, @@ -905,7 +999,8 @@ impl LuaVM { /// Get value from string with metatable support /// Handles __index metamethod for strings pub fn string_get(&mut self, string_val: &LuaValue, key: &LuaValue) -> Option { - let index_key = self.create_string("__index"); + // Use pre-cached __index StringId + let index_key = LuaValue::string(self.object_pool.tm_index); // Check for __index metamethod in string metatable if let Some(mt) = &self.string_metatable.clone() { let index_value = if let Some(mt_ref) = self.get_table(mt) { @@ -918,9 +1013,18 @@ impl LuaVM { match index_val.kind() { // __index is a table - look up in that table (this is the common case for strings) LuaValueKind::Table => return self.table_get_with_meta(&index_val, key), - // __index is a function - call it with (string, key) - LuaValueKind::CFunction | LuaValueKind::Function => { - let args = vec![string_val.clone(), key.clone()]; + // Fast path for CFunction __index + LuaValueKind::CFunction => { + if let Some(cfunc) = index_val.as_cfunction() { + match self.call_cfunc_metamethod_2(cfunc, *string_val, *key) { + Ok(result) => return result, + Err(_) => return None, + } + } + } + // Lua function - slower path + LuaValueKind::Function => { + let args = [*string_val, *key]; match self.call_metamethod(&index_val, &args) { Ok(result) => return result, Err(_) => return None, @@ -973,7 +1077,8 @@ impl LuaVM { if let Some(mt) = meta_value && let Some(mt_id) = mt.as_table_id() { - let newindex_key = self.create_string("__newindex"); + // Use pre-cached __newindex StringId - avoids hash computation and intern lookup + let newindex_key = LuaValue::string(self.object_pool.tm_newindex); let newindex_value = { let Some(metatable) = self.object_pool.get_table(mt_id) else { @@ -987,8 +1092,18 @@ impl LuaVM { LuaValueKind::Table => { return self.table_set_with_meta(newindex_val, key, value); } - LuaValueKind::CFunction | LuaValueKind::Function => { - let args = vec![lua_table_val, key, value]; + // Fast path for CFunction __newindex + LuaValueKind::CFunction => { + if let Some(cfunc) = newindex_val.as_cfunction() { + match self.call_cfunc_metamethod_3(cfunc, lua_table_val, key, value) { + Ok(_) => return Ok(()), + Err(e) => return Err(e), + } + } + } + // Lua function - slower path + LuaValueKind::Function => { + let args = [lua_table_val, key, value]; match self.call_metamethod(&newindex_val, &args) { Ok(_) => return Ok(()), Err(e) => return Err(e), @@ -1018,10 +1133,178 @@ impl LuaVM { func: &LuaValue, args: &[LuaValue], ) -> LuaResult> { - // Use call_function_internal for both C functions and Lua functions + // Fast path for CFunction + if let Some(cfunc) = func.as_cfunction() { + match args.len() { + 1 => return self.call_cfunc_metamethod_1(cfunc, args[0]), + 2 => return self.call_cfunc_metamethod_2(cfunc, args[0], args[1]), + 3 => return self.call_cfunc_metamethod_3(cfunc, args[0], args[1], args[2]), + _ => {} + } + } + + // Slow path for Lua functions and general cases let result = self.call_function_internal(func.clone(), args.to_vec())?; Ok(result.get(0).cloned()) } + + /// Fast path for calling CFunction metamethods with 2 arguments + /// Used by __index, __newindex, etc. Avoids Vec allocation. + /// Returns the first return value. + #[inline(always)] + pub fn call_cfunc_metamethod_2( + &mut self, + cfunc: crate::lua_value::CFunction, + arg1: LuaValue, + arg2: LuaValue, + ) -> LuaResult> { + // Calculate new base position - use current frame's top area + let new_base = if self.frame_count > 0 { + let current_frame = &self.frames[self.frame_count - 1]; + let caller_base = current_frame.base_ptr; + let caller_max_stack = + if let Some(func_id) = current_frame.function_value.as_function_id() { + self.object_pool + .get_function(func_id) + .map(|f| f.chunk.max_stack_size) + .unwrap_or(256) + } else { + 256 + }; + caller_base + caller_max_stack + } else { + 0 + }; + + let stack_size = 3; // func + 2 args + self.ensure_stack_capacity(new_base + stack_size); + + // Set up arguments directly (no Vec allocation) + self.register_stack[new_base] = LuaValue::cfunction(cfunc); + self.register_stack[new_base + 1] = arg1; + self.register_stack[new_base + 2] = arg2; + + // Create C function frame + let temp_frame = LuaCallFrame::new_c_function(new_base, stack_size); + self.push_frame(temp_frame); + + // Call CFunction + let result = match cfunc(self) { + Ok(r) => { + self.pop_frame_discard(); + Ok(r.first()) + } + Err(LuaError::Yield) => Err(LuaError::Yield), + Err(e) => { + self.pop_frame_discard(); + Err(e) + } + }; + + result + } + + /// Fast path for calling CFunction metamethods with 1 argument + /// Used by __len, __unm, __bnot, etc. Avoids Vec allocation. + #[inline(always)] + pub fn call_cfunc_metamethod_1( + &mut self, + cfunc: crate::lua_value::CFunction, + arg1: LuaValue, + ) -> LuaResult> { + let new_base = if self.frame_count > 0 { + let current_frame = &self.frames[self.frame_count - 1]; + let caller_base = current_frame.base_ptr; + let caller_max_stack = + if let Some(func_id) = current_frame.function_value.as_function_id() { + self.object_pool + .get_function(func_id) + .map(|f| f.chunk.max_stack_size) + .unwrap_or(256) + } else { + 256 + }; + caller_base + caller_max_stack + } else { + 0 + }; + + let stack_size = 2; // func + 1 arg + self.ensure_stack_capacity(new_base + stack_size); + + self.register_stack[new_base] = LuaValue::cfunction(cfunc); + self.register_stack[new_base + 1] = arg1; + + let temp_frame = LuaCallFrame::new_c_function(new_base, stack_size); + self.push_frame(temp_frame); + + let result = match cfunc(self) { + Ok(r) => { + self.pop_frame_discard(); + Ok(r.first()) + } + Err(LuaError::Yield) => Err(LuaError::Yield), + Err(e) => { + self.pop_frame_discard(); + Err(e) + } + }; + + result + } + + /// Fast path for calling CFunction metamethods with 3 arguments + /// Used by __newindex. Avoids Vec allocation. + #[inline(always)] + pub fn call_cfunc_metamethod_3( + &mut self, + cfunc: crate::lua_value::CFunction, + arg1: LuaValue, + arg2: LuaValue, + arg3: LuaValue, + ) -> LuaResult> { + let new_base = if self.frame_count > 0 { + let current_frame = &self.frames[self.frame_count - 1]; + let caller_base = current_frame.base_ptr; + let caller_max_stack = + if let Some(func_id) = current_frame.function_value.as_function_id() { + self.object_pool + .get_function(func_id) + .map(|f| f.chunk.max_stack_size) + .unwrap_or(256) + } else { + 256 + }; + caller_base + caller_max_stack + } else { + 0 + }; + + let stack_size = 4; // func + 3 args + self.ensure_stack_capacity(new_base + stack_size); + + self.register_stack[new_base] = LuaValue::cfunction(cfunc); + self.register_stack[new_base + 1] = arg1; + self.register_stack[new_base + 2] = arg2; + self.register_stack[new_base + 3] = arg3; + + let temp_frame = LuaCallFrame::new_c_function(new_base, stack_size); + self.push_frame(temp_frame); + + let result = match cfunc(self) { + Ok(r) => { + self.pop_frame_discard(); + Ok(r.first()) + } + Err(LuaError::Yield) => Err(LuaError::Yield), + Err(e) => { + self.pop_frame_discard(); + Err(e) + } + }; + + result + } // Integer division @@ -1192,8 +1475,8 @@ impl LuaVM { continue; } - // Try to get __close metamethod - let close_key = self.create_string("__close"); + // Try to get __close metamethod using pre-cached StringId + let close_key = LuaValue::string(self.object_pool.tm_close); let metamethod = if let Some(mt) = self.table_get_metatable(&value) { self.table_get_with_meta(&mt, &close_key) } else { @@ -1259,16 +1542,14 @@ impl LuaVM { } /// Create a new table in object pool + /// OPTIMIZATION: Only update local debt counter, no function calls #[inline(always)] pub fn create_table(&mut self, array_size: usize, hash_size: usize) -> LuaValue { let id = self.object_pool.create_table(array_size, hash_size); - // Register with GC - ultra-lightweight, just update debt - self.gc - .register_object(id.0, crate::gc::GcObjectType::Table); - - // GC check MUST NOT happen here - object not yet protected! - // Caller must call check_gc() AFTER storing value in register + // Lightweight GC tracking: just increment debt + // This is a single integer add, should be very fast + self.gc_debt_local += 256; LuaValue::table(id) } @@ -1519,13 +1800,33 @@ impl LuaVM { /// This is called after allocating new objects (strings, tables, functions) /// Uses GC debt mechanism: runs when debt > 0 /// - /// OPTIMIZATION: Use incremental collection with work budget + /// OPTIMIZATION: Fast path is inlined, slow path is separate function + #[inline(always)] fn check_gc(&mut self) { - // Fast path: check debt without collecting roots - if !self.gc.should_collect() { + // Ultra-fast path: single integer comparison with local debt counter + // Only check if debt exceeds a significant threshold (1MB) + // This reduces the overhead of frequent checks dramatically + if self.gc_debt_local <= 1024 * 1024 { return; } + // Slow path: actual GC work + self.check_gc_slow(); + } + + /// Slow path for GC - separate function to keep hot path small + /// Public version for direct inline checks + #[cold] + #[inline(never)] + pub fn check_gc_slow_pub(&mut self) { + self.check_gc_slow(); + } + #[cold] + #[inline(never)] + fn check_gc_slow(&mut self) { + // Sync local debt to GC + self.gc.gc_debt = self.gc_debt_local; + // Incremental GC: only collect every N checks to reduce overhead self.gc.increment_check_counter(); if !self.gc.should_run_collection() { @@ -1538,14 +1839,21 @@ impl LuaVM { // 1. Global table roots.push(self.global_value); - // 2. String metatable + // 2. Registry table (persistent objects storage) + roots.push(self.registry); + + // 3. String metatable if let Some(mt) = &self.string_metatable { roots.push(*mt); } - // 3. ALL frame registers (not just current frame) + // 3. ALL frame registers AND function values (not just current frame) // This is critical - any register in any active frame must be kept alive - for frame in &self.frames { + // Also, the function being executed in each frame must be kept alive! + for frame in &self.frames[..self.frame_count] { + // Add the function value for this frame - this is CRITICAL! + roots.push(frame.function_value); + let base_ptr = frame.base_ptr; let top = frame.top; for i in 0..top { @@ -1626,12 +1934,25 @@ impl LuaVM { // Add the global table itself as a root roots.push(self.global_value); - // Add all frame registers as roots - for frame in &self.frames { + // Add registry table as a root (persistent objects) + roots.push(self.registry); + + // Add string metatable if present + if let Some(mt) = &self.string_metatable { + roots.push(*mt); + } + + // Add all frame registers AND function values as roots + for frame in &self.frames[..self.frame_count] { + // CRITICAL: Add the function being executed + roots.push(frame.function_value); + let base_ptr = frame.base_ptr; let top = frame.top; for i in 0..top { - roots.push(self.register_stack[base_ptr + i]); + if base_ptr + i < self.register_stack.len() { + roots.push(self.register_stack[base_ptr + i]); + } } } @@ -2207,13 +2528,20 @@ impl LuaVM { self.ensure_stack_capacity(new_base + max_stack_size); - // Initialize registers with nil, then copy args - for i in new_base..(new_base + max_stack_size) { - self.register_stack[i] = LuaValue::nil(); - } - for (i, arg) in args.iter().enumerate() { - if i < max_stack_size { - self.register_stack[new_base + i] = *arg; + // Copy args first, then initialize remaining with nil (only beyond args) + let arg_count = args.len().min(max_stack_size); + unsafe { + let dst = self.register_stack.as_mut_ptr().add(new_base); + // Copy arguments + for (i, arg) in args.iter().enumerate() { + if i < max_stack_size { + *dst.add(i) = *arg; + } + } + // Initialize remaining registers with nil + let nil_val = LuaValue::nil(); + for i in arg_count..max_stack_size { + *dst.add(i) = nil_val; } } @@ -2242,13 +2570,9 @@ impl LuaVM { self.pop_frame_discard(); let result = std::mem::take(&mut self.return_values); - // Clear the stack region used by this call to release references - // This prevents GC from scanning stale objects after dofile/pcall - for i in new_base..(new_base + max_stack_size) { - if i < self.register_stack.len() { - self.register_stack[i] = LuaValue::nil(); - } - } + // NOTE: We intentionally don't clear the stack here anymore. + // The stack will be overwritten on next call, and GC can handle + // any stale references. This gives significant performance improvement. Ok(result) } diff --git a/crates/luars/src/stdlib/basic.rs b/crates/luars/src/stdlib/basic.rs index 4a96d72e..9e4c6866 100644 --- a/crates/luars/src/stdlib/basic.rs +++ b/crates/luars/src/stdlib/basic.rs @@ -254,39 +254,36 @@ fn lua_ipairs(vm: &mut LuaVM) -> LuaResult { /// Iterator function for ipairs - Optimized for performance #[inline] fn ipairs_next(vm: &mut LuaVM) -> LuaResult { - // Fast path: direct argument access without validation - let table_val = if let Some(val) = get_arg(vm, 1) { - val - } else { - return Err(vm.error("ipairs iterator: table expected".to_string())); - }; - - let index_val = if let Some(val) = get_arg(vm, 2) { - val - } else { - return Err(vm.error("ipairs iterator: index expected".to_string())); - }; - - // Use ObjectPool API for table access + // ULTRA-FAST PATH: Direct register access without get_arg overhead + let frame = vm.current_frame(); + let base_ptr = frame.base_ptr; + + // Arguments are at base_ptr + 1 (table) and base_ptr + 2 (index) + // Avoid bounds checking in hot path + let table_val = unsafe { *vm.register_stack.get_unchecked(base_ptr + 1) }; + let index_val = unsafe { *vm.register_stack.get_unchecked(base_ptr + 2) }; + + // Fast path: both table and index are valid if let Some(table_id) = table_val.as_table_id() { if let Some(index) = index_val.as_integer() { let next_index = index + 1; - // Access table via ObjectPool + // Access table via ObjectPool - unchecked for speed if let Some(table) = vm.object_pool.get_table(table_id) { if let Some(value) = table.get_int(next_index) { - return Ok(MultiValue::multiple(vec![ + // Use MultiValue::two() to avoid Vec allocation + return Ok(MultiValue::two( LuaValue::integer(next_index), value, - ])); + )); } - // Reached end of array + // Reached end of array - return single nil return Ok(MultiValue::single(LuaValue::nil())); } } } - // Slow path with proper validation + // Slow path with error Err(vm.error("ipairs iterator: invalid table or index".to_string())) } @@ -309,9 +306,19 @@ fn lua_pairs(vm: &mut LuaVM) -> LuaResult { } /// next(table [, index]) - Return next key-value pair +/// OPTIMIZED: Avoid Vec allocation for common 2-return case fn lua_next(vm: &mut LuaVM) -> LuaResult { - let table_val = require_arg(vm, 1, "next")?; - let index_val = get_arg(vm, 2).unwrap_or(LuaValue::nil()); + // Fast path: direct register access + let frame = vm.current_frame(); + let base_ptr = frame.base_ptr; + let top = frame.top; + + let table_val = unsafe { *vm.register_stack.get_unchecked(base_ptr + 1) }; + let index_val = if top > 2 { + unsafe { *vm.register_stack.get_unchecked(base_ptr + 2) } + } else { + LuaValue::nil() + }; // Use ObjectPool API for table access if let Some(table_id) = table_val.as_table_id() { @@ -319,7 +326,8 @@ fn lua_next(vm: &mut LuaVM) -> LuaResult { let result = table.next(&index_val); match result { - Some((key, value)) => Ok(MultiValue::multiple(vec![key, value])), + // Use MultiValue::two() to avoid Vec allocation + Some((key, value)) => Ok(MultiValue::two(key, value)), None => Ok(MultiValue::single(LuaValue::nil())), } } else {