@@ -9,89 +9,113 @@ use crate::{
99
1010/// FORPREP A Bx
1111/// Prepare numeric for loop: R[A]-=R[A+2]; R[A+3]=R[A]; if (skip) pc+=Bx+1
12+ /// OPTIMIZED: Uses frame_ptr directly, no i128, unsafe register access
1213#[ inline( always) ]
13- pub fn exec_forprep ( vm : & mut LuaVM , instr : u32 ) -> LuaResult < ( ) > {
14+ pub fn exec_forprep ( vm : & mut LuaVM , instr : u32 , frame_ptr : * mut LuaCallFrame ) -> LuaResult < ( ) > {
1415 let a = Instruction :: get_a ( instr) as usize ;
1516 let bx = Instruction :: get_bx ( instr) as usize ;
1617
17- let frame = vm. current_frame ( ) ;
18- let base_ptr = frame. base_ptr ;
18+ unsafe {
19+ let base_ptr = ( * frame_ptr) . base_ptr ;
20+ let reg_base = vm. register_stack . as_mut_ptr ( ) . add ( base_ptr + a) ;
21+
22+ let init = * reg_base;
23+ let limit = * reg_base. add ( 1 ) ;
24+ let step = * reg_base. add ( 2 ) ;
1925
20- let init = vm. register_stack [ base_ptr + a] ;
21- let limit = vm. register_stack [ base_ptr + a + 1 ] ;
22- let step = vm. register_stack [ base_ptr + a + 2 ] ;
26+ // Check for integer loop using type tags
27+ let init_tag = init. primary & TYPE_MASK ;
28+ let limit_tag = limit. primary & TYPE_MASK ;
29+ let step_tag = step. primary & TYPE_MASK ;
2330
24- // Check for integer loop
25- if let ( Some ( init_i) , Some ( limit_i) , Some ( step_i) ) =
26- ( init. as_integer ( ) , limit. as_integer ( ) , step. as_integer ( ) )
27- {
28- if step_i == 0 {
29- return Err ( vm. error ( "'for' step is zero" . to_string ( ) ) ) ;
30- }
31+ if init_tag == TAG_INTEGER && limit_tag == TAG_INTEGER && step_tag == TAG_INTEGER {
32+ let init_i = init. secondary as i64 ;
33+ let limit_i = limit. secondary as i64 ;
34+ let step_i = step. secondary as i64 ;
3135
32- // Set control variable (R[A+3] = init)
33- vm. register_stack [ base_ptr + a + 3 ] = LuaValue :: integer ( init_i) ;
36+ if step_i == 0 {
37+ return Err ( vm. error ( "'for' step is zero" . to_string ( ) ) ) ;
38+ }
39+
40+ // Set control variable (R[A+3] = init)
41+ * reg_base. add ( 3 ) = LuaValue :: integer ( init_i) ;
42+
43+ // Calculate loop count using i64 arithmetic (avoid i128!)
44+ // Lua 5.4 style: use saturating arithmetic to avoid overflow
45+ let count: u64 = if step_i > 0 {
46+ // Ascending loop
47+ if limit_i < init_i {
48+ 0
49+ } else {
50+ // (limit - init) / step, using unsigned division
51+ let diff = ( limit_i as u64 ) . wrapping_sub ( init_i as u64 ) ;
52+ diff / ( step_i as u64 )
53+ }
54+ } else {
55+ // Descending loop
56+ if init_i < limit_i {
57+ 0
58+ } else {
59+ // (init - limit) / (-step)
60+ let diff = ( init_i as u64 ) . wrapping_sub ( limit_i as u64 ) ;
61+ let neg_step = ( -( step_i as i64 ) ) as u64 ;
62+ diff / neg_step
63+ }
64+ } ;
3465
35- // Calculate loop count (Lua 5.4 uses counter for integer loops)
36- let count = if step_i > 0 {
37- // Ascending: count = (limit - init) / step
38- if limit_i < init_i {
39- 0 // skip loop
66+ if count == 0 {
67+ // Skip the entire loop body and FORLOOP
68+ ( * frame_ptr) . pc += bx;
4069 } else {
41- let diff = ( limit_i as i128 ) - ( init_i as i128 ) ;
42- ( diff / ( step_i as i128 ) ) as u64
70+ // Store count in R[A+1] (replacing limit)
71+ * reg_base . add ( 1 ) = LuaValue :: integer ( count as i64 ) ;
4372 }
4473 } else {
45- // Descending: count = (init - limit) / (-(step+1)+1)
46- if init_i < limit_i {
47- 0 // skip loop
74+ // Float loop - convert to f64
75+ let init_f = if init_tag == TAG_INTEGER {
76+ init. secondary as i64 as f64
77+ } else if init_tag == TAG_FLOAT {
78+ f64:: from_bits ( init. secondary )
4879 } else {
49- let diff = ( init_i as i128 ) - ( limit_i as i128 ) ;
50- let divisor = -( ( step_i + 1 ) as i128 ) + 1 ;
51- ( diff / divisor) as u64
52- }
53- } ;
80+ return Err ( vm. error ( "'for' initial value must be a number" . to_string ( ) ) ) ;
81+ } ;
5482
55- if count == 0 {
56- // Skip the entire loop body and FORLOOP
57- vm. current_frame_mut ( ) . pc = vm. current_frame ( ) . pc + bx;
58- } else {
59- // Store count in R[A+1] (replacing limit)
60- vm. register_stack [ base_ptr + a + 1 ] = LuaValue :: integer ( count as i64 ) ;
61- // R[A] keeps init value (will be updated by FORLOOP)
62- // Don't modify R[A] here!
63- }
64- } else {
65- // Float loop
66- let Some ( init_f) = init. as_number ( ) else {
67- return Err ( vm. error ( "'for' initial value must be a number" . to_string ( ) ) ) ;
68- } ;
69- let Some ( limit_f) = limit. as_number ( ) else {
70- return Err ( vm. error ( "'for' limit must be a number" . to_string ( ) ) ) ;
71- } ;
72- let Some ( step_f) = step. as_number ( ) else {
73- return Err ( vm. error ( "'for' step must be a number" . to_string ( ) ) ) ;
74- } ;
75-
76- if step_f == 0.0 {
77- return Err ( vm. error ( "'for' step is zero" . to_string ( ) ) ) ;
78- }
83+ let limit_f = if limit_tag == TAG_INTEGER {
84+ limit. secondary as i64 as f64
85+ } else if limit_tag == TAG_FLOAT {
86+ f64:: from_bits ( limit. secondary )
87+ } else {
88+ return Err ( vm. error ( "'for' limit must be a number" . to_string ( ) ) ) ;
89+ } ;
7990
80- // Set control variable
81- vm. register_stack [ base_ptr + a + 3 ] = LuaValue :: number ( init_f) ;
91+ let step_f = if step_tag == TAG_INTEGER {
92+ step. secondary as i64 as f64
93+ } else if step_tag == TAG_FLOAT {
94+ f64:: from_bits ( step. secondary )
95+ } else {
96+ return Err ( vm. error ( "'for' step must be a number" . to_string ( ) ) ) ;
97+ } ;
8298
83- // Check if we should skip
84- let should_skip = if step_f > 0.0 {
85- init_f > limit_f
86- } else {
87- init_f < limit_f
88- } ;
99+ if step_f == 0.0 {
100+ return Err ( vm. error ( "'for' step is zero" . to_string ( ) ) ) ;
101+ }
89102
90- if should_skip {
91- vm. current_frame_mut ( ) . pc = vm. current_frame ( ) . pc + bx;
92- } else {
93- // Prepare internal index
94- vm. register_stack [ base_ptr + a] = LuaValue :: number ( init_f - step_f) ;
103+ // Set control variable
104+ * reg_base. add ( 3 ) = LuaValue :: number ( init_f) ;
105+
106+ // Check if we should skip
107+ let should_skip = if step_f > 0.0 {
108+ init_f > limit_f
109+ } else {
110+ init_f < limit_f
111+ } ;
112+
113+ if should_skip {
114+ ( * frame_ptr) . pc += bx;
115+ } else {
116+ // Prepare internal index
117+ * reg_base = LuaValue :: number ( init_f - step_f) ;
118+ }
95119 }
96120 }
97121
@@ -102,7 +126,7 @@ pub fn exec_forprep(vm: &mut LuaVM, instr: u32) -> LuaResult<()> {
102126/// R[A]+=R[A+2];
103127/// if R[A] <?= R[A+1] then { pc-=Bx; R[A+3]=R[A] }
104128///
105- /// ULTRA-OPTIMIZED: Uses pre-fetched frame_ptr + direct bit-mask type checking
129+ /// ULTRA-OPTIMIZED: Minimized memory access, branch prediction friendly
106130#[ inline( always) ]
107131pub fn exec_forloop ( vm : & mut LuaVM , instr : u32 , frame_ptr : * mut LuaCallFrame ) -> LuaResult < ( ) > {
108132 let a = Instruction :: get_a ( instr) as usize ;
@@ -112,71 +136,80 @@ pub fn exec_forloop(vm: &mut LuaVM, instr: u32, frame_ptr: *mut LuaCallFrame) ->
112136 let base_ptr = ( * frame_ptr) . base_ptr ;
113137 let reg_base = vm. register_stack . as_mut_ptr ( ) . add ( base_ptr + a) ;
114138
115- let idx = * reg_base;
116- let counter_or_limit = * reg_base. add ( 1 ) ;
117- let step = * reg_base. add ( 2 ) ;
118-
119- let combined_tags = ( idx. primary | counter_or_limit. primary | step. primary ) & TYPE_MASK ;
139+ // Read counter first - this is the hot path check
140+ let counter = ( * reg_base. add ( 1 ) ) . secondary as i64 ;
141+
142+ // Fast path: integer loop with counter > 0
143+ // Check counter first (most common exit condition)
144+ if counter > 0 {
145+ // Only read other values if we're continuing
146+ let idx_i = ( * reg_base) . secondary as i64 ;
147+ let step_i = ( * reg_base. add ( 2 ) ) . secondary as i64 ;
148+ let new_idx = idx_i. wrapping_add ( step_i) ;
149+
150+ // Write back - minimize writes
151+ ( * reg_base) . secondary = new_idx as u64 ;
152+ ( * reg_base. add ( 1 ) ) . secondary = ( counter - 1 ) as u64 ;
153+ ( * reg_base. add ( 3 ) ) . secondary = new_idx as u64 ;
154+ // Note: type tags stay TAG_INTEGER, no need to rewrite primary
155+
156+ ( * frame_ptr) . pc -= bx;
157+ return Ok ( ( ) ) ;
158+ }
120159
121- // Fast path: All integers
160+ // Check if this is actually an integer loop (counter == 0 means loop ended)
161+ let idx = * reg_base;
162+ let combined_tags = ( idx. primary | ( * reg_base. add ( 1 ) ) . primary | ( * reg_base. add ( 2 ) ) . primary ) & TYPE_MASK ;
163+
122164 if combined_tags == TAG_INTEGER {
123- let count = counter_or_limit. secondary as i64 ;
124-
125- if count > 0 {
126- let idx_i = idx. secondary as i64 ;
127- let step_i = step. secondary as i64 ;
128- let new_idx = idx_i. wrapping_add ( step_i) ;
129-
130- * reg_base = LuaValue :: integer ( new_idx) ;
131- * reg_base. add ( 1 ) = LuaValue :: integer ( count - 1 ) ;
132- * reg_base. add ( 3 ) = LuaValue :: integer ( new_idx) ;
133-
134- ( * frame_ptr) . pc -= bx;
135- }
165+ // Integer loop ended (counter == 0)
166+ return Ok ( ( ) ) ;
136167 }
137- // Slow path: at least one non-integer
138- else {
139- let step_tag = step. primary & TYPE_MASK ;
140- let counter_tag = counter_or_limit. primary & TYPE_MASK ;
141- let idx_tag = idx. primary & TYPE_MASK ;
142-
143- if ( step_tag == TAG_FLOAT || step_tag == TAG_INTEGER )
144- && ( counter_tag == TAG_FLOAT || counter_tag == TAG_INTEGER )
145- && ( idx_tag == TAG_FLOAT || idx_tag == TAG_INTEGER )
146- {
147- let idx_f = if idx_tag == TAG_FLOAT {
148- f64:: from_bits ( idx. secondary )
149- } else {
150- idx. secondary as i64 as f64
151- } ;
152168
153- let limit_f = if counter_tag == TAG_FLOAT {
154- f64:: from_bits ( counter_or_limit. secondary )
155- } else {
156- counter_or_limit. secondary as i64 as f64
157- } ;
169+ // Slow path: float loop
170+ let counter_or_limit = * reg_base. add ( 1 ) ;
171+ let step = * reg_base. add ( 2 ) ;
172+
173+ let step_tag = step. primary & TYPE_MASK ;
174+ let limit_tag = counter_or_limit. primary & TYPE_MASK ;
175+ let idx_tag = idx. primary & TYPE_MASK ;
176+
177+ if ( step_tag == TAG_FLOAT || step_tag == TAG_INTEGER )
178+ && ( limit_tag == TAG_FLOAT || limit_tag == TAG_INTEGER )
179+ && ( idx_tag == TAG_FLOAT || idx_tag == TAG_INTEGER )
180+ {
181+ let idx_f = if idx_tag == TAG_FLOAT {
182+ f64:: from_bits ( idx. secondary )
183+ } else {
184+ idx. secondary as i64 as f64
185+ } ;
158186
159- let step_f = if step_tag == TAG_FLOAT {
160- f64:: from_bits ( step . secondary )
161- } else {
162- step . secondary as i64 as f64
163- } ;
187+ let limit_f = if limit_tag == TAG_FLOAT {
188+ f64:: from_bits ( counter_or_limit . secondary )
189+ } else {
190+ counter_or_limit . secondary as i64 as f64
191+ } ;
164192
165- let new_idx_f = idx_f + step_f;
166- let should_continue = if step_f > 0.0 {
167- new_idx_f <= limit_f
168- } else {
169- new_idx_f >= limit_f
170- } ;
193+ let step_f = if step_tag == TAG_FLOAT {
194+ f64:: from_bits ( step. secondary )
195+ } else {
196+ step. secondary as i64 as f64
197+ } ;
171198
172- if should_continue {
173- * reg_base = LuaValue :: number ( new_idx_f) ;
174- * reg_base. add ( 3 ) = LuaValue :: number ( new_idx_f) ;
175- ( * frame_ptr) . pc -= bx;
176- }
199+ let new_idx_f = idx_f + step_f;
200+ let should_continue = if step_f > 0.0 {
201+ new_idx_f <= limit_f
177202 } else {
178- return Err ( vm. error ( "'for' values must be numbers" . to_string ( ) ) ) ;
203+ new_idx_f >= limit_f
204+ } ;
205+
206+ if should_continue {
207+ * reg_base = LuaValue :: number ( new_idx_f) ;
208+ * reg_base. add ( 3 ) = LuaValue :: number ( new_idx_f) ;
209+ ( * frame_ptr) . pc -= bx;
179210 }
211+ } else {
212+ return Err ( vm. error ( "'for' values must be numbers" . to_string ( ) ) ) ;
180213 }
181214 }
182215
0 commit comments