@@ -127,6 +127,10 @@ impl Aarch64CodeGen {
127127 }
128128
129129 /// Set up register arguments for standard AAPCS64 calls
130+ ///
131+ /// AAPCS64 requires stack arguments to be placed in parameter order at
132+ /// consecutive 8-byte slots starting from SP. Unlike x86-64, we don't use
133+ /// push instructions - instead we pre-allocate space and store directly.
130134 pub ( super ) fn setup_register_args (
131135 & mut self ,
132136 insn : & Instruction ,
@@ -136,9 +140,17 @@ impl Aarch64CodeGen {
136140 ) -> i32 {
137141 let int_arg_regs = Reg :: arg_regs ( ) ;
138142 let fp_arg_regs = VReg :: arg_regs ( ) ;
143+
144+ // First pass: identify which args go to registers vs stack
145+ // Collect stack args with their info for the second pass
146+ struct StackArg {
147+ pseudo : PseudoId ,
148+ is_fp : bool ,
149+ size : u32 ,
150+ }
151+ let mut stack_args_info: Vec < StackArg > = Vec :: new ( ) ;
139152 let mut int_arg_idx = 0 ;
140153 let mut fp_arg_idx = 0 ;
141- let mut stack_args = 0 ;
142154
143155 for ( i, & arg) in insn. src . iter ( ) . enumerate ( ) . skip ( args_start) {
144156 let arg_type = insn. arg_types . get ( i) . copied ( ) ;
@@ -168,52 +180,96 @@ impl Aarch64CodeGen {
168180 ) ;
169181 fp_arg_idx += 2 ;
170182 } else {
171- stack_args += 2 ;
183+ // Complex on stack needs 2 slots
184+ stack_args_info. push ( StackArg {
185+ pseudo : arg,
186+ is_fp : true ,
187+ size : arg_size,
188+ } ) ;
189+ stack_args_info. push ( StackArg {
190+ pseudo : arg,
191+ is_fp : true ,
192+ size : arg_size,
193+ } ) ;
172194 }
173195 } else if is_fp {
174- let fp_size = if let Some ( typ) = arg_type {
175- types. size_bits ( typ)
176- } else {
177- 64
178- } ;
179196 if fp_arg_idx < fp_arg_regs. len ( ) {
197+ let fp_size = if let Some ( typ) = arg_type {
198+ types. size_bits ( typ)
199+ } else {
200+ 64
201+ } ;
180202 self . emit_fp_move ( arg, fp_arg_regs[ fp_arg_idx] , fp_size, frame_size) ;
181203 fp_arg_idx += 1 ;
182204 } else {
183- self . emit_fp_move ( arg, VReg :: V16 , fp_size, frame_size) ;
184- let fp_sz = if fp_size == 32 {
185- FpSize :: Single
186- } else {
187- FpSize :: Double
188- } ;
189- self . push_lir ( Aarch64Inst :: StrFp {
190- size : fp_sz,
191- src : VReg :: V16 ,
192- addr : MemAddr :: PreIndex {
193- base : Reg :: SP ,
194- offset : -16 ,
195- } ,
205+ stack_args_info. push ( StackArg {
206+ pseudo : arg,
207+ is_fp : true ,
208+ size : arg_size,
196209 } ) ;
197- stack_args += 1 ;
198210 }
199211 } else if int_arg_idx < int_arg_regs. len ( ) {
200212 self . emit_move ( arg, int_arg_regs[ int_arg_idx] , arg_size, frame_size) ;
201213 int_arg_idx += 1 ;
202214 } else {
203- self . emit_move ( arg, Reg :: X9 , arg_size, frame_size) ;
215+ stack_args_info. push ( StackArg {
216+ pseudo : arg,
217+ is_fp : false ,
218+ size : arg_size,
219+ } ) ;
220+ }
221+ }
222+
223+ // If no stack args, we're done
224+ if stack_args_info. is_empty ( ) {
225+ return 0 ;
226+ }
227+
228+ // Pre-allocate stack space for all stack args (8 bytes each, 16-byte aligned)
229+ let num_stack_args = stack_args_info. len ( ) ;
230+ let stack_bytes = ( num_stack_args * 8 ) as i32 ;
231+ let aligned_bytes = ( stack_bytes + 15 ) & !15 ;
232+
233+ self . push_lir ( Aarch64Inst :: Sub {
234+ size : OperandSize :: B64 ,
235+ src1 : Reg :: sp ( ) ,
236+ src2 : GpOperand :: Imm ( aligned_bytes as i64 ) ,
237+ dst : Reg :: sp ( ) ,
238+ } ) ;
239+
240+ // Store each stack arg at its proper offset from SP (in parameter order)
241+ for ( idx, stack_arg) in stack_args_info. into_iter ( ) . enumerate ( ) {
242+ let offset = ( idx * 8 ) as i32 ;
243+ if stack_arg. is_fp {
244+ self . emit_fp_move ( stack_arg. pseudo , VReg :: V16 , stack_arg. size , frame_size) ;
245+ let fp_sz = if stack_arg. size == 32 {
246+ FpSize :: Single
247+ } else {
248+ FpSize :: Double
249+ } ;
250+ self . push_lir ( Aarch64Inst :: StrFp {
251+ size : fp_sz,
252+ src : VReg :: V16 ,
253+ addr : MemAddr :: BaseOffset {
254+ base : Reg :: SP ,
255+ offset,
256+ } ,
257+ } ) ;
258+ } else {
259+ self . emit_move ( stack_arg. pseudo , Reg :: X9 , stack_arg. size , frame_size) ;
204260 self . push_lir ( Aarch64Inst :: Str {
205261 size : OperandSize :: B64 ,
206262 src : Reg :: X9 ,
207- addr : MemAddr :: PreIndex {
263+ addr : MemAddr :: BaseOffset {
208264 base : Reg :: SP ,
209- offset : - 16 ,
265+ offset,
210266 } ,
211267 } ) ;
212- stack_args += 1 ;
213268 }
214269 }
215270
216- stack_args
271+ // Return number of 16-byte units allocated (for cleanup)
272+ ( aligned_bytes + 15 ) / 16
217273 }
218274
219275 /// Set up a complex number argument (real + imaginary in two V registers)
0 commit comments