Skip to content

Commit 62a3f7d

Browse files
committed
[cc] aarch64 callee saved FP registers
1. cc/arch/aarch64/regalloc.rs - Added callee_saved_fp_used() getter (line 915) 2. cc/arch/aarch64/lir.rs - Added new LIR instructions: - StpFp - Store pair of FP registers (line 587) - LdpFp - Load pair of FP registers (line 595) - Emit implementations for both (lines 1310-1334) 3. cc/arch/aarch64/codegen.rs - Full integration: - Get FP callee-saved registers from allocator (line 280) - Frame size calculation includes FP callee-saved space (lines 295-297) - frame_info type extended to (i32, Vec<Reg>, Vec<VReg>) (line 604) - Prologue saves FP callee-saved registers using stp (lines 435-481) - Epilogue restores FP callee-saved registers using ldp (lines 702-728) Key AAPCS64 compliance: Per the ABI, only the lower 64 bits of V8-V15 need preservation, so we save/restore as d8-d15 (using FpSize::Double).
1 parent 91b3e82 commit 62a3f7d

File tree

3 files changed

+142
-9
lines changed

3 files changed

+142
-9
lines changed

cc/arch/aarch64/codegen.rs

Lines changed: 95 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,7 @@ impl Aarch64CodeGen {
277277

278278
let stack_size = alloc.stack_size();
279279
let callee_saved = alloc.callee_saved_used().to_vec();
280+
let callee_saved_fp = alloc.callee_saved_fp_used().to_vec();
280281

281282
// For variadic functions on Linux/FreeBSD, we need extra space for the register save area
282283
// AAPCS64: 8 GP regs (x0-x7) * 8 bytes = 64 bytes
@@ -286,16 +287,20 @@ impl Aarch64CodeGen {
286287
let reg_save_area_size: i32 = if is_variadic && !is_darwin { 64 } else { 0 };
287288

288289
// Calculate total frame size
289-
// Need space for: fp/lr (16 bytes) + callee-saved regs + local vars + reg save area
290-
// Round up callee-saved count to even for 16-byte alignment
291-
let callee_saved_pairs = (callee_saved.len() + 1) / 2;
292-
let callee_saved_size = callee_saved_pairs as i32 * 16;
290+
// Need space for: fp/lr (16 bytes) + GP callee-saved + FP callee-saved + local vars + reg save area
291+
// Round up callee-saved counts to even for 16-byte alignment
292+
// Note: AAPCS64 only requires the lower 64 bits of V8-V15 to be preserved (d8-d15)
293+
let callee_saved_gp_pairs = (callee_saved.len() + 1) / 2;
294+
let callee_saved_gp_size = callee_saved_gp_pairs as i32 * 16;
295+
let callee_saved_fp_pairs = (callee_saved_fp.len() + 1) / 2;
296+
let callee_saved_fp_size = callee_saved_fp_pairs as i32 * 16; // 8 bytes per d-reg, 16 per pair
297+
let callee_saved_size = callee_saved_gp_size + callee_saved_fp_size;
293298
let total_frame = 16 + callee_saved_size + stack_size + reg_save_area_size;
294299
// Ensure 16-byte alignment
295300
let total_frame = (total_frame + 15) & !15;
296301

297302
// Track register save area offset for va_start (offset from FP)
298-
// Layout: [fp/lr][callee-saved][locals][reg_save_area]
303+
// Layout: [fp/lr][GP callee-saved][FP callee-saved][locals][reg_save_area]
299304
// The save area is at FP + 16 + callee_saved_size + stack_size
300305
self.reg_save_area_offset = if is_variadic {
301306
16 + callee_saved_size + stack_size
@@ -426,6 +431,54 @@ impl Aarch64CodeGen {
426431
}
427432
offset += 16;
428433
}
434+
435+
// Save FP callee-saved registers (d8-d15) in pairs
436+
// AAPCS64 only requires preserving the lower 64 bits
437+
let mut i = 0;
438+
while i < callee_saved_fp.len() {
439+
if i + 1 < callee_saved_fp.len() {
440+
self.push_lir(Aarch64Inst::StpFp {
441+
size: FpSize::Double,
442+
src1: callee_saved_fp[i],
443+
src2: callee_saved_fp[i + 1],
444+
addr: MemAddr::BaseOffset {
445+
base: Reg::X29, // fp
446+
offset,
447+
},
448+
});
449+
if self.emit_debug {
450+
let cfi_offset1 = -(total_frame - offset);
451+
let cfi_offset2 = -(total_frame - offset - 8);
452+
self.push_lir(Aarch64Inst::Directive(Directive::cfi_offset(
453+
callee_saved_fp[i].name_d(),
454+
cfi_offset1,
455+
)));
456+
self.push_lir(Aarch64Inst::Directive(Directive::cfi_offset(
457+
callee_saved_fp[i + 1].name_d(),
458+
cfi_offset2,
459+
)));
460+
}
461+
i += 2;
462+
} else {
463+
self.push_lir(Aarch64Inst::StrFp {
464+
size: FpSize::Double,
465+
src: callee_saved_fp[i],
466+
addr: MemAddr::BaseOffset {
467+
base: Reg::X29, // fp
468+
offset,
469+
},
470+
});
471+
if self.emit_debug {
472+
let cfi_offset = -(total_frame - offset);
473+
self.push_lir(Aarch64Inst::Directive(Directive::cfi_offset(
474+
callee_saved_fp[i].name_d(),
475+
cfi_offset,
476+
)));
477+
}
478+
i += 1;
479+
}
480+
offset += 16;
481+
}
429482
} else {
430483
// Minimal frame: stp x29, x30, [sp, #-16]!
431484
self.push_lir(Aarch64Inst::Stp {
@@ -548,7 +601,7 @@ impl Aarch64CodeGen {
548601
}
549602

550603
// Store frame size for epilogue
551-
let frame_info = (total_frame, callee_saved.clone());
604+
let frame_info = (total_frame, callee_saved.clone(), callee_saved_fp.clone());
552605

553606
// Emit basic blocks
554607
for block in &func.blocks {
@@ -564,7 +617,7 @@ impl Aarch64CodeGen {
564617
fn emit_block(
565618
&mut self,
566619
block: &crate::ir::BasicBlock,
567-
frame_info: &(i32, Vec<Reg>),
620+
frame_info: &(i32, Vec<Reg>, Vec<VReg>),
568621
types: &TypeTable,
569622
) {
570623
// Always emit block ID label for consistency with jumps
@@ -580,11 +633,16 @@ impl Aarch64CodeGen {
580633
}
581634
}
582635

583-
fn emit_insn(&mut self, insn: &Instruction, frame_info: &(i32, Vec<Reg>), types: &TypeTable) {
636+
fn emit_insn(
637+
&mut self,
638+
insn: &Instruction,
639+
frame_info: &(i32, Vec<Reg>, Vec<VReg>),
640+
types: &TypeTable,
641+
) {
584642
// Emit .loc directive for debug info
585643
self.emit_loc(insn);
586644

587-
let (total_frame, callee_saved) = frame_info;
645+
let (total_frame, callee_saved, callee_saved_fp) = frame_info;
588646

589647
match insn.op {
590648
Opcode::Entry => {
@@ -645,6 +703,34 @@ impl Aarch64CodeGen {
645703
}
646704
offset += 16;
647705
}
706+
707+
// Restore FP callee-saved registers (d8-d15)
708+
let mut i = 0;
709+
while i < callee_saved_fp.len() {
710+
if i + 1 < callee_saved_fp.len() {
711+
self.push_lir(Aarch64Inst::LdpFp {
712+
size: FpSize::Double,
713+
addr: MemAddr::BaseOffset {
714+
base: Reg::sp(),
715+
offset,
716+
},
717+
dst1: callee_saved_fp[i],
718+
dst2: callee_saved_fp[i + 1],
719+
});
720+
i += 2;
721+
} else {
722+
self.push_lir(Aarch64Inst::LdrFp {
723+
size: FpSize::Double,
724+
addr: MemAddr::BaseOffset {
725+
base: Reg::sp(),
726+
offset,
727+
},
728+
dst: callee_saved_fp[i],
729+
});
730+
i += 1;
731+
}
732+
offset += 16;
733+
}
648734
}
649735

650736
// Restore fp/lr and deallocate stack

cc/arch/aarch64/lir.rs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -584,6 +584,22 @@ pub enum Aarch64Inst {
584584
addr: MemAddr,
585585
},
586586

587+
/// STP (FP) - Store pair of FP registers (for callee-saved register saves)
588+
StpFp {
589+
size: FpSize,
590+
src1: VReg,
591+
src2: VReg,
592+
addr: MemAddr,
593+
},
594+
595+
/// LDP (FP) - Load pair of FP registers (for callee-saved register restores)
596+
LdpFp {
597+
size: FpSize,
598+
addr: MemAddr,
599+
dst1: VReg,
600+
dst2: VReg,
601+
},
602+
587603
/// FADD - FP add
588604
Fadd {
589605
size: FpSize,
@@ -1291,6 +1307,32 @@ impl EmitAsm for Aarch64Inst {
12911307
let _ = writeln!(out, " str {}, {}", name, addr.format());
12921308
}
12931309

1310+
Aarch64Inst::StpFp {
1311+
size,
1312+
src1,
1313+
src2,
1314+
addr,
1315+
} => {
1316+
let (name1, name2) = match size {
1317+
FpSize::Single => (src1.name_s(), src2.name_s()),
1318+
FpSize::Double => (src1.name_d(), src2.name_d()),
1319+
};
1320+
let _ = writeln!(out, " stp {}, {}, {}", name1, name2, addr.format());
1321+
}
1322+
1323+
Aarch64Inst::LdpFp {
1324+
size,
1325+
addr,
1326+
dst1,
1327+
dst2,
1328+
} => {
1329+
let (name1, name2) = match size {
1330+
FpSize::Single => (dst1.name_s(), dst2.name_s()),
1331+
FpSize::Double => (dst1.name_d(), dst2.name_d()),
1332+
};
1333+
let _ = writeln!(out, " ldp {}, {}, {}", name1, name2, addr.format());
1334+
}
1335+
12941336
Aarch64Inst::LdrFpSymOffset {
12951337
size,
12961338
sym,

cc/arch/aarch64/regalloc.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -910,6 +910,11 @@ impl RegAlloc {
910910
pub fn callee_saved_used(&self) -> &[Reg] {
911911
&self.used_callee_saved
912912
}
913+
914+
/// Get callee-saved floating-point registers that need to be preserved
915+
pub fn callee_saved_fp_used(&self) -> &[VReg] {
916+
&self.used_callee_saved_fp
917+
}
913918
}
914919

915920
impl Default for RegAlloc {

0 commit comments

Comments
 (0)