diff --git a/cc/README.md b/cc/README.md index 9ed9ba688..3a5213104 100644 --- a/cc/README.md +++ b/cc/README.md @@ -77,7 +77,6 @@ Supported: Not yet implemented (exceptions to C99, or features we want to add): - Actual inlining optimization (the `inline` keyword is supported but functions are not inlined) -- multi-register returns (for structs larger than 8 bytes) - -fverbose-asm - top builtins to implement: __builtin_expect diff --git a/cc/arch/aarch64/codegen.rs b/cc/arch/aarch64/codegen.rs index 5fe5b3f25..4f00dc128 100644 --- a/cc/arch/aarch64/codegen.rs +++ b/cc/arch/aarch64/codegen.rs @@ -789,12 +789,19 @@ impl Aarch64CodeGen { Opcode::Ret => { // Move return value to x0 (integer), v0 (float), or v0+v1 (complex) if present + // Two-register struct returns (9-16 bytes) go in X0+X1 if let Some(&src) = insn.src.first() { let src_loc = self.get_location(src); let is_complex = insn.typ.is_some_and(|t| types.is_complex(t)); let is_fp = matches!(src_loc, Loc::VReg(_) | Loc::FImm(..)); - if is_complex { + if insn.is_two_reg_return { + // Two-register struct return: src[0] -> X0, src[1] -> X1 + self.emit_move(src, Reg::X0, 64, *total_frame); + if let Some(&src2) = insn.src.get(1) { + self.emit_move(src2, Reg::X1, 64, *total_frame); + } + } else if is_complex { // Complex return value: load real into V0, imag into V1 // The source is a pointer to the complex value let (fp_size, imag_offset) = complex_fp_info(types, insn.typ.unwrap()); @@ -2633,7 +2640,47 @@ impl Aarch64CodeGen { // Get return value size from type let ret_size = insn.size.max(32); - if is_complex_result { + if insn.is_two_reg_return { + // Two-register struct return: X0 has low 8 bytes, X1 has high 8 bytes + // Store both to the target location (which must be a stack slot) + match dst_loc { + Loc::Stack(offset) => { + let actual_offset = self.stack_offset(frame_size, offset); + // Store X0 (low 8 bytes) + self.push_lir(Aarch64Inst::Str { + size: OperandSize::B64, + src: Reg::X0, + addr: MemAddr::BaseOffset { + base: Reg::X29, + offset: actual_offset, + }, + }); + // Store X1 (high 8 bytes) + self.push_lir(Aarch64Inst::Str { + size: OperandSize::B64, + src: Reg::X1, + addr: MemAddr::BaseOffset { + base: Reg::X29, + offset: actual_offset + 8, + }, + }); + } + Loc::Reg(r) => { + // Address in register - store through it + self.push_lir(Aarch64Inst::Str { + size: OperandSize::B64, + src: Reg::X0, + addr: MemAddr::BaseOffset { base: r, offset: 0 }, + }); + self.push_lir(Aarch64Inst::Str { + size: OperandSize::B64, + src: Reg::X1, + addr: MemAddr::BaseOffset { base: r, offset: 8 }, + }); + } + _ => {} + } + } else if is_complex_result { // Complex return value is in V0 (real) + V1 (imag) // Store both parts to the target location let (fp_size, imag_offset) = complex_fp_info(types, insn.typ.unwrap()); diff --git a/cc/arch/x86_64/codegen.rs b/cc/arch/x86_64/codegen.rs index c2841971a..59bc2e5ca 100644 --- a/cc/arch/x86_64/codegen.rs +++ b/cc/arch/x86_64/codegen.rs @@ -594,12 +594,19 @@ impl X86_64CodeGen { Opcode::Ret => { // Move return value to appropriate register if present // System V AMD64 ABI: integers in RAX, floats in XMM0, complex in XMM0+XMM1 + // Two-register struct returns (9-16 bytes) go in RAX+RDX if let Some(src) = insn.src.first() { let src_loc = self.get_location(*src); let is_complex = insn.typ.is_some_and(|t| types.is_complex(t)); let is_fp = matches!(src_loc, Loc::Xmm(_) | Loc::FImm(..)) || insn.typ.is_some_and(|t| types.is_float(t)); - if is_complex { + if insn.is_two_reg_return { + // Two-register struct return: src[0] -> RAX, src[1] -> RDX + self.emit_move(*src, Reg::Rax, 64); + if let Some(&src2) = insn.src.get(1) { + self.emit_move(src2, Reg::Rdx, 64); + } + } else if is_complex { // Complex return value: load real into XMM0, imag into XMM1 // The source is a pointer to the complex value let (fp_size, imag_offset) = complex_fp_info(types, insn.typ.unwrap()); @@ -3026,7 +3033,47 @@ impl X86_64CodeGen { // Get return value size from type let ret_size = insn.size.max(32); - if is_complex_result { + if insn.is_two_reg_return { + // Two-register struct return: RAX has low 8 bytes, RDX has high 8 bytes + // Store both to the target location (which must be a stack slot) + match dst_loc { + Loc::Stack(offset) => { + let adjusted = offset + self.callee_saved_offset; + // Store RAX (low 8 bytes) + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: -adjusted, + }), + }); + // Store RDX (high 8 bytes) + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::Rdx), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: -adjusted + 8, + }), + }); + } + Loc::Reg(r) => { + // Address in register - store through it + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { base: r, offset: 0 }), + }); + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::Rdx), + dst: GpOperand::Mem(MemAddr::BaseOffset { base: r, offset: 8 }), + }); + } + _ => {} + } + } else if is_complex_result { // Complex return value is in XMM0 (real) + XMM1 (imag) // Store both parts to the target location let (fp_size, imag_offset) = complex_fp_info(types, insn.typ.unwrap()); diff --git a/cc/doc/C99.md b/cc/doc/C99.md index 1f16c1404..c53d52f5f 100644 --- a/cc/doc/C99.md +++ b/cc/doc/C99.md @@ -6,9 +6,7 @@ organized by priority and complexity. ## Table of Contents - [Medium Priority](#medium-priority) - - [Array Parameter Qualifiers](#array-parameter-qualifiers) - [`[*]` in Function Prototypes](#-in-function-prototypes) - - [Multi-Register Struct Returns](#multi-register-struct-returns) - [Low Priority](#low-priority) - [Inline Function Optimization](#inline-function-optimization) - [FP Register Save for Variadic Functions](#fp-register-save-for-variadic-functions) @@ -17,34 +15,6 @@ organized by priority and complexity. ## Medium Priority -### Array Parameter Qualifiers - -**Status**: NOT IMPLEMENTED - -**C99 Reference**: 6.7.5.3 - -**Description**: Qualifiers and `static` in array parameter declarations: -```c -void f(int a[const 10]); // const-qualified array parameter -void f(int a[static 10]); // at least 10 elements guaranteed -void f(int a[restrict]); // restrict-qualified -void f(int a[const static 10]); // combined -``` - -**Current Behavior**: Parse error: "expected ']'" - -**Location**: `cc/parse/parser.rs` array declarator parsing - -**Implementation Notes**: -- Extend array declarator parsing to allow qualifiers after `[` -- `static` indicates minimum size (optimization hint) -- Qualifiers apply to the pointer the array decays to -- These are hints for optimization, not semantic requirements - -**Complexity**: Medium - ---- - ### `[*]` in Function Prototypes **Status**: NOT IMPLEMENTED @@ -67,32 +37,6 @@ void f(int n, int arr[*]); // VLA parameter with unspecified size --- -### Multi-Register Struct Returns - -**Status**: PARTIAL - -**C99 Reference**: ABI-dependent (not in C99 itself) - -**Description**: Structs that fit in two registers should be returned in two registers per ABI. - -**Current Behavior**: -- Structs > 8 bytes use sret (hidden pointer parameter) -- Structs 9-16 bytes should use two registers but don't - -**Location**: -- `cc/README.md` line 85 -- `cc/target.rs` lines 99-100 - -**Implementation Notes**: -- x86-64 SysV: structs up to 16 bytes can be returned in RAX+RDX -- AArch64: structs up to 16 bytes can be returned in X0+X1 -- Need to classify struct fields according to ABI rules -- Affects both caller and callee code generation - -**Complexity**: Medium-High - ---- - ## Low Priority These items are implemented but incomplete or have known limitations that rarely affect real code. @@ -145,10 +89,7 @@ These items are implemented but incomplete or have known limitations that rarely | Feature | Status | Complexity | Priority | |---------|--------|------------|----------| -| `_Complex` | **Implemented** | High | High | -| Array parameter qualifiers | Not implemented | Medium | Medium | | `[*]` in prototypes | Not implemented | Low | Medium | -| Multi-register struct returns | Partial | Medium-High | Medium | | Inline optimization | Partial | High | Low | | FP variadic save | Partial | Medium | Low | diff --git a/cc/ir/linearize.rs b/cc/ir/linearize.rs index 039c7cfba..8f08bfcc2 100644 --- a/cc/ir/linearize.rs +++ b/cc/ir/linearize.rs @@ -86,10 +86,12 @@ pub struct Linearizer<'a> { types: &'a TypeTable, /// String table for converting StringId to String at IR boundary strings: &'a StringTable, - /// Hidden struct return pointer (for functions returning large structs) + /// Hidden struct return pointer (for functions returning large structs via sret) struct_return_ptr: Option, - /// Size of struct being returned (for functions returning large structs) + /// Size of struct being returned (for functions returning large structs via sret) struct_return_size: u32, + /// Type of struct being returned via two registers (9-16 bytes, per ABI) + two_reg_return_type: Option, /// Current function name (for generating unique static local names) current_func_name: String, /// Counter for generating unique static local names @@ -135,6 +137,7 @@ impl<'a> Linearizer<'a> { strings, struct_return_ptr: None, struct_return_size: 0, + two_reg_return_type: None, current_func_name: String::new(), static_local_counter: 0, compound_literal_counter: 0, @@ -756,6 +759,7 @@ impl<'a> Linearizer<'a> { self.continue_targets.clear(); self.struct_return_ptr = None; self.struct_return_size = 0; + self.two_reg_return_type = None; self.current_func_name = self.str(func.name).to_string(); // Note: static_locals is NOT cleared - it persists across functions @@ -798,6 +802,17 @@ impl<'a> Linearizer<'a> { self.struct_return_size = self.types.size_bits(func.return_type); } + // Check if function returns a medium struct (9-16 bytes) via two registers + // This is the ABI-compliant way to return structs that fit in two GP registers + let struct_size_bits = self.types.size_bits(func.return_type); + let returns_two_reg_struct = (ret_kind == TypeKind::Struct || ret_kind == TypeKind::Union) + && struct_size_bits > 64 + && struct_size_bits <= 128 + && !returns_large_struct; // Only if not using sret + if returns_two_reg_struct { + self.two_reg_return_type = Some(func.return_type); + } + // Add parameters // For struct/union parameters, we need to copy them to local storage // so member access works properly @@ -853,9 +868,11 @@ impl<'a> Linearizer<'a> { } let typ_size = self.types.size_bits(typ); - // For large structs, arg_pseudo is a pointer to the struct + // For large structs (> 64 bits), arg_pseudo is a pointer to the struct // We need to copy the data from that pointer to local storage - if typ_size > self.target.max_aggregate_register_bits { + // Note: We receive struct parameters > 64 bits as pointers, even though + // the ABI allows two-register passing for 9-16 byte structs. + if typ_size > 64 { // arg_pseudo is a pointer - copy each 8-byte chunk let struct_size = typ_size / 8; let mut offset = 0i64; @@ -1048,6 +1065,48 @@ impl<'a> Linearizer<'a> { self.types.void_ptr_id, 64, )); + } else if let Some(ret_type) = self.two_reg_return_type { + // Two-register struct return (9-16 bytes) + // Load struct data into two temps and emit ret with is_two_reg_return=true + let src_addr = self.linearize_lvalue(e); + let struct_size = self.types.size_bits(ret_type); + + // Load first 8 bytes + let low_temp = self.alloc_pseudo(); + let low_pseudo = Pseudo::reg(low_temp, low_temp.0); + if let Some(func) = &mut self.current_func { + func.add_pseudo(low_pseudo); + } + self.emit(Instruction::load( + low_temp, + src_addr, + 0, + self.types.long_id, + 64, + )); + + // Load second portion (remaining bytes, up to 8) + let high_temp = self.alloc_pseudo(); + let high_pseudo = Pseudo::reg(high_temp, high_temp.0); + if let Some(func) = &mut self.current_func { + func.add_pseudo(high_pseudo); + } + // Load up to 8 bytes for the high part (may be less for structs < 16 bytes) + let high_size = std::cmp::min(64, struct_size - 64); + self.emit(Instruction::load( + high_temp, + src_addr, + 8, + self.types.long_id, + high_size, + )); + + // Emit return with both values and two_reg_return flag + let mut ret_insn = + Instruction::ret_typed(Some(low_temp), ret_type, struct_size); + ret_insn.src.push(high_temp); // High part in src[1] + ret_insn.is_two_reg_return = true; + self.emit(ret_insn); } else if self.types.is_complex(typ) { // Complex return: codegen expects an address (pointer to the complex value) let addr = self.linearize_lvalue(e); @@ -2648,9 +2707,15 @@ impl<'a> Linearizer<'a> { // Check if function returns a large struct or complex type // Large structs: allocate space and pass address as hidden first argument // Complex types: allocate local storage for result (needs stack for 16-byte value) + // Two-register structs (9-16 bytes): allocate local storage, codegen stores two regs let typ_kind = self.types.kind(typ); + let struct_size_bits = self.types.size_bits(typ); let returns_large_struct = (typ_kind == TypeKind::Struct || typ_kind == TypeKind::Union) - && self.types.size_bits(typ) > self.target.max_aggregate_register_bits; + && struct_size_bits > self.target.max_aggregate_register_bits; + let returns_two_reg_struct = (typ_kind == TypeKind::Struct || typ_kind == TypeKind::Union) + && struct_size_bits > 64 + && struct_size_bits <= 128 + && !returns_large_struct; let returns_complex = self.types.is_complex(typ); let (result_sym, mut arg_vals, mut arg_types_vec) = if returns_large_struct { @@ -2683,6 +2748,17 @@ impl<'a> Linearizer<'a> { // Hidden return pointer is the first argument (pointer type) (sret_sym, vec![sret_addr], vec![self.types.pointer_to(typ)]) + } else if returns_two_reg_struct { + // Two-register struct returns: allocate local storage for the result + // Codegen will store RAX+RDX (x86-64) or X0+X1 (AArch64) to this location + let local_sym = self.alloc_pseudo(); + let unique_name = format!("__2reg_{}", local_sym.0); + let local_pseudo = Pseudo::sym(local_sym, unique_name.clone()); + if let Some(func) = &mut self.current_func { + func.add_pseudo(local_pseudo); + func.add_local(&unique_name, local_sym, typ, false, self.current_bb); + } + (local_sym, Vec::new(), Vec::new()) } else if returns_complex { // Complex returns: allocate local storage for the result // Complex values are 16 bytes and need stack storage @@ -2701,13 +2777,15 @@ impl<'a> Linearizer<'a> { // Linearize regular arguments // For large structs, pass by reference (address) instead of by value + // Note: We pass structs > 64 bits by reference. While the ABI allows + // two-register passing for 9-16 byte structs, we don't implement that yet. // For complex types, pass address so codegen can load real/imag into XMM registers // For arrays (including VLAs), decay to pointer for a in args.iter() { let arg_type = self.expr_type(a); let arg_kind = self.types.kind(arg_type); let arg_val = if (arg_kind == TypeKind::Struct || arg_kind == TypeKind::Union) - && self.types.size_bits(arg_type) > self.target.max_aggregate_register_bits + && self.types.size_bits(arg_type) > 64 { // Large struct: pass address instead of value // The argument type becomes a pointer @@ -2766,6 +2844,7 @@ impl<'a> Linearizer<'a> { ); call_insn.variadic_arg_start = variadic_arg_start; call_insn.is_noreturn_call = is_noreturn_call; + call_insn.is_two_reg_return = returns_two_reg_struct; self.emit(call_insn); result_sym } diff --git a/cc/ir/mod.rs b/cc/ir/mod.rs index 54b0905a2..4e8ad597d 100644 --- a/cc/ir/mod.rs +++ b/cc/ir/mod.rs @@ -490,6 +490,9 @@ pub struct Instruction { /// For calls: true if the called function is noreturn (never returns). /// Code after a noreturn call is unreachable. pub is_noreturn_call: bool, + /// For calls/returns: true if this returns a 9-16 byte struct via two registers + /// (RAX+RDX on x86-64, X0+X1 on AArch64) per ABI. + pub is_two_reg_return: bool, /// Source position for debug info pub pos: Option, } @@ -514,6 +517,7 @@ impl Default for Instruction { variadic_arg_start: None, is_sret_call: false, is_noreturn_call: false, + is_two_reg_return: false, pos: None, } } diff --git a/cc/target.rs b/cc/target.rs index 5a14cd354..ad6456792 100644 --- a/cc/target.rs +++ b/cc/target.rs @@ -94,11 +94,10 @@ impl Target { }; // Maximum aggregate size that can be returned in registers. - // Both x86-64 SysV ABI and AAPCS64 technically support returning - // 16-byte structs in registers (rax+rdx or x0+x1), but pcc currently - // only supports single-register returns. Use sret for >8 byte structs - // until multi-register returns are implemented. - let max_aggregate_register_bits = 64; + // Both x86-64 SysV ABI and AAPCS64 support returning 16-byte structs + // in two registers (rax+rdx or x0+x1). Structs larger than 16 bytes + // use sret (hidden pointer parameter). + let max_aggregate_register_bits = 128; Self { arch, diff --git a/cc/tests/datatypes/struct_type.rs b/cc/tests/datatypes/struct_type.rs index 4c02b0080..c9c35642d 100644 --- a/cc/tests/datatypes/struct_type.rs +++ b/cc/tests/datatypes/struct_type.rs @@ -311,6 +311,103 @@ int main(void) { assert_eq!(compile_and_run("struct_return_large", code), 0); } +// ============================================================================ +// Two-Register Struct Return: Structs 9-16 bytes returned in RAX+RDX or X0+X1 +// Per System V AMD64 ABI and AAPCS64, structs up to 16 bytes are returned +// in two general-purpose registers (not via sret hidden pointer). +// ============================================================================ + +#[test] +fn struct_return_two_register() { + let code = r#" +// 16-byte struct (12 bytes data + 4 bytes padding) - uses two-register return (RAX+RDX or X0+X1) +struct medium { + long first; // 8 bytes + int second; // 4 bytes (+ 4 bytes padding for alignment; total 16 bytes) +}; + +struct medium make_medium(long a, int b) { + struct medium s; + s.first = a; + s.second = b; + return s; +} + +// 16-byte struct - boundary case, also uses two registers +struct sixteen { + long first; + long second; +}; + +struct sixteen make_sixteen(long a, long b) { + struct sixteen s; + s.first = a; + s.second = b; + return s; +} + +int main(void) { + // Test 12-byte struct (with padding to 16) + struct medium m = make_medium(0x123456789ABCDEF0L, 42); + if (m.first != 0x123456789ABCDEF0L) return 1; + if (m.second != 42) return 2; + + // Test 16-byte struct (boundary case) + struct sixteen s = make_sixteen(100, 200); + if (s.first != 100) return 3; + if (s.second != 200) return 4; + + // Test with different values to ensure correct member mapping + m = make_medium(999, 888); + if (m.first != 999) return 5; + if (m.second != 888) return 6; + + return 0; +} +"#; + assert_eq!(compile_and_run("struct_return_two_register", code), 0); +} + +// ============================================================================ +// Large Struct Return (>16 bytes): Must use sret (hidden pointer parameter) +// ============================================================================ + +#[test] +fn struct_return_sret_24_bytes() { + let code = r#" +// 24-byte struct - must use sret (hidden pointer) +struct huge { + long a; + long b; + long c; +}; + +struct huge make_huge(long x, long y, long z) { + struct huge h; + h.a = x; + h.b = y; + h.c = z; + return h; +} + +int main(void) { + struct huge h = make_huge(111, 222, 333); + if (h.a != 111) return 1; + if (h.b != 222) return 2; + if (h.c != 333) return 3; + + // Test with different values + h = make_huge(1000000, 2000000, 3000000); + if (h.a != 1000000) return 4; + if (h.b != 2000000) return 5; + if (h.c != 3000000) return 6; + + return 0; +} +"#; + assert_eq!(compile_and_run("struct_return_sret_24_bytes", code), 0); +} + // ============================================================================ // Compound Literals (C99 6.5.2.5) // ============================================================================