diff --git a/.github/workflows/TestingCI.yml b/.github/workflows/TestingCI.yml index ad4ccb586..94082c4db 100644 --- a/.github/workflows/TestingCI.yml +++ b/.github/workflows/TestingCI.yml @@ -6,12 +6,24 @@ on: env: CARGO_TERM_COLOR: always + RUST_BACKTRACE: 1 jobs: linux-ubuntu: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - name: System info + run: | + echo "=== Architecture ===" + uname -a + echo "=== CPU ===" + lscpu | head -20 + echo "=== Toolchain ===" + rustc --version + cargo --version + cc --version || true + as --version || true - name: Fetch run: cargo fetch - name: Build @@ -39,9 +51,26 @@ jobs: with: path: ~/.cargo/registry/cache key: registry-${{ runner.os }}-${{ steps.rust-version.outputs.version }}-${{ hashFiles('Cargo.lock') }} + - name: System info + run: | + echo "=== Architecture ===" + uname -a + arch + echo "=== Toolchain ===" + rustc --version + cargo --version + cc --version || true + as -version || true + echo "=== Xcode ===" + xcodebuild -version || true - name: Fetch run: cargo fetch - name: Build run: cargo build --release --verbose - name: Run tests run: cargo test --release --verbose + - name: Run cc tests (detailed) + if: failure() + run: | + echo "=== Re-running cc tests with more detail ===" + cargo test --release -p posixutils-cc --verbose -- --nocapture --test-threads=1 2>&1 | head -500 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 14ec62343..b1440a3dc 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -71,6 +71,9 @@ There are several ways to contribute to posixutils-rs: static input data, and compares output with static output data (OS reference data). * Use plib's TestPlan framework for integration tests. +* Integration test harness should ONLY contain `mod` statements. + Test logic is in $module/tests/$category/mod.rs files. * Only "quick" tests should be run automatically in `cargo test` * Longer tests, or tests requiring root access, should be triggered via special environment variables. + diff --git a/Cargo.lock b/Cargo.lock index d721f1b6e..00d5b8cc2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1460,6 +1460,17 @@ dependencies = [ "rustyline", ] +[[package]] +name = "posixutils-cc" +version = "0.2.2" +dependencies = [ + "bitflags 2.9.0", + "clap", + "gettext-rs", + "plib", + "tempfile", +] + [[package]] name = "posixutils-datetime" version = "0.2.2" diff --git a/Cargo.toml b/Cargo.toml index 40adc69fc..b8c4b628a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,6 +4,7 @@ resolver = "2" members = [ "awk", "calc", + "cc", "datetime", "dev", "display", diff --git a/cc/BUILTIN.md b/cc/BUILTIN.md new file mode 100644 index 000000000..57e36c8f8 --- /dev/null +++ b/cc/BUILTIN.md @@ -0,0 +1,418 @@ +# Compiler Builtins + +This document describes the compiler builtin functions and types supported by pcc. + +## Variadic Function Support + +These builtins provide C99-compliant variadic function support, matching GCC and Clang behavior. + +### `__builtin_va_list` + +**Type**: Platform-specific variadic argument list type. + +**Platform sizes**: +- x86-64 (System V ABI): 24 bytes (struct with gp_offset, fp_offset, overflow_arg_area, reg_save_area) +- aarch64-macos (Apple ARM64): 8 bytes (simple char* pointer) +- aarch64-linux (AAPCS64): 32 bytes (struct with stack, gr_top, vr_top, gr_offs, vr_offs) + +**Usage**: +```c +__builtin_va_list ap; +``` + +### `__builtin_va_start(ap, last_param)` + +**Purpose**: Initialize a va_list to point to the first variadic argument. + +**Parameters**: +- `ap`: A variable of type `__builtin_va_list` +- `last_param`: The name of the last fixed parameter before the `...` + +**Usage**: +```c +void my_printf(const char *fmt, ...) { + __builtin_va_list ap; + __builtin_va_start(ap, fmt); + // ... use ap ... + __builtin_va_end(ap); +} +``` + +### `__builtin_va_arg(ap, type)` + +**Purpose**: Retrieve the next variadic argument of the specified type and advance the va_list. + +**Parameters**: +- `ap`: A variable of type `__builtin_va_list` +- `type`: The type of the argument to retrieve (e.g., `int`, `long`, `char*`) + +**Returns**: The next argument, interpreted as the specified type. + +**Usage**: +```c +int value = __builtin_va_arg(ap, int); +char *str = __builtin_va_arg(ap, char*); +``` + +### `__builtin_va_end(ap)` + +**Purpose**: Clean up a va_list after use. Required for portability, though it's a no-op on most platforms. + +**Parameters**: +- `ap`: A variable of type `__builtin_va_list` + +**Usage**: +```c +__builtin_va_end(ap); +``` + +### `__builtin_va_copy(dest, src)` + +**Purpose**: Copy one va_list to another. The copy can be used independently of the original. + +**Parameters**: +- `dest`: Destination `__builtin_va_list` (uninitialized) +- `src`: Source `__builtin_va_list` (previously initialized with va_start) + +**Usage**: +```c +__builtin_va_list ap, ap_copy; +__builtin_va_start(ap, last_param); +__builtin_va_copy(ap_copy, ap); +// ap and ap_copy can now be used independently +__builtin_va_end(ap); +__builtin_va_end(ap_copy); +``` + +## Standard Header Compatibility + +The system `` header typically defines macros that expand to these builtins: + +```c +typedef __builtin_va_list va_list; +#define va_start(ap, param) __builtin_va_start(ap, param) +#define va_arg(ap, type) __builtin_va_arg(ap, type) +#define va_end(ap) __builtin_va_end(ap) +#define va_copy(dest, src) __builtin_va_copy(dest, src) +``` + +This means code using the standard `va_list`, `va_start`, `va_arg`, `va_end`, and `va_copy` will work correctly once preprocessor support for `#include` is available. + +## Implementation Notes + +### Caller Side (Variadic Function Calls) + +When calling a variadic function, pcc: +1. Passes fixed arguments normally (in registers per ABI) +2. Pushes variadic arguments onto the stack in reverse order +3. This ensures variadic arguments are accessible via simple stack pointer offsets + +### Callee Side (Variadic Function Definitions) + +When implementing a variadic function, pcc: +1. `va_start` initializes the overflow_arg_area to point to stack arguments (rbp+16 on x86-64) +2. `va_arg` reads from the overflow_arg_area and advances the pointer +3. `va_copy` performs a byte-for-byte copy of the va_list structure +4. `va_end` is currently a no-op + +### Limitations + +- Register save area is not currently implemented; all variadic arguments are passed on the stack +- This is ABI-compatible for calling external functions but may be less efficient for register-heavy variadic calls + +## Byte-Swapping Builtins + +These builtins reverse the byte order of integers, commonly used for endianness conversion between big-endian (network byte order) and little-endian (host byte order on x86/ARM). + +### `__builtin_bswap16(x)` + +**Signature**: `uint16_t __builtin_bswap16(uint16_t x)` + +**Purpose**: Returns `x` with the order of bytes reversed. + +**Example**: `0xABCD` becomes `0xCDAB` + +**Usage**: +```c +unsigned short val = 0xABCD; +unsigned short swapped = __builtin_bswap16(val); // 0xCDAB +``` + +**Implementation**: +- x86-64: Uses `ror $8, %ax` (rotate right 8 bits) +- AArch64: Uses `rev16` instruction + +### `__builtin_bswap32(x)` + +**Signature**: `uint32_t __builtin_bswap32(uint32_t x)` + +**Purpose**: Returns `x` with the order of bytes reversed. + +**Example**: `0x12345678` becomes `0x78563412` + +**Usage**: +```c +unsigned int val = 0x12345678; +unsigned int swapped = __builtin_bswap32(val); // 0x78563412 +``` + +**Implementation**: +- x86-64: Uses native `bswap %eax` instruction +- AArch64: Uses `rev` instruction with 32-bit register + +### `__builtin_bswap64(x)` + +**Signature**: `uint64_t __builtin_bswap64(uint64_t x)` + +**Purpose**: Returns `x` with the order of bytes reversed. + +**Example**: `0x0102030405060708` becomes `0x0807060504030201` + +**Usage**: +```c +unsigned long long val = 0x0102030405060708ULL; +unsigned long long swapped = __builtin_bswap64(val); // 0x0807060504030201ULL +``` + +**Implementation**: +- x86-64: Uses native `bswap %rax` instruction +- AArch64: Uses `rev` instruction with 64-bit register + +### Common Use Cases + +**Network byte order conversion**: +```c +// Convert network byte order (big-endian) to host byte order (little-endian) +unsigned int ip_addr_host = __builtin_bswap32(ip_addr_network); + +// Convert 16-bit port number +unsigned short port_host = __builtin_bswap16(port_network); +``` + +**Binary file format parsing**: +```c +// Read a big-endian 32-bit value from a file +unsigned int val = __builtin_bswap32(*(unsigned int *)buffer); +``` + +### Standard Header Compatibility + +These builtins are compatible with GCC and Clang. The `` header (on Linux/glibc) typically provides macros that may expand to these builtins: + +```c +#define bswap_16(x) __builtin_bswap16(x) +#define bswap_32(x) __builtin_bswap32(x) +#define bswap_64(x) __builtin_bswap64(x) +``` + +## Compile-Time Introspection + +### `__builtin_constant_p(expr)` + +**Signature**: `int __builtin_constant_p(expr)` + +**Purpose**: Determines if an expression is known to be constant at compile time. Returns 1 if the expression is a constant, 0 otherwise. + +**Parameters**: +- `expr`: Any C expression to test for constant-ness + +**Returns**: Integer 1 if the expression is a compile-time constant, 0 otherwise. + +**Important Notes**: +- The expression is **not evaluated** - side effects are discarded +- A return of 0 does not necessarily mean the expression isn't constant, only that the compiler cannot prove it's constant +- The result is itself a compile-time constant, allowing use in `#if` preprocessor directives (when supported) and static initializers + +**Usage**: +```c +// Basic constant detection +if (__builtin_constant_p(42)) { + // This branch is taken - 42 is constant +} + +// Variable is not constant +int x = 10; +if (__builtin_constant_p(x)) { + // This branch is NOT taken - x is a variable +} + +// Constant expressions are detected +if (__builtin_constant_p(10 + 20 * 3)) { + // This branch is taken - arithmetic on constants +} + +// Common optimization pattern +#define my_abs(x) \ + (__builtin_constant_p(x) ? ((x) < 0 ? -(x) : (x)) : runtime_abs(x)) +``` + +**What Counts as Constant**: +- Integer literals: `42`, `0xFF` +- Character literals: `'a'`, `'\n'` +- Arithmetic on constants: `10 + 20`, `3 * 4` +- Comparison on constants: `5 < 10` +- Logical operations: `1 && 1`, `0 || 1` +- Bitwise operations: `0xFF & 0x0F` +- Shift operations: `1 << 4` +- Conditional expressions with constant condition: `1 ? 100 : 200` +- Enum constants + +**What Is NOT Constant**: +- Variables (even if initialized with a constant) +- Pointers to variables +- Function calls +- Array subscripts with variable indices + +## Stack Allocation + +### `__builtin_alloca(size)` + +**Signature**: `void *__builtin_alloca(size_t size)` + +**Purpose**: Allocates `size` bytes of memory on the stack frame of the calling function. The allocated memory is automatically freed when the function returns. + +**Parameters**: +- `size`: The number of bytes to allocate + +**Returns**: A `void*` pointer to the allocated memory, suitably aligned for any type. + +**Important Notes**: +- The memory is allocated on the **stack**, not the heap +- Memory is automatically freed when the function **returns** (not when it goes out of scope) +- Do **not** return a pointer to alloca'd memory from a function - it becomes invalid after return +- Large allocations may cause stack overflow +- The allocation size does not need to be a compile-time constant + +**Usage**: +```c +void process_data(int n) { + // Allocate n integers on the stack + int *arr = __builtin_alloca(n * sizeof(int)); + + for (int i = 0; i < n; i++) { + arr[i] = i * 2; + } + + // arr is automatically freed when process_data returns +} + +void example(void) { + // Variable-length buffer without heap allocation + char *buffer = __builtin_alloca(256); + // Use buffer... + // No need to free - automatically cleaned up on return +} +``` + +**Common Use Cases**: +- Variable-length temporary buffers when size is known at runtime +- Avoiding heap allocation overhead for small, short-lived allocations +- Implementing variable-length arrays (VLAs) internally + +**Comparison with malloc**: +| Feature | `__builtin_alloca` | `malloc` | +|---------|-------------------|----------| +| Allocation location | Stack | Heap | +| Deallocation | Automatic on return | Manual with `free()` | +| Speed | Very fast | Slower | +| Size limit | Limited by stack size | Limited by heap/RAM | +| Persistence | Until function returns | Until explicitly freed | + +**Implementation**: +- x86-64: Adjusts the stack pointer (`rsp`) and returns the new address +- AArch64: Adjusts the stack pointer (`sp`) and returns the new address + +**Standard Header Compatibility**: +The `` header typically provides: +```c +#define alloca(size) __builtin_alloca(size) +``` + +## Type Introspection + +### `__builtin_types_compatible_p(type1, type2)` + +**Signature**: `int __builtin_types_compatible_p(type1, type2)` + +**Purpose**: Determines if two types are compatible, ignoring top-level type qualifiers (const, volatile, restrict). Returns 1 if the types are considered compatible, 0 otherwise. + +**Parameters**: +- `type1`: A C type name (e.g., `int`, `char*`, `struct Point`) +- `type2`: A C type name to compare against + +**Returns**: Integer 1 if the types are compatible, 0 otherwise. + +**Important Notes**: +- Top-level qualifiers (`const`, `volatile`, `restrict`) are **ignored** when comparing types +- Signedness **matters**: `int` and `unsigned int` are NOT compatible +- Different struct/union types are NOT compatible, even if they have identical layouts +- The result is a compile-time constant + +**Usage**: +```c +// Basic type comparison +if (__builtin_types_compatible_p(int, int)) { + // This branch is taken - same types +} + +// Different types +if (__builtin_types_compatible_p(int, long)) { + // NOT taken - different types +} + +// Qualifiers are ignored +if (__builtin_types_compatible_p(const int, int)) { + // This branch IS taken - qualifiers ignored +} + +if (__builtin_types_compatible_p(volatile int, int)) { + // This branch IS taken +} + +// Signedness matters +if (__builtin_types_compatible_p(unsigned int, int)) { + // NOT taken - different signedness +} + +// Pointer types +if (__builtin_types_compatible_p(int*, int*)) { + // This branch is taken +} + +if (__builtin_types_compatible_p(int*, char*)) { + // NOT taken - different base types +} +``` + +**What Types Are Compatible**: +- Same basic types: `int` and `int`, `char` and `char` +- Same pointer types: `int*` and `int*` +- Same struct/union types: `struct Point` and `struct Point` +- Types differing only in qualifiers: `const int` and `int` + +**What Types Are NOT Compatible**: +- Different basic types: `int` and `long`, `char` and `int` +- Different signedness: `int` and `unsigned int` +- Different pointer base types: `int*` and `char*` +- Different struct/union types (even with same layout): `struct Point` and `struct Size` +- `void*` and other pointer types: `void*` and `int*` + +**Common Use Cases**: + +**Type-generic macros**: +```c +#define process(x) \ + (__builtin_types_compatible_p(typeof(x), int) ? process_int(x) : \ + __builtin_types_compatible_p(typeof(x), float) ? process_float(x) : \ + process_generic(x)) +``` + +**Static assertions**: +```c +#define STATIC_ASSERT_SAME_TYPE(a, b) \ + _Static_assert(__builtin_types_compatible_p(typeof(a), typeof(b)), \ + "Types must be compatible") +``` + +**Standard Header Compatibility**: +This builtin is compatible with GCC and Clang. It is commonly used in conjunction with `typeof()` for type-generic programming. diff --git a/cc/Cargo.toml b/cc/Cargo.toml new file mode 100644 index 000000000..a78a31b1d --- /dev/null +++ b/cc/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "posixutils-cc" +version = "0.2.2" +authors = ["Jeff Garzik"] +repository.workspace = true +license.workspace = true +edition.workspace = true +rust-version.workspace = true + +[dependencies] +bitflags = "2" +clap.workspace = true +gettext-rs.workspace = true +plib = { path = "../plib" } + +[dev-dependencies] +tempfile = "3" + +[[bin]] +name = "pcc" +path = "./main.rs" + diff --git a/cc/README.md b/cc/README.md new file mode 100644 index 000000000..086f5cc1b --- /dev/null +++ b/cc/README.md @@ -0,0 +1,133 @@ +# Guide to working on pcc, our C99 compiler + +## Overview + +pcc is a C99 compiler targeting POSIX.2024 compliance. It currently supports x86-64 and AArch64 (ARM64) on Linux and macOS. + +Reference: [ISO/IEC 9899:1999 (C99)](https://www.open-std.org/jtc1/sc22/wg14/www/docs/n1256.pdf) + +## Architecture + +The compiler pipeline: + +``` +Source → Lexer → Preprocessor → Parser → Type Check → Linearize → SSA → Lower → Codegen → Assembly +``` + +Key source files: + +| File | Purpose | +|------|---------| +| `token/lexer.rs` | Tokenization | +| `token/preprocess.rs` | C preprocessor (#include, #define, #ifdef, etc.) | +| `parse/parser.rs` | Recursive descent parser producing AST | +| `parse/ast.rs` | AST node definitions | +| `types.rs` | C type system | +| `symbol.rs` | Symbol table with scope management | +| `linearize.rs` | AST → IR conversion, SSA construction | +| `ir.rs` | Intermediate representation definitions | +| `ssa.rs` | SSA phi node insertion | +| `lower.rs` | IR lowering passes (phi elimination) | +| `dominate.rs` | Dominator tree and dominance frontiers | +| `codegen/x86_64/` | x86-64 code generator | +| `codegen/aarch64/` | AArch64 code generator | + +## Building + +```bash +cargo build && cargo build --release +``` + +This builds the whole tree, which is usually the best solution. + +## Testing + +For frequent compiler development, this runs the compiler subset of tests: + +```bash +cargo test --release -p posixutils-cc +``` + +## Debugging + +The compiler supports C input via `-` for stdin, and can output intermediate representations: + +- `--dump-asm` - Output assembly to stdout (instead of invoking assembler/linker) +- `--dump-ir` - Output IR before code generation + +Examples: + +```bash +# Compile from stdin, view generated assembly +echo 'int main() { return 42; }' | ./target/release/pcc - --dump-asm + +# View IR for a source file +./target/release/pcc myfile.c --dump-ir + +# Using heredoc for multi-line test cases +./target/release/pcc - --dump-asm <<'EOF' +int add(int a, int b) { + return a + b; +} +int main() { + return add(1, 2); +} +EOF +``` + +## Current Limitations + +Supported: +- Basic types: void, char, short, int, long, long long (signed/unsigned), float, double, \_Bool +- Pointers (including void\*), arrays, structs, unions +- Control flow: if/else, while, do-while, for, switch, goto +- Functions with parameters and return values +- Function pointer declarations +- Enums +- Variadic functions (va\_list, va\_start, va\_arg, va\_copy, va\_end) +- Storage class specifiers: static, extern + - Static local variables (block scope with static duration) + - Static functions (internal linkage) + - Static global variables (internal linkage) + - Extern declarations +- C preprocessor basics (#include, #define, #ifdef, #ifndef, #if, #elif, #else, #endif) +- Bitfields (named, unnamed, zero-width for alignment) + +Not yet implemented: +- `inline` and inlining support +- -fverbose-asm +- Complex initializers +- VLAs (variable-length arrays) +- _Complex and _Atomic types +- Thread-local storage, alignas, etc. +- top builtins to implement: + __builtin_expect + __builtin_clz / clzl / clzll + __builtin_ctz / ctzl / ctzll + __sync_synchronize + __sync_fetch_and_add (and maybe a couple of its siblings) + __builtin_unreachable (helps optimizations + silences some warnings) +- string interning +- DCE and other opt passes +- assembly peephole optimizations + +## Known Issues + +### Preprocessor + +- **Chained macro expansion after token paste**: When a token paste (`##`) creates an identifier that is itself a function-like macro, the subsequent macro expansion may not work correctly. For example: + ```c + #define BAR_test(y) y + #define FOO(x) BAR_ ## x(1) + FOO(test) // Should expand to 1, but may produce incorrect output + ``` + +## Code Quality + +Please run `cargo fmt` before committing code, and `cargo clippy` regularly while working. Code should build without warnings. + +```bash +cargo fmt && cargo clippy -p posixutils-cc +``` + +Read CONTRIBUTING.md in the root of the repository for more details. diff --git a/cc/TODO.md b/cc/TODO.md new file mode 100644 index 000000000..c6ae64cef --- /dev/null +++ b/cc/TODO.md @@ -0,0 +1,159 @@ +# pcc TODO + +## Technical Debt + +### R10 reserved globally for division scratch + +**Location**: `arch/x86_64/codegen.rs` lines 160-179 + +**Issue**: R10 is permanently excluded from the allocatable register pool because x86-64 `div`/`idiv` instructions clobber RAX (quotient) and RDX (remainder). When the divisor operand happens to be in RAX or RDX, we need a scratch register to hold it during the division. + +**Current approach**: Reserve R10 globally so it's always available as scratch for division. + +**Cost**: All generated code loses one general-purpose register, even functions that never use division/modulo operations. + +**Better solutions**: + +1. **Per-function reservation**: Only exclude R10 from allocation in functions that contain `DivS`, `DivU`, `ModS`, or `ModU` instructions. Scan the IR before register allocation. + +2. **Instruction-level constraints**: Integrate scratch register requirements into the register allocator. Mark division instructions as needing a scratch and ensure one is available only when needed. + +3. **Pre-coloring**: During register allocation, constrain the divisor operand to never be assigned RAX or RDX, eliminating the need for a scratch register. + +4. **Spill to stack**: When divisor is in RAX/RDX, use a temporary stack slot instead of a dedicated scratch register. + +Production compilers (GCC, LLVM) use approaches 2-4. The current approach is simple and correct but sacrifices some optimization potential. + +### XMM register save area for variadic functions (x86-64) + +**Location**: `arch/x86_64/codegen.rs` lines 1012-1015 + +**Issue**: On x86-64 SysV ABI, variadic functions that receive floating-point arguments via XMM0-XMM7 should save those registers in the register save area during the prologue. This allows `va_arg(ap, double)` to retrieve FP arguments passed in registers. + +**Current state**: The compiler correctly: +- Passes FP arguments to variadic functions (caller side) in XMM registers +- Sets AL to the count of XMM registers used for variadic calls +- Saves GP registers (RDI, RSI, RDX, RCX, R8, R9) in the register save area + +However, XMM registers are not saved, so user-defined variadic functions cannot use `va_arg` to retrieve `double` arguments. + +**Impact**: Low. Most POSIX variadic functions (printf, sprintf, scanf, etc.) are provided by libc and work correctly. This only affects user-defined variadic functions that accept floating-point variadic arguments. + +**Solution**: In the function prologue for variadic functions: +1. Allocate additional space in the register save area (8 XMM regs × 16 bytes = 128 bytes) +2. Use `movaps` to save XMM0-XMM7 at their ABI-specified offsets +3. Update `va_start` to initialize the FP register save area pointer correctly +4. Update `va_arg` to read from XMM save area when extracting FP types + +--- + +## Future Features + +### C11 Atomics (`_Atomic`) Implementation + +#### Overview + +Add C11 `_Atomic` type qualifier/specifier support to achieve full C11 compliance. + +#### Research Summary + +**C11 `_Atomic` has two syntactic forms:** +1. **Type specifier**: `_Atomic(type-name)` - creates a new atomic type +2. **Type qualifier**: `_Atomic type-name` - can combine with `const`, `volatile`, `restrict` + +**Key semantics:** +- Atomic types may have different size/alignment than non-atomic counterparts +- Normal assignment and compound assignment are sequentially consistent +- Increment/decrement are atomic read-modify-write operations +- Cannot take address of atomic struct member (UB) +- Arrays and functions cannot be made atomic + +**Target instructions:** +- **x86-64**: `LOCK` prefix + `ADD`, `XADD`, `CMPXCHG`, etc. (`XCHG` has implicit lock) +- **ARM64**: `LDXR`/`STXR` (load/store exclusive) or LSE `LDADD`, `STADD`, etc. + +#### Implementation Phases + +**Phase 1: Type System (`cc/types.rs`)** + +1. Add `ATOMIC` modifier to `TypeModifiers`: `const ATOMIC = 1 << 14;` +2. Add helper methods: `is_atomic()`, `atomic_of()`, `strip_atomic()` +3. Size/alignment: atomic types can have same size as non-atomic (x86-64 compatible) + +**Phase 2: Parser (`cc/parse/parser.rs`)** + +1. Recognize `_Atomic` keyword in tokenizer/lexer +2. Parse type specifier form: `_Atomic(type-name)` +3. Parse type qualifier form: `_Atomic` as qualifier +4. Validation: reject on array/function types, reject double-atomic + +**Phase 3: IR Extensions (`cc/ir.rs`)** + +Add new atomic opcodes: +- `AtomicLoad`, `AtomicStore` - with memory ordering +- `AtomicRmwAdd`, `AtomicRmwSub`, `AtomicRmwAnd`, `AtomicRmwOr`, `AtomicRmwXor` +- `AtomicRmwXchg`, `AtomicCmpXchg`, `AtomicFence` + +Add `MemoryOrder` enum: `Relaxed`, `Consume`, `Acquire`, `Release`, `AcqRel`, `SeqCst` + +**Phase 4: Linearizer (`cc/linearize.rs`)** + +1. Detect atomic type in load/store → emit `AtomicLoad`/`AtomicStore` +2. Handle compound assignment: `atomic_var += 1` → `AtomicRmwAdd` +3. Handle increment/decrement: `++atomic_var` → `AtomicRmwAdd` with value 1 + +**Phase 5: Code Generation (`cc/arch/*/codegen.rs`)** + +x86-64: +- `AtomicLoad`: simple MOV (x86 loads are atomic if aligned) +- `AtomicStore (SeqCst)`: `xchg [addr], reg` or `mov + mfence` +- `AtomicRmwAdd`: `lock xadd [addr], reg` +- `AtomicCmpXchg`: `lock cmpxchg [addr], reg` + +ARM64: +- `AtomicLoad (Acquire)`: `ldar` +- `AtomicStore (Release)`: `stlr` +- `AtomicRmwAdd (LSE)`: `ldaddal` +- `AtomicRmwAdd (LL/SC)`: `ldaxr`/`stlxr` loop + +**Phase 6: `` Support** + +- Type aliases: `atomic_bool`, `atomic_int`, `atomic_uint`, etc. +- Core functions: `atomic_load()`, `atomic_store()`, `atomic_exchange()`, etc. +- Fence operations: `atomic_thread_fence()`, `atomic_signal_fence()` + +**Phase 7: Semantic Validation** + +- Reject `&(atomic_struct.member)` +- `sizeof` on atomic type returns atomic size + +#### Implementation Order + +| Order | Phase | Complexity | Dependencies | +|-------|-------|------------|--------------| +| 1 | Type system (`ATOMIC` modifier) | Low | None | +| 2 | Parser (`_Atomic` keyword) | Medium | Phase 1 | +| 3 | IR opcodes | Medium | None | +| 4 | Linearizer (atomic load/store) | Medium | Phases 1-3 | +| 5 | x86-64 codegen | High | Phases 1-4 | +| 6 | ARM64 codegen | High | Phases 1-4 | +| 7 | stdatomic.h builtins | Medium | Phases 1-5 | +| 8 | Semantic validation | Low | Phases 1-2 | + +#### Test Cases Needed + +1. Parser tests: `_Atomic(int)`, `_Atomic int`, combined qualifiers +2. Basic atomic ops: load, store, assignment +3. Compound assignment: `+=`, `-=`, `&=`, `|=`, `^=` +4. Increment/decrement: `++`, `--` (pre and post) +5. Struct with atomic members +6. Pointer to atomic +7. stdatomic.h functions (when implemented) + +#### References + +- [Atomic types - cppreference.com](https://en.cppreference.com/w/c/language/atomic) +- [stdatomic.h - cppreference.com](https://en.cppreference.com/w/c/header/stdatomic) +- [C/C++11 mappings to processors](https://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html) +- [LOCK prefix - x86 reference](https://www.felixcloutier.com/x86/lock) +- [AArch64 atomic access - Microsoft](https://devblogs.microsoft.com/oldnewthing/20220811-00/?p=106963) diff --git a/cc/arch/aarch64/codegen.rs b/cc/arch/aarch64/codegen.rs new file mode 100644 index 000000000..03d4b6231 --- /dev/null +++ b/cc/arch/aarch64/codegen.rs @@ -0,0 +1,4538 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// AArch64 Code Generator +// Converts IR to AArch64 assembly +// +// Uses linear scan register allocation and AAPCS64 calling convention. +// +// Stack Pointer Policy: All stack variable accesses use the frame pointer (X29/FP) +// rather than the stack pointer (SP). This is required for alloca support, since +// alloca modifies SP at runtime, invalidating SP-relative offsets. SP is only used +// for prologue/epilogue, call stack argument passing, and alloca itself. +// + +use crate::arch::aarch64::lir::{Aarch64Inst, CallTarget, Cond, GpOperand, MemAddr}; +use crate::arch::codegen::CodeGenerator; +use crate::arch::lir::{Directive, FpSize, Label, OperandSize, Symbol}; +use crate::ir::{Function, Initializer, Instruction, Module, Opcode, Pseudo, PseudoId, PseudoKind}; +use crate::linearize::MAX_REGISTER_AGGREGATE_BITS; +use crate::target::Target; +use crate::types::{Type, TypeModifiers}; +use std::collections::HashMap; + +// ============================================================================ +// AArch64 Register Definitions +// ============================================================================ + +/// AArch64 physical registers +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum Reg { + // General purpose registers x0-x28 + X0, + X1, + X2, + X3, + X4, + X5, + X6, + X7, + X8, + X9, + X10, + X11, + X12, + X13, + X14, + X15, + X16, + X17, + X18, + X19, + X20, + X21, + X22, + X23, + X24, + X25, + X26, + X27, + X28, + // Frame pointer + X29, + // Link register + X30, + // Stack pointer (special, shares encoding with XZR in some contexts) + SP, +} + +impl Reg { + /// Get 64-bit register name + pub fn name64(&self) -> &'static str { + match self { + Reg::X0 => "x0", + Reg::X1 => "x1", + Reg::X2 => "x2", + Reg::X3 => "x3", + Reg::X4 => "x4", + Reg::X5 => "x5", + Reg::X6 => "x6", + Reg::X7 => "x7", + Reg::X8 => "x8", + Reg::X9 => "x9", + Reg::X10 => "x10", + Reg::X11 => "x11", + Reg::X12 => "x12", + Reg::X13 => "x13", + Reg::X14 => "x14", + Reg::X15 => "x15", + Reg::X16 => "x16", + Reg::X17 => "x17", + Reg::X18 => "x18", + Reg::X19 => "x19", + Reg::X20 => "x20", + Reg::X21 => "x21", + Reg::X22 => "x22", + Reg::X23 => "x23", + Reg::X24 => "x24", + Reg::X25 => "x25", + Reg::X26 => "x26", + Reg::X27 => "x27", + Reg::X28 => "x28", + Reg::X29 => "x29", + Reg::X30 => "x30", + Reg::SP => "sp", + } + } + + /// Get 32-bit register name + pub fn name32(&self) -> &'static str { + match self { + Reg::X0 => "w0", + Reg::X1 => "w1", + Reg::X2 => "w2", + Reg::X3 => "w3", + Reg::X4 => "w4", + Reg::X5 => "w5", + Reg::X6 => "w6", + Reg::X7 => "w7", + Reg::X8 => "w8", + Reg::X9 => "w9", + Reg::X10 => "w10", + Reg::X11 => "w11", + Reg::X12 => "w12", + Reg::X13 => "w13", + Reg::X14 => "w14", + Reg::X15 => "w15", + Reg::X16 => "w16", + Reg::X17 => "w17", + Reg::X18 => "w18", + Reg::X19 => "w19", + Reg::X20 => "w20", + Reg::X21 => "w21", + Reg::X22 => "w22", + Reg::X23 => "w23", + Reg::X24 => "w24", + Reg::X25 => "w25", + Reg::X26 => "w26", + Reg::X27 => "w27", + Reg::X28 => "w28", + Reg::X29 => "w29", + Reg::X30 => "w30", + Reg::SP => "sp", // SP doesn't have a 32-bit form in normal use + } + } + + /// Get register name for a given bit size + pub fn name_for_size(&self, bits: u32) -> &'static str { + match bits { + 8 | 16 | 32 => self.name32(), + _ => self.name64(), + } + } + + /// Is this a callee-saved register? + pub fn is_callee_saved(&self) -> bool { + matches!( + self, + Reg::X19 + | Reg::X20 + | Reg::X21 + | Reg::X22 + | Reg::X23 + | Reg::X24 + | Reg::X25 + | Reg::X26 + | Reg::X27 + | Reg::X28 + ) + } + + /// Argument registers in order (AAPCS64) + pub fn arg_regs() -> &'static [Reg] { + &[ + Reg::X0, + Reg::X1, + Reg::X2, + Reg::X3, + Reg::X4, + Reg::X5, + Reg::X6, + Reg::X7, + ] + } + + /// All allocatable registers + /// Excludes: x8 (indirect result), x16/x17 (linker scratch), + /// x18 (platform), x29 (fp), x30 (lr), sp + pub fn allocatable() -> &'static [Reg] { + &[ + Reg::X0, + Reg::X1, + Reg::X2, + Reg::X3, + Reg::X4, + Reg::X5, + Reg::X6, + Reg::X7, + // Skip x8 (indirect result register for large struct returns per AAPCS64) + Reg::X9, + Reg::X10, + Reg::X11, + Reg::X12, + Reg::X13, + Reg::X14, + Reg::X15, + // Skip x16, x17 (linker scratch) + // Skip x18 (platform reserved) + Reg::X19, + Reg::X20, + Reg::X21, + Reg::X22, + Reg::X23, + Reg::X24, + Reg::X25, + Reg::X26, + Reg::X27, + Reg::X28, + // Skip x29 (fp), x30 (lr) + ] + } + + /// Scratch registers for codegen (not allocatable, used for temporaries) + /// x16 (IP0) and x17 (IP1) are linker scratch registers + pub fn scratch_regs() -> (Reg, Reg) { + (Reg::X16, Reg::X17) + } + + /// Frame pointer register + pub fn fp() -> Reg { + Reg::X29 + } + + /// Link register + pub fn lr() -> Reg { + Reg::X30 + } + + /// Stack pointer register + pub fn sp() -> Reg { + Reg::SP + } + + /// Platform reserved register (x18) + /// Should not be used; this is only for documentation and completeness + pub fn platform_reserved() -> Reg { + Reg::X18 + } +} + +// ============================================================================ +// AArch64 Floating-Point Register Definitions +// ============================================================================ + +/// AArch64 SIMD/FP registers (V0-V31, accessed as D0-D31 for double, S0-S31 for float) +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum VReg { + V0, + V1, + V2, + V3, + V4, + V5, + V6, + V7, + V8, + V9, + V10, + V11, + V12, + V13, + V14, + V15, + V16, + V17, + V18, + V19, + V20, + V21, + V22, + V23, + V24, + V25, + V26, + V27, + V28, + V29, + V30, + V31, +} + +impl VReg { + /// Get 64-bit (double) register name + pub fn name_d(&self) -> &'static str { + match self { + VReg::V0 => "d0", + VReg::V1 => "d1", + VReg::V2 => "d2", + VReg::V3 => "d3", + VReg::V4 => "d4", + VReg::V5 => "d5", + VReg::V6 => "d6", + VReg::V7 => "d7", + VReg::V8 => "d8", + VReg::V9 => "d9", + VReg::V10 => "d10", + VReg::V11 => "d11", + VReg::V12 => "d12", + VReg::V13 => "d13", + VReg::V14 => "d14", + VReg::V15 => "d15", + VReg::V16 => "d16", + VReg::V17 => "d17", + VReg::V18 => "d18", + VReg::V19 => "d19", + VReg::V20 => "d20", + VReg::V21 => "d21", + VReg::V22 => "d22", + VReg::V23 => "d23", + VReg::V24 => "d24", + VReg::V25 => "d25", + VReg::V26 => "d26", + VReg::V27 => "d27", + VReg::V28 => "d28", + VReg::V29 => "d29", + VReg::V30 => "d30", + VReg::V31 => "d31", + } + } + + /// Get 32-bit (float) register name + pub fn name_s(&self) -> &'static str { + match self { + VReg::V0 => "s0", + VReg::V1 => "s1", + VReg::V2 => "s2", + VReg::V3 => "s3", + VReg::V4 => "s4", + VReg::V5 => "s5", + VReg::V6 => "s6", + VReg::V7 => "s7", + VReg::V8 => "s8", + VReg::V9 => "s9", + VReg::V10 => "s10", + VReg::V11 => "s11", + VReg::V12 => "s12", + VReg::V13 => "s13", + VReg::V14 => "s14", + VReg::V15 => "s15", + VReg::V16 => "s16", + VReg::V17 => "s17", + VReg::V18 => "s18", + VReg::V19 => "s19", + VReg::V20 => "s20", + VReg::V21 => "s21", + VReg::V22 => "s22", + VReg::V23 => "s23", + VReg::V24 => "s24", + VReg::V25 => "s25", + VReg::V26 => "s26", + VReg::V27 => "s27", + VReg::V28 => "s28", + VReg::V29 => "s29", + VReg::V30 => "s30", + VReg::V31 => "s31", + } + } + + /// Get register name for a given size (32 = float, 64 = double) + pub fn name_for_size(&self, bits: u32) -> &'static str { + if bits <= 32 { + self.name_s() + } else { + self.name_d() + } + } + + /// Is this a callee-saved FP register? + /// AAPCS64: v8-v15 (lower 64 bits) are callee-saved + pub fn is_callee_saved(&self) -> bool { + matches!( + self, + VReg::V8 + | VReg::V9 + | VReg::V10 + | VReg::V11 + | VReg::V12 + | VReg::V13 + | VReg::V14 + | VReg::V15 + ) + } + + /// FP argument registers in order (AAPCS64) + pub fn arg_regs() -> &'static [VReg] { + &[ + VReg::V0, + VReg::V1, + VReg::V2, + VReg::V3, + VReg::V4, + VReg::V5, + VReg::V6, + VReg::V7, + ] + } + + /// All allocatable FP registers + /// Note: V16, V17, V18 are reserved as scratch registers for codegen + pub fn allocatable() -> &'static [VReg] { + &[ + VReg::V0, + VReg::V1, + VReg::V2, + VReg::V3, + VReg::V4, + VReg::V5, + VReg::V6, + VReg::V7, + VReg::V8, + VReg::V9, + VReg::V10, + VReg::V11, + VReg::V12, + VReg::V13, + VReg::V14, + VReg::V15, + // V16, V17, V18 reserved for scratch + VReg::V19, + VReg::V20, + VReg::V21, + VReg::V22, + VReg::V23, + VReg::V24, + VReg::V25, + VReg::V26, + VReg::V27, + VReg::V28, + VReg::V29, + VReg::V30, + VReg::V31, + ] + } +} + +// ============================================================================ +// Operand - Location of a value (register or memory) +// ============================================================================ + +/// Location of a value +#[derive(Debug, Clone)] +pub enum Loc { + /// In a register + Reg(Reg), + /// In a floating-point register + VReg(VReg), + /// On the stack at [sp + offset] (positive offset from sp after allocation) + Stack(i32), + /// Immediate constant + Imm(i64), + /// Floating-point immediate constant + FImm(f64), + /// Global symbol + Global(String), +} + +// ============================================================================ +// Register Allocator (Linear Scan) +// ============================================================================ + +/// Live interval for a pseudo +#[derive(Debug, Clone)] +struct LiveInterval { + pseudo: PseudoId, + start: usize, + end: usize, +} + +/// Simple linear scan register allocator for AArch64 +pub struct RegAlloc { + /// Mapping from pseudo to location + locations: HashMap, + /// Free registers + free_regs: Vec, + /// Free FP registers + free_fp_regs: Vec, + /// Active intervals (sorted by end point) + active: Vec<(LiveInterval, Reg)>, + /// Active FP intervals (sorted by end point) + active_fp: Vec<(LiveInterval, VReg)>, + /// Next stack slot offset + stack_offset: i32, + /// Callee-saved registers that were used + used_callee_saved: Vec, + /// Callee-saved FP registers that were used + used_callee_saved_fp: Vec, + /// Set of pseudos that need FP registers (determined by analyzing the IR) + fp_pseudos: std::collections::HashSet, +} + +impl RegAlloc { + pub fn new() -> Self { + Self { + locations: HashMap::new(), + free_regs: Reg::allocatable().to_vec(), + free_fp_regs: VReg::allocatable().to_vec(), + active: Vec::new(), + active_fp: Vec::new(), + stack_offset: 0, + used_callee_saved: Vec::new(), + used_callee_saved_fp: Vec::new(), + fp_pseudos: std::collections::HashSet::new(), + } + } + + /// Perform register allocation for a function + pub fn allocate(&mut self, func: &Function) -> HashMap { + // Reset state + self.locations.clear(); + self.free_regs = Reg::allocatable().to_vec(); + self.free_fp_regs = VReg::allocatable().to_vec(); + self.active.clear(); + self.active_fp.clear(); + self.stack_offset = 0; + self.used_callee_saved.clear(); + self.used_callee_saved_fp.clear(); + self.fp_pseudos.clear(); + + // Identify which pseudos need FP registers + self.identify_fp_pseudos(func); + + // Pre-allocate argument registers (integer and FP separately) + let int_arg_regs = Reg::arg_regs(); + let fp_arg_regs = VReg::arg_regs(); + let mut int_arg_idx = 0usize; + let mut fp_arg_idx = 0usize; + + // AAPCS64: Detect if there's a hidden return pointer for large struct returns. + // Unlike x86-64 where sret goes in RDI (first arg), AAPCS64 uses X8 (indirect + // result register) which does NOT shift other arguments. + let sret_pseudo = func + .pseudos + .iter() + .find(|p| matches!(p.kind, PseudoKind::Arg(0)) && p.name.as_deref() == Some("__sret")); + + // If there's a hidden return pointer, allocate X8 for it + if let Some(sret) = sret_pseudo { + self.locations.insert(sret.id, Loc::Reg(Reg::X8)); + // X8 is not in allocatable list, so no need to remove from free_regs + } + + // Offset for matching arg_idx: if sret exists, real params have arg_idx starting at 1 + let arg_idx_offset: u32 = if sret_pseudo.is_some() { 1 } else { 0 }; + + for (i, (_name, typ)) in func.params.iter().enumerate() { + // Find the pseudo for this argument + for pseudo in &func.pseudos { + if let PseudoKind::Arg(arg_idx) = pseudo.kind { + // Match by adjusted index (sret shifts arg_idx but not register allocation) + if arg_idx == (i as u32) + arg_idx_offset { + let is_fp = typ.is_float(); + if is_fp { + if fp_arg_idx < fp_arg_regs.len() { + self.locations + .insert(pseudo.id, Loc::VReg(fp_arg_regs[fp_arg_idx])); + self.free_fp_regs.retain(|&r| r != fp_arg_regs[fp_arg_idx]); + self.fp_pseudos.insert(pseudo.id); + } else { + // FP argument on stack + let offset = 16 + (fp_arg_idx - fp_arg_regs.len()) as i32 * 8; + self.locations.insert(pseudo.id, Loc::Stack(offset)); + self.fp_pseudos.insert(pseudo.id); + } + fp_arg_idx += 1; + } else { + if int_arg_idx < int_arg_regs.len() { + self.locations + .insert(pseudo.id, Loc::Reg(int_arg_regs[int_arg_idx])); + self.free_regs.retain(|&r| r != int_arg_regs[int_arg_idx]); + } else { + // Integer argument on stack + let offset = 16 + (int_arg_idx - int_arg_regs.len()) as i32 * 8; + self.locations.insert(pseudo.id, Loc::Stack(offset)); + } + int_arg_idx += 1; + } + break; + } + } + } + } + + // Compute live intervals + let intervals = self.compute_live_intervals(func); + + // Allocate registers using linear scan + for interval in intervals { + self.expire_old_intervals(interval.start); + + // Check if already allocated (e.g., arguments) + if self.locations.contains_key(&interval.pseudo) { + continue; + } + + // Check if this is a constant - constants don't need registers + if let Some(pseudo) = func.pseudos.iter().find(|p| p.id == interval.pseudo) { + match &pseudo.kind { + PseudoKind::Val(v) => { + self.locations.insert(interval.pseudo, Loc::Imm(*v)); + continue; + } + PseudoKind::FVal(v) => { + self.locations.insert(interval.pseudo, Loc::FImm(*v)); + self.fp_pseudos.insert(interval.pseudo); + continue; + } + PseudoKind::Sym(name) => { + // Check if this is a local variable or a global symbol + if let Some(local) = func.locals.get(name) { + // Local variable - allocate stack space based on actual type size + let size = local.typ.size_bytes().max(8) as i32; + // Align to 8 bytes + let aligned_size = (size + 7) & !7; + self.stack_offset += aligned_size; + self.locations + .insert(interval.pseudo, Loc::Stack(-self.stack_offset)); + } else { + // Global symbol + self.locations + .insert(interval.pseudo, Loc::Global(name.clone())); + } + continue; + } + _ => {} + } + } + + // Check if this pseudo needs an FP register + let needs_fp = self.fp_pseudos.contains(&interval.pseudo); + + if needs_fp { + // Try to allocate an FP register + if let Some(reg) = self.free_fp_regs.pop() { + if reg.is_callee_saved() && !self.used_callee_saved_fp.contains(®) { + self.used_callee_saved_fp.push(reg); + } + self.locations.insert(interval.pseudo, Loc::VReg(reg)); + self.active_fp.push((interval.clone(), reg)); + self.active_fp.sort_by_key(|(i, _)| i.end); + } else { + // Spill to stack + self.stack_offset += 8; + self.locations + .insert(interval.pseudo, Loc::Stack(-self.stack_offset)); + } + } else { + // Try to allocate an integer register + if let Some(reg) = self.free_regs.pop() { + if reg.is_callee_saved() && !self.used_callee_saved.contains(®) { + self.used_callee_saved.push(reg); + } + self.locations.insert(interval.pseudo, Loc::Reg(reg)); + self.active.push((interval.clone(), reg)); + self.active.sort_by_key(|(i, _)| i.end); + } else { + // Spill to stack + self.stack_offset += 8; + self.locations + .insert(interval.pseudo, Loc::Stack(-self.stack_offset)); + } + } + } + + self.locations.clone() + } + + /// Identify which pseudos need FP registers by scanning the IR + fn identify_fp_pseudos(&mut self, func: &Function) { + for block in &func.blocks { + for insn in &block.insns { + // Check if this instruction produces a floating-point result + let is_fp_op = matches!( + insn.op, + Opcode::FAdd + | Opcode::FSub + | Opcode::FMul + | Opcode::FDiv + | Opcode::FNeg + | Opcode::FCvtF + | Opcode::UCvtF + | Opcode::SCvtF + ); + + if is_fp_op { + if let Some(target) = insn.target { + self.fp_pseudos.insert(target); + } + } + + // Also check if the type is floating point + // (but exclude comparisons which always produce int regardless of operand type) + if let Some(ref typ) = insn.typ { + if typ.is_float() + && !matches!( + insn.op, + Opcode::FCmpOEq + | Opcode::FCmpONe + | Opcode::FCmpOLt + | Opcode::FCmpOLe + | Opcode::FCmpOGt + | Opcode::FCmpOGe + ) + { + if let Some(target) = insn.target { + self.fp_pseudos.insert(target); + } + } + } + } + } + + // Also mark pseudos that are FVal constants + for pseudo in &func.pseudos { + if matches!(pseudo.kind, PseudoKind::FVal(_)) { + self.fp_pseudos.insert(pseudo.id); + } + } + } + + fn expire_old_intervals(&mut self, point: usize) { + // Expire integer register intervals + let mut to_remove = Vec::new(); + for (i, (interval, reg)) in self.active.iter().enumerate() { + if interval.end < point { + self.free_regs.push(*reg); + to_remove.push(i); + } + } + // Remove in reverse order to preserve indices + for i in to_remove.into_iter().rev() { + self.active.remove(i); + } + + // Expire FP register intervals + let mut to_remove_fp = Vec::new(); + for (i, (interval, reg)) in self.active_fp.iter().enumerate() { + if interval.end < point { + self.free_fp_regs.push(*reg); + to_remove_fp.push(i); + } + } + for i in to_remove_fp.into_iter().rev() { + self.active_fp.remove(i); + } + } + + fn compute_live_intervals(&self, func: &Function) -> Vec { + use crate::ir::BasicBlockId; + + // Track earliest definition, latest definition, and latest use for each pseudo + struct IntervalInfo { + pseudo: PseudoId, + first_def: usize, + last_def: usize, + last_use: usize, + } + + let mut intervals: HashMap = HashMap::new(); + let mut pos = 0usize; + + // First pass: compute block start and end positions + let mut block_start_pos: HashMap = HashMap::new(); + let mut block_end_pos: HashMap = HashMap::new(); + let mut temp_pos = 0usize; + for block in &func.blocks { + block_start_pos.insert(block.id, temp_pos); + temp_pos += block.insns.len(); + block_end_pos.insert(block.id, temp_pos.saturating_sub(1)); + } + + // Collect phi sources with their source blocks for later processing + // Also track phi targets - they need intervals extended to cover all their copy definitions + let mut phi_sources: Vec<(BasicBlockId, PseudoId)> = Vec::new(); + let mut phi_targets: Vec<(BasicBlockId, PseudoId)> = Vec::new(); + + for block in &func.blocks { + for insn in &block.insns { + // Definition point - track both first and last definition + // This is important because phi elimination creates multiple definitions + // of the same pseudo (via Copy instructions in predecessor blocks) + if let Some(target) = insn.target { + intervals + .entry(target) + .and_modify(|info| { + info.first_def = info.first_def.min(pos); + info.last_def = info.last_def.max(pos); + }) + .or_insert(IntervalInfo { + pseudo: target, + first_def: pos, + last_def: pos, + last_use: pos, + }); + } + + // Use points + for &src in &insn.src { + if let Some(info) = intervals.get_mut(&src) { + info.last_use = info.last_use.max(pos); + } else { + intervals.insert( + src, + IntervalInfo { + pseudo: src, + first_def: pos, + last_def: pos, + last_use: pos, + }, + ); + } + } + + // Collect phi sources - they need to be live at the END of their source block + // Also collect phi targets - they have Copy definitions in each source block + for (src_bb, pseudo) in &insn.phi_list { + phi_sources.push((*src_bb, *pseudo)); + // The phi target (if present) is defined via Copy at the end of each source block + if let Some(target) = insn.target { + phi_targets.push((*src_bb, target)); + } + } + + pos += 1; + } + } + + // Process phi sources: extend their live interval to the end of their source block + // This is critical for loops where phi sources come from later blocks via back edges + for (src_bb, pseudo) in phi_sources { + if let Some(&end_pos) = block_end_pos.get(&src_bb) { + if let Some(info) = intervals.get_mut(&pseudo) { + info.last_use = info.last_use.max(end_pos); + info.last_def = info.last_def.max(end_pos); + } else { + intervals.insert( + pseudo, + IntervalInfo { + pseudo, + first_def: end_pos, + last_def: end_pos, + last_use: end_pos, + }, + ); + } + } + } + + // Process phi targets: extend their live interval to cover all Copy definitions + // The phi target is defined via Copy at the end of each source block + // For loops, this means the target must be live until the last Copy (at the loop back edge) + for (src_bb, target) in phi_targets { + if let Some(&end_pos) = block_end_pos.get(&src_bb) { + if let Some(info) = intervals.get_mut(&target) { + info.last_def = info.last_def.max(end_pos); + } else { + intervals.insert( + target, + IntervalInfo { + pseudo: target, + first_def: end_pos, + last_def: end_pos, + last_use: end_pos, + }, + ); + } + } + } + + // Detect loops via back edges and extend lifetimes of variables used in loop bodies + // A back edge is a branch from a block to an earlier block (lower start position) + // Any variable used in a block that's part of a loop must be live until the back edge + let mut loop_back_edges: Vec<(BasicBlockId, BasicBlockId, usize)> = Vec::new(); // (from, to, from_end_pos) + for block in &func.blocks { + // Check the last instruction for branch targets + if let Some(last_insn) = block.insns.last() { + // Collect branch targets (bb_true for unconditional/conditional, bb_false for conditional) + let mut targets = Vec::new(); + if let Some(target) = last_insn.bb_true { + targets.push(target); + } + if let Some(target) = last_insn.bb_false { + targets.push(target); + } + + // Check if any target is a back edge (to an earlier block) + let from_start = block_start_pos.get(&block.id).copied().unwrap_or(0); + for target_bb in targets { + let target_start = block_start_pos.get(&target_bb).copied().unwrap_or(0); + if target_start < from_start { + // This is a back edge - the loop spans from target_bb to block + let from_end = block_end_pos.get(&block.id).copied().unwrap_or(0); + loop_back_edges.push((block.id, target_bb, from_end)); + } + } + } + } + + // For each loop, extend lifetimes of variables used within the loop + for (_from_bb, to_bb, back_edge_pos) in &loop_back_edges { + let loop_start = block_start_pos.get(to_bb).copied().unwrap_or(0); + + // Extend any variable that: + // 1. Is defined before the loop (first_def < loop_start) + // 2. Is used inside the loop (last_use >= loop_start && last_use <= back_edge_pos) + for info in intervals.values_mut() { + if info.first_def < loop_start + && info.last_use >= loop_start + && info.last_use <= *back_edge_pos + { + // This variable is used inside the loop but defined outside + // Extend its lifetime to the back edge + info.last_use = info.last_use.max(*back_edge_pos); + } + } + } + + // Find the maximum position in the function + let max_pos = pos.saturating_sub(1); + + // Convert to LiveInterval + // The interval spans from first_def to max(last_def, last_use) + // This ensures that pseudos with multiple definitions (from phi copies) + // stay live across all their definitions + // + // IMPORTANT: For loop variables, if last_def > last_use (definition comes after use + // due to back edge), the variable must stay live until the end of the function + // because it will be used again in the next loop iteration. + let mut result: Vec<_> = intervals + .into_values() + .map(|info| { + let end = if info.last_def > info.last_use { + // Loop variable: definition after use means we wrap around + // Extend to end of function to ensure it stays live + max_pos + } else { + info.last_def.max(info.last_use) + }; + LiveInterval { + pseudo: info.pseudo, + start: info.first_def, + end, + } + }) + .collect(); + result.sort_by_key(|i| i.start); + result + } + + /// Get stack size needed (aligned to 16 bytes) + pub fn stack_size(&self) -> i32 { + // Align to 16 bytes + (self.stack_offset + 15) & !15 + } + + /// Get callee-saved registers that need to be preserved + pub fn callee_saved_used(&self) -> &[Reg] { + &self.used_callee_saved + } +} + +impl Default for RegAlloc { + fn default() -> Self { + Self::new() + } +} + +// ============================================================================ +// AArch64 Code Generator +// ============================================================================ + +/// AArch64 code generator +pub struct Aarch64CodeGen { + /// Target info + target: Target, + /// Output buffer + output: String, + /// LIR instruction buffer (for deferred emission and future peephole optimization) + lir_buffer: Vec, + /// Current function's register allocation + locations: HashMap, + /// Current function's pseudos (for looking up values) + pseudos: Vec, + /// Current function name (for generating unique labels) + current_fn: String, + /// Total frame size for current function + frame_size: i32, + /// Size of callee-saved register area (for computing local variable offsets) + callee_saved_size: i32, + /// Whether to emit basic unwind tables (cfi_startproc/cfi_endproc) + emit_unwind_tables: bool, + /// Last emitted source line (for avoiding duplicate .loc directives) + last_debug_line: u32, + /// Last emitted source file index + last_debug_file: u16, + /// Whether to emit debug info (.file/.loc directives) + emit_debug: bool, + /// Offset from FP to register save area (for variadic functions) + reg_save_area_offset: i32, + /// Size of register save area (for variadic functions) + /// Used to compute correct FP-relative offsets for local variables + reg_save_area_size: i32, + /// Number of fixed GP parameters (for variadic functions) + num_fixed_gp_params: usize, +} + +impl Aarch64CodeGen { + pub fn new(target: Target) -> Self { + Self { + target, + output: String::new(), + lir_buffer: Vec::new(), + locations: HashMap::new(), + pseudos: Vec::new(), + current_fn: String::new(), + frame_size: 0, + callee_saved_size: 0, + emit_unwind_tables: true, // Default to emitting basic unwind tables + last_debug_line: 0, + last_debug_file: 0, + emit_debug: false, + reg_save_area_offset: 0, + reg_save_area_size: 0, + num_fixed_gp_params: 0, + } + } + + /// Check if a function contains va_start (indicating it's variadic) + fn is_variadic_function(func: &Function) -> bool { + for block in &func.blocks { + for insn in &block.insns { + if matches!(insn.op, crate::ir::Opcode::VaStart) { + return true; + } + } + } + false + } + + /// Compute the actual FP-relative offset for a stack location. + /// For local variables (negative offsets), this accounts for the + /// register save area in varargs functions which is placed at the + /// end of the frame (after locals). + #[inline] + fn stack_offset(&self, frame_size: i32, offset: i32) -> i32 { + if offset < 0 { + // Local variable: use frame size minus reg_save_area + // Layout: [fp/lr][callee-saved][locals][reg_save_area] + // Locals are at offsets from (frame_size - reg_save_area_size) + (frame_size - self.reg_save_area_size) + offset + } else { + // Positive offset (arguments passed on stack) - use as-is + offset + } + } + + /// Push a LIR instruction to the buffer (deferred emission) + fn push_lir(&mut self, inst: Aarch64Inst) { + self.lir_buffer.push(inst); + } + + /// Emit all buffered LIR instructions to the output string + fn emit_all(&mut self) { + use crate::arch::lir::EmitAsm; + for inst in &self.lir_buffer { + inst.emit(&self.target, &mut self.output); + } + } + + /// Emit .loc directive for source line tracking (if debug is enabled and line changed) + fn emit_loc(&mut self, insn: &Instruction) { + if !self.emit_debug { + return; + } + if let Some(pos) = &insn.pos { + // Only emit if line changed (avoid duplicate .loc directives) + let file = pos.stream + 1; // DWARF file indices start at 1 + let line = pos.line; + if line != self.last_debug_line || file != self.last_debug_file { + self.push_lir(Aarch64Inst::Directive(Directive::loc( + file.into(), + line, + pos.col.into(), + ))); + self.last_debug_line = line; + self.last_debug_file = file; + } + } + } + + fn emit_header(&mut self) { + // Header comments with compiler and target info (GCC-style) + for comment in crate::arch::codegen::generate_header_comments(&self.target) { + self.push_lir(Aarch64Inst::Directive(Directive::Comment(comment))); + } + self.push_lir(Aarch64Inst::Directive(Directive::Text)); + } + + fn emit_global(&mut self, name: &str, typ: &Type, init: &Initializer) { + let size = typ.size_bits() / 8; + let size = if size == 0 { 8 } else { size }; // Default to 8 bytes + + // Check storage class - skip .globl for static + let is_static = typ.modifiers.contains(TypeModifiers::STATIC); + + // Get alignment from type info + let align = typ.alignment() as u32; + + // Use .comm for uninitialized external (non-static) globals + let use_bss = matches!(init, Initializer::None) && !is_static; + + if use_bss { + // Use .comm for uninitialized external globals + self.push_lir(Aarch64Inst::Directive(Directive::comm(name, size, align))); + return; + } + + // Data section + self.push_lir(Aarch64Inst::Directive(Directive::Data)); + + // Global visibility (if not static) + if !is_static { + self.push_lir(Aarch64Inst::Directive(Directive::global(name))); + } + + // ELF-only type and size (handled by Directive::emit which skips on macOS) + self.push_lir(Aarch64Inst::Directive(Directive::type_object(name))); + self.push_lir(Aarch64Inst::Directive(Directive::size(name, size))); + + // Alignment + if align > 1 { + self.push_lir(Aarch64Inst::Directive(Directive::Align( + align.trailing_zeros(), + ))); + } + + // Label + self.push_lir(Aarch64Inst::Directive(Directive::global_label(name))); + + // Emit initializer + match init { + Initializer::None => { + // For static uninitialized, use .zero (not .comm) + self.push_lir(Aarch64Inst::Directive(Directive::Zero(size))); + } + Initializer::Int(val) => match size { + 1 => self.push_lir(Aarch64Inst::Directive(Directive::Byte(*val))), + 2 => self.push_lir(Aarch64Inst::Directive(Directive::Short(*val))), + 4 => self.push_lir(Aarch64Inst::Directive(Directive::Long(*val))), + _ => self.push_lir(Aarch64Inst::Directive(Directive::Quad(*val))), + }, + Initializer::Float(val) => { + if size == 4 { + // float - emit as 32-bit IEEE 754 + let bits = (*val as f32).to_bits(); + self.push_lir(Aarch64Inst::Directive(Directive::Long(bits as i64))); + } else { + // double - emit as 64-bit IEEE 754 + let bits = val.to_bits(); + self.push_lir(Aarch64Inst::Directive(Directive::Quad(bits as i64))); + } + } + } + } + + fn emit_strings(&mut self, strings: &[(String, String)]) { + if strings.is_empty() { + return; + } + + // Read-only data section + self.push_lir(Aarch64Inst::Directive(Directive::Rodata)); + + for (label, content) in strings { + // Local label for string literal + self.push_lir(Aarch64Inst::Directive(Directive::local_label(label))); + self.push_lir(Aarch64Inst::Directive(Directive::Asciz( + Self::escape_string(content), + ))); + } + + // Switch back to text section for functions + self.push_lir(Aarch64Inst::Directive(Directive::Text)); + } + + fn escape_string(s: &str) -> String { + let mut result = String::new(); + for c in s.chars() { + match c { + '\n' => result.push_str("\\n"), + '\r' => result.push_str("\\r"), + '\t' => result.push_str("\\t"), + '\\' => result.push_str("\\\\"), + '"' => result.push_str("\\\""), + c if c.is_ascii_graphic() || c == ' ' => result.push(c), + c => { + // Escape non-printable as octal + for byte in c.to_string().as_bytes() { + result.push_str(&format!("\\{:03o}", byte)); + } + } + } + } + result + } + + fn emit_function(&mut self, func: &Function) { + // Check if this function uses varargs + let is_variadic = Self::is_variadic_function(func); + + // Register allocation + let mut alloc = RegAlloc::new(); + self.locations = alloc.allocate(func); + self.pseudos = func.pseudos.clone(); + + let stack_size = alloc.stack_size(); + let callee_saved = alloc.callee_saved_used().to_vec(); + + // For variadic functions on Linux/FreeBSD, we need extra space for the register save area + // AAPCS64: 8 GP regs (x0-x7) * 8 bytes = 64 bytes + // On Darwin (macOS/iOS), variadic args are passed on the stack by the caller, + // so we don't need a register save area. + let is_darwin = self.target.os == crate::target::Os::MacOS; + let reg_save_area_size: i32 = if is_variadic && !is_darwin { 64 } else { 0 }; + + // Calculate total frame size + // Need space for: fp/lr (16 bytes) + callee-saved regs + local vars + reg save area + // Round up callee-saved count to even for 16-byte alignment + let callee_saved_pairs = (callee_saved.len() + 1) / 2; + let callee_saved_size = callee_saved_pairs as i32 * 16; + let total_frame = 16 + callee_saved_size + stack_size + reg_save_area_size; + // Ensure 16-byte alignment + let total_frame = (total_frame + 15) & !15; + + // Track register save area offset for va_start (offset from FP) + // Layout: [fp/lr][callee-saved][locals][reg_save_area] + // The save area is at FP + 16 + callee_saved_size + stack_size + self.reg_save_area_offset = if is_variadic { + 16 + callee_saved_size + stack_size + } else { + 0 + }; + + // Save function name, frame size, and callee-saved size for label generation and offset calculation + self.current_fn = func.name.clone(); + self.frame_size = total_frame; + self.callee_saved_size = callee_saved_size; + self.reg_save_area_size = reg_save_area_size; + + // Function prologue + self.push_lir(Aarch64Inst::Directive(Directive::Blank)); + self.push_lir(Aarch64Inst::Directive(Directive::Text)); + + // Skip .globl for static functions (internal linkage) + if !func.is_static { + self.push_lir(Aarch64Inst::Directive(Directive::global(&func.name))); + } + + // ELF-only type (handled by Directive::emit which skips on macOS) + self.push_lir(Aarch64Inst::Directive(Directive::type_func(&func.name))); + + // Alignment + self.push_lir(Aarch64Inst::Directive(Directive::Align(2))); + + // Function label + self.push_lir(Aarch64Inst::Directive(Directive::global_label(&func.name))); + + // CFI: Start procedure (enables stack unwinding for this function) + if self.emit_unwind_tables { + self.push_lir(Aarch64Inst::Directive(Directive::CfiStartProc)); + } + + // Prologue: save frame pointer and link register, allocate stack + let (scratch0, _scratch1) = Reg::scratch_regs(); + let fp = Reg::fp(); + let lr = Reg::lr(); + // Reference platform_reserved to acknowledge its existence + let _ = Reg::platform_reserved(); + + if total_frame > 0 { + // Combined push and allocate: stp x29, x30, [sp, #-N]! + self.push_lir(Aarch64Inst::Stp { + size: OperandSize::B64, + src1: fp, + src2: lr, + addr: MemAddr::PreIndex { + base: Reg::SP, + offset: -total_frame, + }, + }); + if self.emit_debug { + // CFA is now at sp + total_frame (previous SP value) + self.push_lir(Aarch64Inst::Directive(Directive::cfi_def_cfa( + "sp", + total_frame, + ))); + // x29 (fp) is saved at [sp+0], x30 (lr) is saved at [sp+8] + self.push_lir(Aarch64Inst::Directive(Directive::cfi_offset( + "x29", + -total_frame, + ))); + self.push_lir(Aarch64Inst::Directive(Directive::cfi_offset( + "x30", + -(total_frame - 8), + ))); + } + // Set up frame pointer: mov x29, sp + self.push_lir(Aarch64Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::SP), + dst: fp, + }); + if self.emit_debug { + // CFA is now tracked by x29 + total_frame + self.push_lir(Aarch64Inst::Directive(Directive::cfi_def_cfa_register( + "x29", + ))); + } + + // Save callee-saved registers in pairs + let mut offset = 16; // Start after fp/lr + let mut i = 0; + while i < callee_saved.len() { + if i + 1 < callee_saved.len() { + self.push_lir(Aarch64Inst::Stp { + size: OperandSize::B64, + src1: callee_saved[i], + src2: callee_saved[i + 1], + addr: MemAddr::BaseOffset { + base: Reg::X29, // fp + offset, + }, + }); + if self.emit_debug { + let cfi_offset1 = -(total_frame - offset); + let cfi_offset2 = -(total_frame - offset - 8); + self.push_lir(Aarch64Inst::Directive(Directive::cfi_offset( + callee_saved[i].name64(), + cfi_offset1, + ))); + self.push_lir(Aarch64Inst::Directive(Directive::cfi_offset( + callee_saved[i + 1].name64(), + cfi_offset2, + ))); + } + i += 2; + } else { + self.push_lir(Aarch64Inst::Str { + size: OperandSize::B64, + src: callee_saved[i], + addr: MemAddr::BaseOffset { + base: Reg::X29, // fp + offset, + }, + }); + if self.emit_debug { + let cfi_offset = -(total_frame - offset); + self.push_lir(Aarch64Inst::Directive(Directive::cfi_offset( + callee_saved[i].name64(), + cfi_offset, + ))); + } + i += 1; + } + offset += 16; + } + } else { + // Minimal frame: stp x29, x30, [sp, #-16]! + self.push_lir(Aarch64Inst::Stp { + size: OperandSize::B64, + src1: fp, + src2: lr, + addr: MemAddr::PreIndex { + base: Reg::SP, + offset: -16, + }, + }); + if self.emit_debug { + self.push_lir(Aarch64Inst::Directive(Directive::cfi_def_cfa("sp", 16))); + self.push_lir(Aarch64Inst::Directive(Directive::cfi_offset("x29", -16))); + self.push_lir(Aarch64Inst::Directive(Directive::cfi_offset("x30", -8))); + } + // mov x29, sp + self.push_lir(Aarch64Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::SP), + dst: fp, + }); + if self.emit_debug { + self.push_lir(Aarch64Inst::Directive(Directive::cfi_def_cfa_register( + "x29", + ))); + } + } + + // Store scratch register for later use in this function + let _ = scratch0; + + // AAPCS64: Detect if there's a hidden return pointer (sret) for large struct returns. + // Unlike x86-64, AAPCS64 uses X8 for indirect result, which doesn't shift other args. + let has_sret = func + .pseudos + .iter() + .any(|p| matches!(p.kind, PseudoKind::Arg(0)) && p.name.as_deref() == Some("__sret")); + let arg_idx_offset: u32 = if has_sret { 1 } else { 0 }; + + // If there's a sret, spill X8 to stack if needed + if has_sret { + if let Some(sret) = func.pseudos.iter().find(|p| { + matches!(p.kind, PseudoKind::Arg(0)) && p.name.as_deref() == Some("__sret") + }) { + if let Some(Loc::Stack(offset)) = self.locations.get(&sret.id) { + if *offset < 0 { + let actual_offset = self.stack_offset(total_frame, *offset); + self.push_lir(Aarch64Inst::Str { + size: OperandSize::B64, + src: Reg::X8, + addr: MemAddr::BaseOffset { + base: Reg::X29, // fp + offset: actual_offset, + }, + }); + } + } + } + } + + // For variadic functions on Linux/FreeBSD, save ALL argument registers to the register save area + // This must be done BEFORE any code uses these registers, so va_arg can access them + // AAPCS64: x0-x7 are saved at reg_save_area_offset from FP + // On Darwin, variadic args are passed on the stack by the caller, so we skip this. + if is_variadic && !is_darwin { + let arg_regs = Reg::arg_regs(); + for (i, reg) in arg_regs.iter().enumerate() { + // Each register at offset: reg_save_area_offset + (i * 8) + let offset = self.reg_save_area_offset + (i as i32 * 8); + self.push_lir(Aarch64Inst::Str { + size: OperandSize::B64, + src: *reg, + addr: MemAddr::BaseOffset { + base: Reg::X29, // fp + offset, + }, + }); + } + } + + // Count fixed GP parameters for va_start + if is_variadic { + self.num_fixed_gp_params = func + .params + .iter() + .filter(|(_, typ)| !typ.is_float()) + .count(); + } + + // Move arguments from registers to their allocated locations if needed + // Note: On AAPCS64, sret uses X8, so regular args still start at X0 + let arg_regs = Reg::arg_regs(); + for (i, (_name, _typ)) in func.params.iter().enumerate() { + if i < arg_regs.len() { + // Find the pseudo for this argument + for pseudo in &func.pseudos { + if let PseudoKind::Arg(arg_idx) = pseudo.kind { + // With sret, params have arg_idx = i + 1, but still use arg_regs[i] + if arg_idx == (i as u32) + arg_idx_offset { + if let Some(Loc::Stack(offset)) = self.locations.get(&pseudo.id) { + // Move from arg register to stack + if *offset < 0 { + let actual_offset = self.stack_offset(total_frame, *offset); + self.push_lir(Aarch64Inst::Str { + size: OperandSize::B64, + src: arg_regs[i], + addr: MemAddr::BaseOffset { + base: Reg::X29, // fp + offset: actual_offset, + }, + }); + } + } + break; + } + } + } + } + } + + // Store frame size for epilogue + let frame_info = (total_frame, callee_saved.clone()); + + // Emit basic blocks + for block in &func.blocks { + self.emit_block(block, &frame_info); + } + + // CFI: End procedure + if self.emit_unwind_tables { + self.push_lir(Aarch64Inst::Directive(Directive::CfiEndProc)); + } + } + + fn emit_block(&mut self, block: &crate::ir::BasicBlock, frame_info: &(i32, Vec)) { + // Emit block label using LIR (include function name for uniqueness) + if let Some(label) = &block.label { + // LIR: named block label (using Raw since format differs from standard) + self.push_lir(Aarch64Inst::Directive(Directive::Raw(format!( + ".L_{}_{}:", + self.current_fn, label + )))); + } else { + // LIR: numbered block label + self.push_lir(Aarch64Inst::Directive(Directive::BlockLabel(Label::new( + &self.current_fn, + block.id.0, + )))); + } + + // Emit instructions + for insn in &block.insns { + self.emit_insn(insn, frame_info); + } + } + + fn emit_insn(&mut self, insn: &Instruction, frame_info: &(i32, Vec)) { + // Emit .loc directive for debug info + self.emit_loc(insn); + + let (total_frame, callee_saved) = frame_info; + + match insn.op { + Opcode::Entry => { + // Already handled in function prologue + } + + Opcode::Ret => { + // Move return value to x0 (integer) or v0 (float) if present + if let Some(&src) = insn.src.first() { + let src_loc = self.get_location(src); + let is_fp = matches!(src_loc, Loc::VReg(_) | Loc::FImm(_)); + + if is_fp { + // FP return value goes in V0 + self.emit_fp_move(src, VReg::V0, insn.size, *total_frame); + } else { + // Integer return value goes in X0 + self.emit_move(src, Reg::X0, insn.size, *total_frame); + } + } + + // Epilogue: reset SP to FP first (required for alloca support, since + // alloca may have moved SP during function execution) + self.push_lir(Aarch64Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::X29), + dst: Reg::SP, + }); + + // Restore callee-saved registers (now safe since SP == FP) + if *total_frame > 16 { + let mut offset = 16; + let mut i = 0; + while i < callee_saved.len() { + if i + 1 < callee_saved.len() { + // LIR: ldp pair restore + self.push_lir(Aarch64Inst::Ldp { + size: OperandSize::B64, + addr: MemAddr::BaseOffset { + base: Reg::sp(), + offset, + }, + dst1: callee_saved[i], + dst2: callee_saved[i + 1], + }); + i += 2; + } else { + // LIR: ldr single restore + self.push_lir(Aarch64Inst::Ldr { + size: OperandSize::B64, + addr: MemAddr::BaseOffset { + base: Reg::sp(), + offset, + }, + dst: callee_saved[i], + }); + i += 1; + } + offset += 16; + } + } + + // Restore fp/lr and deallocate stack + let fp = Reg::fp(); + let lr = Reg::lr(); + let dealloc = if *total_frame > 0 { *total_frame } else { 16 }; + // LIR: ldp with post-increment to restore fp/lr and deallocate + self.push_lir(Aarch64Inst::Ldp { + size: OperandSize::B64, + addr: MemAddr::PostIndex { + base: Reg::sp(), + offset: dealloc, + }, + dst1: fp, + dst2: lr, + }); + // LIR: ret + self.push_lir(Aarch64Inst::Ret); + } + + Opcode::Br => { + if let Some(target) = insn.bb_true { + // LIR: unconditional branch + self.push_lir(Aarch64Inst::B { + target: Label::new(&self.current_fn, target.0), + }); + } + } + + Opcode::Cbr => { + if let Some(&cond) = insn.src.first() { + let loc = self.get_location(cond); + let (scratch0, _) = Reg::scratch_regs(); + match &loc { + Loc::Reg(r) => { + // LIR: compare register with zero + self.push_lir(Aarch64Inst::Cmp { + size: OperandSize::B64, + src1: *r, + src2: GpOperand::Imm(0), + }); + } + Loc::Stack(offset) => { + let actual_offset = self.stack_offset(*total_frame, *offset); + // LIR: load from stack (FP-relative for alloca safety) + self.push_lir(Aarch64Inst::Ldr { + size: OperandSize::B64, + addr: MemAddr::BaseOffset { + base: Reg::X29, + offset: actual_offset, + }, + dst: scratch0, + }); + // LIR: compare with zero + self.push_lir(Aarch64Inst::Cmp { + size: OperandSize::B64, + src1: scratch0, + src2: GpOperand::Imm(0), + }); + } + Loc::Imm(v) => { + if *v != 0 { + if let Some(target) = insn.bb_true { + // LIR: unconditional branch (constant true) + self.push_lir(Aarch64Inst::B { + target: Label::new(&self.current_fn, target.0), + }); + } + return; + } else { + if let Some(target) = insn.bb_false { + // LIR: unconditional branch (constant false) + self.push_lir(Aarch64Inst::B { + target: Label::new(&self.current_fn, target.0), + }); + } + return; + } + } + Loc::Global(name) => { + self.emit_load_global(name, scratch0, OperandSize::B64); + // LIR: compare with zero + self.push_lir(Aarch64Inst::Cmp { + size: OperandSize::B64, + src1: scratch0, + src2: GpOperand::Imm(0), + }); + } + Loc::VReg(v) => { + // LIR: compare FP register with zero + self.push_lir(Aarch64Inst::FcmpZero { + size: FpSize::Double, + src: *v, + }); + } + Loc::FImm(f) => { + // FP immediate as condition - branch based on non-zero + if *f != 0.0 { + if let Some(target) = insn.bb_true { + // LIR: unconditional branch (constant true) + self.push_lir(Aarch64Inst::B { + target: Label::new(&self.current_fn, target.0), + }); + } + return; + } else { + if let Some(target) = insn.bb_false { + // LIR: unconditional branch (constant false) + self.push_lir(Aarch64Inst::B { + target: Label::new(&self.current_fn, target.0), + }); + } + return; + } + } + } + if let Some(target) = insn.bb_true { + // LIR: conditional branch (not equal = true) + self.push_lir(Aarch64Inst::BCond { + cond: Cond::Ne, + target: Label::new(&self.current_fn, target.0), + }); + } + if let Some(target) = insn.bb_false { + // LIR: unconditional branch (fallthrough to false) + self.push_lir(Aarch64Inst::B { + target: Label::new(&self.current_fn, target.0), + }); + } + } + } + + Opcode::Switch => { + // Switch uses target as the value to switch on + if let Some(val) = insn.target { + let loc = self.get_location(val); + let (scratch0, scratch1) = Reg::scratch_regs(); + let size = insn.size.max(32); + let op_size = OperandSize::from_bits(size); + + // Move switch value to scratch0 + match &loc { + Loc::Reg(r) => { + if *r != scratch0 { + // LIR: move register to scratch + self.push_lir(Aarch64Inst::Mov { + size: op_size, + src: GpOperand::Reg(*r), + dst: scratch0, + }); + } + } + Loc::Stack(offset) => { + let actual_offset = self.stack_offset(*total_frame, *offset); + // LIR: load from stack (FP-relative for alloca safety) + self.push_lir(Aarch64Inst::Ldr { + size: op_size, + addr: MemAddr::BaseOffset { + base: Reg::X29, + offset: actual_offset, + }, + dst: scratch0, + }); + } + Loc::Imm(v) => { + // LIR: move immediate + self.push_lir(Aarch64Inst::Mov { + size: op_size, + src: GpOperand::Imm(*v), + dst: scratch0, + }); + } + Loc::Global(name) => { + self.emit_load_global(name, scratch0, op_size); + } + Loc::VReg(_) | Loc::FImm(_) => { + // FP values shouldn't be used in switch statements + // This is unreachable in valid C code + } + } + + // Generate comparisons for each case + for (case_val, target_bb) in &insn.switch_cases { + // Load case value to scratch1 if it doesn't fit in immediate + if *case_val >= 0 && *case_val < 4096 { + // LIR: compare with immediate + self.push_lir(Aarch64Inst::Cmp { + size: op_size, + src1: scratch0, + src2: GpOperand::Imm(*case_val), + }); + } else { + // LIR: load large constant + self.push_lir(Aarch64Inst::Mov { + size: op_size, + src: GpOperand::Imm(*case_val), + dst: scratch1, + }); + // LIR: compare registers + self.push_lir(Aarch64Inst::Cmp { + size: op_size, + src1: scratch0, + src2: GpOperand::Reg(scratch1), + }); + } + // LIR: conditional branch on equal + self.push_lir(Aarch64Inst::BCond { + cond: Cond::Eq, + target: Label::new(&self.current_fn, target_bb.0), + }); + } + + // Jump to default + if let Some(default_bb) = insn.switch_default { + // LIR: unconditional branch to default + self.push_lir(Aarch64Inst::B { + target: Label::new(&self.current_fn, default_bb.0), + }); + } + } + } + + Opcode::Add + | Opcode::Sub + | Opcode::And + | Opcode::Or + | Opcode::Xor + | Opcode::Shl + | Opcode::Lsr + | Opcode::Asr => { + self.emit_binop(insn, *total_frame); + } + + Opcode::Mul => { + self.emit_mul(insn, *total_frame); + } + + Opcode::DivS | Opcode::DivU | Opcode::ModS | Opcode::ModU => { + self.emit_div(insn, *total_frame); + } + + Opcode::SetEq + | Opcode::SetNe + | Opcode::SetLt + | Opcode::SetLe + | Opcode::SetGt + | Opcode::SetGe + | Opcode::SetB + | Opcode::SetBe + | Opcode::SetA + | Opcode::SetAe => { + self.emit_compare(insn, *total_frame); + } + + Opcode::Neg => { + self.emit_neg(insn, *total_frame); + } + + Opcode::Not => { + self.emit_not(insn, *total_frame); + } + + Opcode::Load => { + self.emit_load(insn, *total_frame); + } + + Opcode::Store => { + self.emit_store(insn, *total_frame); + } + + Opcode::Call => { + self.emit_call(insn, *total_frame); + } + + Opcode::SetVal => { + if let Some(target) = insn.target { + if let Some(pseudo) = self.pseudos.iter().find(|p| p.id == target) { + match self.locations.get(&target).cloned() { + Some(Loc::Reg(r)) => { + if let PseudoKind::Val(v) = &pseudo.kind { + self.emit_mov_imm(r, *v, insn.size); + } + } + Some(Loc::VReg(v)) => { + if let PseudoKind::FVal(f) = &pseudo.kind { + // Load FP constant using integer register + let (scratch0, _) = Reg::scratch_regs(); + let bits = if insn.size <= 32 { + (*f as f32).to_bits() as i64 + } else { + f.to_bits() as i64 + }; + self.emit_mov_imm(scratch0, bits, 64); + // LIR: fmov from GP to FP register + let fp_size = if insn.size <= 32 { + FpSize::Single + } else { + FpSize::Double + }; + self.push_lir(Aarch64Inst::FmovFromGp { + size: fp_size, + src: scratch0, + dst: v, + }); + } + } + _ => {} + } + } + } + } + + Opcode::Copy => { + if let (Some(target), Some(&src)) = (insn.target, insn.src.first()) { + // Pass the type for proper sign/zero extension + self.emit_copy_with_type( + src, + target, + insn.size, + insn.typ.as_ref(), + *total_frame, + ); + } + } + + Opcode::SymAddr => { + if let (Some(target), Some(&src)) = (insn.target, insn.src.first()) { + let dst_loc = self.get_location(target); + let dst_reg = match &dst_loc { + Loc::Reg(r) => *r, + _ => Reg::X9, + }; + let src_loc = self.get_location(src); + match src_loc { + Loc::Global(name) => { + self.emit_load_addr(&name, dst_reg); + } + Loc::Stack(offset) => { + // Get address of stack location (FP-relative for alloca safety) + let adjusted = self.stack_offset(*total_frame, offset); + self.push_lir(Aarch64Inst::Add { + size: OperandSize::B64, + src1: Reg::X29, + src2: GpOperand::Imm(adjusted as i64), + dst: dst_reg, + }); + } + _ => {} + } + // Move to final destination if needed + if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) { + self.emit_move_to_loc(dst_reg, &dst_loc, 64, *total_frame); + } + } + } + + Opcode::Select => { + self.emit_select(insn, *total_frame); + } + + Opcode::Zext | Opcode::Sext | Opcode::Trunc => { + self.emit_extend(insn, *total_frame); + } + + // Floating-point arithmetic operations + Opcode::FAdd | Opcode::FSub | Opcode::FMul | Opcode::FDiv => { + self.emit_fp_binop(insn, *total_frame); + } + + // Floating-point negation + Opcode::FNeg => { + self.emit_fp_neg(insn, *total_frame); + } + + // Floating-point comparisons + Opcode::FCmpOEq + | Opcode::FCmpONe + | Opcode::FCmpOLt + | Opcode::FCmpOLe + | Opcode::FCmpOGt + | Opcode::FCmpOGe => { + self.emit_fp_compare(insn, *total_frame); + } + + // Int to float conversions + Opcode::UCvtF | Opcode::SCvtF => { + self.emit_int_to_float(insn, *total_frame); + } + + // Float to int conversions + Opcode::FCvtU | Opcode::FCvtS => { + self.emit_float_to_int(insn, *total_frame); + } + + // Float to float conversions (size changes) + Opcode::FCvtF => { + self.emit_float_to_float(insn, *total_frame); + } + + // ================================================================ + // Variadic function support (va_* builtins) + // ================================================================ + Opcode::VaStart => { + self.emit_va_start(insn, *total_frame); + } + + Opcode::VaArg => { + self.emit_va_arg(insn, *total_frame); + } + + Opcode::VaEnd => { + // va_end is a no-op on all platforms + // The C standard says it must be called, but it does nothing + } + + Opcode::VaCopy => { + self.emit_va_copy(insn, *total_frame); + } + + // ================================================================ + // Byte-swapping builtins + // ================================================================ + Opcode::Bswap16 => { + self.emit_bswap16(insn, *total_frame); + } + + Opcode::Bswap32 => { + self.emit_bswap32(insn, *total_frame); + } + + Opcode::Bswap64 => { + self.emit_bswap64(insn, *total_frame); + } + + Opcode::Alloca => { + self.emit_alloca(insn, *total_frame); + } + + // Skip no-ops and unimplemented + _ => {} + } + } + + fn get_location(&self, pseudo: PseudoId) -> Loc { + self.locations.get(&pseudo).cloned().unwrap_or(Loc::Imm(0)) + } + + /// Load address of a global symbol into a register + fn emit_load_addr(&mut self, name: &str, dst: Reg) { + // Local labels (starting with '.') don't get the _ prefix on macOS + let sym = if name.starts_with('.') { + Symbol::local(name) + } else { + Symbol::global(name) + }; + + // ADRP + ADD sequence for PIC address loading + self.push_lir(Aarch64Inst::Adrp { + sym: sym.clone(), + dst, + }); + self.push_lir(Aarch64Inst::AddSymOffset { + sym, + base: dst, + dst, + }); + } + + /// Load value of a global symbol into a register with specified size + fn emit_load_global(&mut self, name: &str, dst: Reg, size: OperandSize) { + // Local labels (starting with '.') don't get the _ prefix on macOS + let sym = if name.starts_with('.') { + Symbol::local(name) + } else { + Symbol::global(name) + }; + + // ADRP + LDR sequence for PIC value loading + self.push_lir(Aarch64Inst::Adrp { + sym: sym.clone(), + dst, + }); + self.push_lir(Aarch64Inst::LdrSymOffset { + size, + sym, + base: dst, + dst, + }); + } + + /// Move immediate value to register + fn emit_mov_imm(&mut self, dst: Reg, val: i64, size: u32) { + let op_size = OperandSize::from_bits(size.max(32)); + + // AArch64 can only move 16-bit immediates directly + // For larger values, we need movz + movk sequence + if (0..=0xFFFF).contains(&val) { + // LIR: simple mov immediate + self.push_lir(Aarch64Inst::Mov { + size: op_size, + src: GpOperand::Imm(val), + dst, + }); + } else if (-0x8000..0).contains(&val) { + // Small negative number - use mov (assembler handles movn) + self.push_lir(Aarch64Inst::Mov { + size: op_size, + src: GpOperand::Imm(val), + dst, + }); + } else { + // Use movz + movk for larger values + let uval = val as u64; + // LIR: movz base + self.push_lir(Aarch64Inst::Movz { + size: OperandSize::B64, + imm: (uval & 0xFFFF) as u16, + shift: 0, + dst, + }); + if (uval >> 16) & 0xFFFF != 0 { + // LIR: movk shift 16 + self.push_lir(Aarch64Inst::Movk { + size: OperandSize::B64, + imm: ((uval >> 16) & 0xFFFF) as u16, + shift: 16, + dst, + }); + } + if (uval >> 32) & 0xFFFF != 0 { + // LIR: movk shift 32 + self.push_lir(Aarch64Inst::Movk { + size: OperandSize::B64, + imm: ((uval >> 32) & 0xFFFF) as u16, + shift: 32, + dst, + }); + } + if (uval >> 48) & 0xFFFF != 0 { + // LIR: movk shift 48 + self.push_lir(Aarch64Inst::Movk { + size: OperandSize::B64, + imm: ((uval >> 48) & 0xFFFF) as u16, + shift: 48, + dst, + }); + } + } + } + + fn emit_move(&mut self, src: PseudoId, dst: Reg, size: u32, frame_size: i32) { + let size = size.max(32); + let loc = self.get_location(src); + let op_size = OperandSize::from_bits(size); + + match loc { + Loc::Reg(r) if r == dst => {} + Loc::Reg(r) => { + // LIR: mov register to register + self.push_lir(Aarch64Inst::Mov { + size: op_size, + src: GpOperand::Reg(r), + dst, + }); + } + Loc::Stack(offset) => { + let actual_offset = self.stack_offset(frame_size, offset); + // LIR: load from stack (FP-relative for alloca safety) + self.push_lir(Aarch64Inst::Ldr { + size: op_size, + addr: MemAddr::BaseOffset { + base: Reg::X29, + offset: actual_offset, + }, + dst, + }); + } + Loc::Imm(v) => { + self.emit_mov_imm(dst, v, size); + } + Loc::Global(name) => { + let load_size = OperandSize::from_bits(size.max(32)); + self.emit_load_global(&name, dst, load_size); + } + Loc::VReg(v) => { + // LIR: fmov from FP to GP register + let fp_size = if size <= 32 { + FpSize::Single + } else { + FpSize::Double + }; + self.push_lir(Aarch64Inst::FmovToGp { + size: fp_size, + src: v, + dst, + }); + } + Loc::FImm(f) => { + // Load FP immediate as integer bits + let bits = if size <= 32 { + (f as f32).to_bits() as i64 + } else { + f.to_bits() as i64 + }; + self.emit_mov_imm(dst, bits, size); + } + } + } + + fn emit_move_to_loc(&mut self, src: Reg, dst: &Loc, size: u32, frame_size: i32) { + // For stack stores, use actual size to properly handle char/short + // For register-to-register, use minimum 32-bit + match dst { + Loc::Reg(r) if *r == src => {} + Loc::Reg(r) => { + let reg_size = size.max(32); + // LIR: mov register to register + self.push_lir(Aarch64Inst::Mov { + size: OperandSize::from_bits(reg_size), + src: GpOperand::Reg(src), + dst: *r, + }); + } + Loc::Stack(offset) => { + let actual_offset = self.stack_offset(frame_size, *offset); + // LIR: store to stack (FP-relative for alloca safety) + let op_size = OperandSize::from_bits(size); + self.push_lir(Aarch64Inst::Str { + size: op_size, + src, + addr: MemAddr::BaseOffset { + base: Reg::X29, + offset: actual_offset, + }, + }); + } + _ => {} + } + } + + fn emit_binop(&mut self, insn: &Instruction, frame_size: i32) { + let size = insn.size.max(32); + let op_size = OperandSize::from_bits(size); + let (src1, src2) = match (insn.src.first(), insn.src.get(1)) { + (Some(&s1), Some(&s2)) => (s1, s2), + _ => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + let dst_loc = self.get_location(target); + let work_reg = match &dst_loc { + Loc::Reg(r) => *r, + _ => Reg::X9, + }; + + // Load first operand + self.emit_move(src1, work_reg, size, frame_size); + + // Get second operand as GpOperand + let src2_loc = self.get_location(src2); + let src2_operand = match &src2_loc { + Loc::Reg(r) => GpOperand::Reg(*r), + Loc::Imm(v) if *v >= 0 && *v <= 4095 => GpOperand::Imm(*v), + _ => { + self.emit_move(src2, Reg::X10, size, frame_size); + GpOperand::Reg(Reg::X10) + } + }; + + // Emit the appropriate LIR instruction + match insn.op { + Opcode::Add => self.push_lir(Aarch64Inst::Add { + size: op_size, + src1: work_reg, + src2: src2_operand, + dst: work_reg, + }), + Opcode::Sub => self.push_lir(Aarch64Inst::Sub { + size: op_size, + src1: work_reg, + src2: src2_operand, + dst: work_reg, + }), + Opcode::And => self.push_lir(Aarch64Inst::And { + size: op_size, + src1: work_reg, + src2: src2_operand, + dst: work_reg, + }), + Opcode::Or => self.push_lir(Aarch64Inst::Orr { + size: op_size, + src1: work_reg, + src2: src2_operand, + dst: work_reg, + }), + Opcode::Xor => self.push_lir(Aarch64Inst::Eor { + size: op_size, + src1: work_reg, + src2: src2_operand, + dst: work_reg, + }), + Opcode::Shl => self.push_lir(Aarch64Inst::Lsl { + size: op_size, + src: work_reg, + amount: src2_operand, + dst: work_reg, + }), + Opcode::Lsr => self.push_lir(Aarch64Inst::Lsr { + size: op_size, + src: work_reg, + amount: src2_operand, + dst: work_reg, + }), + Opcode::Asr => self.push_lir(Aarch64Inst::Asr { + size: op_size, + src: work_reg, + amount: src2_operand, + dst: work_reg, + }), + _ => return, + } + + if !matches!(&dst_loc, Loc::Reg(r) if *r == work_reg) { + self.emit_move_to_loc(work_reg, &dst_loc, size, frame_size); + } + } + + fn emit_mul(&mut self, insn: &Instruction, frame_size: i32) { + let size = insn.size.max(32); + let op_size = OperandSize::from_bits(size); + let (src1, src2) = match (insn.src.first(), insn.src.get(1)) { + (Some(&s1), Some(&s2)) => (s1, s2), + _ => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + let dst_loc = self.get_location(target); + let dst_reg = match &dst_loc { + Loc::Reg(r) => *r, + _ => Reg::X9, + }; + + self.emit_move(src1, Reg::X10, size, frame_size); + self.emit_move(src2, Reg::X11, size, frame_size); + + self.push_lir(Aarch64Inst::Mul { + size: op_size, + src1: Reg::X10, + src2: Reg::X11, + dst: dst_reg, + }); + + if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) { + self.emit_move_to_loc(dst_reg, &dst_loc, size, frame_size); + } + } + + fn emit_div(&mut self, insn: &Instruction, frame_size: i32) { + let size = insn.size.max(32); + let op_size = OperandSize::from_bits(size); + let (src1, src2) = match (insn.src.first(), insn.src.get(1)) { + (Some(&s1), Some(&s2)) => (s1, s2), + _ => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + + self.emit_move(src1, Reg::X10, size, frame_size); + self.emit_move(src2, Reg::X11, size, frame_size); + + let dst_loc = self.get_location(target); + let dst_reg = match &dst_loc { + Loc::Reg(r) => *r, + _ => Reg::X9, + }; + + // Division instruction + match insn.op { + Opcode::DivS | Opcode::ModS => self.push_lir(Aarch64Inst::Sdiv { + size: op_size, + src1: Reg::X10, + src2: Reg::X11, + dst: dst_reg, + }), + Opcode::DivU | Opcode::ModU => self.push_lir(Aarch64Inst::Udiv { + size: op_size, + src1: Reg::X10, + src2: Reg::X11, + dst: dst_reg, + }), + _ => return, + } + + // For modulo, compute remainder: r = n - (n / d) * d + // Using msub: msub Rd, Rm, Rn, Ra -> Rd = Ra - Rm * Rn + if matches!(insn.op, Opcode::ModS | Opcode::ModU) { + // dst_reg now has quotient, compute: src1 - quotient * src2 + self.push_lir(Aarch64Inst::Msub { + size: op_size, + mul1: dst_reg, + mul2: Reg::X11, + sub: Reg::X10, + dst: dst_reg, + }); + } + + if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) { + self.emit_move_to_loc(dst_reg, &dst_loc, size, frame_size); + } + } + + fn emit_compare(&mut self, insn: &Instruction, frame_size: i32) { + let size = insn.size.max(32); + let op_size = OperandSize::from_bits(size); + let (src1, src2) = match (insn.src.first(), insn.src.get(1)) { + (Some(&s1), Some(&s2)) => (s1, s2), + _ => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + + self.emit_move(src1, Reg::X10, size, frame_size); + + // Try to use immediate for comparison if possible + let src2_loc = self.get_location(src2); + let src2_operand = match &src2_loc { + Loc::Imm(v) if *v >= 0 && *v <= 4095 => GpOperand::Imm(*v), + _ => { + self.emit_move(src2, Reg::X11, size, frame_size); + GpOperand::Reg(Reg::X11) + } + }; + + self.push_lir(Aarch64Inst::Cmp { + size: op_size, + src1: Reg::X10, + src2: src2_operand, + }); + + let dst_loc = self.get_location(target); + let dst_reg = match &dst_loc { + Loc::Reg(r) => *r, + _ => Reg::X9, + }; + + // Use cset to set register based on condition + let cond = match insn.op { + Opcode::SetEq => Cond::Eq, + Opcode::SetNe => Cond::Ne, + Opcode::SetLt => Cond::Lt, + Opcode::SetLe => Cond::Le, + Opcode::SetGt => Cond::Gt, + Opcode::SetGe => Cond::Ge, + Opcode::SetB => Cond::Cc, // unsigned less than (lo) + Opcode::SetBe => Cond::Ls, // unsigned less than or equal + Opcode::SetA => Cond::Hi, // unsigned greater than + Opcode::SetAe => Cond::Cs, // unsigned greater than or equal (hs) + _ => return, + }; + + self.push_lir(Aarch64Inst::Cset { cond, dst: dst_reg }); + + if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) { + self.emit_move_to_loc(dst_reg, &dst_loc, size, frame_size); + } + } + + fn emit_neg(&mut self, insn: &Instruction, frame_size: i32) { + let size = insn.size.max(32); + let op_size = OperandSize::from_bits(size); + let src = match insn.src.first() { + Some(&s) => s, + None => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + let dst_loc = self.get_location(target); + let work_reg = match &dst_loc { + Loc::Reg(r) => *r, + _ => Reg::X9, + }; + + self.emit_move(src, work_reg, size, frame_size); + self.push_lir(Aarch64Inst::Neg { + size: op_size, + src: work_reg, + dst: work_reg, + }); + + if !matches!(&dst_loc, Loc::Reg(r) if *r == work_reg) { + self.emit_move_to_loc(work_reg, &dst_loc, size, frame_size); + } + } + + fn emit_not(&mut self, insn: &Instruction, frame_size: i32) { + let size = insn.size.max(32); + let op_size = OperandSize::from_bits(size); + let src = match insn.src.first() { + Some(&s) => s, + None => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + let dst_loc = self.get_location(target); + let work_reg = match &dst_loc { + Loc::Reg(r) => *r, + _ => Reg::X9, + }; + + self.emit_move(src, work_reg, size, frame_size); + self.push_lir(Aarch64Inst::Mvn { + size: op_size, + src: work_reg, + dst: work_reg, + }); + + if !matches!(&dst_loc, Loc::Reg(r) if *r == work_reg) { + self.emit_move_to_loc(work_reg, &dst_loc, size, frame_size); + } + } + + fn emit_load(&mut self, insn: &Instruction, frame_size: i32) { + let mem_size = insn.size; + let reg_size = insn.size.max(32); + let addr = match insn.src.first() { + Some(&s) => s, + None => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + let dst_loc = self.get_location(target); + + // Check if this is an FP load + let is_fp = + insn.typ.as_ref().is_some_and(|t| t.is_float()) || matches!(dst_loc, Loc::VReg(_)); + + if is_fp { + self.emit_fp_load(insn, frame_size); + return; + } + + let dst_reg = match &dst_loc { + Loc::Reg(r) => *r, + _ => Reg::X9, + }; + + // Determine if we need sign or zero extension for small types + // For plain char, use target.char_signed to determine signedness + let is_unsigned = insn.typ.as_ref().is_some_and(|t| { + if t.is_unsigned() { + true + } else if t.is_plain_char() { + // Plain char: unsigned if target says char is not signed + !self.target.char_signed + } else { + false + } + }); + + // Helper to emit the appropriate load instruction + let emit_load_lir = |this: &mut Self, mem_addr: MemAddr| match mem_size { + 8 if is_unsigned => { + this.push_lir(Aarch64Inst::Ldr { + size: OperandSize::B8, + addr: mem_addr, + dst: dst_reg, + }); + } + 8 => { + this.push_lir(Aarch64Inst::Ldrs { + src_size: OperandSize::B8, + dst_size: OperandSize::from_bits(reg_size), + addr: mem_addr, + dst: dst_reg, + }); + } + 16 if is_unsigned => { + this.push_lir(Aarch64Inst::Ldr { + size: OperandSize::B16, + addr: mem_addr, + dst: dst_reg, + }); + } + 16 => { + this.push_lir(Aarch64Inst::Ldrs { + src_size: OperandSize::B16, + dst_size: OperandSize::from_bits(reg_size), + addr: mem_addr, + dst: dst_reg, + }); + } + _ => { + this.push_lir(Aarch64Inst::Ldr { + size: OperandSize::from_bits(mem_size), + addr: mem_addr, + dst: dst_reg, + }); + } + }; + + let addr_loc = self.get_location(addr); + match addr_loc { + Loc::Reg(r) => { + let mem_addr = MemAddr::BaseOffset { + base: r, + offset: insn.offset as i32, + }; + emit_load_lir(self, mem_addr); + } + Loc::Stack(offset) => { + // Check if the address operand is a symbol (local variable) or a temp (spilled address) + let is_symbol = self + .pseudos + .iter() + .find(|p| p.id == addr) + .is_some_and(|p| matches!(p.kind, PseudoKind::Sym(_))); + + if is_symbol { + // Local variable - load directly from stack slot (FP-relative for alloca safety) + let total_offset = self.stack_offset(frame_size, offset) + insn.offset as i32; + let mem_addr = MemAddr::BaseOffset { + base: Reg::X29, + offset: total_offset, + }; + emit_load_lir(self, mem_addr); + } else { + // Spilled address - load address first (FP-relative for alloca safety) + let adjusted = self.stack_offset(frame_size, offset); + self.push_lir(Aarch64Inst::Ldr { + size: OperandSize::B64, + addr: MemAddr::BaseOffset { + base: Reg::X29, + offset: adjusted, + }, + dst: Reg::X16, + }); + let mem_addr = MemAddr::BaseOffset { + base: Reg::X16, + offset: insn.offset as i32, + }; + emit_load_lir(self, mem_addr); + } + } + Loc::Global(name) => { + // Load from global with correct size + let load_size = OperandSize::from_bits(mem_size); + self.emit_load_global(&name, dst_reg, load_size); + } + _ => { + self.emit_move(addr, Reg::X16, 64, frame_size); + let mem_addr = MemAddr::BaseOffset { + base: Reg::X16, + offset: insn.offset as i32, + }; + emit_load_lir(self, mem_addr); + } + } + + if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) { + self.emit_move_to_loc(dst_reg, &dst_loc, reg_size, frame_size); + } + } + + /// Emit a floating-point load instruction + fn emit_fp_load(&mut self, insn: &Instruction, frame_size: i32) { + let size = insn.size.max(32); + let addr = match insn.src.first() { + Some(&s) => s, + None => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + let dst_loc = self.get_location(target); + let dst_vreg = match &dst_loc { + Loc::VReg(v) => *v, + _ => VReg::V17, + }; + let addr_loc = self.get_location(addr); + + let fp_size = if size <= 32 { + FpSize::Single + } else { + FpSize::Double + }; + + match addr_loc { + Loc::Reg(r) => { + self.push_lir(Aarch64Inst::LdrFp { + size: fp_size, + addr: MemAddr::BaseOffset { + base: r, + offset: insn.offset as i32, + }, + dst: dst_vreg, + }); + } + Loc::Stack(offset) => { + // Check if the address operand is a symbol (local variable) or a temp (spilled address) + let is_symbol = self + .pseudos + .iter() + .find(|p| p.id == addr) + .is_some_and(|p| matches!(p.kind, PseudoKind::Sym(_))); + + if is_symbol { + // Local variable - load directly from stack slot (FP-relative for alloca safety) + let total_offset = self.stack_offset(frame_size, offset) + insn.offset as i32; + self.push_lir(Aarch64Inst::LdrFp { + size: fp_size, + addr: MemAddr::BaseOffset { + base: Reg::X29, + offset: total_offset, + }, + dst: dst_vreg, + }); + } else { + // Spilled address - load address first (FP-relative for alloca safety) + let adjusted = self.stack_offset(frame_size, offset); + self.push_lir(Aarch64Inst::Ldr { + size: OperandSize::B64, + addr: MemAddr::BaseOffset { + base: Reg::X29, + offset: adjusted, + }, + dst: Reg::X16, + }); + self.push_lir(Aarch64Inst::LdrFp { + size: fp_size, + addr: MemAddr::BaseOffset { + base: Reg::X16, + offset: insn.offset as i32, + }, + dst: dst_vreg, + }); + } + } + Loc::Global(name) => { + // Load FP value from global using ADRP + LDR + let sym = if name.starts_with('.') { + Symbol::local(&name) + } else { + Symbol::global(&name) + }; + let (scratch0, _) = Reg::scratch_regs(); + self.push_lir(Aarch64Inst::Adrp { + sym: sym.clone(), + dst: scratch0, + }); + self.push_lir(Aarch64Inst::LdrFpSymOffset { + size: fp_size, + sym, + base: scratch0, + dst: dst_vreg, + }); + } + _ => { + self.emit_move(addr, Reg::X16, 64, frame_size); + self.push_lir(Aarch64Inst::LdrFp { + size: fp_size, + addr: MemAddr::BaseOffset { + base: Reg::X16, + offset: insn.offset as i32, + }, + dst: dst_vreg, + }); + } + } + + // Move to final destination if needed + if !matches!(&dst_loc, Loc::VReg(v) if *v == dst_vreg) { + self.emit_fp_move_to_loc(dst_vreg, &dst_loc, size, frame_size); + } + } + + fn emit_store(&mut self, insn: &Instruction, frame_size: i32) { + // Use actual size for memory stores (8, 16, 32, 64 bits) + let mem_size = insn.size; + let reg_size = insn.size.max(32); + + let (addr, value) = match (insn.src.first(), insn.src.get(1)) { + (Some(&a), Some(&v)) => (a, v), + _ => return, + }; + + // For struct stores (size > 64), we need to copy multiple words + // The value is a symbol containing the struct data + if mem_size > 64 { + self.emit_struct_store(insn, addr, value, frame_size); + return; + } + + self.emit_move(value, Reg::X9, reg_size, frame_size); + + // Get the store size based on mem_size + let store_size = OperandSize::from_bits(mem_size); + + // Helper to emit store instruction + let emit_store_lir = |this: &mut Self, mem_addr: MemAddr| { + this.push_lir(Aarch64Inst::Str { + size: store_size, + src: Reg::X9, + addr: mem_addr, + }); + }; + + let addr_loc = self.get_location(addr); + match addr_loc { + Loc::Reg(r) => { + let mem_addr = MemAddr::BaseOffset { + base: r, + offset: insn.offset as i32, + }; + emit_store_lir(self, mem_addr); + } + Loc::Stack(offset) => { + // Check if the address operand is a symbol (local variable) or a temp (spilled address) + let is_symbol = self + .pseudos + .iter() + .find(|p| p.id == addr) + .is_some_and(|p| matches!(p.kind, PseudoKind::Sym(_))); + + if is_symbol { + // Local variable - store directly to stack slot (FP-relative for alloca safety) + let total_offset = self.stack_offset(frame_size, offset) + insn.offset as i32; + let mem_addr = MemAddr::BaseOffset { + base: Reg::X29, + offset: total_offset, + }; + emit_store_lir(self, mem_addr); + } else { + // Spilled address - load address first (FP-relative for alloca safety) + let adjusted = self.stack_offset(frame_size, offset); + self.push_lir(Aarch64Inst::Ldr { + size: OperandSize::B64, + addr: MemAddr::BaseOffset { + base: Reg::X29, + offset: adjusted, + }, + dst: Reg::X16, + }); + let mem_addr = MemAddr::BaseOffset { + base: Reg::X16, + offset: insn.offset as i32, + }; + emit_store_lir(self, mem_addr); + } + } + Loc::Global(name) => { + self.emit_load_addr(&name, Reg::X16); + let mem_addr = MemAddr::Base(Reg::X16); + emit_store_lir(self, mem_addr); + } + _ => { + self.emit_move(addr, Reg::X16, 64, frame_size); + let mem_addr = MemAddr::BaseOffset { + base: Reg::X16, + offset: insn.offset as i32, + }; + emit_store_lir(self, mem_addr); + } + } + } + + /// Emit a struct copy (store of size > 64 bits) + /// The value is a symbol containing the source struct data + fn emit_struct_store( + &mut self, + insn: &Instruction, + addr: PseudoId, + value: PseudoId, + frame_size: i32, + ) { + let struct_size = insn.size; // Size in bits + let num_qwords = (struct_size + 63) / 64; + + // Get source address (where the struct data is) + let value_loc = self.get_location(value); + // Get destination address + let addr_loc = self.get_location(addr); + + // Load source address into X16 (FP-relative for alloca safety) + match value_loc { + Loc::Stack(offset) => { + let total_offset = self.stack_offset(frame_size, offset); + self.push_lir(Aarch64Inst::Add { + size: OperandSize::B64, + src1: Reg::X29, + src2: GpOperand::Imm(total_offset as i64), + dst: Reg::X16, + }); + } + Loc::Reg(r) => { + if r != Reg::X16 { + self.push_lir(Aarch64Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(r), + dst: Reg::X16, + }); + } + } + Loc::Global(ref name) => { + self.emit_load_addr(name, Reg::X16); + } + _ => return, + } + + // Load destination address into X17 (FP-relative for alloca safety) + match addr_loc { + Loc::Stack(offset) => { + let total_offset = self.stack_offset(frame_size, offset) + insn.offset as i32; + self.push_lir(Aarch64Inst::Add { + size: OperandSize::B64, + src1: Reg::X29, + src2: GpOperand::Imm(total_offset as i64), + dst: Reg::X17, + }); + } + Loc::Reg(r) => { + if insn.offset != 0 { + self.push_lir(Aarch64Inst::Add { + size: OperandSize::B64, + src1: r, + src2: GpOperand::Imm(insn.offset), + dst: Reg::X17, + }); + } else if r != Reg::X17 { + self.push_lir(Aarch64Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(r), + dst: Reg::X17, + }); + } + } + Loc::Global(ref name) => { + self.emit_load_addr(name, Reg::X17); + } + _ => return, + } + + // Copy qword by qword using X9 as temp + for i in 0..num_qwords { + let byte_offset = (i * 8) as i32; + self.push_lir(Aarch64Inst::Ldr { + size: OperandSize::B64, + addr: MemAddr::BaseOffset { + base: Reg::X16, + offset: byte_offset, + }, + dst: Reg::X9, + }); + self.push_lir(Aarch64Inst::Str { + size: OperandSize::B64, + src: Reg::X9, + addr: MemAddr::BaseOffset { + base: Reg::X17, + offset: byte_offset, + }, + }); + } + } + + fn emit_call(&mut self, insn: &Instruction, frame_size: i32) { + let func_name = match &insn.func_name { + Some(n) => n.clone(), + None => return, + }; + + // AAPCS64 calling convention: + // - Integer arguments: X0-X7 (8 registers) + // - Floating-point arguments: V0-V7 (8 registers) + // - Indirect result (large struct return): X8 + // Each class has its own register allocation + // + // Apple ARM64 (Darwin) divergence for variadic functions: + // ALL variadic arguments (after the last named parameter) must be passed + // on the stack, not in registers. This differs from standard AAPCS64. + let int_arg_regs = Reg::arg_regs(); + let fp_arg_regs = VReg::arg_regs(); + + let mut int_arg_idx = 0; + let mut fp_arg_idx = 0; + let mut stack_args = 0; + + // For Darwin variadic calls, determine where variadic args start + let variadic_start = insn.variadic_arg_start.unwrap_or(usize::MAX); + let is_darwin_variadic = + self.target.os == crate::target::Os::MacOS && insn.variadic_arg_start.is_some(); + + // Check if this call returns a large struct + // If so, the first argument is the sret pointer and goes in X8 (not X0) + let returns_large_struct = insn.typ.as_ref().is_some_and(|t| { + (t.kind == crate::types::TypeKind::Struct || t.kind == crate::types::TypeKind::Union) + && t.size_bits() > MAX_REGISTER_AGGREGATE_BITS + }); + + // Also check if return type is a pointer to a large struct (linearizer wraps it) + let returns_large_struct = returns_large_struct + || insn.typ.as_ref().is_some_and(|t| { + if let Some(pointee) = t.get_base() { + (pointee.kind == crate::types::TypeKind::Struct + || pointee.kind == crate::types::TypeKind::Union) + && pointee.size_bits() > MAX_REGISTER_AGGREGATE_BITS + } else { + false + } + }); + + let args_start = if returns_large_struct && !insn.src.is_empty() { + // First argument is sret pointer - move to X8 + self.emit_move(insn.src[0], Reg::X8, 64, frame_size); + 1 // Skip first arg in main loop + } else { + 0 + }; + + // For Darwin variadic calls, we need to: + // 1. First pass fixed args in registers as normal + // 2. Allocate stack space for variadic args (with 16-byte alignment) + // 3. Store variadic args at 8-byte offsets from sp (like clang does) + if is_darwin_variadic { + // Collect variadic args and fixed args separately + let mut variadic_args: Vec<(PseudoId, bool, u32)> = Vec::new(); + + // Process all arguments + for (i, &arg) in insn.src.iter().enumerate().skip(args_start) { + let arg_type = insn.arg_types.get(i); + let is_fp = if let Some(typ) = arg_type { + typ.is_float() + } else { + let arg_loc = self.get_location(arg); + matches!(arg_loc, Loc::VReg(_) | Loc::FImm(_)) + }; + + let arg_size = if let Some(typ) = arg_type { + typ.size_bits().max(32) + } else { + 64 + }; + + if i >= variadic_start { + // Variadic arg - collect for stack storing + variadic_args.push((arg, is_fp, arg_size)); + } else { + // Fixed arg - use registers as normal + if is_fp { + let fp_size = if let Some(typ) = arg_type { + typ.size_bits() + } else { + 64 + }; + if fp_arg_idx < fp_arg_regs.len() { + self.emit_fp_move(arg, fp_arg_regs[fp_arg_idx], fp_size, frame_size); + fp_arg_idx += 1; + } + } else if int_arg_idx < int_arg_regs.len() { + self.emit_move(arg, int_arg_regs[int_arg_idx], arg_size, frame_size); + int_arg_idx += 1; + } + } + } + + // Calculate stack space for variadic args: 8 bytes each, rounded up to 16 + let num_variadic = variadic_args.len(); + if num_variadic > 0 { + let variadic_bytes = (num_variadic * 8) as i32; + let aligned_bytes = (variadic_bytes + 15) & !15; // Round up to 16 + + // Allocate stack space for variadic args + self.push_lir(Aarch64Inst::Sub { + size: OperandSize::B64, + src1: Reg::sp(), + src2: GpOperand::Imm(aligned_bytes as i64), + dst: Reg::sp(), + }); + + // Store variadic args at 8-byte offsets from sp + // First variadic arg at [sp+0], second at [sp+8], etc. + for (idx, (arg, is_fp, arg_size)) in variadic_args.into_iter().enumerate() { + let offset = (idx * 8) as i32; + if is_fp { + self.emit_fp_move(arg, VReg::V16, arg_size, frame_size); + self.push_lir(Aarch64Inst::StrFp { + size: FpSize::Double, + src: VReg::V16, + addr: MemAddr::BaseOffset { + base: Reg::SP, + offset, + }, + }); + } else { + self.emit_move(arg, Reg::X9, arg_size, frame_size); + self.push_lir(Aarch64Inst::Str { + size: OperandSize::B64, + src: Reg::X9, + addr: MemAddr::BaseOffset { + base: Reg::SP, + offset, + }, + }); + } + } + + // Track stack space for cleanup + stack_args = (aligned_bytes + 15) / 16; // Number of 16-byte units + } + } else { + // Standard AAPCS64 (Linux, FreeBSD) or non-variadic calls + // Move arguments to registers + for (i, &arg) in insn.src.iter().enumerate().skip(args_start) { + // Get argument type if available, otherwise fall back to location-based detection + let arg_type = insn.arg_types.get(i); + let is_fp = if let Some(typ) = arg_type { + typ.is_float() + } else { + // Fall back to location-based detection for backwards compatibility + let arg_loc = self.get_location(arg); + matches!(arg_loc, Loc::VReg(_) | Loc::FImm(_)) + }; + + // Get argument size from type, with minimum 32-bit for register ops + let arg_size = if let Some(typ) = arg_type { + typ.size_bits().max(32) + } else { + 64 // Default for backwards compatibility + }; + + if is_fp { + // FP size from type (32 for float, 64 for double) + let fp_size = if let Some(typ) = arg_type { + typ.size_bits() + } else { + 64 + }; + if fp_arg_idx < fp_arg_regs.len() { + self.emit_fp_move(arg, fp_arg_regs[fp_arg_idx], fp_size, frame_size); + fp_arg_idx += 1; + } else { + // FP arg on stack + self.emit_fp_move(arg, VReg::V16, fp_size, frame_size); + // LIR: store FP reg to stack with pre-decrement + let fp_sz = if fp_size == 32 { + FpSize::Single + } else { + FpSize::Double + }; + self.push_lir(Aarch64Inst::StrFp { + size: fp_sz, + src: VReg::V16, + addr: MemAddr::PreIndex { + base: Reg::SP, + offset: -16, + }, + }); + stack_args += 1; + } + } else if int_arg_idx < int_arg_regs.len() { + self.emit_move(arg, int_arg_regs[int_arg_idx], arg_size, frame_size); + int_arg_idx += 1; + } else { + // Integer arg on stack - always store 8 bytes on aarch64 + self.emit_move(arg, Reg::X9, arg_size, frame_size); + // LIR: store GP reg to stack with pre-decrement + self.push_lir(Aarch64Inst::Str { + size: OperandSize::B64, + src: Reg::X9, + addr: MemAddr::PreIndex { + base: Reg::SP, + offset: -16, + }, + }); + stack_args += 1; + } + } + } + + // Call the function + self.push_lir(Aarch64Inst::Bl { + target: CallTarget::Direct(Symbol::global(&func_name)), + }); + + // Clean up stack arguments + if stack_args > 0 { + self.push_lir(Aarch64Inst::Add { + size: OperandSize::B64, + src1: Reg::sp(), + src2: GpOperand::Imm((stack_args * 16) as i64), + dst: Reg::sp(), + }); + } + + // Move return value if needed + if let Some(target) = insn.target { + let dst_loc = self.get_location(target); + // Check if return value is floating-point based on type or location + let is_fp_result = if let Some(ref typ) = insn.typ { + typ.is_float() + } else { + matches!(dst_loc, Loc::VReg(_) | Loc::FImm(_)) + }; + + // Get return value size from type + let ret_size = insn.size.max(32); + + if is_fp_result { + // FP return value is in V0 + self.emit_fp_move_to_loc(VReg::V0, &dst_loc, ret_size, frame_size); + } else { + // Integer return value is in X0 + self.emit_move_to_loc(Reg::X0, &dst_loc, ret_size, frame_size); + } + } + } + + fn emit_select(&mut self, insn: &Instruction, frame_size: i32) { + let (cond, then_val, else_val) = match (insn.src.first(), insn.src.get(1), insn.src.get(2)) + { + (Some(&c), Some(&t), Some(&e)) => (c, t, e), + _ => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + let size = insn.size.max(32); + let op_size = OperandSize::from_bits(size); + let dst_loc = self.get_location(target); + let dst_reg = match &dst_loc { + Loc::Reg(r) => *r, + _ => Reg::X9, + }; + + // Load condition, then and else values + self.emit_move(cond, Reg::X10, 64, frame_size); + self.emit_move(then_val, Reg::X11, size, frame_size); + self.emit_move(else_val, Reg::X12, size, frame_size); + + // LIR: compare condition with zero + self.push_lir(Aarch64Inst::Cmp { + size: OperandSize::B64, + src1: Reg::X10, + src2: GpOperand::Imm(0), + }); + + // Use csel: if cond != 0, select then_val, else select else_val + self.push_lir(Aarch64Inst::Csel { + size: op_size, + cond: Cond::Ne, + src_true: Reg::X11, + src_false: Reg::X12, + dst: dst_reg, + }); + + if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) { + self.emit_move_to_loc(dst_reg, &dst_loc, size, frame_size); + } + } + + fn emit_extend(&mut self, insn: &Instruction, frame_size: i32) { + let src = match insn.src.first() { + Some(&s) => s, + None => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + let dst_loc = self.get_location(target); + let dst_reg = match &dst_loc { + Loc::Reg(r) => *r, + _ => Reg::X9, + }; + + match insn.op { + Opcode::Zext => { + // Zero extend: use uxtb, uxth, or just mov for 32->64 + self.emit_move(src, dst_reg, 64, frame_size); + match insn.size { + 8 => { + self.push_lir(Aarch64Inst::Uxtb { + src: dst_reg, + dst: dst_reg, + }); + } + 16 => { + self.push_lir(Aarch64Inst::Uxth { + src: dst_reg, + dst: dst_reg, + }); + } + 32 => { + // 32-bit ops automatically zero-extend on AArch64 + } + _ => {} + } + } + Opcode::Sext => { + // Sign extend: use sxtb, sxth, sxtw based on source size + self.emit_move(src, dst_reg, 64, frame_size); + match insn.src_size { + 8 => { + self.push_lir(Aarch64Inst::Sxtb { + dst_size: OperandSize::B64, + src: dst_reg, + dst: dst_reg, + }); + } + 16 => { + self.push_lir(Aarch64Inst::Sxth { + dst_size: OperandSize::B64, + src: dst_reg, + dst: dst_reg, + }); + } + 32 => { + self.push_lir(Aarch64Inst::Sxtw { + src: dst_reg, + dst: dst_reg, + }); + } + _ => {} + } + } + Opcode::Trunc => { + // Truncate: move value then mask to target size + self.emit_move(src, dst_reg, 64, frame_size); + // Mask to target size using AND + match insn.size { + 8 => { + self.push_lir(Aarch64Inst::And { + size: OperandSize::B32, + src1: dst_reg, + src2: GpOperand::Imm(0xff), + dst: dst_reg, + }); + } + 16 => { + self.push_lir(Aarch64Inst::And { + size: OperandSize::B32, + src1: dst_reg, + src2: GpOperand::Imm(0xffff), + dst: dst_reg, + }); + } + 32 => { + // 32-bit already handled - writing to w register zeros upper 32 bits + } + _ => {} + } + } + _ => {} + } + + if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) { + self.emit_move_to_loc(dst_reg, &dst_loc, insn.size, frame_size); + } + } + + fn emit_copy_with_type( + &mut self, + src: PseudoId, + dst: PseudoId, + size: u32, + typ: Option<&Type>, + frame_size: i32, + ) { + // Keep actual size for handling narrow types + let actual_size = size; + let reg_size = size.max(32); + let dst_loc = self.get_location(dst); + let src_loc = self.get_location(src); + + // Check if this is a FP copy (source or dest is in VReg or is FImm) + let is_fp_copy = + matches!(&src_loc, Loc::VReg(_) | Loc::FImm(_)) || matches!(&dst_loc, Loc::VReg(_)); + + // Determine if the type is unsigned (for proper sign/zero extension) + // For plain char, use target.char_signed to determine signedness + let is_unsigned = typ.is_some_and(|t| { + if t.is_unsigned() { + true + } else if t.is_plain_char() { + // Plain char: unsigned if target says char is not signed + !self.target.char_signed + } else { + false + } + }); + + if is_fp_copy { + // Handle FP copy + let dst_vreg = match &dst_loc { + Loc::VReg(v) => *v, + _ => VReg::V16, // Use scratch register + }; + + self.emit_fp_move(src, dst_vreg, reg_size, frame_size); + + if !matches!(&dst_loc, Loc::VReg(v) if *v == dst_vreg) { + self.emit_fp_move_to_loc(dst_vreg, &dst_loc, reg_size, frame_size); + } + } else { + // Integer copy + match &dst_loc { + Loc::Reg(r) => { + self.emit_move(src, *r, reg_size, frame_size); + // For narrow types (8 or 16 bits), extend to correct width + // AARCH64: UXTB/UXTH for unsigned, SXTB/SXTH for signed + if actual_size == 8 { + if is_unsigned { + self.push_lir(Aarch64Inst::Uxtb { src: *r, dst: *r }); + } else { + self.push_lir(Aarch64Inst::Sxtb { + dst_size: OperandSize::B32, + src: *r, + dst: *r, + }); + } + } else if actual_size == 16 { + if is_unsigned { + self.push_lir(Aarch64Inst::Uxth { src: *r, dst: *r }); + } else { + self.push_lir(Aarch64Inst::Sxth { + dst_size: OperandSize::B32, + src: *r, + dst: *r, + }); + } + } + } + Loc::Stack(_) => { + self.emit_move(src, Reg::X9, reg_size, frame_size); + // For narrow types stored to stack, use the actual size + if actual_size <= 16 { + self.emit_move_to_loc(Reg::X9, &dst_loc, actual_size, frame_size); + } else { + self.emit_move_to_loc(Reg::X9, &dst_loc, reg_size, frame_size); + } + } + _ => {} + } + } + } + + // ======================================================================== + // Floating-Point Operations + // ======================================================================== + + /// Move FP value to a VReg + fn emit_fp_move(&mut self, src: PseudoId, dst: VReg, size: u32, frame_size: i32) { + let loc = self.get_location(src); + let fp_size = FpSize::from_bits(size.max(32)); + + match loc { + Loc::VReg(v) if v == dst => {} + Loc::VReg(v) => { + self.push_lir(Aarch64Inst::FmovReg { + size: fp_size, + src: v, + dst, + }); + } + Loc::Stack(offset) => { + let actual_offset = self.stack_offset(frame_size, offset); + // FP-relative for alloca safety + self.push_lir(Aarch64Inst::LdrFp { + size: fp_size, + addr: MemAddr::BaseOffset { + base: Reg::X29, + offset: actual_offset, + }, + dst, + }); + } + Loc::FImm(f) => { + // Load FP constant using integer register + let (scratch0, _) = Reg::scratch_regs(); + let bits = if size <= 32 { + (f as f32).to_bits() as i64 + } else { + f.to_bits() as i64 + }; + self.emit_mov_imm(scratch0, bits, 64); + self.push_lir(Aarch64Inst::FmovFromGp { + size: fp_size, + src: scratch0, + dst, + }); + } + Loc::Reg(r) => { + // Move from integer register to FP register + self.push_lir(Aarch64Inst::FmovFromGp { + size: fp_size, + src: r, + dst, + }); + } + Loc::Imm(v) => { + // Load integer immediate and move to FP + let (scratch0, _) = Reg::scratch_regs(); + self.emit_mov_imm(scratch0, v, 64); + self.push_lir(Aarch64Inst::FmovFromGp { + size: fp_size, + src: scratch0, + dst, + }); + } + Loc::Global(name) => { + // Load from global - use size matching FP precision + let (scratch0, _) = Reg::scratch_regs(); + let load_size = match fp_size { + FpSize::Single => OperandSize::B32, + FpSize::Double => OperandSize::B64, + }; + self.emit_load_global(&name, scratch0, load_size); + self.push_lir(Aarch64Inst::FmovFromGp { + size: fp_size, + src: scratch0, + dst, + }); + } + } + } + + /// Move FP register value to a location + fn emit_fp_move_to_loc(&mut self, src: VReg, dst: &Loc, size: u32, frame_size: i32) { + let fp_size = FpSize::from_bits(size.max(32)); + + match dst { + Loc::VReg(v) if *v == src => {} + Loc::VReg(v) => { + self.push_lir(Aarch64Inst::FmovReg { + size: fp_size, + src, + dst: *v, + }); + } + Loc::Stack(offset) => { + let actual_offset = self.stack_offset(frame_size, *offset); + // FP-relative for alloca safety + self.push_lir(Aarch64Inst::StrFp { + size: fp_size, + src, + addr: MemAddr::BaseOffset { + base: Reg::X29, + offset: actual_offset, + }, + }); + } + Loc::Reg(r) => { + // Move from FP register to integer register + self.push_lir(Aarch64Inst::FmovToGp { + size: fp_size, + src, + dst: *r, + }); + } + _ => {} + } + } + + /// Emit FP binary operation (fadd, fsub, fmul, fdiv) + fn emit_fp_binop(&mut self, insn: &Instruction, frame_size: i32) { + let size = insn.size.max(32); + let fp_size = FpSize::from_bits(size); + let (src1, src2) = match (insn.src.first(), insn.src.get(1)) { + (Some(&s1), Some(&s2)) => (s1, s2), + _ => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + let dst_loc = self.get_location(target); + let work_reg = match &dst_loc { + Loc::VReg(v) => *v, + _ => VReg::V16, + }; + + // Load operands + self.emit_fp_move(src1, VReg::V17, size, frame_size); + self.emit_fp_move(src2, VReg::V18, size, frame_size); + + match insn.op { + Opcode::FAdd => { + self.push_lir(Aarch64Inst::Fadd { + size: fp_size, + src1: VReg::V17, + src2: VReg::V18, + dst: work_reg, + }); + } + Opcode::FSub => { + self.push_lir(Aarch64Inst::Fsub { + size: fp_size, + src1: VReg::V17, + src2: VReg::V18, + dst: work_reg, + }); + } + Opcode::FMul => { + self.push_lir(Aarch64Inst::Fmul { + size: fp_size, + src1: VReg::V17, + src2: VReg::V18, + dst: work_reg, + }); + } + Opcode::FDiv => { + self.push_lir(Aarch64Inst::Fdiv { + size: fp_size, + src1: VReg::V17, + src2: VReg::V18, + dst: work_reg, + }); + } + _ => return, + } + + if !matches!(&dst_loc, Loc::VReg(v) if *v == work_reg) { + self.emit_fp_move_to_loc(work_reg, &dst_loc, size, frame_size); + } + } + + /// Emit FP negation + fn emit_fp_neg(&mut self, insn: &Instruction, frame_size: i32) { + let size = insn.size.max(32); + let fp_size = FpSize::from_bits(size); + let src = match insn.src.first() { + Some(&s) => s, + None => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + let dst_loc = self.get_location(target); + let work_reg = match &dst_loc { + Loc::VReg(v) => *v, + _ => VReg::V16, + }; + + self.emit_fp_move(src, VReg::V17, size, frame_size); + + self.push_lir(Aarch64Inst::Fneg { + size: fp_size, + src: VReg::V17, + dst: work_reg, + }); + + if !matches!(&dst_loc, Loc::VReg(v) if *v == work_reg) { + self.emit_fp_move_to_loc(work_reg, &dst_loc, size, frame_size); + } + } + + /// Emit FP comparison + fn emit_fp_compare(&mut self, insn: &Instruction, frame_size: i32) { + let size = insn.size.max(32); + let fp_size = FpSize::from_bits(size); + let (src1, src2) = match (insn.src.first(), insn.src.get(1)) { + (Some(&s1), Some(&s2)) => (s1, s2), + _ => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + + // Load operands to FP registers + self.emit_fp_move(src1, VReg::V17, size, frame_size); + self.emit_fp_move(src2, VReg::V18, size, frame_size); + + // Perform comparison + self.push_lir(Aarch64Inst::Fcmp { + size: fp_size, + src1: VReg::V17, + src2: VReg::V18, + }); + + // Get result location + let dst_loc = self.get_location(target); + let dst_reg = match &dst_loc { + Loc::Reg(r) => *r, + _ => Reg::X9, + }; + + // Set result based on condition + let cond = match insn.op { + Opcode::FCmpOEq => Cond::Eq, + Opcode::FCmpONe => Cond::Ne, + Opcode::FCmpOLt => Cond::Lt, + Opcode::FCmpOLe => Cond::Le, + Opcode::FCmpOGt => Cond::Gt, + Opcode::FCmpOGe => Cond::Ge, + _ => return, + }; + + self.push_lir(Aarch64Inst::Cset { cond, dst: dst_reg }); + + if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) { + self.emit_move_to_loc(dst_reg, &dst_loc, 32, frame_size); + } + } + + /// Emit integer to float conversion + fn emit_int_to_float(&mut self, insn: &Instruction, frame_size: i32) { + let src = match insn.src.first() { + Some(&s) => s, + None => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + + let src_size = insn.src_size.max(32); + let dst_size = insn.size.max(32); + let fp_size = FpSize::from_bits(dst_size); + let int_size = OperandSize::from_bits(src_size); + + let dst_loc = self.get_location(target); + let dst_vreg = match &dst_loc { + Loc::VReg(v) => *v, + _ => VReg::V16, + }; + + // Load source to integer register + let (scratch0, _) = Reg::scratch_regs(); + self.emit_move(src, scratch0, src_size, frame_size); + + // Convert integer to float + // scvtf/ucvtf: signed/unsigned int to float + match insn.op { + Opcode::SCvtF => { + self.push_lir(Aarch64Inst::Scvtf { + int_size, + fp_size, + src: scratch0, + dst: dst_vreg, + }); + } + Opcode::UCvtF => { + self.push_lir(Aarch64Inst::Ucvtf { + int_size, + fp_size, + src: scratch0, + dst: dst_vreg, + }); + } + _ => return, + } + + if !matches!(&dst_loc, Loc::VReg(v) if *v == dst_vreg) { + self.emit_fp_move_to_loc(dst_vreg, &dst_loc, dst_size, frame_size); + } + } + + /// Emit float to integer conversion + fn emit_float_to_int(&mut self, insn: &Instruction, frame_size: i32) { + let src = match insn.src.first() { + Some(&s) => s, + None => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + + let src_size = insn.src_size.max(32); + let dst_size = insn.size.max(32); + let fp_size = FpSize::from_bits(src_size); + let int_size = OperandSize::from_bits(dst_size); + + let dst_loc = self.get_location(target); + let dst_reg = match &dst_loc { + Loc::Reg(r) => *r, + _ => Reg::X9, + }; + + // Load source to FP register + self.emit_fp_move(src, VReg::V17, src_size, frame_size); + + // Convert float to integer using truncation toward zero + // fcvtzu/fcvtzs: float to unsigned/signed int with truncation + match insn.op { + Opcode::FCvtU => { + self.push_lir(Aarch64Inst::Fcvtzu { + fp_size, + int_size, + src: VReg::V17, + dst: dst_reg, + }); + } + Opcode::FCvtS => { + self.push_lir(Aarch64Inst::Fcvtzs { + fp_size, + int_size, + src: VReg::V17, + dst: dst_reg, + }); + } + _ => return, + } + + if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) { + self.emit_move_to_loc(dst_reg, &dst_loc, dst_size, frame_size); + } + } + + /// Emit float to float conversion (size change) + fn emit_float_to_float(&mut self, insn: &Instruction, frame_size: i32) { + let src = match insn.src.first() { + Some(&s) => s, + None => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + + let src_size = insn.src_size.max(32); + let dst_size = insn.size.max(32); + let src_fp_size = FpSize::from_bits(src_size); + let dst_fp_size = FpSize::from_bits(dst_size); + + let dst_loc = self.get_location(target); + let dst_vreg = match &dst_loc { + Loc::VReg(v) => *v, + _ => VReg::V16, + }; + + // Load source to FP register + self.emit_fp_move(src, VReg::V17, src_size, frame_size); + + // Convert between float sizes if they differ + // fcvt: convert between single and double precision + // Note: On Apple Silicon, long double == double (both 64-bit), + // so skip fcvt when sizes are equal to avoid invalid "fcvt d, d" + if src_fp_size != dst_fp_size { + self.push_lir(Aarch64Inst::Fcvt { + src_size: src_fp_size, + dst_size: dst_fp_size, + src: VReg::V17, + dst: dst_vreg, + }); + } else if dst_vreg != VReg::V17 { + // Same size, just move if needed + self.push_lir(Aarch64Inst::FmovReg { + size: dst_fp_size, + src: VReg::V17, + dst: dst_vreg, + }); + } + + if !matches!(&dst_loc, Loc::VReg(v) if *v == dst_vreg) { + self.emit_fp_move_to_loc(dst_vreg, &dst_loc, dst_size, frame_size); + } + } + + // ======================================================================== + // Variadic function support (va_* builtins) + // ======================================================================== + // + // Platform-specific va_list handling: + // + // Linux/FreeBSD (AAPCS64): + // - va_list is a char* pointing to register save area + // - In prologue, we save x0-x7 to the register save area + // - va_start computes: reg_save_area + (num_fixed_gp_params * 8) + // + // Darwin (macOS/iOS): + // - Variadic args are passed on the stack by the caller + // - va_list is a char* pointing to the caller's stack + // - va_start computes: FP + frame_size (where caller placed variadic args) + + /// Emit va_start: Initialize va_list to point to first variadic arg + /// Note: ap_addr is the ADDRESS of the va_list variable (from symaddr), not the va_list itself + fn emit_va_start(&mut self, insn: &Instruction, frame_size: i32) { + let ap_addr = match insn.src.first() { + Some(&s) => s, + None => return, + }; + + let ap_loc = self.get_location(ap_addr); + let (scratch0, scratch1) = Reg::scratch_regs(); + + // Compute address of first variadic argument + let is_darwin = self.target.os == crate::target::Os::MacOS; + let vararg_offset = if is_darwin { + // Darwin: Variadic args are on the stack, placed by the caller + // They start at FP + frame_size (the original SP before prologue) + frame_size + } else { + // Linux/FreeBSD: Variadic args are in the register save area + // First variadic arg is at: reg_save_area_offset + (num_fixed_gp_params * 8) + self.reg_save_area_offset + (self.num_fixed_gp_params as i32 * 8) + }; + + self.push_lir(Aarch64Inst::Add { + size: OperandSize::B64, + src1: Reg::X29, + src2: GpOperand::Imm(vararg_offset as i64), + dst: scratch0, + }); + + // ap_loc contains the ADDRESS of the va_list variable + // We need to store scratch0 TO that address (indirect store) + match ap_loc { + Loc::Stack(offset) => { + // The stack location contains the address of ap, load it first + let actual_offset = self.stack_offset(frame_size, offset); + self.push_lir(Aarch64Inst::Ldr { + size: OperandSize::B64, + addr: MemAddr::BaseOffset { + base: Reg::X29, + offset: actual_offset, + }, + dst: scratch1, + }); + // Store the computed va_list pointer to *scratch1 + self.push_lir(Aarch64Inst::Str { + size: OperandSize::B64, + src: scratch0, + addr: MemAddr::Base(scratch1), + }); + } + Loc::Reg(r) => { + // r contains the address of ap, store scratch0 to [r] + self.push_lir(Aarch64Inst::Str { + size: OperandSize::B64, + src: scratch0, + addr: MemAddr::Base(r), + }); + } + _ => {} + } + } + + /// Emit va_arg: Get the next variadic argument of the specified type + /// Note: ap_addr is the ADDRESS of the va_list variable (from symaddr), not the va_list itself + fn emit_va_arg(&mut self, insn: &Instruction, frame_size: i32) { + let ap_addr = match insn.src.first() { + Some(&s) => s, + None => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + + let default_type = Type::basic(crate::types::TypeKind::Int); + let arg_type = insn.typ.as_ref().unwrap_or(&default_type); + let arg_size = arg_type.size_bits().max(32); + let arg_bytes = (arg_size / 8).max(8) as i64; // Minimum 8 bytes per slot on ARM64 + + let ap_loc = self.get_location(ap_addr); + let dst_loc = self.get_location(target); + let (scratch0, scratch1) = Reg::scratch_regs(); + + // ap_loc contains the ADDRESS of the va_list variable (from symaddr) + // First, get the address of ap into scratch1, then load ap value from there + let ap_ptr_reg = match &ap_loc { + Loc::Stack(offset) => { + // Stack location contains the address of ap + let actual_offset = self.stack_offset(frame_size, *offset); + self.push_lir(Aarch64Inst::Ldr { + size: OperandSize::B64, + addr: MemAddr::BaseOffset { + base: Reg::X29, + offset: actual_offset, + }, + dst: scratch1, + }); + scratch1 + } + Loc::Reg(r) => { + // Register contains the address of ap + if *r != scratch1 { + self.push_lir(Aarch64Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(*r), + dst: scratch1, + }); + } + scratch1 + } + _ => return, + }; + + // Load the current ap pointer value from [ap_ptr_reg] + self.push_lir(Aarch64Inst::Ldr { + size: OperandSize::B64, + addr: MemAddr::Base(ap_ptr_reg), + dst: scratch0, + }); + + // Load the argument from *ap + if arg_type.is_float() { + // Load floating point value + let fp_size = arg_type.size_bits(); + let fp_size_enum = FpSize::from_bits(fp_size); + self.push_lir(Aarch64Inst::LdrFp { + size: fp_size_enum, + addr: MemAddr::Base(scratch0), + dst: VReg::V16, + }); + + // Store to destination + match &dst_loc { + Loc::VReg(v) => { + self.push_lir(Aarch64Inst::FmovReg { + size: fp_size_enum, + src: VReg::V16, + dst: *v, + }); + } + Loc::Stack(offset) => { + // FP-relative for alloca safety + let actual_offset = self.stack_offset(frame_size, *offset); + self.push_lir(Aarch64Inst::StrFp { + size: fp_size_enum, + src: VReg::V16, + addr: MemAddr::BaseOffset { + base: Reg::X29, + offset: actual_offset, + }, + }); + } + _ => {} + } + } else { + // Load integer value + let op_size = OperandSize::from_bits(arg_size); + self.push_lir(Aarch64Inst::Ldr { + size: op_size, + addr: MemAddr::Base(scratch0), + dst: scratch1, + }); + + // Store to destination + match &dst_loc { + Loc::Reg(r) => { + if *r != scratch1 { + self.push_lir(Aarch64Inst::Mov { + size: op_size, + src: GpOperand::Reg(scratch1), + dst: *r, + }); + } + } + Loc::Stack(offset) => { + // FP-relative for alloca safety + let actual_offset = self.stack_offset(frame_size, *offset); + self.push_lir(Aarch64Inst::Str { + size: OperandSize::B64, + src: scratch1, + addr: MemAddr::BaseOffset { + base: Reg::X29, + offset: actual_offset, + }, + }); + } + _ => {} + } + } + + // Advance ap by the argument size + self.push_lir(Aarch64Inst::Add { + size: OperandSize::B64, + src1: scratch0, + src2: GpOperand::Imm(arg_bytes), + dst: scratch0, + }); + + // Store updated ap back to [&ap] (ap_loc contains the address of ap) + // Need to reload the address since scratch1 may have been clobbered + match &ap_loc { + Loc::Stack(offset) => { + // Reload the address of ap into scratch1 + let actual_offset = self.stack_offset(frame_size, *offset); + self.push_lir(Aarch64Inst::Ldr { + size: OperandSize::B64, + addr: MemAddr::BaseOffset { + base: Reg::X29, + offset: actual_offset, + }, + dst: scratch1, + }); + // Store updated ap to [scratch1] + self.push_lir(Aarch64Inst::Str { + size: OperandSize::B64, + src: scratch0, + addr: MemAddr::Base(scratch1), + }); + } + Loc::Reg(r) => { + // r contains the address of ap, store scratch0 to [r] + self.push_lir(Aarch64Inst::Str { + size: OperandSize::B64, + src: scratch0, + addr: MemAddr::Base(*r), + }); + } + _ => {} + } + } + + /// Emit va_copy: Copy a va_list + /// Note: Both addresses are pointers to va_list variables (from symaddr) + fn emit_va_copy(&mut self, insn: &Instruction, frame_size: i32) { + let dest_addr = match insn.src.first() { + Some(&s) => s, + None => return, + }; + let src_addr = match insn.src.get(1) { + Some(&s) => s, + None => return, + }; + + let dest_loc = self.get_location(dest_addr); + let src_loc = self.get_location(src_addr); + let (scratch0, scratch1) = Reg::scratch_regs(); + + // Both src_loc and dest_loc contain ADDRESSES of va_list variables + // Get the address of src va_list into scratch1 + let src_ptr_reg = match &src_loc { + Loc::Stack(offset) => { + let actual_offset = self.stack_offset(frame_size, *offset); + self.push_lir(Aarch64Inst::Ldr { + size: OperandSize::B64, + addr: MemAddr::BaseOffset { + base: Reg::X29, + offset: actual_offset, + }, + dst: scratch1, + }); + scratch1 + } + Loc::Reg(r) => *r, + _ => return, + }; + + // Load the src va_list pointer value from [src_ptr_reg] + self.push_lir(Aarch64Inst::Ldr { + size: OperandSize::B64, + addr: MemAddr::Base(src_ptr_reg), + dst: scratch0, + }); + + // Get the address of dest va_list and store scratch0 there + match &dest_loc { + Loc::Stack(offset) => { + let actual_offset = self.stack_offset(frame_size, *offset); + self.push_lir(Aarch64Inst::Ldr { + size: OperandSize::B64, + addr: MemAddr::BaseOffset { + base: Reg::X29, + offset: actual_offset, + }, + dst: scratch1, + }); + self.push_lir(Aarch64Inst::Str { + size: OperandSize::B64, + src: scratch0, + addr: MemAddr::Base(scratch1), + }); + } + Loc::Reg(r) => { + self.push_lir(Aarch64Inst::Str { + size: OperandSize::B64, + src: scratch0, + addr: MemAddr::Base(*r), + }); + } + _ => {} + } + } + + // ========================================================================= + // Byte-swapping builtins + // ========================================================================= + + /// Emit bswap16: Byte-swap a 16-bit value + /// ARM64 uses rev16 to reverse bytes within each 16-bit halfword + fn emit_bswap16(&mut self, insn: &Instruction, frame_size: i32) { + let src = match insn.src.first() { + Some(&s) => s, + None => return, + }; + let dst = match insn.target { + Some(t) => t, + None => return, + }; + + let src_loc = self.get_location(src); + let dst_loc = self.get_location(dst); + let scratch = Reg::X9; + + // Load source into scratch register + match src_loc { + Loc::Reg(r) => { + self.push_lir(Aarch64Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Reg(r), + dst: scratch, + }); + } + Loc::Stack(off) => { + // FP-relative for alloca safety + let adjusted = frame_size + off; + self.push_lir(Aarch64Inst::Ldr { + size: OperandSize::B16, + addr: MemAddr::BaseOffset { + base: Reg::X29, + offset: adjusted, + }, + dst: scratch, + }); + } + Loc::Imm(v) => { + self.push_lir(Aarch64Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Imm(v), + dst: scratch, + }); + } + _ => return, + } + + // rev16 reverses bytes within each halfword + self.push_lir(Aarch64Inst::Rev16 { + size: OperandSize::B32, + src: scratch, + dst: scratch, + }); + + // Mask to 16 bits + self.push_lir(Aarch64Inst::And { + size: OperandSize::B32, + src1: scratch, + src2: GpOperand::Imm(0xFFFF), + dst: scratch, + }); + + // Store result + match dst_loc { + Loc::Reg(r) => { + self.push_lir(Aarch64Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Reg(scratch), + dst: r, + }); + } + Loc::Stack(off) => { + // FP-relative for alloca safety + let adjusted = frame_size + off; + self.push_lir(Aarch64Inst::Str { + size: OperandSize::B16, + src: scratch, + addr: MemAddr::BaseOffset { + base: Reg::X29, + offset: adjusted, + }, + }); + } + _ => {} + } + } + + /// Emit bswap32: Byte-swap a 32-bit value + fn emit_bswap32(&mut self, insn: &Instruction, frame_size: i32) { + let src = match insn.src.first() { + Some(&s) => s, + None => return, + }; + let dst = match insn.target { + Some(t) => t, + None => return, + }; + + let src_loc = self.get_location(src); + let dst_loc = self.get_location(dst); + let scratch = Reg::X9; + + // Load source into scratch register + match src_loc { + Loc::Reg(r) => { + self.push_lir(Aarch64Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Reg(r), + dst: scratch, + }); + } + Loc::Stack(off) => { + // FP-relative for alloca safety + let adjusted = frame_size + off; + self.push_lir(Aarch64Inst::Ldr { + size: OperandSize::B32, + addr: MemAddr::BaseOffset { + base: Reg::X29, + offset: adjusted, + }, + dst: scratch, + }); + } + Loc::Imm(v) => { + self.push_lir(Aarch64Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Imm(v), + dst: scratch, + }); + } + _ => return, + } + + // rev reverses all bytes in the register + self.push_lir(Aarch64Inst::Rev { + size: OperandSize::B32, + src: scratch, + dst: scratch, + }); + + // Store result + match dst_loc { + Loc::Reg(r) => { + self.push_lir(Aarch64Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Reg(scratch), + dst: r, + }); + } + Loc::Stack(off) => { + // FP-relative for alloca safety + let adjusted = frame_size + off; + self.push_lir(Aarch64Inst::Str { + size: OperandSize::B32, + src: scratch, + addr: MemAddr::BaseOffset { + base: Reg::X29, + offset: adjusted, + }, + }); + } + _ => {} + } + } + + /// Emit bswap64: Byte-swap a 64-bit value + fn emit_bswap64(&mut self, insn: &Instruction, frame_size: i32) { + let src = match insn.src.first() { + Some(&s) => s, + None => return, + }; + let dst = match insn.target { + Some(t) => t, + None => return, + }; + + let src_loc = self.get_location(src); + let dst_loc = self.get_location(dst); + let scratch = Reg::X9; + + // Load source into scratch register + match src_loc { + Loc::Reg(r) => { + self.push_lir(Aarch64Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(r), + dst: scratch, + }); + } + Loc::Stack(off) => { + // FP-relative for alloca safety + let adjusted = frame_size + off; + self.push_lir(Aarch64Inst::Ldr { + size: OperandSize::B64, + addr: MemAddr::BaseOffset { + base: Reg::X29, + offset: adjusted, + }, + dst: scratch, + }); + } + Loc::Imm(v) => { + self.push_lir(Aarch64Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Imm(v), + dst: scratch, + }); + } + _ => return, + } + + // rev reverses all bytes in the 64-bit register + self.push_lir(Aarch64Inst::Rev { + size: OperandSize::B64, + src: scratch, + dst: scratch, + }); + + // Store result + match dst_loc { + Loc::Reg(r) => { + self.push_lir(Aarch64Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(scratch), + dst: r, + }); + } + Loc::Stack(off) => { + // FP-relative for alloca safety + let adjusted = frame_size + off; + self.push_lir(Aarch64Inst::Str { + size: OperandSize::B64, + src: scratch, + addr: MemAddr::BaseOffset { + base: Reg::X29, + offset: adjusted, + }, + }); + } + _ => {} + } + } + + /// Emit __builtin_alloca - dynamic stack allocation + fn emit_alloca(&mut self, insn: &Instruction, frame_size: i32) { + let size = match insn.src.first() { + Some(&s) => s, + None => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + + // Load size into X9 (scratch register) + self.emit_move(size, Reg::X9, 64, frame_size); + + // Round up to 16-byte alignment: (size + 15) & ~15 + self.push_lir(Aarch64Inst::Add { + size: OperandSize::B64, + src1: Reg::X9, + src2: GpOperand::Imm(15), + dst: Reg::X9, + }); + self.push_lir(Aarch64Inst::And { + size: OperandSize::B64, + src1: Reg::X9, + src2: GpOperand::Imm(-16), + dst: Reg::X9, + }); + + // Subtract from stack pointer + self.push_lir(Aarch64Inst::Sub { + size: OperandSize::B64, + src1: Reg::SP, + src2: GpOperand::Reg(Reg::X9), + dst: Reg::SP, + }); + + // Return new stack pointer + self.push_lir(Aarch64Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::SP), + dst: Reg::X9, + }); + + // Store result + let dst_loc = self.get_location(target); + self.emit_move_to_loc(Reg::X9, &dst_loc, 64, frame_size); + } +} + +// ============================================================================ +// CodeGenerator trait implementation +// ============================================================================ + +impl CodeGenerator for Aarch64CodeGen { + fn generate(&mut self, module: &Module) -> String { + self.output.clear(); + self.lir_buffer.clear(); + self.last_debug_line = 0; + self.last_debug_file = 0; + self.emit_debug = module.debug; + + // Emit file header + self.emit_header(); + + // Emit .file directives unconditionally (useful for diagnostics/profiling) + for (i, path) in module.source_files.iter().enumerate() { + // File indices in DWARF start at 1 + self.push_lir(Aarch64Inst::Directive(Directive::file( + (i + 1) as u32, + path.as_str(), + ))); + } + + // Emit globals + for (name, typ, init) in &module.globals { + self.emit_global(name, typ, init); + } + + // Emit string literals + self.emit_strings(&module.strings); + + // Emit functions + for func in &module.functions { + self.emit_function(func); + } + + // Flush all buffered LIR instructions to output + self.emit_all(); + + self.output.clone() + } + + fn set_emit_unwind_tables(&mut self, emit: bool) { + self.emit_unwind_tables = emit; + } +} diff --git a/cc/arch/aarch64/lir.rs b/cc/arch/aarch64/lir.rs new file mode 100644 index 000000000..b05780996 --- /dev/null +++ b/cc/arch/aarch64/lir.rs @@ -0,0 +1,1806 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// AArch64 Low-level Intermediate Representation (LIR) +// +// Strongly-typed representation of AArch64 assembly instructions. +// Each instruction variant encodes all operands in typed fields, +// enabling peephole optimizations before final assembly emission. +// + +// Allow unused variants/methods - LIR defines complete instruction set for future use +#![allow(dead_code)] + +use super::codegen::{Reg, VReg}; +use crate::arch::lir::{Directive, EmitAsm, FpSize, Label, OperandSize, Symbol}; +use crate::target::{Os, Target}; +use std::fmt::{self, Write}; + +// ============================================================================ +// Memory Addressing Modes +// ============================================================================ + +/// AArch64 memory addressing mode +#[derive(Debug, Clone, PartialEq)] +pub enum MemAddr { + /// [base] - Register indirect + Base(Reg), + + /// [base, #offset] - Register indirect with immediate offset + BaseOffset { base: Reg, offset: i32 }, + + /// [base, Xm] - Register indirect with register offset + BaseReg { base: Reg, index: Reg }, + + /// [base, Xm, LSL #shift] - Register indirect with shifted register offset + BaseRegShift { base: Reg, index: Reg, shift: u8 }, + + /// [base, #offset]! - Pre-indexed (base updated before access) + PreIndex { base: Reg, offset: i32 }, + + /// [base], #offset - Post-indexed (base updated after access) + PostIndex { base: Reg, offset: i32 }, +} + +impl MemAddr { + /// Format memory operand in AArch64 syntax + pub fn format(&self) -> String { + match self { + MemAddr::Base(base) => format!("[{}]", base.name64()), + MemAddr::BaseOffset { base, offset } => { + if *offset == 0 { + format!("[{}]", base.name64()) + } else { + format!("[{}, #{}]", base.name64(), offset) + } + } + MemAddr::BaseReg { base, index } => { + format!("[{}, {}]", base.name64(), index.name64()) + } + MemAddr::BaseRegShift { base, index, shift } => { + format!("[{}, {}, lsl #{}]", base.name64(), index.name64(), shift) + } + MemAddr::PreIndex { base, offset } => { + format!("[{}, #{}]!", base.name64(), offset) + } + MemAddr::PostIndex { base, offset } => { + format!("[{}], #{}", base.name64(), offset) + } + } + } +} + +// ============================================================================ +// General-Purpose Operands +// ============================================================================ + +/// AArch64 general-purpose operand (register or immediate) +#[derive(Debug, Clone, PartialEq)] +pub enum GpOperand { + /// Register operand + Reg(Reg), + /// Immediate integer value (12-bit for most ALU ops) + Imm(i64), +} + +impl GpOperand { + /// Format operand in AArch64 syntax for given size + pub fn format(&self, size: OperandSize) -> String { + match self { + GpOperand::Reg(r) => r.name_for_size(size.bits()).to_string(), + GpOperand::Imm(v) => format!("#{}", v), + } + } + + /// Check if this is a register operand + pub fn is_reg(&self) -> bool { + matches!(self, GpOperand::Reg(_)) + } + + /// Get register if this is a register operand + pub fn as_reg(&self) -> Option { + match self { + GpOperand::Reg(r) => Some(*r), + _ => None, + } + } +} + +// ============================================================================ +// FP/SIMD Operands +// ============================================================================ + +/// AArch64 FP/SIMD operand (register) +#[derive(Debug, Clone, PartialEq)] +pub enum FpOperand { + /// FP register operand + Reg(VReg), +} + +impl FpOperand { + /// Format operand in AArch64 syntax + pub fn format(&self, size: FpSize) -> String { + match self { + FpOperand::Reg(r) => match size { + FpSize::Single => r.name_s().to_string(), + FpSize::Double => r.name_d().to_string(), + }, + } + } +} + +// ============================================================================ +// Condition Codes +// ============================================================================ + +/// AArch64 condition codes for comparisons and conditional operations +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum Cond { + /// Equal (Z=1) + Eq, + /// Not equal (Z=0) + Ne, + /// Carry set / unsigned higher or same (C=1) + Cs, + /// Carry clear / unsigned lower (C=0) + Cc, + /// Minus / negative (N=1) + Mi, + /// Plus / positive or zero (N=0) + Pl, + /// Overflow (V=1) + Vs, + /// No overflow (V=0) + Vc, + /// Unsigned higher (C=1 and Z=0) + Hi, + /// Unsigned lower or same (C=0 or Z=1) + Ls, + /// Signed greater than or equal (N=V) + Ge, + /// Signed less than (N!=V) + Lt, + /// Signed greater than (Z=0 and N=V) + Gt, + /// Signed less than or equal (Z=1 or N!=V) + Le, + /// Always (unconditional) + Al, + /// Never (unconditional inverse, rarely used) + Nv, +} + +impl Cond { + /// AArch64 condition code suffix + pub fn suffix(&self) -> &'static str { + match self { + Cond::Eq => "eq", + Cond::Ne => "ne", + Cond::Cs => "cs", + Cond::Cc => "cc", + Cond::Mi => "mi", + Cond::Pl => "pl", + Cond::Vs => "vs", + Cond::Vc => "vc", + Cond::Hi => "hi", + Cond::Ls => "ls", + Cond::Ge => "ge", + Cond::Lt => "lt", + Cond::Gt => "gt", + Cond::Le => "le", + Cond::Al => "al", + Cond::Nv => "nv", + } + } + + /// Get the inverse condition + pub fn inverse(&self) -> Cond { + match self { + Cond::Eq => Cond::Ne, + Cond::Ne => Cond::Eq, + Cond::Cs => Cond::Cc, + Cond::Cc => Cond::Cs, + Cond::Mi => Cond::Pl, + Cond::Pl => Cond::Mi, + Cond::Vs => Cond::Vc, + Cond::Vc => Cond::Vs, + Cond::Hi => Cond::Ls, + Cond::Ls => Cond::Hi, + Cond::Ge => Cond::Lt, + Cond::Lt => Cond::Ge, + Cond::Gt => Cond::Le, + Cond::Le => Cond::Gt, + Cond::Al => Cond::Nv, + Cond::Nv => Cond::Al, + } + } +} + +impl fmt::Display for Cond { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.suffix()) + } +} + +// ============================================================================ +// Shift Type +// ============================================================================ + +/// Shift type for shifted register operands +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum ShiftType { + /// Logical shift left + Lsl, + /// Logical shift right + Lsr, + /// Arithmetic shift right + Asr, + /// Rotate right + Ror, +} + +impl ShiftType { + pub fn as_str(&self) -> &'static str { + match self { + ShiftType::Lsl => "lsl", + ShiftType::Lsr => "lsr", + ShiftType::Asr => "asr", + ShiftType::Ror => "ror", + } + } +} + +// ============================================================================ +// Extend Type +// ============================================================================ + +/// Extend type for extended register operands +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum ExtendType { + /// Unsigned extend byte + Uxtb, + /// Unsigned extend halfword + Uxth, + /// Unsigned extend word + Uxtw, + /// Unsigned extend doubleword (same as LSL for 64-bit) + Uxtx, + /// Signed extend byte + Sxtb, + /// Signed extend halfword + Sxth, + /// Signed extend word + Sxtw, + /// Signed extend doubleword + Sxtx, +} + +impl ExtendType { + pub fn as_str(&self) -> &'static str { + match self { + ExtendType::Uxtb => "uxtb", + ExtendType::Uxth => "uxth", + ExtendType::Uxtw => "uxtw", + ExtendType::Uxtx => "uxtx", + ExtendType::Sxtb => "sxtb", + ExtendType::Sxth => "sxth", + ExtendType::Sxtw => "sxtw", + ExtendType::Sxtx => "sxtx", + } + } +} + +// ============================================================================ +// Call Target +// ============================================================================ + +/// Target for call instructions +#[derive(Debug, Clone, PartialEq)] +pub enum CallTarget { + /// Direct call to symbol + Direct(Symbol), + /// Indirect call through register + Indirect(Reg), +} + +// ============================================================================ +// AArch64 LIR Instructions +// ============================================================================ + +/// AArch64 Low-level IR instruction +#[derive(Debug, Clone)] +pub enum Aarch64Inst { + // ======================================================================== + // Data Movement + // ======================================================================== + /// MOV - Move register or immediate + Mov { + size: OperandSize, + src: GpOperand, + dst: Reg, + }, + + /// MOVZ - Move wide with zero (clear other bits) + Movz { + size: OperandSize, + imm: u16, + shift: u8, // 0, 16, 32, or 48 + dst: Reg, + }, + + /// MOVK - Move wide with keep (keep other bits) + Movk { + size: OperandSize, + imm: u16, + shift: u8, // 0, 16, 32, or 48 + dst: Reg, + }, + + /// MOVN - Move wide with NOT (inverted immediate) + Movn { + size: OperandSize, + imm: u16, + shift: u8, + dst: Reg, + }, + + /// LDR - Load register + Ldr { + size: OperandSize, + addr: MemAddr, + dst: Reg, + }, + + /// LDRSB/LDRSH/LDRSW - Load register with sign extension + Ldrs { + src_size: OperandSize, + dst_size: OperandSize, + addr: MemAddr, + dst: Reg, + }, + + /// STR - Store register + Str { + size: OperandSize, + src: Reg, + addr: MemAddr, + }, + + /// LDP - Load pair of registers + Ldp { + size: OperandSize, + addr: MemAddr, + dst1: Reg, + dst2: Reg, + }, + + /// STP - Store pair of registers + Stp { + size: OperandSize, + src1: Reg, + src2: Reg, + addr: MemAddr, + }, + + /// ADRP - Form PC-relative address to 4KB page + Adrp { sym: Symbol, dst: Reg }, + + /// ADD with symbol page offset (used with ADRP) + /// macOS: add dst, base, symbol@PAGEOFF + /// Linux: add dst, base, :lo12:symbol + AddSymOffset { sym: Symbol, base: Reg, dst: Reg }, + + /// LDR with symbol page offset (used with ADRP) + /// macOS: ldr dst, [base, symbol@PAGEOFF] + /// Linux: ldr dst, [base, :lo12:symbol] + LdrSymOffset { + size: OperandSize, + sym: Symbol, + base: Reg, + dst: Reg, + }, + + // ======================================================================== + // Integer Arithmetic + // ======================================================================== + /// ADD - Add + Add { + size: OperandSize, + src1: Reg, + src2: GpOperand, + dst: Reg, + }, + + /// SUB - Subtract + Sub { + size: OperandSize, + src1: Reg, + src2: GpOperand, + dst: Reg, + }, + + /// MUL - Multiply + Mul { + size: OperandSize, + src1: Reg, + src2: Reg, + dst: Reg, + }, + + /// SDIV - Signed divide + Sdiv { + size: OperandSize, + src1: Reg, + src2: Reg, + dst: Reg, + }, + + /// UDIV - Unsigned divide + Udiv { + size: OperandSize, + src1: Reg, + src2: Reg, + dst: Reg, + }, + + /// MSUB - Multiply-subtract (for modulo: r = a - (a/b)*b) + Msub { + size: OperandSize, + mul1: Reg, + mul2: Reg, + sub: Reg, + dst: Reg, + }, + + /// NEG - Negate + Neg { + size: OperandSize, + src: Reg, + dst: Reg, + }, + + // ======================================================================== + // Bitwise Operations + // ======================================================================== + /// AND - Bitwise AND + And { + size: OperandSize, + src1: Reg, + src2: GpOperand, + dst: Reg, + }, + + /// ORR - Bitwise OR + Orr { + size: OperandSize, + src1: Reg, + src2: GpOperand, + dst: Reg, + }, + + /// EOR - Bitwise exclusive OR + Eor { + size: OperandSize, + src1: Reg, + src2: GpOperand, + dst: Reg, + }, + + /// MVN - Bitwise NOT (move NOT) + Mvn { + size: OperandSize, + src: Reg, + dst: Reg, + }, + + /// LSL - Logical shift left + Lsl { + size: OperandSize, + src: Reg, + amount: GpOperand, + dst: Reg, + }, + + /// LSR - Logical shift right + Lsr { + size: OperandSize, + src: Reg, + amount: GpOperand, + dst: Reg, + }, + + /// ASR - Arithmetic shift right + Asr { + size: OperandSize, + src: Reg, + amount: GpOperand, + dst: Reg, + }, + + /// ROR - Rotate right + Ror { + size: OperandSize, + src: Reg, + amount: GpOperand, + dst: Reg, + }, + + /// REV - Reverse bytes (byte swap) + Rev { + size: OperandSize, + src: Reg, + dst: Reg, + }, + + /// REV16 - Reverse bytes in halfwords + Rev16 { + size: OperandSize, + src: Reg, + dst: Reg, + }, + + // ======================================================================== + // Comparison and Conditional + // ======================================================================== + /// CMP - Compare (sets flags based on dst - src) + Cmp { + size: OperandSize, + src1: Reg, + src2: GpOperand, + }, + + /// TST - Test (sets flags based on AND) + Tst { + size: OperandSize, + src1: Reg, + src2: GpOperand, + }, + + /// CSET - Conditional set (set to 1 if condition true, else 0) + Cset { cond: Cond, dst: Reg }, + + /// CSEL - Conditional select + Csel { + size: OperandSize, + cond: Cond, + src_true: Reg, + src_false: Reg, + dst: Reg, + }, + + // ======================================================================== + // Sign/Zero Extension + // ======================================================================== + /// SXTB - Sign extend byte to word/doubleword + Sxtb { + dst_size: OperandSize, + src: Reg, + dst: Reg, + }, + + /// SXTH - Sign extend halfword to word/doubleword + Sxth { + dst_size: OperandSize, + src: Reg, + dst: Reg, + }, + + /// SXTW - Sign extend word to doubleword + Sxtw { src: Reg, dst: Reg }, + + /// UXTB - Zero extend byte (AND with 0xFF) + Uxtb { src: Reg, dst: Reg }, + + /// UXTH - Zero extend halfword (AND with 0xFFFF) + Uxth { src: Reg, dst: Reg }, + + // ======================================================================== + // Control Flow + // ======================================================================== + /// B - Unconditional branch + B { target: Label }, + + /// B.cond - Conditional branch + BCond { cond: Cond, target: Label }, + + /// BL - Branch with link (function call) + Bl { target: CallTarget }, + + /// BLR - Branch to register with link + Blr { target: Reg }, + + /// RET - Return from subroutine + Ret, + + // ======================================================================== + // Floating-Point (Scalar) + // ======================================================================== + /// FMOV - FP move (between FP regs or GP<->FP) + FmovReg { size: FpSize, src: VReg, dst: VReg }, + + /// FMOV - Move from GP to FP register + FmovFromGp { size: FpSize, src: Reg, dst: VReg }, + + /// FMOV - Move from FP to GP register + FmovToGp { size: FpSize, src: VReg, dst: Reg }, + + /// LDR (FP) - Load FP register + LdrFp { + size: FpSize, + addr: MemAddr, + dst: VReg, + }, + + /// LDR (FP) with symbol page offset (used with ADRP for global FP loads) + /// macOS: ldr dst, [base, symbol@PAGEOFF] + /// Linux: ldr dst, [base, :lo12:symbol] + LdrFpSymOffset { + size: FpSize, + sym: Symbol, + base: Reg, + dst: VReg, + }, + + /// STR (FP) - Store FP register + StrFp { + size: FpSize, + src: VReg, + addr: MemAddr, + }, + + /// FADD - FP add + Fadd { + size: FpSize, + src1: VReg, + src2: VReg, + dst: VReg, + }, + + /// FSUB - FP subtract + Fsub { + size: FpSize, + src1: VReg, + src2: VReg, + dst: VReg, + }, + + /// FMUL - FP multiply + Fmul { + size: FpSize, + src1: VReg, + src2: VReg, + dst: VReg, + }, + + /// FDIV - FP divide + Fdiv { + size: FpSize, + src1: VReg, + src2: VReg, + dst: VReg, + }, + + /// FNEG - FP negate + Fneg { size: FpSize, src: VReg, dst: VReg }, + + /// FCMP - FP compare + Fcmp { + size: FpSize, + src1: VReg, + src2: VReg, + }, + + /// FCMP with zero + FcmpZero { size: FpSize, src: VReg }, + + /// SCVTF - Signed int to FP + Scvtf { + int_size: OperandSize, + fp_size: FpSize, + src: Reg, + dst: VReg, + }, + + /// UCVTF - Unsigned int to FP + Ucvtf { + int_size: OperandSize, + fp_size: FpSize, + src: Reg, + dst: VReg, + }, + + /// FCVTZS - FP to signed int (truncate toward zero) + Fcvtzs { + fp_size: FpSize, + int_size: OperandSize, + src: VReg, + dst: Reg, + }, + + /// FCVTZU - FP to unsigned int (truncate toward zero) + Fcvtzu { + fp_size: FpSize, + int_size: OperandSize, + src: VReg, + dst: Reg, + }, + + /// FCVT - FP to FP (size conversion) + Fcvt { + src_size: FpSize, + dst_size: FpSize, + src: VReg, + dst: VReg, + }, + + // ======================================================================== + // Directives (Architecture-Independent) + // ======================================================================== + /// Assembler directives (labels, sections, CFI, .loc, data, etc.) + Directive(Directive), +} + +// ============================================================================ +// EmitAsm Implementation +// ============================================================================ + +impl EmitAsm for Aarch64Inst { + fn emit(&self, target: &Target, out: &mut String) { + match self { + // Data Movement + Aarch64Inst::Mov { size, src, dst } => { + let sz = size.bits().max(32); + let _ = writeln!( + out, + " mov {}, {}", + dst.name_for_size(sz), + src.format(OperandSize::from_bits(sz)) + ); + } + + Aarch64Inst::Movz { + size, + imm, + shift, + dst, + } => { + let sz = size.bits().max(32); + if *shift == 0 { + let _ = writeln!(out, " movz {}, #{}", dst.name_for_size(sz), imm); + } else { + let _ = writeln!( + out, + " movz {}, #{}, lsl #{}", + dst.name_for_size(sz), + imm, + shift + ); + } + } + + Aarch64Inst::Movk { + size, + imm, + shift, + dst, + } => { + let sz = size.bits().max(32); + let _ = writeln!( + out, + " movk {}, #{}, lsl #{}", + dst.name_for_size(sz), + imm, + shift + ); + } + + Aarch64Inst::Movn { + size, + imm, + shift, + dst, + } => { + let sz = size.bits().max(32); + if *shift == 0 { + let _ = writeln!(out, " movn {}, #{}", dst.name_for_size(sz), imm); + } else { + let _ = writeln!( + out, + " movn {}, #{}, lsl #{}", + dst.name_for_size(sz), + imm, + shift + ); + } + } + + Aarch64Inst::Ldr { size, addr, dst } => { + let insn = match size { + OperandSize::B8 => "ldrb", + OperandSize::B16 => "ldrh", + _ => "ldr", + }; + let reg = dst.name_for_size(size.bits().max(32)); + let _ = writeln!(out, " {} {}, {}", insn, reg, addr.format()); + } + + Aarch64Inst::Ldrs { + src_size, + dst_size, + addr, + dst, + } => { + let insn = match (src_size, dst_size) { + (OperandSize::B8, OperandSize::B32) => "ldrsb", + (OperandSize::B8, OperandSize::B64) => "ldrsb", + (OperandSize::B16, OperandSize::B32) => "ldrsh", + (OperandSize::B16, OperandSize::B64) => "ldrsh", + (OperandSize::B32, OperandSize::B64) => "ldrsw", + _ => "ldr", + }; + let reg = dst.name_for_size(dst_size.bits()); + let _ = writeln!(out, " {} {}, {}", insn, reg, addr.format()); + } + + Aarch64Inst::Str { size, src, addr } => { + let insn = match size { + OperandSize::B8 => "strb", + OperandSize::B16 => "strh", + _ => "str", + }; + let reg = src.name_for_size(size.bits().max(32)); + let _ = writeln!(out, " {} {}, {}", insn, reg, addr.format()); + } + + Aarch64Inst::Ldp { + size, + addr, + dst1, + dst2, + } => { + let sz = size.bits().max(32); + let _ = writeln!( + out, + " ldp {}, {}, {}", + dst1.name_for_size(sz), + dst2.name_for_size(sz), + addr.format() + ); + } + + Aarch64Inst::Stp { + size, + src1, + src2, + addr, + } => { + let sz = size.bits().max(32); + let _ = writeln!( + out, + " stp {}, {}, {}", + src1.name_for_size(sz), + src2.name_for_size(sz), + addr.format() + ); + } + + Aarch64Inst::Adrp { sym, dst } => { + let sym_name = sym.format_for_target(target); + match target.os { + Os::MacOS => { + let _ = writeln!(out, " adrp {}, {}@PAGE", dst.name64(), sym_name); + } + Os::Linux | Os::FreeBSD => { + let _ = writeln!(out, " adrp {}, {}", dst.name64(), sym_name); + } + } + } + + Aarch64Inst::AddSymOffset { sym, base, dst } => { + let sym_name = sym.format_for_target(target); + match target.os { + Os::MacOS => { + let _ = writeln!( + out, + " add {}, {}, {}@PAGEOFF", + dst.name64(), + base.name64(), + sym_name + ); + } + Os::Linux | Os::FreeBSD => { + let _ = writeln!( + out, + " add {}, {}, :lo12:{}", + dst.name64(), + base.name64(), + sym_name + ); + } + } + } + + Aarch64Inst::LdrSymOffset { + size, + sym, + base, + dst, + } => { + let sym_name = sym.format_for_target(target); + let sz = size.bits().max(32); + let ldr_insn = match size { + OperandSize::B8 => "ldrb", + OperandSize::B16 => "ldrh", + OperandSize::B32 => "ldr", + OperandSize::B64 => "ldr", + }; + match target.os { + Os::MacOS => { + let _ = writeln!( + out, + " {} {}, [{}, {}@PAGEOFF]", + ldr_insn, + dst.name_for_size(sz), + base.name64(), + sym_name + ); + } + Os::Linux | Os::FreeBSD => { + let _ = writeln!( + out, + " {} {}, [{}, :lo12:{}]", + ldr_insn, + dst.name_for_size(sz), + base.name64(), + sym_name + ); + } + } + } + + // Integer Arithmetic + Aarch64Inst::Add { + size, + src1, + src2, + dst, + } => { + let sz = size.bits().max(32); + let _ = writeln!( + out, + " add {}, {}, {}", + dst.name_for_size(sz), + src1.name_for_size(sz), + src2.format(OperandSize::from_bits(sz)) + ); + } + + Aarch64Inst::Sub { + size, + src1, + src2, + dst, + } => { + let sz = size.bits().max(32); + let _ = writeln!( + out, + " sub {}, {}, {}", + dst.name_for_size(sz), + src1.name_for_size(sz), + src2.format(OperandSize::from_bits(sz)) + ); + } + + Aarch64Inst::Mul { + size, + src1, + src2, + dst, + } => { + let sz = size.bits().max(32); + let _ = writeln!( + out, + " mul {}, {}, {}", + dst.name_for_size(sz), + src1.name_for_size(sz), + src2.name_for_size(sz) + ); + } + + Aarch64Inst::Sdiv { + size, + src1, + src2, + dst, + } => { + let sz = size.bits().max(32); + let _ = writeln!( + out, + " sdiv {}, {}, {}", + dst.name_for_size(sz), + src1.name_for_size(sz), + src2.name_for_size(sz) + ); + } + + Aarch64Inst::Udiv { + size, + src1, + src2, + dst, + } => { + let sz = size.bits().max(32); + let _ = writeln!( + out, + " udiv {}, {}, {}", + dst.name_for_size(sz), + src1.name_for_size(sz), + src2.name_for_size(sz) + ); + } + + Aarch64Inst::Msub { + size, + mul1, + mul2, + sub, + dst, + } => { + let sz = size.bits().max(32); + let _ = writeln!( + out, + " msub {}, {}, {}, {}", + dst.name_for_size(sz), + mul1.name_for_size(sz), + mul2.name_for_size(sz), + sub.name_for_size(sz) + ); + } + + Aarch64Inst::Neg { size, src, dst } => { + let sz = size.bits().max(32); + let _ = writeln!( + out, + " neg {}, {}", + dst.name_for_size(sz), + src.name_for_size(sz) + ); + } + + // Bitwise Operations + Aarch64Inst::And { + size, + src1, + src2, + dst, + } => { + let sz = size.bits().max(32); + let _ = writeln!( + out, + " and {}, {}, {}", + dst.name_for_size(sz), + src1.name_for_size(sz), + src2.format(OperandSize::from_bits(sz)) + ); + } + + Aarch64Inst::Orr { + size, + src1, + src2, + dst, + } => { + let sz = size.bits().max(32); + let _ = writeln!( + out, + " orr {}, {}, {}", + dst.name_for_size(sz), + src1.name_for_size(sz), + src2.format(OperandSize::from_bits(sz)) + ); + } + + Aarch64Inst::Eor { + size, + src1, + src2, + dst, + } => { + let sz = size.bits().max(32); + let _ = writeln!( + out, + " eor {}, {}, {}", + dst.name_for_size(sz), + src1.name_for_size(sz), + src2.format(OperandSize::from_bits(sz)) + ); + } + + Aarch64Inst::Mvn { size, src, dst } => { + let sz = size.bits().max(32); + let _ = writeln!( + out, + " mvn {}, {}", + dst.name_for_size(sz), + src.name_for_size(sz) + ); + } + + Aarch64Inst::Lsl { + size, + src, + amount, + dst, + } => { + let sz = size.bits().max(32); + let _ = writeln!( + out, + " lsl {}, {}, {}", + dst.name_for_size(sz), + src.name_for_size(sz), + amount.format(OperandSize::from_bits(sz)) + ); + } + + Aarch64Inst::Lsr { + size, + src, + amount, + dst, + } => { + let sz = size.bits().max(32); + let _ = writeln!( + out, + " lsr {}, {}, {}", + dst.name_for_size(sz), + src.name_for_size(sz), + amount.format(OperandSize::from_bits(sz)) + ); + } + + Aarch64Inst::Asr { + size, + src, + amount, + dst, + } => { + let sz = size.bits().max(32); + let _ = writeln!( + out, + " asr {}, {}, {}", + dst.name_for_size(sz), + src.name_for_size(sz), + amount.format(OperandSize::from_bits(sz)) + ); + } + + Aarch64Inst::Ror { + size, + src, + amount, + dst, + } => { + let sz = size.bits().max(32); + let _ = writeln!( + out, + " ror {}, {}, {}", + dst.name_for_size(sz), + src.name_for_size(sz), + amount.format(OperandSize::from_bits(sz)) + ); + } + + Aarch64Inst::Rev { size, src, dst } => { + let sz = size.bits().max(32); + // rev for 32-bit uses w registers, rev for 64-bit uses x registers + let _ = writeln!( + out, + " rev {}, {}", + dst.name_for_size(sz), + src.name_for_size(sz) + ); + } + + Aarch64Inst::Rev16 { size, src, dst } => { + let sz = size.bits().max(32); + let _ = writeln!( + out, + " rev16 {}, {}", + dst.name_for_size(sz), + src.name_for_size(sz) + ); + } + + // Comparison and Conditional + Aarch64Inst::Cmp { size, src1, src2 } => { + let sz = size.bits().max(32); + let _ = writeln!( + out, + " cmp {}, {}", + src1.name_for_size(sz), + src2.format(OperandSize::from_bits(sz)) + ); + } + + Aarch64Inst::Tst { size, src1, src2 } => { + let sz = size.bits().max(32); + let _ = writeln!( + out, + " tst {}, {}", + src1.name_for_size(sz), + src2.format(OperandSize::from_bits(sz)) + ); + } + + Aarch64Inst::Cset { cond, dst } => { + let _ = writeln!(out, " cset {}, {}", dst.name64(), cond.suffix()); + } + + Aarch64Inst::Csel { + size, + cond, + src_true, + src_false, + dst, + } => { + let sz = size.bits().max(32); + let _ = writeln!( + out, + " csel {}, {}, {}, {}", + dst.name_for_size(sz), + src_true.name_for_size(sz), + src_false.name_for_size(sz), + cond.suffix() + ); + } + + // Sign/Zero Extension + Aarch64Inst::Sxtb { dst_size, src, dst } => { + let sz = dst_size.bits().max(32); + let _ = writeln!(out, " sxtb {}, {}", dst.name_for_size(sz), src.name32()); + } + + Aarch64Inst::Sxth { dst_size, src, dst } => { + let sz = dst_size.bits().max(32); + let _ = writeln!(out, " sxth {}, {}", dst.name_for_size(sz), src.name32()); + } + + Aarch64Inst::Sxtw { src, dst } => { + let _ = writeln!(out, " sxtw {}, {}", dst.name64(), src.name32()); + } + + Aarch64Inst::Uxtb { src, dst } => { + let _ = writeln!(out, " uxtb {}, {}", dst.name32(), src.name32()); + } + + Aarch64Inst::Uxth { src, dst } => { + let _ = writeln!(out, " uxth {}, {}", dst.name32(), src.name32()); + } + + // Control Flow + Aarch64Inst::B { target: lbl } => { + let _ = writeln!(out, " b {}", lbl.name()); + } + + Aarch64Inst::BCond { cond, target: lbl } => { + let _ = writeln!(out, " b.{} {}", cond.suffix(), lbl.name()); + } + + Aarch64Inst::Bl { + target: call_target, + } => match call_target { + CallTarget::Direct(sym) => { + let sym_name = sym.format_for_target(target); + let _ = writeln!(out, " bl {}", sym_name); + } + CallTarget::Indirect(reg) => { + let _ = writeln!(out, " blr {}", reg.name64()); + } + }, + + Aarch64Inst::Blr { target: reg } => { + let _ = writeln!(out, " blr {}", reg.name64()); + } + + Aarch64Inst::Ret => { + let _ = writeln!(out, " ret"); + } + + // Floating-Point + Aarch64Inst::FmovReg { size, src, dst } => { + let name = match size { + FpSize::Single => (src.name_s(), dst.name_s()), + FpSize::Double => (src.name_d(), dst.name_d()), + }; + let _ = writeln!(out, " fmov {}, {}", name.1, name.0); + } + + Aarch64Inst::FmovFromGp { size, src, dst } => { + let fp_name = match size { + FpSize::Single => dst.name_s(), + FpSize::Double => dst.name_d(), + }; + let gp_name = match size { + FpSize::Single => src.name32(), + FpSize::Double => src.name64(), + }; + let _ = writeln!(out, " fmov {}, {}", fp_name, gp_name); + } + + Aarch64Inst::FmovToGp { size, src, dst } => { + let fp_name = match size { + FpSize::Single => src.name_s(), + FpSize::Double => src.name_d(), + }; + let gp_name = match size { + FpSize::Single => dst.name32(), + FpSize::Double => dst.name64(), + }; + let _ = writeln!(out, " fmov {}, {}", gp_name, fp_name); + } + + Aarch64Inst::LdrFp { size, addr, dst } => { + let name = match size { + FpSize::Single => dst.name_s(), + FpSize::Double => dst.name_d(), + }; + let _ = writeln!(out, " ldr {}, {}", name, addr.format()); + } + + Aarch64Inst::StrFp { size, src, addr } => { + let name = match size { + FpSize::Single => src.name_s(), + FpSize::Double => src.name_d(), + }; + let _ = writeln!(out, " str {}, {}", name, addr.format()); + } + + Aarch64Inst::LdrFpSymOffset { + size, + sym, + base, + dst, + } => { + let sym_name = sym.format_for_target(target); + let fp_name = match size { + FpSize::Single => dst.name_s(), + FpSize::Double => dst.name_d(), + }; + match target.os { + Os::MacOS => { + let _ = writeln!( + out, + " ldr {}, [{}, {}@PAGEOFF]", + fp_name, + base.name64(), + sym_name + ); + } + Os::Linux | Os::FreeBSD => { + let _ = writeln!( + out, + " ldr {}, [{}, :lo12:{}]", + fp_name, + base.name64(), + sym_name + ); + } + } + } + + Aarch64Inst::Fadd { + size, + src1, + src2, + dst, + } => { + let _ = writeln!( + out, + " fadd {}, {}, {}", + dst.name_for_size(size_bits(*size)), + src1.name_for_size(size_bits(*size)), + src2.name_for_size(size_bits(*size)) + ); + } + + Aarch64Inst::Fsub { + size, + src1, + src2, + dst, + } => { + let _ = writeln!( + out, + " fsub {}, {}, {}", + dst.name_for_size(size_bits(*size)), + src1.name_for_size(size_bits(*size)), + src2.name_for_size(size_bits(*size)) + ); + } + + Aarch64Inst::Fmul { + size, + src1, + src2, + dst, + } => { + let _ = writeln!( + out, + " fmul {}, {}, {}", + dst.name_for_size(size_bits(*size)), + src1.name_for_size(size_bits(*size)), + src2.name_for_size(size_bits(*size)) + ); + } + + Aarch64Inst::Fdiv { + size, + src1, + src2, + dst, + } => { + let _ = writeln!( + out, + " fdiv {}, {}, {}", + dst.name_for_size(size_bits(*size)), + src1.name_for_size(size_bits(*size)), + src2.name_for_size(size_bits(*size)) + ); + } + + Aarch64Inst::Fneg { size, src, dst } => { + let _ = writeln!( + out, + " fneg {}, {}", + dst.name_for_size(size_bits(*size)), + src.name_for_size(size_bits(*size)) + ); + } + + Aarch64Inst::Fcmp { size, src1, src2 } => { + let _ = writeln!( + out, + " fcmp {}, {}", + src1.name_for_size(size_bits(*size)), + src2.name_for_size(size_bits(*size)) + ); + } + + Aarch64Inst::FcmpZero { size, src } => { + let _ = writeln!( + out, + " fcmp {}, #0.0", + src.name_for_size(size_bits(*size)) + ); + } + + Aarch64Inst::Scvtf { + int_size, + fp_size, + src, + dst, + } => { + let fp_name = dst.name_for_size(size_bits(*fp_size)); + let gp_name = src.name_for_size(int_size.bits().max(32)); + let _ = writeln!(out, " scvtf {}, {}", fp_name, gp_name); + } + + Aarch64Inst::Ucvtf { + int_size, + fp_size, + src, + dst, + } => { + let fp_name = dst.name_for_size(size_bits(*fp_size)); + let gp_name = src.name_for_size(int_size.bits().max(32)); + let _ = writeln!(out, " ucvtf {}, {}", fp_name, gp_name); + } + + Aarch64Inst::Fcvtzs { + fp_size, + int_size, + src, + dst, + } => { + let fp_name = src.name_for_size(size_bits(*fp_size)); + let gp_name = dst.name_for_size(int_size.bits().max(32)); + let _ = writeln!(out, " fcvtzs {}, {}", gp_name, fp_name); + } + + Aarch64Inst::Fcvtzu { + fp_size, + int_size, + src, + dst, + } => { + let fp_name = src.name_for_size(size_bits(*fp_size)); + let gp_name = dst.name_for_size(int_size.bits().max(32)); + let _ = writeln!(out, " fcvtzu {}, {}", gp_name, fp_name); + } + + Aarch64Inst::Fcvt { + src_size, + dst_size, + src, + dst, + } => { + let _ = writeln!( + out, + " fcvt {}, {}", + dst.name_for_size(size_bits(*dst_size)), + src.name_for_size(size_bits(*src_size)) + ); + } + + // Directives - delegate to shared implementation + Aarch64Inst::Directive(dir) => { + dir.emit(target, out); + } + } + } +} + +/// Helper to convert FpSize to bits +fn size_bits(size: FpSize) -> u32 { + match size { + FpSize::Single => 32, + FpSize::Double => 64, + } +} + +// ============================================================================ +// Utility Functions +// ============================================================================ + +/// Emit a sequence of LIR instructions to assembly text +pub fn emit_instrs(instrs: &[Aarch64Inst], target: &Target) -> String { + let mut out = String::new(); + for inst in instrs { + inst.emit(target, &mut out); + } + out +} + +// ============================================================================ +// Tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + use crate::target::{Arch, Os}; + + fn linux_target() -> Target { + Target::new(Arch::Aarch64, Os::Linux) + } + + fn macos_target() -> Target { + Target::new(Arch::Aarch64, Os::MacOS) + } + + #[test] + fn test_mov_emit() { + let target = linux_target(); + let mut out = String::new(); + + let inst = Aarch64Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::X1), + dst: Reg::X0, + }; + inst.emit(&target, &mut out); + assert_eq!(out.trim(), "mov x0, x1"); + } + + #[test] + fn test_mov_imm() { + let target = linux_target(); + let mut out = String::new(); + + let inst = Aarch64Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Imm(42), + dst: Reg::X0, + }; + inst.emit(&target, &mut out); + assert_eq!(out.trim(), "mov w0, #42"); + } + + #[test] + fn test_ldr_str() { + let target = linux_target(); + + let mut out = String::new(); + let inst = Aarch64Inst::Ldr { + size: OperandSize::B64, + addr: MemAddr::BaseOffset { + base: Reg::X29, + offset: -8, + }, + dst: Reg::X0, + }; + inst.emit(&target, &mut out); + assert_eq!(out.trim(), "ldr x0, [x29, #-8]"); + + let mut out = String::new(); + let inst = Aarch64Inst::Str { + size: OperandSize::B8, + src: Reg::X1, + addr: MemAddr::BaseOffset { + base: Reg::X29, + offset: 0, + }, + }; + inst.emit(&target, &mut out); + assert_eq!(out.trim(), "strb w1, [x29]"); + } + + #[test] + fn test_stp_ldp() { + let target = linux_target(); + + // Test stp with SP as base (valid prologue pattern) + let mut out = String::new(); + let inst = Aarch64Inst::Stp { + size: OperandSize::B64, + src1: Reg::X29, + src2: Reg::X30, + addr: MemAddr::PreIndex { + base: Reg::SP, + offset: -16, + }, + }; + inst.emit(&target, &mut out); + assert!(out.contains("stp x29, x30, [sp, #-16]!")); + } + + #[test] + fn test_arithmetic() { + let target = linux_target(); + + let mut out = String::new(); + let inst = Aarch64Inst::Add { + size: OperandSize::B64, + src1: Reg::X0, + src2: GpOperand::Imm(100), + dst: Reg::X0, + }; + inst.emit(&target, &mut out); + assert_eq!(out.trim(), "add x0, x0, #100"); + + let mut out = String::new(); + let inst = Aarch64Inst::Mul { + size: OperandSize::B32, + src1: Reg::X0, + src2: Reg::X1, + dst: Reg::X2, + }; + inst.emit(&target, &mut out); + assert_eq!(out.trim(), "mul w2, w0, w1"); + } + + #[test] + fn test_cmp_cset() { + let target = linux_target(); + + let mut out = String::new(); + let inst = Aarch64Inst::Cmp { + size: OperandSize::B64, + src1: Reg::X0, + src2: GpOperand::Imm(0), + }; + inst.emit(&target, &mut out); + assert_eq!(out.trim(), "cmp x0, #0"); + + let mut out = String::new(); + let inst = Aarch64Inst::Cset { + cond: Cond::Eq, + dst: Reg::X0, + }; + inst.emit(&target, &mut out); + assert_eq!(out.trim(), "cset x0, eq"); + } + + #[test] + fn test_branches() { + let target = linux_target(); + + let mut out = String::new(); + let inst = Aarch64Inst::B { + target: Label::new("main", 1), + }; + inst.emit(&target, &mut out); + assert_eq!(out.trim(), "b .Lmain_1"); + + let mut out = String::new(); + let inst = Aarch64Inst::BCond { + cond: Cond::Ne, + target: Label::new("main", 2), + }; + inst.emit(&target, &mut out); + assert_eq!(out.trim(), "b.ne .Lmain_2"); + } + + #[test] + fn test_fp_instructions() { + let target = linux_target(); + + let mut out = String::new(); + let inst = Aarch64Inst::Fadd { + size: FpSize::Double, + src1: VReg::V0, + src2: VReg::V1, + dst: VReg::V2, + }; + inst.emit(&target, &mut out); + assert_eq!(out.trim(), "fadd d2, d0, d1"); + + let mut out = String::new(); + let inst = Aarch64Inst::Scvtf { + int_size: OperandSize::B32, + fp_size: FpSize::Double, + src: Reg::X0, + dst: VReg::V0, + }; + inst.emit(&target, &mut out); + assert_eq!(out.trim(), "scvtf d0, w0"); + } + + #[test] + fn test_symbol_macos_prefix() { + let linux = linux_target(); + let macos = macos_target(); + + // Test BL with symbol + let mut out = String::new(); + let inst = Aarch64Inst::Bl { + target: CallTarget::Direct(Symbol::global("printf")), + }; + inst.emit(&linux, &mut out); + assert!(out.contains("bl printf")); + + let mut out = String::new(); + inst.emit(&macos, &mut out); + assert!(out.contains("bl _printf")); + } + + #[test] + fn test_condition_inverse() { + assert_eq!(Cond::Eq.inverse(), Cond::Ne); + assert_eq!(Cond::Lt.inverse(), Cond::Ge); + assert_eq!(Cond::Hi.inverse(), Cond::Ls); + } + + #[test] + fn test_emit_instrs() { + let target = linux_target(); + + let instrs = vec![ + Aarch64Inst::Stp { + size: OperandSize::B64, + src1: Reg::X29, + src2: Reg::X30, + addr: MemAddr::PreIndex { + base: Reg::X29, + offset: -16, + }, + }, + Aarch64Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Imm(42), + dst: Reg::X0, + }, + Aarch64Inst::Ret, + ]; + + let out = emit_instrs(&instrs, &target); + assert!(out.contains("stp x29, x30")); + assert!(out.contains("mov x0, #42")); + assert!(out.contains("ret")); + } +} diff --git a/cc/arch/aarch64/macros.rs b/cc/arch/aarch64/macros.rs new file mode 100644 index 000000000..129c78b78 --- /dev/null +++ b/cc/arch/aarch64/macros.rs @@ -0,0 +1,62 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// AArch64-specific predefined macros +// + +/// Get AArch64-specific predefined macros +pub fn get_macros() -> Vec<(&'static str, Option<&'static str>)> { + vec![ + // Architecture identification + ("__aarch64__", Some("1")), + ("__arm64__", Some("1")), // macOS uses this + ("__ARM_ARCH", Some("8")), + ("__ARM_64BIT_STATE", Some("1")), + ("__ARM_ARCH_ISA_A64", Some("1")), + // Byte order (AArch64 is little-endian by default) + ("__BYTE_ORDER__", Some("__ORDER_LITTLE_ENDIAN__")), + ("__ORDER_LITTLE_ENDIAN__", Some("1234")), + ("__ORDER_BIG_ENDIAN__", Some("4321")), + ("__LITTLE_ENDIAN__", Some("1")), + ("__AARCH64EL__", Some("1")), + // Register size + ("__REGISTER_PREFIX__", Some("")), + ("__USER_LABEL_PREFIX__", Some("")), + // Long double is 128-bit IEEE quad precision on AArch64 + ("__SIZEOF_LONG_DOUBLE__", Some("16")), + ("__LDBL_MANT_DIG__", Some("113")), + ("__LDBL_DIG__", Some("33")), + // char is unsigned on ARM by default + ("__CHAR_UNSIGNED__", Some("1")), + // SIMD + ("__ARM_NEON", Some("1")), + ("__ARM_NEON__", Some("1")), + // FP support + ("__ARM_FP", Some("14")), // VFPv3 compatible + ("__ARM_FP16_FORMAT_IEEE", Some("1")), + ("__ARM_FEATURE_FMA", Some("1")), + // Atomic primitives + ("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1", Some("1")), + ("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2", Some("1")), + ("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4", Some("1")), + ("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8", Some("1")), + // Lock-free atomics + ("__GCC_ATOMIC_BOOL_LOCK_FREE", Some("2")), + ("__GCC_ATOMIC_CHAR_LOCK_FREE", Some("2")), + ("__GCC_ATOMIC_SHORT_LOCK_FREE", Some("2")), + ("__GCC_ATOMIC_INT_LOCK_FREE", Some("2")), + ("__GCC_ATOMIC_LONG_LOCK_FREE", Some("2")), + ("__GCC_ATOMIC_LLONG_LOCK_FREE", Some("2")), + ("__GCC_ATOMIC_POINTER_LOCK_FREE", Some("2")), + // ARM-specific features + ("__ARM_SIZEOF_WCHAR_T", Some("4")), + ("__ARM_SIZEOF_MINIMAL_ENUM", Some("4")), + ("__ARM_FEATURE_UNALIGNED", Some("1")), + ("__ARM_FEATURE_CLZ", Some("1")), + ] +} diff --git a/cc/arch/aarch64/mod.rs b/cc/arch/aarch64/mod.rs new file mode 100644 index 000000000..342feccc6 --- /dev/null +++ b/cc/arch/aarch64/mod.rs @@ -0,0 +1,16 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// AArch64 architecture support +// + +pub mod codegen; +pub mod lir; +pub mod macros; + +pub use macros::get_macros; diff --git a/cc/arch/codegen.rs b/cc/arch/codegen.rs new file mode 100644 index 000000000..fc0610afb --- /dev/null +++ b/cc/arch/codegen.rs @@ -0,0 +1,71 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Architecture-independent code generation interface +// + +use crate::ir::Module; +use crate::target::Target; + +/// pcc version string for assembly header +pub const PCC_VERSION: &str = env!("CARGO_PKG_VERSION"); + +/// Generate header comment lines for assembly output (GCC-style) +/// Returns a vector of comment strings to be emitted +pub fn generate_header_comments(target: &Target) -> Vec { + let mut comments = Vec::new(); + + // Compiler identification + comments.push(format!("Generated by pcc {}", PCC_VERSION)); + + // Target triple (normalized format) + let os_triple = match target.os { + crate::target::Os::Linux => "unknown-linux-gnu", + crate::target::Os::MacOS => "apple-darwin", + crate::target::Os::FreeBSD => "unknown-freebsd", + }; + comments.push(format!("Target: {}-{}", target.arch, os_triple)); + + // ABI info + comments.push(format!( + "ABI: LP64, pointer={}bit, long={}bit, char={}", + target.pointer_width, + target.long_width, + if target.char_signed { + "signed" + } else { + "unsigned" + } + )); + + comments +} + +/// Trait for architecture-specific code generators +pub trait CodeGenerator { + /// Generate assembly code for the given IR module + fn generate(&mut self, module: &Module) -> String; + + /// Set whether to emit basic unwind tables (cfi_startproc/cfi_endproc) + fn set_emit_unwind_tables(&mut self, emit: bool); +} + +/// Create a code generator for the given target with options +pub fn create_codegen_with_options( + target: Target, + emit_unwind_tables: bool, +) -> Box { + use crate::target::Arch; + + let mut codegen: Box = match target.arch { + Arch::X86_64 => Box::new(super::x86_64::codegen::X86_64CodeGen::new(target)), + Arch::Aarch64 => Box::new(super::aarch64::codegen::Aarch64CodeGen::new(target)), + }; + codegen.set_emit_unwind_tables(emit_unwind_tables); + codegen +} diff --git a/cc/arch/lir.rs b/cc/arch/lir.rs new file mode 100644 index 000000000..b1d8e6a6c --- /dev/null +++ b/cc/arch/lir.rs @@ -0,0 +1,752 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Low-level Intermediate Representation (LIR) - Shared Types +// +// Common types used across architecture-specific LIR implementations. +// Each architecture (x86-64, AArch64) has its own LIR instruction enum +// that uses these shared operand and size types. +// + +use crate::target::{Os, Target}; +use std::fmt::{self, Write}; + +// ============================================================================ +// Operand Size +// ============================================================================ + +/// Size specifier for operations (in bits) +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum OperandSize { + /// 8-bit (byte) + B8, + /// 16-bit (word on x86, halfword on ARM) + B16, + /// 32-bit (dword on x86, word on ARM) + B32, + /// 64-bit (qword on x86, doubleword on ARM) + B64, +} + +impl OperandSize { + /// Create from bit count + pub fn from_bits(bits: u32) -> Self { + match bits { + 0..=8 => OperandSize::B8, + 9..=16 => OperandSize::B16, + 17..=32 => OperandSize::B32, + _ => OperandSize::B64, + } + } + + /// Get bit count + pub fn bits(&self) -> u32 { + match self { + OperandSize::B8 => 8, + OperandSize::B16 => 16, + OperandSize::B32 => 32, + OperandSize::B64 => 64, + } + } + + /// x86-64 AT&T syntax suffix (b, w, l, q) + pub fn x86_suffix(&self) -> &'static str { + match self { + OperandSize::B8 => "b", + OperandSize::B16 => "w", + OperandSize::B32 => "l", + OperandSize::B64 => "q", + } + } +} + +impl fmt::Display for OperandSize { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.bits()) + } +} + +// ============================================================================ +// Floating-Point Size +// ============================================================================ + +/// Floating-point size specifier +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum FpSize { + /// 32-bit single precision (float) + Single, + /// 64-bit double precision (double) + Double, +} + +impl FpSize { + /// Create from bit count + pub fn from_bits(bits: u32) -> Self { + if bits <= 32 { + FpSize::Single + } else { + FpSize::Double + } + } + + /// x86-64 SSE instruction suffix (ss for single, sd for double) + pub fn x86_suffix(&self) -> &'static str { + match self { + FpSize::Single => "ss", + FpSize::Double => "sd", + } + } + + /// x86-64 packed suffix (ps for single, pd for double) + pub fn x86_packed_suffix(&self) -> &'static str { + match self { + FpSize::Single => "ps", + FpSize::Double => "pd", + } + } +} + +impl fmt::Display for FpSize { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + FpSize::Single => write!(f, "f32"), + FpSize::Double => write!(f, "f64"), + } + } +} + +// ============================================================================ +// Labels and Symbols +// ============================================================================ + +/// Label for local jumps (basic block targets within a function) +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Label { + /// Function name (for unique label generation) + pub func_name: String, + /// Basic block ID within the function + pub block_id: u32, +} + +impl Label { + pub fn new(func_name: impl Into, block_id: u32) -> Self { + Self { + func_name: func_name.into(), + block_id, + } + } + + /// Format as assembly label name + pub fn name(&self) -> String { + format!(".L{}_{}", self.func_name, self.block_id) + } +} + +impl fmt::Display for Label { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.name()) + } +} + +/// Global symbol reference (function, global variable, string literal) +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Symbol { + /// Symbol name + pub name: String, + /// Is this a local symbol (starts with '.') + pub is_local: bool, +} + +impl Symbol { + pub fn local(name: impl Into) -> Self { + Self { + name: name.into(), + is_local: true, + } + } + + pub fn global(name: impl Into) -> Self { + Self { + name: name.into(), + is_local: false, + } + } + + /// Format symbol name for the target OS + /// macOS prepends underscore to external symbols + pub fn format_for_target(&self, target: &Target) -> String { + if self.is_local { + // Local symbols don't get underscore prefix + self.name.clone() + } else { + match target.os { + Os::MacOS => format!("_{}", self.name), + Os::Linux | Os::FreeBSD => self.name.clone(), + } + } + } +} + +impl fmt::Display for Symbol { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.name) + } +} + +// ============================================================================ +// Symbol Type (for ELF .type directive) +// ============================================================================ + +/// Symbol type for ELF .type directive +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum SymbolType { + /// @function + Function, + /// @object + Object, +} + +impl SymbolType { + pub fn as_str(&self) -> &'static str { + match self { + SymbolType::Function => "@function", + SymbolType::Object => "@object", + } + } +} + +// ============================================================================ +// EmitAsm Trait +// ============================================================================ + +/// Trait for LIR instructions that can be emitted to assembly text +pub trait EmitAsm { + /// Emit assembly text for this instruction + fn emit(&self, target: &Target, out: &mut String); +} + +// ============================================================================ +// Assembler Directives (Architecture-Independent) +// ============================================================================ + +/// Assembler directives that are common across architectures. +/// These control the assembler behavior, emit data, or provide metadata. +#[derive(Debug, Clone, PartialEq)] +pub enum Directive { + // ======================================================================== + // Labels + // ======================================================================== + /// Local label for basic blocks within a function + BlockLabel(Label), + + /// Global/function label (emits "name:" or "_name:" on macOS) + GlobalLabel(Symbol), + + // ======================================================================== + // Sections + // ======================================================================== + /// Switch to text section (.text or .section __TEXT,__text) + Text, + + /// Switch to data section (.data or .section __DATA,__data) + Data, + + /// Switch to read-only data section (.section .rodata or __TEXT,__cstring) + Rodata, + + // ======================================================================== + // Symbol Visibility and Attributes + // ======================================================================== + /// .globl symbol - mark symbol as globally visible + Global(Symbol), + + /// .type symbol, @function/@object (ELF only) + Type { sym: Symbol, kind: SymbolType }, + + /// .size symbol, size (ELF only) + Size { sym: Symbol, size: u32 }, + + /// .comm symbol, size, align - allocate common (BSS) storage + Comm { sym: Symbol, size: u32, align: u32 }, + + // ======================================================================== + // Alignment + // ======================================================================== + /// .p2align power (macOS) or .align bytes (Linux) + /// Stores power of 2, emits appropriately for platform + Align(u32), + + // ======================================================================== + // Data Emission + // ======================================================================== + /// .zero N - emit N zero bytes + Zero(u32), + + /// .byte value - emit 8-bit value + Byte(i64), + + /// .short/.value value - emit 16-bit value + Short(i64), + + /// .long value - emit 32-bit value + Long(i64), + + /// .quad value - emit 64-bit value + Quad(i64), + + /// .asciz "string" - emit null-terminated string + Asciz(String), + + // ======================================================================== + // CFI (Call Frame Information) Directives + // ======================================================================== + /// .cfi_startproc - begin procedure CFI + CfiStartProc, + + /// .cfi_endproc - end procedure CFI + CfiEndProc, + + /// .cfi_def_cfa register, offset - set CFA to register + offset + CfiDefCfa { reg: String, offset: i32 }, + + /// .cfi_def_cfa_offset offset - set CFA offset from current register + CfiDefCfaOffset(i32), + + /// .cfi_offset register, offset - register saved at CFA+offset + CfiOffset { reg: String, offset: i32 }, + + /// .cfi_def_cfa_register register - CFA is now based on this register + CfiDefCfaRegister(String), + + // ======================================================================== + // Debug Information + // ======================================================================== + /// .file index "path" - declare source file for debug info + File { index: u32, path: String }, + + /// .loc file line column - source location for debug info + Loc { file: u32, line: u32, col: u32 }, + + // ======================================================================== + // Misc + // ======================================================================== + /// Assembly comment (prefixed with #) + Comment(String), + + /// Blank line for readability + Blank, + + /// Raw assembly line (escape hatch for special cases) + Raw(String), +} + +impl Directive { + // Convenience constructors + + pub fn global_label(name: impl Into) -> Self { + Directive::GlobalLabel(Symbol::global(name)) + } + + pub fn local_label(name: impl Into) -> Self { + Directive::GlobalLabel(Symbol::local(name)) + } + + pub fn loc(file: u32, line: u32, col: u32) -> Self { + Directive::Loc { file, line, col } + } + + pub fn global(name: impl Into) -> Self { + Directive::Global(Symbol::global(name)) + } + + pub fn type_func(name: impl Into) -> Self { + Directive::Type { + sym: Symbol::global(name), + kind: SymbolType::Function, + } + } + + pub fn type_object(name: impl Into) -> Self { + Directive::Type { + sym: Symbol::global(name), + kind: SymbolType::Object, + } + } + + pub fn size(name: impl Into, size: u32) -> Self { + Directive::Size { + sym: Symbol::global(name), + size, + } + } + + pub fn comm(name: impl Into, size: u32, align: u32) -> Self { + Directive::Comm { + sym: Symbol::global(name), + size, + align, + } + } + + pub fn file(index: u32, path: impl Into) -> Self { + Directive::File { + index, + path: path.into(), + } + } + + pub fn cfi_offset(reg: impl Into, offset: i32) -> Self { + Directive::CfiOffset { + reg: reg.into(), + offset, + } + } + + pub fn cfi_def_cfa_register(reg: impl Into) -> Self { + Directive::CfiDefCfaRegister(reg.into()) + } + + pub fn cfi_def_cfa(reg: impl Into, offset: i32) -> Self { + Directive::CfiDefCfa { + reg: reg.into(), + offset, + } + } +} + +impl EmitAsm for Directive { + fn emit(&self, target: &Target, out: &mut String) { + match self { + // Labels + Directive::BlockLabel(lbl) => { + let _ = writeln!(out, "{}:", lbl.name()); + } + Directive::GlobalLabel(sym) => { + let _ = writeln!(out, "{}:", sym.format_for_target(target)); + } + + // Sections + Directive::Text => match target.os { + Os::MacOS => { + let _ = writeln!(out, ".section __TEXT,__text"); + } + Os::Linux | Os::FreeBSD => { + let _ = writeln!(out, ".text"); + } + }, + Directive::Data => match target.os { + Os::MacOS => { + let _ = writeln!(out, ".section __DATA,__data"); + } + Os::Linux | Os::FreeBSD => { + let _ = writeln!(out, ".data"); + } + }, + Directive::Rodata => match target.os { + Os::MacOS => { + let _ = writeln!(out, ".section __TEXT,__cstring,cstring_literals"); + } + Os::Linux | Os::FreeBSD => { + let _ = writeln!(out, ".section .rodata"); + } + }, + + // Symbol visibility + Directive::Global(sym) => { + let _ = writeln!(out, ".globl {}", sym.format_for_target(target)); + } + Directive::Type { sym, kind } => { + // ELF only - skip on macOS + if !matches!(target.os, Os::MacOS) { + let _ = writeln!(out, ".type {}, {}", sym.name, kind.as_str()); + } + } + Directive::Size { sym, size } => { + // ELF only - skip on macOS + if !matches!(target.os, Os::MacOS) { + let _ = writeln!(out, ".size {}, {}", sym.name, size); + } + } + Directive::Comm { sym, size, align } => { + let _ = writeln!( + out, + ".comm {},{},{}", + sym.format_for_target(target), + size, + align + ); + } + + // Alignment + Directive::Align(power) => match target.os { + Os::MacOS => { + let _ = writeln!(out, ".p2align {}", power); + } + Os::Linux | Os::FreeBSD => { + // Linux .align takes bytes, not power of 2 + let _ = writeln!(out, ".align {}", 1u32 << power); + } + }, + + // Data emission + Directive::Zero(n) => { + let _ = writeln!(out, " .zero {}", n); + } + Directive::Byte(v) => { + let _ = writeln!(out, " .byte {}", v); + } + Directive::Short(v) => { + let _ = writeln!(out, " .short {}", v); + } + Directive::Long(v) => { + let _ = writeln!(out, " .long {}", v); + } + Directive::Quad(v) => { + let _ = writeln!(out, " .quad {}", v); + } + Directive::Asciz(s) => { + let _ = writeln!(out, " .asciz \"{}\"", s); + } + + // CFI directives + Directive::CfiStartProc => { + let _ = writeln!(out, " .cfi_startproc"); + } + Directive::CfiEndProc => { + let _ = writeln!(out, " .cfi_endproc"); + } + Directive::CfiDefCfa { reg, offset } => { + let _ = writeln!(out, " .cfi_def_cfa {}, {}", reg, offset); + } + Directive::CfiDefCfaOffset(offset) => { + let _ = writeln!(out, " .cfi_def_cfa_offset {}", offset); + } + Directive::CfiOffset { reg, offset } => { + let _ = writeln!(out, " .cfi_offset {}, {}", reg, offset); + } + Directive::CfiDefCfaRegister(reg) => { + let _ = writeln!(out, " .cfi_def_cfa_register {}", reg); + } + + // Debug info + Directive::File { index, path } => { + let _ = writeln!(out, " .file {} \"{}\"", index, path); + } + Directive::Loc { file, line, col } => { + let _ = writeln!(out, " .loc {} {} {}", file, line, col); + } + + // Misc + Directive::Comment(text) => { + let _ = writeln!(out, "# {}", text); + } + Directive::Blank => { + let _ = writeln!(out); + } + Directive::Raw(text) => { + let _ = writeln!(out, "{}", text); + } + } + } +} + +// ============================================================================ +// Tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + use crate::target::Arch; + + #[test] + fn test_operand_size() { + assert_eq!(OperandSize::from_bits(1), OperandSize::B8); + assert_eq!(OperandSize::from_bits(8), OperandSize::B8); + assert_eq!(OperandSize::from_bits(16), OperandSize::B16); + assert_eq!(OperandSize::from_bits(32), OperandSize::B32); + assert_eq!(OperandSize::from_bits(64), OperandSize::B64); + + assert_eq!(OperandSize::B8.bits(), 8); + + assert_eq!(OperandSize::B8.x86_suffix(), "b"); + assert_eq!(OperandSize::B16.x86_suffix(), "w"); + assert_eq!(OperandSize::B32.x86_suffix(), "l"); + assert_eq!(OperandSize::B64.x86_suffix(), "q"); + } + + #[test] + fn test_fp_size() { + assert_eq!(FpSize::from_bits(32), FpSize::Single); + assert_eq!(FpSize::from_bits(64), FpSize::Double); + + assert_eq!(FpSize::Single.x86_suffix(), "ss"); + assert_eq!(FpSize::Double.x86_suffix(), "sd"); + } + + #[test] + fn test_label() { + let label = Label::new("main", 5); + assert_eq!(label.name(), ".Lmain_5"); + } + + #[test] + fn test_symbol_formatting() { + let sym = Symbol::global("printf"); + + let linux = Target::new(Arch::X86_64, Os::Linux); + let macos = Target::new(Arch::X86_64, Os::MacOS); + + assert_eq!(sym.format_for_target(&linux), "printf"); + assert_eq!(sym.format_for_target(&macos), "_printf"); + + // Local symbols don't get underscore + let local = Symbol::local(".LC0"); + assert_eq!(local.format_for_target(&macos), ".LC0"); + } + + #[test] + fn test_directive_loc() { + let target = Target::new(Arch::X86_64, Os::Linux); + let dir = Directive::loc(1, 42, 5); + let mut out = String::new(); + dir.emit(&target, &mut out); + assert_eq!(out, " .loc 1 42 5\n"); + } + + #[test] + fn test_directive_global_label() { + let linux = Target::new(Arch::X86_64, Os::Linux); + let macos = Target::new(Arch::X86_64, Os::MacOS); + let dir = Directive::global_label("my_func"); + + let mut out = String::new(); + dir.emit(&linux, &mut out); + assert_eq!(out, "my_func:\n"); + + let mut out = String::new(); + dir.emit(&macos, &mut out); + assert_eq!(out, "_my_func:\n"); + } + + #[test] + fn test_directive_sections() { + let linux = Target::new(Arch::X86_64, Os::Linux); + let macos = Target::new(Arch::X86_64, Os::MacOS); + + // Text section + let mut out = String::new(); + Directive::Text.emit(&linux, &mut out); + assert_eq!(out, ".text\n"); + + let mut out = String::new(); + Directive::Text.emit(&macos, &mut out); + assert_eq!(out, ".section __TEXT,__text\n"); + + // Data section + let mut out = String::new(); + Directive::Data.emit(&linux, &mut out); + assert_eq!(out, ".data\n"); + + let mut out = String::new(); + Directive::Data.emit(&macos, &mut out); + assert_eq!(out, ".section __DATA,__data\n"); + } + + #[test] + fn test_directive_global() { + let linux = Target::new(Arch::X86_64, Os::Linux); + let macos = Target::new(Arch::X86_64, Os::MacOS); + + let mut out = String::new(); + Directive::global("main").emit(&linux, &mut out); + assert_eq!(out, ".globl main\n"); + + let mut out = String::new(); + Directive::global("main").emit(&macos, &mut out); + assert_eq!(out, ".globl _main\n"); + } + + #[test] + fn test_directive_type_elf_only() { + let linux = Target::new(Arch::X86_64, Os::Linux); + let macos = Target::new(Arch::X86_64, Os::MacOS); + + let mut out = String::new(); + Directive::type_func("main").emit(&linux, &mut out); + assert_eq!(out, ".type main, @function\n"); + + // Should be empty on macOS + let mut out = String::new(); + Directive::type_func("main").emit(&macos, &mut out); + assert_eq!(out, ""); + } + + #[test] + fn test_directive_align() { + let linux = Target::new(Arch::X86_64, Os::Linux); + let macos = Target::new(Arch::X86_64, Os::MacOS); + + // Align to 16 bytes (power=4, 2^4=16) + let mut out = String::new(); + Directive::Align(4).emit(&linux, &mut out); + assert_eq!(out, ".align 16\n"); + + let mut out = String::new(); + Directive::Align(4).emit(&macos, &mut out); + assert_eq!(out, ".p2align 4\n"); + } + + #[test] + fn test_directive_data_emission() { + let target = Target::new(Arch::X86_64, Os::Linux); + + let mut out = String::new(); + Directive::Zero(16).emit(&target, &mut out); + assert_eq!(out, " .zero 16\n"); + + let mut out = String::new(); + Directive::Byte(42).emit(&target, &mut out); + assert_eq!(out, " .byte 42\n"); + + let mut out = String::new(); + Directive::Long(0x12345678).emit(&target, &mut out); + assert_eq!(out, " .long 305419896\n"); + } + + #[test] + fn test_directive_cfi() { + let target = Target::new(Arch::X86_64, Os::Linux); + + let mut out = String::new(); + Directive::CfiStartProc.emit(&target, &mut out); + assert_eq!(out, " .cfi_startproc\n"); + + let mut out = String::new(); + Directive::CfiDefCfaOffset(16).emit(&target, &mut out); + assert_eq!(out, " .cfi_def_cfa_offset 16\n"); + + let mut out = String::new(); + Directive::cfi_offset("%rbp", -16).emit(&target, &mut out); + assert_eq!(out, " .cfi_offset %rbp, -16\n"); + } + + #[test] + fn test_directive_comm() { + let linux = Target::new(Arch::X86_64, Os::Linux); + let macos = Target::new(Arch::X86_64, Os::MacOS); + + let mut out = String::new(); + Directive::comm("my_var", 8, 8).emit(&linux, &mut out); + assert_eq!(out, ".comm my_var,8,8\n"); + + let mut out = String::new(); + Directive::comm("my_var", 8, 8).emit(&macos, &mut out); + assert_eq!(out, ".comm _my_var,8,8\n"); + } +} diff --git a/cc/arch/mod.rs b/cc/arch/mod.rs new file mode 100644 index 000000000..36282129f --- /dev/null +++ b/cc/arch/mod.rs @@ -0,0 +1,120 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Architecture-specific predefined macros and code generators +// + +pub mod aarch64; +pub mod codegen; +pub mod lir; +pub mod x86_64; + +use crate::target::{Arch, Target}; + +/// Get architecture-specific predefined macros as (name, value) pairs +pub fn get_arch_macros(target: &Target) -> Vec<(&'static str, Option<&'static str>)> { + let mut macros = Vec::new(); + + // Common architecture macros based on type sizes + macros.push(("__CHAR_BIT__", Some("8"))); + + // Pointer size + if target.pointer_width == 64 { + macros.push(("__LP64__", Some("1"))); + macros.push(("_LP64", Some("1"))); + macros.push(("__SIZEOF_POINTER__", Some("8"))); + } else { + macros.push(("__ILP32__", Some("1"))); + macros.push(("__SIZEOF_POINTER__", Some("4"))); + } + + // Type sizes + macros.push(("__SIZEOF_SHORT__", Some("2"))); + macros.push(("__SIZEOF_INT__", Some("4"))); + + if target.long_width == 64 { + macros.push(("__SIZEOF_LONG__", Some("8"))); + } else { + macros.push(("__SIZEOF_LONG__", Some("4"))); + } + + macros.push(("__SIZEOF_LONG_LONG__", Some("8"))); + macros.push(("__SIZEOF_FLOAT__", Some("4"))); + macros.push(("__SIZEOF_DOUBLE__", Some("8"))); + + // Signedness + if target.char_signed { + macros.push(("__CHAR_UNSIGNED__", None)); // Not defined + } else { + macros.push(("__CHAR_UNSIGNED__", Some("1"))); + } + + // Architecture-specific + match target.arch { + Arch::X86_64 => { + macros.extend(x86_64::get_macros()); + } + Arch::Aarch64 => { + macros.extend(aarch64::get_macros()); + } + } + + macros +} + +/// Get type limit macros (for compatibility) +pub fn get_limit_macros(target: &Target) -> Vec<(&'static str, &'static str)> { + let mut macros = Vec::new(); + + // Character limits + macros.push(("__SCHAR_MAX__", "127")); + macros.push(("__SHRT_MAX__", "32767")); + macros.push(("__INT_MAX__", "2147483647")); + + if target.long_width == 64 { + macros.push(("__LONG_MAX__", "9223372036854775807L")); + } else { + macros.push(("__LONG_MAX__", "2147483647L")); + } + + macros.push(("__LONG_LONG_MAX__", "9223372036854775807LL")); + + // Width macros + macros.push(("__SCHAR_WIDTH__", "8")); + macros.push(("__SHRT_WIDTH__", "16")); + macros.push(("__INT_WIDTH__", "32")); + + if target.long_width == 64 { + macros.push(("__LONG_WIDTH__", "64")); + } else { + macros.push(("__LONG_WIDTH__", "32")); + } + + macros.push(("__LLONG_WIDTH__", "64")); + + // Size type + if target.pointer_width == 64 { + macros.push(("__SIZE_MAX__", "18446744073709551615UL")); + macros.push(("__SIZE_WIDTH__", "64")); + macros.push(("__PTRDIFF_MAX__", "9223372036854775807L")); + macros.push(("__PTRDIFF_WIDTH__", "64")); + macros.push(("__INTPTR_MAX__", "9223372036854775807L")); + macros.push(("__INTPTR_WIDTH__", "64")); + macros.push(("__UINTPTR_MAX__", "18446744073709551615UL")); + } else { + macros.push(("__SIZE_MAX__", "4294967295U")); + macros.push(("__SIZE_WIDTH__", "32")); + macros.push(("__PTRDIFF_MAX__", "2147483647")); + macros.push(("__PTRDIFF_WIDTH__", "32")); + macros.push(("__INTPTR_MAX__", "2147483647")); + macros.push(("__INTPTR_WIDTH__", "32")); + macros.push(("__UINTPTR_MAX__", "4294967295U")); + } + + macros +} diff --git a/cc/arch/x86_64/codegen.rs b/cc/arch/x86_64/codegen.rs new file mode 100644 index 000000000..cf52771d1 --- /dev/null +++ b/cc/arch/x86_64/codegen.rs @@ -0,0 +1,5145 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// x86-64 Code Generator +// Converts IR to x86-64 assembly (AT&T syntax) +// +// Uses linear scan register allocation and System V AMD64 ABI. +// + +use crate::arch::codegen::CodeGenerator; +#[allow(unused_imports)] +use crate::arch::lir::{Directive, FpSize, Label, OperandSize, Symbol}; +#[allow(unused_imports)] +use crate::arch::x86_64::lir::{ + CallTarget, GpOperand, IntCC, MemAddr, ShiftCount, X86Inst, XmmOperand, +}; +use crate::ir::{Function, Initializer, Instruction, Module, Opcode, Pseudo, PseudoId, PseudoKind}; +use crate::target::Target; +use crate::types::{Type, TypeModifiers}; +use std::collections::HashMap; + +// ============================================================================ +// x86-64 Register Definitions +// ============================================================================ + +/// x86-64 physical registers +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum Reg { + // 64-bit general purpose registers + Rax, + Rbx, + Rcx, + Rdx, + Rsi, + Rdi, + Rbp, + Rsp, + R8, + R9, + R10, + R11, + R12, + R13, + R14, + R15, +} + +impl Reg { + /// Get AT&T syntax name for 64-bit register + pub fn name64(&self) -> &'static str { + match self { + Reg::Rax => "%rax", + Reg::Rbx => "%rbx", + Reg::Rcx => "%rcx", + Reg::Rdx => "%rdx", + Reg::Rsi => "%rsi", + Reg::Rdi => "%rdi", + Reg::Rbp => "%rbp", + Reg::Rsp => "%rsp", + Reg::R8 => "%r8", + Reg::R9 => "%r9", + Reg::R10 => "%r10", + Reg::R11 => "%r11", + Reg::R12 => "%r12", + Reg::R13 => "%r13", + Reg::R14 => "%r14", + Reg::R15 => "%r15", + } + } + + /// Get AT&T syntax name for 32-bit register + pub fn name32(&self) -> &'static str { + match self { + Reg::Rax => "%eax", + Reg::Rbx => "%ebx", + Reg::Rcx => "%ecx", + Reg::Rdx => "%edx", + Reg::Rsi => "%esi", + Reg::Rdi => "%edi", + Reg::Rbp => "%ebp", + Reg::Rsp => "%esp", + Reg::R8 => "%r8d", + Reg::R9 => "%r9d", + Reg::R10 => "%r10d", + Reg::R11 => "%r11d", + Reg::R12 => "%r12d", + Reg::R13 => "%r13d", + Reg::R14 => "%r14d", + Reg::R15 => "%r15d", + } + } + + /// Get AT&T syntax name for 16-bit register + pub fn name16(&self) -> &'static str { + match self { + Reg::Rax => "%ax", + Reg::Rbx => "%bx", + Reg::Rcx => "%cx", + Reg::Rdx => "%dx", + Reg::Rsi => "%si", + Reg::Rdi => "%di", + Reg::Rbp => "%bp", + Reg::Rsp => "%sp", + Reg::R8 => "%r8w", + Reg::R9 => "%r9w", + Reg::R10 => "%r10w", + Reg::R11 => "%r11w", + Reg::R12 => "%r12w", + Reg::R13 => "%r13w", + Reg::R14 => "%r14w", + Reg::R15 => "%r15w", + } + } + + /// Get AT&T syntax name for 8-bit register (low byte) + pub fn name8(&self) -> &'static str { + match self { + Reg::Rax => "%al", + Reg::Rbx => "%bl", + Reg::Rcx => "%cl", + Reg::Rdx => "%dl", + Reg::Rsi => "%sil", + Reg::Rdi => "%dil", + Reg::Rbp => "%bpl", + Reg::Rsp => "%spl", + Reg::R8 => "%r8b", + Reg::R9 => "%r9b", + Reg::R10 => "%r10b", + Reg::R11 => "%r11b", + Reg::R12 => "%r12b", + Reg::R13 => "%r13b", + Reg::R14 => "%r14b", + Reg::R15 => "%r15b", + } + } + + /// Get register name for a given bit size + pub fn name_for_size(&self, bits: u32) -> &'static str { + match bits { + 8 => self.name8(), + 16 => self.name16(), + 32 => self.name32(), + _ => self.name64(), + } + } + + /// Is this a callee-saved register? + pub fn is_callee_saved(&self) -> bool { + matches!( + self, + Reg::Rbx | Reg::Rbp | Reg::R12 | Reg::R13 | Reg::R14 | Reg::R15 + ) + } + + /// Argument registers in order (System V AMD64 ABI) + pub fn arg_regs() -> &'static [Reg] { + &[Reg::Rdi, Reg::Rsi, Reg::Rdx, Reg::Rcx, Reg::R8, Reg::R9] + } + + /// All allocatable registers (excluding RSP, RBP, and R10) + /// R10 is reserved as scratch for division instructions + pub fn allocatable() -> &'static [Reg] { + &[ + Reg::Rax, + Reg::Rbx, + Reg::Rcx, + Reg::Rdx, + Reg::Rsi, + Reg::Rdi, + Reg::R8, + Reg::R9, + // R10 is reserved for division scratch + Reg::R11, + Reg::R12, + Reg::R13, + Reg::R14, + Reg::R15, + ] + } + + /// Stack pointer register + pub fn sp() -> Reg { + Reg::Rsp + } + + /// Base/frame pointer register + pub fn bp() -> Reg { + Reg::Rbp + } +} + +// ============================================================================ +// XMM Register Definitions (SSE/FP) +// ============================================================================ + +/// x86-64 XMM registers for floating-point operations +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum XmmReg { + Xmm0, + Xmm1, + Xmm2, + Xmm3, + Xmm4, + Xmm5, + Xmm6, + Xmm7, + Xmm8, + Xmm9, + Xmm10, + Xmm11, + Xmm12, + Xmm13, + Xmm14, + Xmm15, +} + +impl XmmReg { + /// Get AT&T syntax name for XMM register + pub fn name(&self) -> &'static str { + match self { + XmmReg::Xmm0 => "%xmm0", + XmmReg::Xmm1 => "%xmm1", + XmmReg::Xmm2 => "%xmm2", + XmmReg::Xmm3 => "%xmm3", + XmmReg::Xmm4 => "%xmm4", + XmmReg::Xmm5 => "%xmm5", + XmmReg::Xmm6 => "%xmm6", + XmmReg::Xmm7 => "%xmm7", + XmmReg::Xmm8 => "%xmm8", + XmmReg::Xmm9 => "%xmm9", + XmmReg::Xmm10 => "%xmm10", + XmmReg::Xmm11 => "%xmm11", + XmmReg::Xmm12 => "%xmm12", + XmmReg::Xmm13 => "%xmm13", + XmmReg::Xmm14 => "%xmm14", + XmmReg::Xmm15 => "%xmm15", + } + } + + /// Floating-point argument registers (System V AMD64 ABI) + pub fn arg_regs() -> &'static [XmmReg] { + &[ + XmmReg::Xmm0, + XmmReg::Xmm1, + XmmReg::Xmm2, + XmmReg::Xmm3, + XmmReg::Xmm4, + XmmReg::Xmm5, + XmmReg::Xmm6, + XmmReg::Xmm7, + ] + } + + /// All allocatable XMM registers + /// XMM0-XMM7 are caller-saved (scratch), XMM8-XMM15 are callee-saved on Windows but not on System V + /// XMM14 and XMM15 are reserved as scratch registers for codegen operations + pub fn allocatable() -> &'static [XmmReg] { + &[ + XmmReg::Xmm0, + XmmReg::Xmm1, + XmmReg::Xmm2, + XmmReg::Xmm3, + XmmReg::Xmm4, + XmmReg::Xmm5, + XmmReg::Xmm6, + XmmReg::Xmm7, + XmmReg::Xmm8, + XmmReg::Xmm9, + XmmReg::Xmm10, + XmmReg::Xmm11, + XmmReg::Xmm12, + XmmReg::Xmm13, + // XMM14 and XMM15 are reserved for scratch use in codegen + ] + } +} + +// ============================================================================ +// Operand - Location of a value (register or memory) +// ============================================================================ + +/// Location of a value +#[derive(Debug, Clone)] +pub enum Loc { + /// In a general-purpose register + Reg(Reg), + /// In an XMM register (floating-point) + Xmm(XmmReg), + /// On the stack at [rbp - offset] + Stack(i32), + /// Immediate integer constant + Imm(i64), + /// Immediate float constant (value, size in bits) + FImm(f64, u32), + /// Global symbol + Global(String), +} + +// ============================================================================ +// Register Allocator (Linear Scan) +// ============================================================================ + +/// Live interval for a pseudo +#[derive(Debug, Clone)] +struct LiveInterval { + pseudo: PseudoId, + start: usize, + end: usize, +} + +/// Simple linear scan register allocator for x86-64 +pub struct RegAlloc { + /// Mapping from pseudo to location + locations: HashMap, + /// Free general-purpose registers + free_regs: Vec, + /// Free XMM registers (for floating-point) + free_xmm_regs: Vec, + /// Active integer register intervals (sorted by end point) + active: Vec<(LiveInterval, Reg)>, + /// Active XMM register intervals (sorted by end point) + active_xmm: Vec<(LiveInterval, XmmReg)>, + /// Next stack slot offset + stack_offset: i32, + /// Callee-saved registers that were used + used_callee_saved: Vec, + /// Track which pseudos need FP registers (based on type) + fp_pseudos: std::collections::HashSet, +} + +impl RegAlloc { + pub fn new() -> Self { + Self { + locations: HashMap::new(), + free_regs: Reg::allocatable().to_vec(), + free_xmm_regs: XmmReg::allocatable().to_vec(), + active: Vec::new(), + active_xmm: Vec::new(), + stack_offset: 0, + used_callee_saved: Vec::new(), + fp_pseudos: std::collections::HashSet::new(), + } + } + + /// Perform register allocation for a function + pub fn allocate(&mut self, func: &Function) -> HashMap { + // Reset state + self.locations.clear(); + self.free_regs = Reg::allocatable().to_vec(); + self.free_xmm_regs = XmmReg::allocatable().to_vec(); + self.active.clear(); + self.active_xmm.clear(); + self.stack_offset = 0; + self.used_callee_saved.clear(); + self.fp_pseudos.clear(); + + // Scan instructions to identify which pseudos need FP registers + self.identify_fp_pseudos(func); + + // Pre-allocate argument registers + // System V AMD64 ABI: integer args in RDI, RSI, RDX, RCX, R8, R9 + // FP args in XMM0-XMM7 + let int_arg_regs = Reg::arg_regs(); + let fp_arg_regs = XmmReg::arg_regs(); + let mut int_arg_idx = 0; + let mut fp_arg_idx = 0; + + // Detect if there's a hidden return pointer (for functions returning large structs) + // The __sret pseudo has arg_idx=0 and shifts all other arg indices by 1 + let sret_pseudo = func + .pseudos + .iter() + .find(|p| matches!(p.kind, PseudoKind::Arg(0)) && p.name.as_deref() == Some("__sret")); + let arg_idx_offset: u32 = if sret_pseudo.is_some() { 1 } else { 0 }; + + // If there's a hidden return pointer, allocate RDI for it + if let Some(sret) = sret_pseudo { + self.locations.insert(sret.id, Loc::Reg(int_arg_regs[0])); + self.free_regs.retain(|&r| r != int_arg_regs[0]); + int_arg_idx += 1; + } + + for (i, (_name, typ)) in func.params.iter().enumerate() { + // Find the pseudo for this argument + for pseudo in &func.pseudos { + if let PseudoKind::Arg(arg_idx) = pseudo.kind { + if arg_idx == (i as u32) + arg_idx_offset { + let is_fp = typ.is_float(); + if is_fp { + // FP argument + if fp_arg_idx < fp_arg_regs.len() { + self.locations + .insert(pseudo.id, Loc::Xmm(fp_arg_regs[fp_arg_idx])); + self.free_xmm_regs.retain(|&r| r != fp_arg_regs[fp_arg_idx]); + self.fp_pseudos.insert(pseudo.id); + } else { + // FP arg on stack + let offset = + 16 + (i - int_arg_regs.len() - fp_arg_regs.len()) as i32 * 8; + self.locations.insert(pseudo.id, Loc::Stack(-offset)); + } + fp_arg_idx += 1; + } else { + // Integer argument + if int_arg_idx < int_arg_regs.len() { + self.locations + .insert(pseudo.id, Loc::Reg(int_arg_regs[int_arg_idx])); + self.free_regs.retain(|&r| r != int_arg_regs[int_arg_idx]); + } else { + // Integer arg on stack + let offset = 16 + (i - int_arg_regs.len()) as i32 * 8; + self.locations.insert(pseudo.id, Loc::Stack(-offset)); + } + int_arg_idx += 1; + } + break; + } + } + } + } + + // Compute live intervals (simplified: just walk instructions) + let intervals = self.compute_live_intervals(func); + + // Find all call positions - arguments in caller-saved registers that live across + // calls need to be spilled to stack + let mut call_positions: Vec = Vec::new(); + let mut pos = 0usize; + for block in &func.blocks { + for insn in &block.insns { + if insn.op == Opcode::Call { + call_positions.push(pos); + } + pos += 1; + } + } + + // Check arguments in caller-saved registers - if their interval crosses a call, + // spill them to the stack instead + let int_arg_regs_set: Vec = Reg::arg_regs().to_vec(); + for interval in &intervals { + if let Some(Loc::Reg(reg)) = self.locations.get(&interval.pseudo) { + // Check if this is an argument register (caller-saved) + if int_arg_regs_set.contains(reg) { + // Check if interval crosses any call + let crosses_call = call_positions + .iter() + .any(|&call_pos| interval.start <= call_pos && call_pos < interval.end); + if crosses_call { + // Spill to stack - remove from register, allocate stack slot + let reg_to_restore = *reg; + self.stack_offset += 8; + self.locations + .insert(interval.pseudo, Loc::Stack(self.stack_offset)); + // Restore the register to free list for other uses + self.free_regs.push(reg_to_restore); + } + } + } + } + + // Force alloca results to the stack. Alloca results typically have long live + // ranges (used across multiple loop iterations) and putting them in registers + // leads to clobbering issues when the codegen needs temp registers. + for block in &func.blocks { + for insn in &block.insns { + if insn.op == Opcode::Alloca { + if let Some(target) = insn.target { + self.stack_offset += 8; + self.locations.insert(target, Loc::Stack(self.stack_offset)); + } + } + } + } + + // Allocate registers using linear scan + for interval in intervals { + self.expire_old_intervals(interval.start); + + // Check if already allocated (e.g., arguments) + if self.locations.contains_key(&interval.pseudo) { + continue; + } + + // Check if this is a constant - constants don't need registers + if let Some(pseudo) = func.pseudos.iter().find(|p| p.id == interval.pseudo) { + match &pseudo.kind { + PseudoKind::Val(v) => { + self.locations.insert(interval.pseudo, Loc::Imm(*v)); + continue; + } + PseudoKind::FVal(v) => { + // Float constants are stored as FImm + // Find the size from the SetVal instruction that defines this constant + let size = func + .blocks + .iter() + .flat_map(|b| &b.insns) + .find(|insn| { + insn.op == crate::ir::Opcode::SetVal + && insn.target == Some(interval.pseudo) + }) + .map(|insn| insn.size) + .unwrap_or(64); // Default to double if not found + self.locations.insert(interval.pseudo, Loc::FImm(*v, size)); + self.fp_pseudos.insert(interval.pseudo); + continue; + } + PseudoKind::Sym(name) => { + // Check if this is a local variable or a global symbol + if let Some(local_var) = func.locals.get(name) { + // Local variable - allocate stack space based on type size + let size = (local_var.typ.size_bits() / 8) as i32; + let size = std::cmp::max(size, 8); // Minimum 8 bytes + // Align to 8 bytes + let aligned_size = (size + 7) & !7; + self.stack_offset += aligned_size; + self.locations + .insert(interval.pseudo, Loc::Stack(self.stack_offset)); + // Mark as FP if the type is float + if local_var.typ.is_float() { + self.fp_pseudos.insert(interval.pseudo); + } + } else { + // Global symbol + self.locations + .insert(interval.pseudo, Loc::Global(name.clone())); + } + continue; + } + _ => {} + } + } + + // Determine if this pseudo needs an FP register + let needs_fp = self.fp_pseudos.contains(&interval.pseudo); + + if needs_fp { + // Try to allocate an XMM register + if let Some(xmm) = self.free_xmm_regs.pop() { + self.locations.insert(interval.pseudo, Loc::Xmm(xmm)); + self.active_xmm.push((interval.clone(), xmm)); + self.active_xmm.sort_by_key(|(i, _)| i.end); + } else { + // Spill to stack + self.stack_offset += 8; + self.locations + .insert(interval.pseudo, Loc::Stack(self.stack_offset)); + } + } else { + // Try to allocate a general-purpose register + if let Some(reg) = self.free_regs.pop() { + if reg.is_callee_saved() && !self.used_callee_saved.contains(®) { + self.used_callee_saved.push(reg); + } + self.locations.insert(interval.pseudo, Loc::Reg(reg)); + self.active.push((interval.clone(), reg)); + // Keep active sorted by end point + self.active.sort_by_key(|(i, _)| i.end); + } else { + // Spill to stack + self.stack_offset += 8; + self.locations + .insert(interval.pseudo, Loc::Stack(self.stack_offset)); + } + } + } + + self.locations.clone() + } + + /// Scan function to identify which pseudos need FP registers + fn identify_fp_pseudos(&mut self, func: &Function) { + for block in &func.blocks { + for insn in &block.insns { + // Check instruction type - FP instructions produce FP results + let is_fp_op = matches!( + insn.op, + Opcode::FAdd + | Opcode::FSub + | Opcode::FMul + | Opcode::FDiv + | Opcode::FNeg + | Opcode::FCmpOEq + | Opcode::FCmpONe + | Opcode::FCmpOLt + | Opcode::FCmpOLe + | Opcode::FCmpOGt + | Opcode::FCmpOGe + | Opcode::UCvtF + | Opcode::SCvtF + | Opcode::FCvtF + ); + + // Mark target pseudo as FP if this is an FP operation + // (except comparisons which produce int) + if is_fp_op + && !matches!( + insn.op, + Opcode::FCmpOEq + | Opcode::FCmpONe + | Opcode::FCmpOLt + | Opcode::FCmpOLe + | Opcode::FCmpOGt + | Opcode::FCmpOGe + ) + { + if let Some(target) = insn.target { + self.fp_pseudos.insert(target); + } + } + + // Also check the type if available + // (but exclude comparisons which always produce int regardless of operand type) + if let Some(ref typ) = insn.typ { + if typ.is_float() + && !matches!( + insn.op, + Opcode::FCmpOEq + | Opcode::FCmpONe + | Opcode::FCmpOLt + | Opcode::FCmpOLe + | Opcode::FCmpOGt + | Opcode::FCmpOGe + ) + { + if let Some(target) = insn.target { + self.fp_pseudos.insert(target); + } + } + } + } + } + } + + fn expire_old_intervals(&mut self, point: usize) { + // Expire integer register intervals + let mut to_remove = Vec::new(); + for (i, (interval, reg)) in self.active.iter().enumerate() { + if interval.end < point { + self.free_regs.push(*reg); + to_remove.push(i); + } + } + // Remove in reverse order to preserve indices + for i in to_remove.into_iter().rev() { + self.active.remove(i); + } + + // Expire XMM register intervals + let mut to_remove_xmm = Vec::new(); + for (i, (interval, xmm)) in self.active_xmm.iter().enumerate() { + if interval.end < point { + self.free_xmm_regs.push(*xmm); + to_remove_xmm.push(i); + } + } + for i in to_remove_xmm.into_iter().rev() { + self.active_xmm.remove(i); + } + } + + fn compute_live_intervals(&self, func: &Function) -> Vec { + use crate::ir::BasicBlockId; + + // Track earliest definition, latest definition, and latest use for each pseudo + struct IntervalInfo { + pseudo: PseudoId, + first_def: usize, + last_def: usize, + last_use: usize, + } + + let mut intervals: HashMap = HashMap::new(); + let mut pos = 0usize; + + // First pass: compute block end positions + let mut block_end_pos: HashMap = HashMap::new(); + let mut temp_pos = 0usize; + for block in &func.blocks { + temp_pos += block.insns.len(); + block_end_pos.insert(block.id, temp_pos.saturating_sub(1)); + } + + // Collect phi sources with their source blocks for later processing + // Also track phi targets - they need intervals extended to cover all their copy definitions + let mut phi_sources: Vec<(BasicBlockId, PseudoId)> = Vec::new(); + let mut phi_targets: Vec<(BasicBlockId, PseudoId)> = Vec::new(); + + for block in &func.blocks { + for insn in &block.insns { + // Definition point - track both first and last definition + // This is important because phi elimination creates multiple definitions + // of the same pseudo (via Copy instructions in predecessor blocks) + if let Some(target) = insn.target { + intervals + .entry(target) + .and_modify(|info| { + info.first_def = info.first_def.min(pos); + info.last_def = info.last_def.max(pos); + }) + .or_insert(IntervalInfo { + pseudo: target, + first_def: pos, + last_def: pos, + last_use: pos, + }); + } + + // Use points + for &src in &insn.src { + if let Some(info) = intervals.get_mut(&src) { + info.last_use = info.last_use.max(pos); + } else { + intervals.insert( + src, + IntervalInfo { + pseudo: src, + first_def: pos, + last_def: pos, + last_use: pos, + }, + ); + } + } + + // Collect phi sources - they need to be live at the END of their source block + // Also collect phi targets - they have Copy definitions in each source block + for (src_bb, pseudo) in &insn.phi_list { + phi_sources.push((*src_bb, *pseudo)); + // The phi target (if present) is defined via Copy at the end of each source block + if let Some(target) = insn.target { + phi_targets.push((*src_bb, target)); + } + } + + pos += 1; + } + } + + // Process phi sources: extend their live interval to the end of their source block + // This is critical for loops where phi sources come from later blocks via back edges + // NOTE: We only extend last_use, NOT last_def, because phi sources are USED (not defined) + // at the copy point. Setting last_def incorrectly can confuse the interval calculation. + for (src_bb, pseudo) in phi_sources { + if let Some(&end_pos) = block_end_pos.get(&src_bb) { + if let Some(info) = intervals.get_mut(&pseudo) { + info.last_use = info.last_use.max(end_pos); + } else { + intervals.insert( + pseudo, + IntervalInfo { + pseudo, + first_def: end_pos, + last_def: end_pos, + last_use: end_pos, + }, + ); + } + } + } + + // Process phi targets: extend their live interval to cover all Copy definitions + // The phi target is defined via Copy at the end of each source block + // For loops, this means the target must be live until the last Copy (at the loop back edge) + for (src_bb, target) in phi_targets { + if let Some(&end_pos) = block_end_pos.get(&src_bb) { + if let Some(info) = intervals.get_mut(&target) { + info.last_def = info.last_def.max(end_pos); + } else { + intervals.insert( + target, + IntervalInfo { + pseudo: target, + first_def: end_pos, + last_def: end_pos, + last_use: end_pos, + }, + ); + } + } + } + + // Find the maximum position in the function + let max_pos = pos.saturating_sub(1); + + // Convert to LiveInterval + // The interval spans from first_def to max(last_def, last_use) + // This ensures that pseudos with multiple definitions (from phi copies) + // stay live across all their definitions + // + // IMPORTANT: For loop variables, if last_def > last_use (definition comes after use + // due to back edge), the variable must stay live until the end of the function + // because it will be used again in the next loop iteration. + let mut result: Vec<_> = intervals + .into_values() + .map(|info| { + let end = if info.last_def > info.last_use { + // Loop variable: definition after use means we wrap around + // Extend to end of function to ensure it stays live + max_pos + } else { + info.last_def.max(info.last_use) + }; + LiveInterval { + pseudo: info.pseudo, + start: info.first_def, + end, + } + }) + .collect(); + result.sort_by_key(|i| i.start); + result + } + + /// Get stack size needed (aligned to 16 bytes) + pub fn stack_size(&self) -> i32 { + // Align to 16 bytes + (self.stack_offset + 15) & !15 + } + + /// Get callee-saved registers that need to be preserved + pub fn callee_saved_used(&self) -> &[Reg] { + &self.used_callee_saved + } +} + +impl Default for RegAlloc { + fn default() -> Self { + Self::new() + } +} + +// ============================================================================ +// x86-64 Code Generator +// ============================================================================ + +/// x86-64 code generator +pub struct X86_64CodeGen { + /// Target info + target: Target, + /// Output buffer + output: String, + /// LIR instruction buffer (for deferred emission and future peephole optimization) + lir_buffer: Vec, + /// Current function's register allocation + locations: HashMap, + /// Current function's pseudos (for looking up values) + pseudos: Vec, + /// Current function name (for generating unique labels) + current_fn: String, + /// Callee-saved registers used in current function (for epilogue) + callee_saved_regs: Vec, + /// Offset to add to stack locations to account for callee-saved registers + callee_saved_offset: i32, + /// Offset from rbp to register save area (for variadic functions) + reg_save_area_offset: i32, + /// Number of fixed GP parameters (for variadic functions) + num_fixed_gp_params: usize, + /// Whether to emit basic unwind tables (cfi_startproc/cfi_endproc) + emit_unwind_tables: bool, + /// Last emitted source line (for avoiding duplicate .loc directives) + last_debug_line: u32, + /// Last emitted source file index + last_debug_file: u16, + /// Whether to emit debug info (.file/.loc directives) + emit_debug: bool, + /// Counter for generating unique internal labels + unique_label_counter: u32, +} + +impl X86_64CodeGen { + pub fn new(target: Target) -> Self { + Self { + target, + output: String::new(), + lir_buffer: Vec::new(), + locations: HashMap::new(), + pseudos: Vec::new(), + current_fn: String::new(), + callee_saved_regs: Vec::new(), + callee_saved_offset: 0, + reg_save_area_offset: 0, + num_fixed_gp_params: 0, + emit_unwind_tables: true, // Default to emitting basic unwind tables + last_debug_line: 0, + last_debug_file: 0, + emit_debug: false, + unique_label_counter: 0, + } + } + + /// Push a LIR instruction to the buffer (deferred emission) + fn push_lir(&mut self, inst: X86Inst) { + self.lir_buffer.push(inst); + } + + /// Emit all buffered LIR instructions to the output string + fn emit_all(&mut self) { + use crate::arch::lir::EmitAsm; + for inst in &self.lir_buffer { + inst.emit(&self.target, &mut self.output); + } + } + + /// Convert a Loc to a GpOperand for LIR + fn loc_to_gp_operand(&self, loc: &Loc) -> GpOperand { + match loc { + Loc::Reg(r) => GpOperand::Reg(*r), + Loc::Stack(offset) => { + let adjusted = *offset + self.callee_saved_offset; + GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: -adjusted, + }) + } + Loc::Imm(v) => GpOperand::Imm(*v), + Loc::FImm(_, _) => GpOperand::Imm(0), // FP immediates handled separately + Loc::Xmm(_) => GpOperand::Imm(0), // XMM handled separately + Loc::Global(name) => GpOperand::Mem(MemAddr::RipRelative(Symbol::global(name.clone()))), + } + } + + /// Emit .loc directive for source line tracking (if debug is enabled and line changed) + fn emit_loc(&mut self, insn: &Instruction) { + if !self.emit_debug { + return; + } + if let Some(pos) = &insn.pos { + // Only emit if line changed (avoid duplicate .loc directives) + let file = pos.stream + 1; // DWARF file indices start at 1 + let line = pos.line; + if line != self.last_debug_line || file != self.last_debug_file { + // LIR: .loc directive for debug info + self.push_lir(X86Inst::Directive(Directive::loc( + file.into(), + line, + pos.col.into(), + ))); + self.last_debug_line = line; + self.last_debug_file = file; + } + } + } + + fn emit_header(&mut self) { + // Header comments with compiler and target info (GCC-style) + for comment in crate::arch::codegen::generate_header_comments(&self.target) { + self.push_lir(X86Inst::Directive(Directive::Comment(comment))); + } + self.push_lir(X86Inst::Directive(Directive::Text)); + } + + fn emit_global(&mut self, name: &str, typ: &Type, init: &Initializer) { + let size = typ.size_bits() / 8; + let size = if size == 0 { 8 } else { size }; // Default to 8 bytes + + // Check storage class - skip .globl for static + let is_static = typ.modifiers.contains(TypeModifiers::STATIC); + + // Get alignment from type info + let align = typ.alignment() as u32; + + // Use .comm for uninitialized external (non-static) globals + let use_bss = matches!(init, Initializer::None) && !is_static; + + if use_bss { + // Use .comm for uninitialized external globals + self.push_lir(X86Inst::Directive(Directive::comm(name, size, align))); + return; + } + + // Data section + self.push_lir(X86Inst::Directive(Directive::Data)); + + // Global visibility (if not static) + if !is_static { + self.push_lir(X86Inst::Directive(Directive::global(name))); + } + + // ELF-only type and size (handled by Directive::emit which skips on macOS) + self.push_lir(X86Inst::Directive(Directive::type_object(name))); + self.push_lir(X86Inst::Directive(Directive::size(name, size))); + + // Alignment + if align > 1 { + self.push_lir(X86Inst::Directive(Directive::Align(align.trailing_zeros()))); + } + + // Label + self.push_lir(X86Inst::Directive(Directive::global_label(name))); + + // Emit initializer + match init { + Initializer::None => { + // For static uninitialized, use .zero (not .comm) + self.push_lir(X86Inst::Directive(Directive::Zero(size))); + } + Initializer::Int(val) => match size { + 1 => self.push_lir(X86Inst::Directive(Directive::Byte(*val))), + 2 => self.push_lir(X86Inst::Directive(Directive::Short(*val))), + 4 => self.push_lir(X86Inst::Directive(Directive::Long(*val))), + _ => self.push_lir(X86Inst::Directive(Directive::Quad(*val))), + }, + Initializer::Float(val) => { + if size == 4 { + // float - emit as 32-bit IEEE 754 + let bits = (*val as f32).to_bits(); + self.push_lir(X86Inst::Directive(Directive::Long(bits as i64))); + } else { + // double - emit as 64-bit IEEE 754 + let bits = val.to_bits(); + self.push_lir(X86Inst::Directive(Directive::Quad(bits as i64))); + } + } + } + } + + /// Check if a function contains va_start (indicating it's variadic) + fn is_variadic_function(func: &Function) -> bool { + for block in &func.blocks { + for insn in &block.insns { + if matches!(insn.op, crate::ir::Opcode::VaStart) { + return true; + } + } + } + false + } + + fn emit_function(&mut self, func: &Function) { + // Save current function name for unique label generation + self.current_fn = func.name.clone(); + + // Check if this function uses varargs + let is_variadic = Self::is_variadic_function(func); + + // Register allocation + let mut alloc = RegAlloc::new(); + self.locations = alloc.allocate(func); + self.pseudos = func.pseudos.clone(); + + let stack_size = alloc.stack_size(); + self.callee_saved_regs = alloc.callee_saved_used().to_vec(); + self.callee_saved_offset = self.callee_saved_regs.len() as i32 * 8; + + // For variadic functions, we need extra space for the register save area + // 6 GP regs * 8 bytes = 48 bytes for GP registers + // (We don't save XMM registers in this implementation for simplicity) + let reg_save_area_size: i32 = if is_variadic { 48 } else { 0 }; + self.reg_save_area_offset = if is_variadic { + // The register save area will be at the end of the stack frame + self.callee_saved_offset + stack_size + reg_save_area_size + } else { + 0 + }; + + // Function prologue + self.push_lir(X86Inst::Directive(Directive::Blank)); + self.push_lir(X86Inst::Directive(Directive::Text)); + + // Skip .globl for static functions (internal linkage) + if !func.is_static { + self.push_lir(X86Inst::Directive(Directive::global(&func.name))); + } + + // ELF-only type (handled by Directive::emit which skips on macOS) + self.push_lir(X86Inst::Directive(Directive::type_func(&func.name))); + + // Function label + self.push_lir(X86Inst::Directive(Directive::global_label(&func.name))); + + // CFI: Start procedure (enables stack unwinding for this function) + if self.emit_unwind_tables { + self.push_lir(X86Inst::Directive(Directive::CfiStartProc)); + } + + // Prologue: save frame pointer and allocate stack + let bp = Reg::bp(); + let sp = Reg::sp(); + self.push_lir(X86Inst::Push { + src: GpOperand::Reg(bp), + }); + if self.emit_debug { + // After pushq %rbp: CFA is now at %rsp+16, and %rbp is saved at CFA-16 + self.push_lir(X86Inst::Directive(Directive::CfiDefCfaOffset(16))); + self.push_lir(X86Inst::Directive(Directive::cfi_offset("%rbp", -16))); + } + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(sp), + dst: GpOperand::Reg(bp), + }); + if self.emit_debug { + // After movq %rsp, %rbp: CFA is now tracked by %rbp+16 + self.push_lir(X86Inst::Directive(Directive::cfi_def_cfa_register("%rbp"))); + } + + // Save callee-saved registers + let mut cfi_offset = -24i32; // First callee-saved is at -24 (after rbp at -16) + for reg in &self.callee_saved_regs.clone() { + self.push_lir(X86Inst::Push { + src: GpOperand::Reg(*reg), + }); + if self.emit_debug { + self.push_lir(X86Inst::Directive(Directive::cfi_offset( + reg.name64(), + cfi_offset, + ))); + } + cfi_offset -= 8; + } + + // Allocate stack space for locals + register save area (if variadic) + let total_stack = + stack_size + (self.callee_saved_regs.len() as i32 * 8) + reg_save_area_size; + // Ensure 16-byte alignment + let aligned_stack = (total_stack + 15) & !15; + if aligned_stack > self.callee_saved_regs.len() as i32 * 8 { + self.push_lir(X86Inst::Sub { + size: OperandSize::B64, + src: GpOperand::Imm( + (aligned_stack - (self.callee_saved_regs.len() as i32 * 8)) as i64, + ), + dst: Reg::Rsp, + }); + } + + // For variadic functions, save argument registers to the register save area + // This must be done before any other code uses these registers + // AMD64 ABI: rdi at offset 0, rsi at offset 8, rdx at offset 16, etc. + if is_variadic { + let int_arg_regs = Reg::arg_regs(); + // Save all 6 GP argument registers at their ABI-specified offsets + for (i, reg) in int_arg_regs.iter().enumerate() { + // offset from rbp = reg_save_area_offset - (i * 8) + // reg at ABI offset i*8 relative to reg_save_area base + let offset = self.reg_save_area_offset - (i as i32 * 8); + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(*reg), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: -offset, + }), + }); + } + } + + // Move arguments from registers to their allocated locations if needed + // System V AMD64 ABI: integer args in RDI, RSI, RDX, RCX, R8, R9 + // FP args in XMM0-XMM7 (separate counters) + let int_arg_regs = Reg::arg_regs(); + let fp_arg_regs = XmmReg::arg_regs(); + let mut int_arg_idx = 0; + let mut fp_arg_idx = 0; + + // Detect if there's a hidden return pointer (for functions returning large structs) + // The __sret pseudo has arg_idx=0 and shifts all other arg indices by 1 + let has_sret = func + .pseudos + .iter() + .any(|p| matches!(p.kind, PseudoKind::Arg(0)) && p.name.as_deref() == Some("__sret")); + let arg_idx_offset: u32 = if has_sret { 1 } else { 0 }; + + // If there's a hidden return pointer, it takes RDI, so params start from RSI + if has_sret { + int_arg_idx = 1; + } + + for (i, (_name, typ)) in func.params.iter().enumerate() { + // Find the pseudo for this argument + for pseudo in &func.pseudos { + if let PseudoKind::Arg(arg_idx) = pseudo.kind { + if arg_idx == (i as u32) + arg_idx_offset { + let is_fp = typ.is_float(); + if is_fp { + // FP argument + if fp_arg_idx < fp_arg_regs.len() { + if let Some(Loc::Stack(offset)) = self.locations.get(&pseudo.id) { + // Move from FP arg register to stack + let adjusted = offset + self.callee_saved_offset; + let fp_size = if typ.size_bits() == 32 { + FpSize::Single + } else { + FpSize::Double + }; + self.push_lir(X86Inst::MovFp { + size: fp_size, + src: XmmOperand::Reg(fp_arg_regs[fp_arg_idx]), + dst: XmmOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: -adjusted, + }), + }); + } + } + fp_arg_idx += 1; + } else { + // Integer argument + if int_arg_idx < int_arg_regs.len() { + if let Some(Loc::Stack(offset)) = self.locations.get(&pseudo.id) { + // Move from arg register to stack + let adjusted = offset + self.callee_saved_offset; + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(int_arg_regs[int_arg_idx]), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: -adjusted, + }), + }); + } + } + int_arg_idx += 1; + } + break; + } + } + } + } + + // Save number of fixed GP params for va_start + // Count how many non-FP params we have + if is_variadic { + self.num_fixed_gp_params = func + .params + .iter() + .filter(|(_, typ)| !typ.is_float()) + .count(); + if has_sret { + self.num_fixed_gp_params += 1; // Account for hidden sret pointer + } + } + + // Emit basic blocks + for block in &func.blocks { + self.emit_block(block); + } + + // CFI: End procedure + if self.emit_unwind_tables { + self.push_lir(X86Inst::Directive(Directive::CfiEndProc)); + } + } + + fn emit_block(&mut self, block: &crate::ir::BasicBlock) { + // Emit block label (include function name for uniqueness) + if let Some(label) = &block.label { + // LIR: named block label (using Raw since format differs from standard) + self.push_lir(X86Inst::Directive(Directive::Raw(format!( + ".L_{}_{}:", + self.current_fn, label + )))); + } else { + // LIR: numbered block label + self.push_lir(X86Inst::Directive(Directive::BlockLabel(Label::new( + &self.current_fn, + block.id.0, + )))); + } + + // Emit instructions + for insn in &block.insns { + self.emit_insn(insn); + } + } + + fn emit_insn(&mut self, insn: &Instruction) { + // Emit .loc directive for debug info + self.emit_loc(insn); + + match insn.op { + Opcode::Entry => { + // Already handled in function prologue + } + + Opcode::Ret => { + // Move return value to appropriate register if present + // System V AMD64 ABI: integers in RAX, floats in XMM0 + if let Some(src) = insn.src.first() { + let src_loc = self.get_location(*src); + let is_fp = matches!(src_loc, Loc::Xmm(_) | Loc::FImm(..)) + || insn.typ.as_ref().is_some_and(|t| t.is_float()); + if is_fp { + // Float return value goes in XMM0 + self.emit_fp_move(*src, XmmReg::Xmm0, insn.size); + } else { + // Integer return value goes in RAX + self.emit_move(*src, Reg::Rax, insn.size); + } + } + // Epilogue: restore callee-saved registers and return + let bp = Reg::bp(); + let num_callee_saved = self.callee_saved_regs.len(); + if num_callee_saved > 0 { + // Set rsp to point to the first callee-saved register (pushed after rbp setup) + let offset = num_callee_saved * 8; + // LIR: lea to restore rsp + self.push_lir(X86Inst::Lea { + addr: MemAddr::BaseOffset { + base: Reg::Rbp, + offset: -(offset as i32), + }, + dst: Reg::Rsp, + }); + // Pop callee-saved registers in reverse order + // Collect to avoid borrow conflict with push_lir + let callee_saved: Vec = + self.callee_saved_regs.iter().rev().copied().collect(); + for reg in callee_saved { + // LIR: pop callee-saved register + self.push_lir(X86Inst::Pop { dst: reg }); + } + } else { + // No callee-saved registers, just restore rsp from rbp + // LIR: mov rbp to rsp + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::Rbp), + dst: GpOperand::Reg(Reg::Rsp), + }); + } + // LIR: pop rbp + self.push_lir(X86Inst::Pop { dst: bp }); + // LIR: ret + self.push_lir(X86Inst::Ret); + } + + Opcode::Br => { + if let Some(target) = insn.bb_true { + // LIR: unconditional jump + self.push_lir(X86Inst::Jmp { + target: Label::new(&self.current_fn, target.0), + }); + } + } + + Opcode::Cbr => { + if let Some(&cond) = insn.src.first() { + let loc = self.get_location(cond); + let size = insn.size.max(32); + let op_size = OperandSize::from_bits(size); + match &loc { + Loc::Reg(r) => { + // LIR: test register with itself + self.push_lir(X86Inst::Test { + size: op_size, + src: GpOperand::Reg(*r), + dst: GpOperand::Reg(*r), + }); + } + Loc::Stack(offset) => { + let adjusted = offset + self.callee_saved_offset; + // LIR: compare stack location with 0 + self.push_lir(X86Inst::Cmp { + size: op_size, + src: GpOperand::Imm(0), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: -adjusted, + }), + }); + } + Loc::Imm(v) => { + if *v != 0 { + if let Some(target) = insn.bb_true { + // LIR: unconditional jump (constant true) + self.push_lir(X86Inst::Jmp { + target: Label::new(&self.current_fn, target.0), + }); + } + return; + } else { + if let Some(target) = insn.bb_false { + // LIR: unconditional jump (constant false) + self.push_lir(X86Inst::Jmp { + target: Label::new(&self.current_fn, target.0), + }); + } + return; + } + } + Loc::Global(_) => { + self.emit_move(cond, Reg::Rax, size); + // LIR: test rax with itself + self.push_lir(X86Inst::Test { + size: op_size, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Reg(Reg::Rax), + }); + } + Loc::Xmm(x) => { + // Compare XMM register with zero + // LIR: xorps to zero xmm15, then ucomiss + self.push_lir(X86Inst::XorpsSelf { reg: XmmReg::Xmm15 }); + self.push_lir(X86Inst::UComiFp { + size: FpSize::Single, + src: XmmOperand::Reg(*x), + dst: XmmReg::Xmm15, + }); + } + Loc::FImm(v, _) => { + // Float immediate - check if non-zero + if *v != 0.0 { + if let Some(target) = insn.bb_true { + // LIR: unconditional jump (constant true) + self.push_lir(X86Inst::Jmp { + target: Label::new(&self.current_fn, target.0), + }); + } + return; + } else { + if let Some(target) = insn.bb_false { + // LIR: unconditional jump (constant false) + self.push_lir(X86Inst::Jmp { + target: Label::new(&self.current_fn, target.0), + }); + } + return; + } + } + } + if let Some(target) = insn.bb_true { + // LIR: conditional jump if not equal (non-zero) + self.push_lir(X86Inst::Jcc { + cc: IntCC::Ne, + target: Label::new(&self.current_fn, target.0), + }); + } + if let Some(target) = insn.bb_false { + // LIR: unconditional jump to false branch + self.push_lir(X86Inst::Jmp { + target: Label::new(&self.current_fn, target.0), + }); + } + } + } + + Opcode::Switch => { + // Switch uses target as the value to switch on + if let Some(val) = insn.target { + let size = insn.size.max(32); + let op_size = OperandSize::from_bits(size); + // Move switch value to eax + self.emit_move(val, Reg::Rax, size); + + // Generate comparisons for each case + for (case_val, target_bb) in &insn.switch_cases { + // LIR: compare with case value + self.push_lir(X86Inst::Cmp { + size: op_size, + src: GpOperand::Imm(*case_val), + dst: GpOperand::Reg(Reg::Rax), + }); + // LIR: conditional jump on equal + self.push_lir(X86Inst::Jcc { + cc: IntCC::E, + target: Label::new(&self.current_fn, target_bb.0), + }); + } + + // Jump to default (or fall through if no default) + if let Some(default_bb) = insn.switch_default { + // LIR: unconditional jump to default + self.push_lir(X86Inst::Jmp { + target: Label::new(&self.current_fn, default_bb.0), + }); + } + } + } + + Opcode::Add + | Opcode::Sub + | Opcode::And + | Opcode::Or + | Opcode::Xor + | Opcode::Shl + | Opcode::Lsr + | Opcode::Asr => { + self.emit_binop(insn); + } + + Opcode::Mul => { + self.emit_mul(insn); + } + + Opcode::DivS | Opcode::DivU | Opcode::ModS | Opcode::ModU => { + self.emit_div(insn); + } + + // Floating-point arithmetic operations + Opcode::FAdd | Opcode::FSub | Opcode::FMul | Opcode::FDiv => { + self.emit_fp_binop(insn); + } + + Opcode::FNeg => { + self.emit_fp_neg(insn); + } + + // Floating-point comparisons + Opcode::FCmpOEq + | Opcode::FCmpONe + | Opcode::FCmpOLt + | Opcode::FCmpOLe + | Opcode::FCmpOGt + | Opcode::FCmpOGe => { + self.emit_fp_compare(insn); + } + + // Integer to float conversions + Opcode::UCvtF | Opcode::SCvtF => { + self.emit_int_to_float(insn); + } + + // Float to integer conversions + Opcode::FCvtU | Opcode::FCvtS => { + self.emit_float_to_int(insn); + } + + // Float to float conversions (e.g., float to double) + Opcode::FCvtF => { + self.emit_float_to_float(insn); + } + + Opcode::SetEq + | Opcode::SetNe + | Opcode::SetLt + | Opcode::SetLe + | Opcode::SetGt + | Opcode::SetGe + | Opcode::SetB + | Opcode::SetBe + | Opcode::SetA + | Opcode::SetAe => { + self.emit_compare(insn); + } + + Opcode::Neg => { + self.emit_neg(insn); + } + + Opcode::Not => { + self.emit_not(insn); + } + + Opcode::Load => { + self.emit_load(insn); + } + + Opcode::Store => { + self.emit_store(insn); + } + + Opcode::Call => { + self.emit_call(insn); + } + + Opcode::SetVal => { + if let Some(target) = insn.target { + if let Some(pseudo) = self.pseudos.iter().find(|p| p.id == target) { + let target_loc = self.locations.get(&target).cloned(); + match &pseudo.kind { + PseudoKind::Val(v) => { + if let Some(Loc::Reg(r)) = target_loc { + self.push_lir(X86Inst::Mov { + size: OperandSize::from_bits(insn.size), + src: GpOperand::Imm(*v), + dst: GpOperand::Reg(r), + }); + } + } + PseudoKind::FVal(v) => { + // Only emit code if the target is in an XMM register + // FImm locations are materialized inline at use sites + if let Some(Loc::Xmm(_)) = target_loc { + self.emit_fp_const_load(target, *v, insn.size); + } + // For FImm locations, do nothing - the value will be + // loaded inline when used in operations + } + _ => {} + } + } + } + } + + Opcode::Copy => { + if let (Some(target), Some(&src)) = (insn.target, insn.src.first()) { + // Pass the type to emit_copy for proper sign/zero extension + self.emit_copy_with_type(src, target, insn.size, insn.typ.as_ref()); + } + } + + Opcode::SymAddr => { + if let (Some(target), Some(&src)) = (insn.target, insn.src.first()) { + let dst_loc = self.get_location(target); + let dst_reg = match &dst_loc { + Loc::Reg(r) => *r, + _ => Reg::Rax, + }; + let src_loc = self.get_location(src); + match src_loc { + Loc::Global(name) => { + // Check if it's a local label (starts with '.') or global symbol + let is_local_label = name.starts_with('.'); + self.push_lir(X86Inst::Lea { + addr: MemAddr::RipRelative(Symbol { + name: name.clone(), + is_local: is_local_label, + }), + dst: dst_reg, + }); + } + Loc::Stack(offset) => { + // Get address of stack location + let adjusted = offset + self.callee_saved_offset; + self.push_lir(X86Inst::Lea { + addr: MemAddr::BaseOffset { + base: Reg::Rbp, + offset: -adjusted, + }, + dst: dst_reg, + }); + } + _ => {} + } + // Move to final destination if needed + if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) { + self.emit_move_to_loc(dst_reg, &dst_loc, 64); + } + } + } + + Opcode::Select => { + self.emit_select(insn); + } + + Opcode::Zext | Opcode::Sext | Opcode::Trunc => { + self.emit_extend(insn); + } + + // ================================================================ + // Variadic function support (va_* builtins) + // ================================================================ + Opcode::VaStart => { + self.emit_va_start(insn); + } + + Opcode::VaArg => { + self.emit_va_arg(insn); + } + + Opcode::VaEnd => { + // va_end is a no-op on all platforms + } + + Opcode::VaCopy => { + self.emit_va_copy(insn); + } + + // ================================================================ + // Byte-swapping builtins + // ================================================================ + Opcode::Bswap16 => { + self.emit_bswap16(insn); + } + + Opcode::Bswap32 => { + self.emit_bswap32(insn); + } + + Opcode::Bswap64 => { + self.emit_bswap64(insn); + } + + Opcode::Alloca => { + self.emit_alloca(insn); + } + + // Skip no-ops and unimplemented + _ => {} + } + } + + fn get_location(&self, pseudo: PseudoId) -> Loc { + self.locations.get(&pseudo).cloned().unwrap_or(Loc::Imm(0)) + } + + fn emit_move(&mut self, src: PseudoId, dst: Reg, size: u32) { + let size = size.max(32); + let op_size = OperandSize::from_bits(size); + let loc = self.get_location(src); + match loc { + Loc::Reg(r) if r == dst => { + // No-op: same register + } + Loc::Reg(r) => { + // LIR: register-to-register move + self.push_lir(X86Inst::Mov { + size: op_size, + src: GpOperand::Reg(r), + dst: GpOperand::Reg(dst), + }); + } + Loc::Stack(offset) => { + let adjusted = offset + self.callee_saved_offset; + // LIR: memory-to-register move + self.push_lir(X86Inst::Mov { + size: op_size, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: -adjusted, + }), + dst: GpOperand::Reg(dst), + }); + } + Loc::Imm(v) => { + // x86-64: movl sign-extends to 64-bit, movq only works with 32-bit signed immediates + // For values outside 32-bit signed range, use movabsq + if size == 64 && (v > i32::MAX as i64 || v < i32::MIN as i64) { + // LIR: 64-bit immediate move + self.push_lir(X86Inst::MovAbs { imm: v, dst }); + } else { + // LIR: immediate-to-register move + self.push_lir(X86Inst::Mov { + size: op_size, + src: GpOperand::Imm(v), + dst: GpOperand::Reg(dst), + }); + } + } + Loc::Global(name) => { + // LIR: RIP-relative memory-to-register move + self.push_lir(X86Inst::Mov { + size: op_size, + src: GpOperand::Mem(MemAddr::RipRelative(Symbol::global(name.clone()))), + dst: GpOperand::Reg(dst), + }); + } + Loc::Xmm(x) => { + // Move from XMM to general-purpose register + // LIR: XMM to GP move + self.push_lir(X86Inst::MovXmmGp { + size: OperandSize::B64, + src: x, + dst, + }); + } + Loc::FImm(..) => { + // Float immediate cannot be directly moved to GP register + // This should not normally happen + } + } + } + + fn emit_move_to_loc(&mut self, src: Reg, dst: &Loc, size: u32) { + // For stack stores, use actual size to properly handle char/short + // For register-to-register, use minimum 32-bit + match dst { + Loc::Reg(r) if *r == src => { + // No-op: same register + } + Loc::Reg(r) => { + let reg_size = size.max(32); + let op_size = OperandSize::from_bits(reg_size); + // LIR: register-to-register move + self.push_lir(X86Inst::Mov { + size: op_size, + src: GpOperand::Reg(src), + dst: GpOperand::Reg(*r), + }); + } + Loc::Stack(offset) => { + // Use actual size for memory stores (8, 16, 32, 64 bits) + let adjusted = offset + self.callee_saved_offset; + let op_size = OperandSize::from_bits(size); + // LIR: register-to-memory move + self.push_lir(X86Inst::Mov { + size: op_size, + src: GpOperand::Reg(src), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: -adjusted, + }), + }); + } + _ => {} + } + } + + fn emit_binop(&mut self, insn: &Instruction) { + let size = insn.size.max(32); + let op_size = OperandSize::from_bits(size); + let (src1, src2) = match (insn.src.first(), insn.src.get(1)) { + (Some(&s1), Some(&s2)) => (s1, s2), + _ => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + let dst_loc = self.get_location(target); + let work_reg = match &dst_loc { + Loc::Reg(r) => *r, + _ => Reg::Rax, + }; + self.emit_move(src1, work_reg, size); + if matches!(insn.op, Opcode::Shl | Opcode::Lsr | Opcode::Asr) { + let src2_loc = self.get_location(src2); + match src2_loc { + Loc::Imm(v) => { + // LIR: shift by immediate + let shift_count = ShiftCount::Imm(v as u8); + match insn.op { + Opcode::Shl => self.push_lir(X86Inst::Shl { + size: op_size, + count: shift_count, + dst: work_reg, + }), + Opcode::Lsr => self.push_lir(X86Inst::Shr { + size: op_size, + count: shift_count, + dst: work_reg, + }), + Opcode::Asr => self.push_lir(X86Inst::Sar { + size: op_size, + count: shift_count, + dst: work_reg, + }), + _ => {} + } + } + _ => { + self.emit_move(src2, Reg::Rcx, 8); + // LIR: shift by %cl + let shift_count = ShiftCount::Cl; + match insn.op { + Opcode::Shl => self.push_lir(X86Inst::Shl { + size: op_size, + count: shift_count, + dst: work_reg, + }), + Opcode::Lsr => self.push_lir(X86Inst::Shr { + size: op_size, + count: shift_count, + dst: work_reg, + }), + Opcode::Asr => self.push_lir(X86Inst::Sar { + size: op_size, + count: shift_count, + dst: work_reg, + }), + _ => {} + } + } + } + } else { + let src2_loc = self.get_location(src2); + let src2_gp = self.loc_to_gp_operand(&src2_loc); + // LIR: binary operation (add, sub, and, or, xor) + match insn.op { + Opcode::Add => self.push_lir(X86Inst::Add { + size: op_size, + src: src2_gp, + dst: work_reg, + }), + Opcode::Sub => self.push_lir(X86Inst::Sub { + size: op_size, + src: src2_gp, + dst: work_reg, + }), + Opcode::And => self.push_lir(X86Inst::And { + size: op_size, + src: src2_gp, + dst: work_reg, + }), + Opcode::Or => self.push_lir(X86Inst::Or { + size: op_size, + src: src2_gp, + dst: work_reg, + }), + Opcode::Xor => self.push_lir(X86Inst::Xor { + size: op_size, + src: src2_gp, + dst: work_reg, + }), + _ => {} + } + } + if !matches!(&dst_loc, Loc::Reg(r) if *r == work_reg) { + self.emit_move_to_loc(work_reg, &dst_loc, size); + } + } + + fn emit_mul(&mut self, insn: &Instruction) { + let size = insn.size.max(32); + let op_size = OperandSize::from_bits(size); + let (src1, src2) = match (insn.src.first(), insn.src.get(1)) { + (Some(&s1), Some(&s2)) => (s1, s2), + _ => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + let dst_loc = self.get_location(target); + let dst_reg = match &dst_loc { + Loc::Reg(r) => *r, + _ => Reg::Rax, + }; + self.emit_move(src1, dst_reg, size); + let src2_loc = self.get_location(src2); + // LIR: 2-operand imul instruction + let src2_gp = self.loc_to_gp_operand(&src2_loc); + self.push_lir(X86Inst::IMul2 { + size: op_size, + src: src2_gp, + dst: dst_reg, + }); + if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) { + self.emit_move_to_loc(dst_reg, &dst_loc, size); + } + } + + fn emit_div(&mut self, insn: &Instruction) { + let size = insn.size.max(32); + let op_size = OperandSize::from_bits(size); + let (src1, src2) = match (insn.src.first(), insn.src.get(1)) { + (Some(&s1), Some(&s2)) => (s1, s2), + _ => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + self.emit_move(src1, Reg::Rax, size); + match insn.op { + Opcode::DivS | Opcode::ModS => { + if size == 64 { + // LIR: sign-extend RAX to RDX:RAX + self.push_lir(X86Inst::Cqto); + } else { + // LIR: sign-extend EAX to EDX:EAX + self.push_lir(X86Inst::Cltd); + } + } + Opcode::DivU | Opcode::ModU => { + // LIR: zero RDX for unsigned division + self.push_lir(X86Inst::Xor { + size: OperandSize::B32, + src: GpOperand::Reg(Reg::Rdx), + dst: Reg::Rdx, + }); + } + _ => {} + } + let src2_loc = self.get_location(src2); + let divisor_reg = match &src2_loc { + Loc::Reg(r) if *r != Reg::Rax && *r != Reg::Rdx => *r, + _ => { + self.emit_move(src2, Reg::R10, size); + Reg::R10 + } + }; + // LIR: division instruction + match insn.op { + Opcode::DivS | Opcode::ModS => { + self.push_lir(X86Inst::IDiv { + size: op_size, + divisor: GpOperand::Reg(divisor_reg), + }); + } + _ => { + self.push_lir(X86Inst::Div { + size: op_size, + divisor: GpOperand::Reg(divisor_reg), + }); + } + } + let dst_loc = self.get_location(target); + let result_reg = match insn.op { + Opcode::DivS | Opcode::DivU => Reg::Rax, + Opcode::ModS | Opcode::ModU => Reg::Rdx, + _ => Reg::Rax, + }; + self.emit_move_to_loc(result_reg, &dst_loc, size); + } + + fn emit_compare(&mut self, insn: &Instruction) { + let size = insn.size.max(32); + let op_size = OperandSize::from_bits(size); + let (src1, src2) = match (insn.src.first(), insn.src.get(1)) { + (Some(&s1), Some(&s2)) => (s1, s2), + _ => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + self.emit_move(src1, Reg::Rax, size); + let src2_loc = self.get_location(src2); + // x86-64 cmp instruction only supports 32-bit signed immediates + // For larger values, we need to load into a register first + let src2_gp = match &src2_loc { + Loc::Imm(v) if *v > i32::MAX as i64 || *v < i32::MIN as i64 => { + // Large immediate - load into rcx first + // LIR: movabsq for large immediate + self.push_lir(X86Inst::MovAbs { + imm: *v, + dst: Reg::Rcx, + }); + GpOperand::Reg(Reg::Rcx) + } + _ => self.loc_to_gp_operand(&src2_loc), + }; + // LIR: compare instruction + self.push_lir(X86Inst::Cmp { + size: op_size, + src: src2_gp, + dst: GpOperand::Reg(Reg::Rax), + }); + // Map opcode to condition code + let cc = match insn.op { + Opcode::SetEq => IntCC::E, + Opcode::SetNe => IntCC::Ne, + Opcode::SetLt => IntCC::L, + Opcode::SetLe => IntCC::Le, + Opcode::SetGt => IntCC::G, + Opcode::SetGe => IntCC::Ge, + Opcode::SetB => IntCC::B, + Opcode::SetBe => IntCC::Be, + Opcode::SetA => IntCC::A, + Opcode::SetAe => IntCC::Ae, + _ => return, + }; + // LIR: setCC instruction + self.push_lir(X86Inst::SetCC { cc, dst: Reg::Rax }); + // LIR: zero extend AL to EAX + self.push_lir(X86Inst::Movzx { + src_size: OperandSize::B8, + dst_size: OperandSize::B32, + src: GpOperand::Reg(Reg::Rax), + dst: Reg::Rax, + }); + let dst_loc = self.get_location(target); + self.emit_move_to_loc(Reg::Rax, &dst_loc, 32); + } + + fn emit_neg(&mut self, insn: &Instruction) { + let size = insn.size.max(32); + let op_size = OperandSize::from_bits(size); + let src = match insn.src.first() { + Some(&s) => s, + None => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + let dst_loc = self.get_location(target); + let work_reg = match &dst_loc { + Loc::Reg(r) => *r, + _ => Reg::Rax, + }; + self.emit_move(src, work_reg, size); + // LIR: negate instruction + self.push_lir(X86Inst::Neg { + size: op_size, + dst: work_reg, + }); + if !matches!(&dst_loc, Loc::Reg(r) if *r == work_reg) { + self.emit_move_to_loc(work_reg, &dst_loc, size); + } + } + + fn emit_not(&mut self, insn: &Instruction) { + let size = insn.size.max(32); + let op_size = OperandSize::from_bits(size); + let src = match insn.src.first() { + Some(&s) => s, + None => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + let dst_loc = self.get_location(target); + let work_reg = match &dst_loc { + Loc::Reg(r) => *r, + _ => Reg::Rax, + }; + self.emit_move(src, work_reg, size); + // LIR: bitwise not instruction + self.push_lir(X86Inst::Not { + size: op_size, + dst: work_reg, + }); + if !matches!(&dst_loc, Loc::Reg(r) if *r == work_reg) { + self.emit_move_to_loc(work_reg, &dst_loc, size); + } + } + + // ======================================================================== + // Floating-Point Instructions (SSE) + // ======================================================================== + + /// Emit a floating-point load operation + fn emit_fp_load(&mut self, insn: &Instruction) { + let size = insn.size.max(32); + let addr = match insn.src.first() { + Some(&s) => s, + None => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + let dst_loc = self.get_location(target); + let dst_xmm = match &dst_loc { + Loc::Xmm(x) => *x, + _ => XmmReg::Xmm15, + }; + let addr_loc = self.get_location(addr); + + let fp_size = if size <= 32 { + FpSize::Single + } else { + FpSize::Double + }; + + match addr_loc { + Loc::Reg(r) => { + self.push_lir(X86Inst::MovFp { + size: fp_size, + src: XmmOperand::Mem(MemAddr::BaseOffset { + base: r, + offset: insn.offset as i32, + }), + dst: XmmOperand::Reg(dst_xmm), + }); + } + Loc::Stack(offset) => { + // Check if the address operand is a symbol (local variable) or a temp (spilled address) + let is_symbol = self + .pseudos + .iter() + .find(|p| p.id == addr) + .is_some_and(|p| matches!(p.kind, PseudoKind::Sym(_))); + + if is_symbol { + // Local variable - load directly from stack slot + let total_offset = offset - insn.offset as i32 + self.callee_saved_offset; + self.push_lir(X86Inst::MovFp { + size: fp_size, + src: XmmOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: -total_offset, + }), + dst: XmmOperand::Reg(dst_xmm), + }); + } else { + // Spilled address - load address first, then load from that address + let adjusted = offset + self.callee_saved_offset; + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: -adjusted, + }), + dst: GpOperand::Reg(Reg::R11), + }); + self.push_lir(X86Inst::MovFp { + size: fp_size, + src: XmmOperand::Mem(MemAddr::BaseOffset { + base: Reg::R11, + offset: insn.offset as i32, + }), + dst: XmmOperand::Reg(dst_xmm), + }); + } + } + Loc::Global(name) => { + self.push_lir(X86Inst::MovFp { + size: fp_size, + src: XmmOperand::Mem(MemAddr::RipRelative(Symbol { + name: name.to_string(), + is_local: false, + })), + dst: XmmOperand::Reg(dst_xmm), + }); + } + _ => { + // Load address into R11, then load from that address + self.emit_move(addr, Reg::R11, 64); + self.push_lir(X86Inst::MovFp { + size: fp_size, + src: XmmOperand::Mem(MemAddr::BaseOffset { + base: Reg::R11, + offset: insn.offset as i32, + }), + dst: XmmOperand::Reg(dst_xmm), + }); + } + } + + // If destination is not the XMM register we loaded to, move it + if !matches!(&dst_loc, Loc::Xmm(x) if *x == dst_xmm) { + self.emit_fp_move_from_xmm(dst_xmm, &dst_loc, size); + } + } + + /// Emit a floating-point store operation + fn emit_fp_store(&mut self, insn: &Instruction) { + let size = insn.size.max(32); + let (addr, value) = match (insn.src.first(), insn.src.get(1)) { + (Some(&a), Some(&v)) => (a, v), + _ => return, + }; + let fp_size = if size <= 32 { + FpSize::Single + } else { + FpSize::Double + }; + + // IMPORTANT: Check address location BEFORE emit_fp_move, because emit_fp_move + // may clobber RAX when loading immediate values. If addr is in RAX, we need + // to save it to R11 first. + let addr_loc = self.get_location(addr); + let addr_reg = match &addr_loc { + Loc::Reg(Reg::Rax) => { + // Address is in RAX - move it to R11 before emit_fp_move clobbers it + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Reg(Reg::R11), + }); + Some(Reg::R11) + } + Loc::Reg(r) => Some(*r), + _ => None, + }; + + // Move value to XMM15 (scratch register) - this may clobber RAX + self.emit_fp_move(value, XmmReg::Xmm15, size); + + match addr_loc { + Loc::Reg(_) => { + // Use the saved register (R11 if it was RAX, otherwise original) + let r = addr_reg.unwrap_or(Reg::Rax); + self.push_lir(X86Inst::MovFp { + size: fp_size, + src: XmmOperand::Reg(XmmReg::Xmm15), + dst: XmmOperand::Mem(MemAddr::BaseOffset { + base: r, + offset: insn.offset as i32, + }), + }); + } + Loc::Stack(offset) => { + // Check if the address operand is a symbol (local variable) or a temp (spilled address) + let is_symbol = self + .pseudos + .iter() + .find(|p| p.id == addr) + .is_some_and(|p| matches!(p.kind, PseudoKind::Sym(_))); + + if is_symbol { + // Local variable - store directly to stack slot + let total_offset = offset - insn.offset as i32 + self.callee_saved_offset; + self.push_lir(X86Inst::MovFp { + size: fp_size, + src: XmmOperand::Reg(XmmReg::Xmm15), + dst: XmmOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: -total_offset, + }), + }); + } else { + // Spilled address - load address first, then store through it + let adjusted = offset + self.callee_saved_offset; + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: -adjusted, + }), + dst: GpOperand::Reg(Reg::R11), + }); + self.push_lir(X86Inst::MovFp { + size: fp_size, + src: XmmOperand::Reg(XmmReg::Xmm15), + dst: XmmOperand::Mem(MemAddr::BaseOffset { + base: Reg::R11, + offset: insn.offset as i32, + }), + }); + } + } + Loc::Global(name) => { + self.push_lir(X86Inst::MovFp { + size: fp_size, + src: XmmOperand::Reg(XmmReg::Xmm15), + dst: XmmOperand::Mem(MemAddr::RipRelative(Symbol { + name: name.to_string(), + is_local: false, + })), + }); + } + _ => { + // Load address into R11, then store + self.emit_move(addr, Reg::R11, 64); + self.push_lir(X86Inst::MovFp { + size: fp_size, + src: XmmOperand::Reg(XmmReg::Xmm15), + dst: XmmOperand::Mem(MemAddr::BaseOffset { + base: Reg::R11, + offset: insn.offset as i32, + }), + }); + } + } + } + + /// Emit floating-point binary operation (addss/addsd, subss/subsd, etc.) + fn emit_fp_binop(&mut self, insn: &Instruction) { + let size = insn.size.max(32); + let (src1, src2) = match (insn.src.first(), insn.src.get(1)) { + (Some(&s1), Some(&s2)) => (s1, s2), + _ => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + + let dst_loc = self.get_location(target); + let dst_xmm = match &dst_loc { + Loc::Xmm(x) => *x, + _ => XmmReg::Xmm0, // Use XMM0 as work register + }; + let fp_size = FpSize::from_bits(size); + + // Helper to emit the FP binop LIR instruction + let emit_fp_binop_lir = |cg: &mut Self, src: XmmOperand, dst: XmmReg| match insn.op { + Opcode::FAdd => cg.push_lir(X86Inst::AddFp { + size: fp_size, + src, + dst, + }), + Opcode::FSub => cg.push_lir(X86Inst::SubFp { + size: fp_size, + src, + dst, + }), + Opcode::FMul => cg.push_lir(X86Inst::MulFp { + size: fp_size, + src, + dst, + }), + Opcode::FDiv => cg.push_lir(X86Inst::DivFp { + size: fp_size, + src, + dst, + }), + _ => {} + }; + + // Move first operand to destination XMM register + self.emit_fp_move(src1, dst_xmm, size); + + // Apply operation with second operand + let src2_loc = self.get_location(src2); + match src2_loc { + Loc::Xmm(x) => { + emit_fp_binop_lir(self, XmmOperand::Reg(x), dst_xmm); + } + Loc::Stack(offset) => { + let adjusted = offset + self.callee_saved_offset; + emit_fp_binop_lir( + self, + XmmOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: -adjusted, + }), + dst_xmm, + ); + } + Loc::FImm(v, _) => { + // Load float immediate to a scratch register, then operate + // Use XMM14 if dst is XMM15, otherwise use XMM15 + let scratch = if dst_xmm == XmmReg::Xmm15 { + XmmReg::Xmm14 + } else { + XmmReg::Xmm15 + }; + self.emit_fp_imm_to_xmm(v, scratch, size); + emit_fp_binop_lir(self, XmmOperand::Reg(scratch), dst_xmm); + } + _ => { + // Move to a scratch register first + // Use XMM14 if dst is XMM15, otherwise use XMM15 + let scratch = if dst_xmm == XmmReg::Xmm15 { + XmmReg::Xmm14 + } else { + XmmReg::Xmm15 + }; + self.emit_fp_move(src2, scratch, size); + emit_fp_binop_lir(self, XmmOperand::Reg(scratch), dst_xmm); + } + } + + // Move result to destination if not already there + if !matches!(&dst_loc, Loc::Xmm(x) if *x == dst_xmm) { + self.emit_fp_move_from_xmm(dst_xmm, &dst_loc, size); + } + } + + /// Emit floating-point negation + fn emit_fp_neg(&mut self, insn: &Instruction) { + let size = insn.size.max(32); + let src = match insn.src.first() { + Some(&s) => s, + None => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + + let dst_loc = self.get_location(target); + let dst_xmm = match &dst_loc { + Loc::Xmm(x) => *x, + _ => XmmReg::Xmm0, + }; + let fp_size = FpSize::from_bits(size); + + // Move source to destination + self.emit_fp_move(src, dst_xmm, size); + + // XOR with sign bit mask to negate + // For float: 0x80000000, for double: 0x8000000000000000 + // Use a scratch register that's not dst_xmm to hold the sign mask + let scratch_xmm = if dst_xmm == XmmReg::Xmm15 { + XmmReg::Xmm14 + } else { + XmmReg::Xmm15 + }; + if size <= 32 { + // Create sign mask in scratch register: all zeros except sign bit + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Imm(0x80000000), + dst: GpOperand::Reg(Reg::Rax), + }); + self.push_lir(X86Inst::MovGpXmm { + size: OperandSize::B32, + src: Reg::Rax, + dst: scratch_xmm, + }); + self.push_lir(X86Inst::XorFp { + size: fp_size, + src: scratch_xmm, + dst: dst_xmm, + }); + } else { + self.push_lir(X86Inst::MovAbs { + imm: 0x8000000000000000u64 as i64, + dst: Reg::Rax, + }); + self.push_lir(X86Inst::MovGpXmm { + size: OperandSize::B64, + src: Reg::Rax, + dst: scratch_xmm, + }); + self.push_lir(X86Inst::XorFp { + size: fp_size, + src: scratch_xmm, + dst: dst_xmm, + }); + } + + if !matches!(&dst_loc, Loc::Xmm(x) if *x == dst_xmm) { + self.emit_fp_move_from_xmm(dst_xmm, &dst_loc, size); + } + } + + /// Emit floating-point comparison + fn emit_fp_compare(&mut self, insn: &Instruction) { + let size = insn.size.max(32); + let (src1, src2) = match (insn.src.first(), insn.src.get(1)) { + (Some(&s1), Some(&s2)) => (s1, s2), + _ => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + let fp_size = FpSize::from_bits(size); + + // Load first operand to XMM0 + self.emit_fp_move(src1, XmmReg::Xmm0, size); + + // Compare with second operand using ucomiss/ucomisd + let src2_loc = self.get_location(src2); + match src2_loc { + Loc::Xmm(x) => { + self.push_lir(X86Inst::UComiFp { + size: fp_size, + src: XmmOperand::Reg(x), + dst: XmmReg::Xmm0, + }); + } + Loc::Stack(offset) => { + let adjusted = offset + self.callee_saved_offset; + self.push_lir(X86Inst::UComiFp { + size: fp_size, + src: XmmOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: -adjusted, + }), + dst: XmmReg::Xmm0, + }); + } + Loc::FImm(v, _) => { + self.emit_fp_imm_to_xmm(v, XmmReg::Xmm15, size); + self.push_lir(X86Inst::UComiFp { + size: fp_size, + src: XmmOperand::Reg(XmmReg::Xmm15), + dst: XmmReg::Xmm0, + }); + } + _ => { + self.emit_fp_move(src2, XmmReg::Xmm15, size); + self.push_lir(X86Inst::UComiFp { + size: fp_size, + src: XmmOperand::Reg(XmmReg::Xmm15), + dst: XmmReg::Xmm0, + }); + } + } + + // Set result based on comparison type + let dst_loc = self.get_location(target); + let dst_reg = match &dst_loc { + Loc::Reg(r) => *r, + _ => Reg::Rax, + }; + + // Use appropriate setcc instruction + // Note: FP comparisons set flags differently - need to handle unordered (NaN) cases + let cc = match insn.op { + Opcode::FCmpOEq => IntCC::E, // Equal (ZF=1, PF=0) + Opcode::FCmpONe => IntCC::Ne, // Not equal + Opcode::FCmpOLt => IntCC::B, // Below (CF=1) - for ordered less than + Opcode::FCmpOLe => IntCC::Be, // Below or equal + Opcode::FCmpOGt => IntCC::A, // Above (CF=0, ZF=0) + Opcode::FCmpOGe => IntCC::Ae, // Above or equal + _ => return, + }; + + self.push_lir(X86Inst::SetCC { cc, dst: dst_reg }); + self.push_lir(X86Inst::Movzx { + src_size: OperandSize::B8, + dst_size: OperandSize::B32, + src: GpOperand::Reg(dst_reg), + dst: dst_reg, + }); + + if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) { + self.emit_move_to_loc(dst_reg, &dst_loc, 32); + } + } + + /// Emit integer to float conversion + fn emit_int_to_float(&mut self, insn: &Instruction) { + let dst_size = insn.size.max(32); + let src_size = insn.src_size.max(32); + let src = match insn.src.first() { + Some(&s) => s, + None => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + + let dst_loc = self.get_location(target); + let dst_xmm = match &dst_loc { + Loc::Xmm(x) => *x, + _ => XmmReg::Xmm0, + }; + + // Move integer to RAX first + self.emit_move(src, Reg::Rax, src_size); + + // Convert using cvtsi2ss/cvtsi2sd + let int_size = OperandSize::from_bits(src_size); + let fp_size = FpSize::from_bits(dst_size); + self.push_lir(X86Inst::CvtIntToFp { + int_size, + fp_size, + src: GpOperand::Reg(Reg::Rax), + dst: dst_xmm, + }); + + if !matches!(&dst_loc, Loc::Xmm(x) if *x == dst_xmm) { + self.emit_fp_move_from_xmm(dst_xmm, &dst_loc, dst_size); + } + } + + /// Emit float to integer conversion + fn emit_float_to_int(&mut self, insn: &Instruction) { + let dst_size = insn.size.max(32); + let src_size = insn.src_size.max(32); + let src = match insn.src.first() { + Some(&s) => s, + None => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + + // Move float to XMM0 + self.emit_fp_move(src, XmmReg::Xmm0, src_size); + + let dst_loc = self.get_location(target); + let dst_reg = match &dst_loc { + Loc::Reg(r) => *r, + _ => Reg::Rax, + }; + + // Convert using cvttss2si/cvttsd2si (truncate toward zero) + let fp_size = FpSize::from_bits(src_size); + let int_size = OperandSize::from_bits(dst_size); + self.push_lir(X86Inst::CvtFpToInt { + fp_size, + int_size, + src: XmmOperand::Reg(XmmReg::Xmm0), + dst: dst_reg, + }); + + if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) { + self.emit_move_to_loc(dst_reg, &dst_loc, dst_size); + } + } + + /// Emit float to float conversion (e.g., float to double) + fn emit_float_to_float(&mut self, insn: &Instruction) { + let dst_size = insn.size.max(32); + let src_size = insn.src_size.max(32); + let src = match insn.src.first() { + Some(&s) => s, + None => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + + let dst_loc = self.get_location(target); + let dst_xmm = match &dst_loc { + Loc::Xmm(x) => *x, + _ => XmmReg::Xmm0, + }; + + // Move source to XMM0 + self.emit_fp_move(src, XmmReg::Xmm0, src_size); + + // Convert + if src_size <= 32 && dst_size > 32 { + // float to double: cvtss2sd + self.push_lir(X86Inst::CvtFpFp { + src_size: FpSize::Single, + dst_size: FpSize::Double, + src: XmmReg::Xmm0, + dst: dst_xmm, + }); + } else if src_size > 32 && dst_size <= 32 { + // double to float: cvtsd2ss + self.push_lir(X86Inst::CvtFpFp { + src_size: FpSize::Double, + dst_size: FpSize::Single, + src: XmmReg::Xmm0, + dst: dst_xmm, + }); + } else { + // Same size, just move + if dst_xmm != XmmReg::Xmm0 { + let fp_size = FpSize::from_bits(dst_size); + self.push_lir(X86Inst::MovFp { + size: fp_size, + src: XmmOperand::Reg(XmmReg::Xmm0), + dst: XmmOperand::Reg(dst_xmm), + }); + } + } + + if !matches!(&dst_loc, Loc::Xmm(x) if *x == dst_xmm) { + self.emit_fp_move_from_xmm(dst_xmm, &dst_loc, dst_size); + } + } + + /// Load a floating-point constant into an XMM register + fn emit_fp_const_load(&mut self, target: PseudoId, value: f64, size: u32) { + let dst_loc = self.get_location(target); + let dst_xmm = match &dst_loc { + Loc::Xmm(x) => *x, + _ => XmmReg::Xmm0, + }; + + self.emit_fp_imm_to_xmm(value, dst_xmm, size); + + if !matches!(&dst_loc, Loc::Xmm(x) if *x == dst_xmm) { + self.emit_fp_move_from_xmm(dst_xmm, &dst_loc, size); + } + } + + /// Load a float immediate value into an XMM register + fn emit_fp_imm_to_xmm(&mut self, value: f64, xmm: XmmReg, size: u32) { + if value == 0.0 { + // Use xorps/xorpd to zero the register (faster) + let fp_size = FpSize::from_bits(size); + self.push_lir(X86Inst::XorFp { + size: fp_size, + src: xmm, + dst: xmm, + }); + } else if size <= 32 { + // Float: load via integer register + let bits = (value as f32).to_bits(); + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Imm(bits as i64), + dst: GpOperand::Reg(Reg::Rax), + }); + self.push_lir(X86Inst::MovGpXmm { + size: OperandSize::B32, + src: Reg::Rax, + dst: xmm, + }); + } else { + // Double: load via integer register + let bits = value.to_bits(); + self.push_lir(X86Inst::MovAbs { + imm: bits as i64, + dst: Reg::Rax, + }); + self.push_lir(X86Inst::MovGpXmm { + size: OperandSize::B64, + src: Reg::Rax, + dst: xmm, + }); + } + } + + /// Move a value to an XMM register + fn emit_fp_move(&mut self, src: PseudoId, dst: XmmReg, size: u32) { + let src_loc = self.get_location(src); + let fp_size = FpSize::from_bits(size); + + match src_loc { + Loc::Xmm(x) if x == dst => { + // Already in destination + } + Loc::Xmm(x) => { + self.push_lir(X86Inst::MovFp { + size: fp_size, + src: XmmOperand::Reg(x), + dst: XmmOperand::Reg(dst), + }); + } + Loc::Stack(offset) => { + let adjusted = offset + self.callee_saved_offset; + self.push_lir(X86Inst::MovFp { + size: fp_size, + src: XmmOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: -adjusted, + }), + dst: XmmOperand::Reg(dst), + }); + } + Loc::FImm(v, imm_size) => { + // Use the size from the FImm, not the passed-in size + // This ensures float constants are loaded as float, not double + self.emit_fp_imm_to_xmm(v, dst, imm_size); + } + Loc::Reg(r) => { + // Move from GP register to XMM (unusual but possible) + self.push_lir(X86Inst::MovGpXmm { + size: OperandSize::B64, + src: r, + dst, + }); + } + Loc::Imm(v) => { + // Integer immediate to float + if size <= 32 { + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Imm(v), + dst: GpOperand::Reg(Reg::Rax), + }); + self.push_lir(X86Inst::MovGpXmm { + size: OperandSize::B32, + src: Reg::Rax, + dst, + }); + } else { + self.push_lir(X86Inst::MovAbs { + imm: v, + dst: Reg::Rax, + }); + self.push_lir(X86Inst::MovGpXmm { + size: OperandSize::B64, + src: Reg::Rax, + dst, + }); + } + } + Loc::Global(name) => { + self.push_lir(X86Inst::MovFp { + size: fp_size, + src: XmmOperand::Mem(MemAddr::RipRelative(Symbol::global(name.clone()))), + dst: XmmOperand::Reg(dst), + }); + } + } + } + + /// Move from XMM register to a location + fn emit_fp_move_from_xmm(&mut self, src: XmmReg, dst: &Loc, size: u32) { + let fp_size = FpSize::from_bits(size); + + match dst { + Loc::Xmm(x) if *x == src => { + // Already in destination + } + Loc::Xmm(x) => { + self.push_lir(X86Inst::MovFp { + size: fp_size, + src: XmmOperand::Reg(src), + dst: XmmOperand::Reg(*x), + }); + } + Loc::Stack(offset) => { + let adjusted = offset + self.callee_saved_offset; + self.push_lir(X86Inst::MovFp { + size: fp_size, + src: XmmOperand::Reg(src), + dst: XmmOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: -adjusted, + }), + }); + } + Loc::Reg(r) => { + // Move from XMM to GP register + self.push_lir(X86Inst::MovXmmGp { + size: OperandSize::B64, + src, + dst: *r, + }); + } + _ => {} + } + } + + fn emit_load(&mut self, insn: &Instruction) { + let mem_size = insn.size; + let reg_size = insn.size.max(32); + let addr = match insn.src.first() { + Some(&s) => s, + None => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + let dst_loc = self.get_location(target); + + // Check if this is an FP load + let is_fp = + insn.typ.as_ref().is_some_and(|t| t.is_float()) || matches!(dst_loc, Loc::Xmm(_)); + + if is_fp { + self.emit_fp_load(insn); + return; + } + + let dst_reg = match &dst_loc { + Loc::Reg(r) => *r, + _ => Reg::Rax, + }; + + // Determine if we need sign or zero extension for small types + // is_unsigned() returns true for explicitly unsigned types + // For plain char, use target.char_signed to determine signedness + let is_unsigned = insn.typ.as_ref().is_some_and(|t| { + if t.is_unsigned() { + true + } else if t.is_plain_char() { + // Plain char: unsigned if target says char is not signed + !self.target.char_signed + } else { + false + } + }); + + let addr_loc = self.get_location(addr); + match addr_loc { + Loc::Reg(r) => { + if mem_size <= 16 { + // Use sign/zero extending load + // LIR: use Movzx or Movsx + let src_size = OperandSize::from_bits(mem_size); + if is_unsigned { + self.push_lir(X86Inst::Movzx { + src_size, + dst_size: OperandSize::B32, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: r, + offset: insn.offset as i32, + }), + dst: dst_reg, + }); + } else { + self.push_lir(X86Inst::Movsx { + src_size, + dst_size: OperandSize::B32, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: r, + offset: insn.offset as i32, + }), + dst: dst_reg, + }); + } + } else { + // 32/64-bit load + let op_size = OperandSize::from_bits(reg_size); + // LIR: simple Mov + self.push_lir(X86Inst::Mov { + size: op_size, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: r, + offset: insn.offset as i32, + }), + dst: GpOperand::Reg(dst_reg), + }); + } + } + Loc::Stack(offset) => { + // Check if the address operand is a symbol (local variable) or a temp (spilled address) + let is_symbol = self + .pseudos + .iter() + .find(|p| p.id == addr) + .is_some_and(|p| matches!(p.kind, PseudoKind::Sym(_))); + + if is_symbol { + // Local variable - load directly from stack slot + let total_offset = offset - insn.offset as i32 + self.callee_saved_offset; + if mem_size <= 16 { + // LIR: sign/zero extending load from stack + let src_size = OperandSize::from_bits(mem_size); + if is_unsigned { + self.push_lir(X86Inst::Movzx { + src_size, + dst_size: OperandSize::B32, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: -total_offset, + }), + dst: dst_reg, + }); + } else { + self.push_lir(X86Inst::Movsx { + src_size, + dst_size: OperandSize::B32, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: -total_offset, + }), + dst: dst_reg, + }); + } + } else { + // LIR: regular load from stack + let op_size = OperandSize::from_bits(reg_size); + self.push_lir(X86Inst::Mov { + size: op_size, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: -total_offset, + }), + dst: GpOperand::Reg(dst_reg), + }); + } + } else { + // Spilled address - load address first, then load from that address + let adjusted = offset + self.callee_saved_offset; + // LIR: load spilled address + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: -adjusted, + }), + dst: GpOperand::Reg(Reg::R11), + }); + if mem_size <= 16 { + // LIR: sign/zero extending load through R11 + let src_size = OperandSize::from_bits(mem_size); + if is_unsigned { + self.push_lir(X86Inst::Movzx { + src_size, + dst_size: OperandSize::B32, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::R11, + offset: insn.offset as i32, + }), + dst: dst_reg, + }); + } else { + self.push_lir(X86Inst::Movsx { + src_size, + dst_size: OperandSize::B32, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::R11, + offset: insn.offset as i32, + }), + dst: dst_reg, + }); + } + } else { + // LIR: regular load through R11 + let op_size = OperandSize::from_bits(reg_size); + self.push_lir(X86Inst::Mov { + size: op_size, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::R11, + offset: insn.offset as i32, + }), + dst: GpOperand::Reg(dst_reg), + }); + } + } + } + Loc::Global(name) => { + if mem_size <= 16 { + // LIR: sign/zero extending load from global + let src_size = OperandSize::from_bits(mem_size); + if is_unsigned { + self.push_lir(X86Inst::Movzx { + src_size, + dst_size: OperandSize::B32, + src: GpOperand::Mem(MemAddr::RipRelative(Symbol::global(name.clone()))), + dst: dst_reg, + }); + } else { + self.push_lir(X86Inst::Movsx { + src_size, + dst_size: OperandSize::B32, + src: GpOperand::Mem(MemAddr::RipRelative(Symbol::global(name.clone()))), + dst: dst_reg, + }); + } + } else { + // LIR: regular load from global + let op_size = OperandSize::from_bits(reg_size); + self.push_lir(X86Inst::Mov { + size: op_size, + src: GpOperand::Mem(MemAddr::RipRelative(Symbol::global(name.clone()))), + dst: GpOperand::Reg(dst_reg), + }); + } + } + _ => { + self.emit_move(addr, Reg::R11, 64); + if mem_size <= 16 { + // LIR: sign/zero extending load through R11 + let src_size = OperandSize::from_bits(mem_size); + if is_unsigned { + self.push_lir(X86Inst::Movzx { + src_size, + dst_size: OperandSize::B32, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::R11, + offset: insn.offset as i32, + }), + dst: dst_reg, + }); + } else { + self.push_lir(X86Inst::Movsx { + src_size, + dst_size: OperandSize::B32, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::R11, + offset: insn.offset as i32, + }), + dst: dst_reg, + }); + } + } else { + // LIR: regular load through R11 + let op_size = OperandSize::from_bits(reg_size); + self.push_lir(X86Inst::Mov { + size: op_size, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::R11, + offset: insn.offset as i32, + }), + dst: GpOperand::Reg(dst_reg), + }); + } + } + } + if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) { + self.emit_move_to_loc(dst_reg, &dst_loc, reg_size); + } + } + + fn emit_store(&mut self, insn: &Instruction) { + // Use actual size for memory stores (8, 16, 32, 64 bits) + // This is critical for char/short types that need byte/word stores + let mem_size = insn.size; + // Register operations use minimum 32-bit + let reg_size = insn.size.max(32); + + let (addr, value) = match (insn.src.first(), insn.src.get(1)) { + (Some(&a), Some(&v)) => (a, v), + _ => return, + }; + + // Check if this is an FP store + let value_loc = self.get_location(value); + let is_fp = insn.typ.as_ref().is_some_and(|t| t.is_float()) + || matches!(value_loc, Loc::Xmm(_) | Loc::FImm(..)); + + if is_fp { + self.emit_fp_store(insn); + return; + } + + // For struct stores (size > 64), we need to copy multiple words + // The value is a symbol containing the struct data + if mem_size > 64 { + self.emit_struct_store(insn, addr, value); + return; + } + + self.emit_move(value, Reg::Rax, reg_size); + let addr_loc = self.get_location(addr); + match addr_loc { + Loc::Reg(r) => { + let op_size = OperandSize::from_bits(mem_size); + // LIR: store through register + self.push_lir(X86Inst::Mov { + size: op_size, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: r, + offset: insn.offset as i32, + }), + }); + } + Loc::Stack(offset) => { + // Check if the address operand is a symbol (local variable) or a temp (spilled address) + let is_symbol = self + .pseudos + .iter() + .find(|p| p.id == addr) + .is_some_and(|p| matches!(p.kind, PseudoKind::Sym(_))); + + let op_size = OperandSize::from_bits(mem_size); + if is_symbol { + // Local variable - store directly to stack slot + let total_offset = offset - insn.offset as i32 + self.callee_saved_offset; + // LIR: store to stack slot + self.push_lir(X86Inst::Mov { + size: op_size, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: -total_offset, + }), + }); + } else { + // Spilled address - load address first, then store through it + let adjusted = offset + self.callee_saved_offset; + // LIR: load spilled address + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: -adjusted, + }), + dst: GpOperand::Reg(Reg::R11), + }); + // LIR: store through loaded address + self.push_lir(X86Inst::Mov { + size: op_size, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::R11, + offset: insn.offset as i32, + }), + }); + } + } + Loc::Global(name) => { + let op_size = OperandSize::from_bits(mem_size); + // LIR: store to global via RIP-relative + self.push_lir(X86Inst::Mov { + size: op_size, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::RipRelative(Symbol::global(name.clone()))), + }); + } + _ => { + self.emit_move(addr, Reg::R11, 64); + let op_size = OperandSize::from_bits(mem_size); + // LIR: store through R11 + self.push_lir(X86Inst::Mov { + size: op_size, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::R11, + offset: insn.offset as i32, + }), + }); + } + } + } + + /// Emit a struct copy (store of size > 64 bits) + /// The value is a symbol containing the source struct data + fn emit_struct_store(&mut self, insn: &Instruction, addr: PseudoId, value: PseudoId) { + let struct_size = insn.size; // Size in bits + let num_qwords = (struct_size + 63) / 64; + + // Get source address (where the struct data is) + let value_loc = self.get_location(value); + // Get destination address + let addr_loc = self.get_location(addr); + + // Load source address into R10 + match value_loc { + Loc::Stack(offset) => { + let adjusted = offset + self.callee_saved_offset; + // LIR: lea for source address + self.push_lir(X86Inst::Lea { + addr: MemAddr::BaseOffset { + base: Reg::Rbp, + offset: -adjusted, + }, + dst: Reg::R10, + }); + } + Loc::Reg(r) => { + if r != Reg::R10 { + // LIR: mov for source address + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(r), + dst: GpOperand::Reg(Reg::R10), + }); + } + } + Loc::Global(ref name) => { + // LIR: lea for global source address + self.push_lir(X86Inst::Lea { + addr: MemAddr::RipRelative(Symbol::global(name.clone())), + dst: Reg::R10, + }); + } + _ => return, + } + + // Load destination address into R11 + match addr_loc { + Loc::Stack(offset) => { + let adjusted = offset - insn.offset as i32 + self.callee_saved_offset; + // LIR: lea for destination address + self.push_lir(X86Inst::Lea { + addr: MemAddr::BaseOffset { + base: Reg::Rbp, + offset: -adjusted, + }, + dst: Reg::R11, + }); + } + Loc::Reg(r) => { + if insn.offset != 0 { + // LIR: lea with offset + self.push_lir(X86Inst::Lea { + addr: MemAddr::BaseOffset { + base: r, + offset: insn.offset as i32, + }, + dst: Reg::R11, + }); + } else if r != Reg::R11 { + // LIR: mov for destination address + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(r), + dst: GpOperand::Reg(Reg::R11), + }); + } + } + Loc::Global(ref name) => { + // LIR: lea for global destination address + self.push_lir(X86Inst::Lea { + addr: MemAddr::RipRelative(Symbol::global(name.clone())), + dst: Reg::R11, + }); + } + _ => return, + } + + // Copy qword by qword + for i in 0..num_qwords { + let byte_offset = (i * 8) as i32; + // LIR: load from source + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::R10, + offset: byte_offset, + }), + dst: GpOperand::Reg(Reg::Rax), + }); + // LIR: store to destination + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::R11, + offset: byte_offset, + }), + }); + } + } + + fn emit_call(&mut self, insn: &Instruction) { + let func_name = match &insn.func_name { + Some(n) => n.clone(), + None => return, + }; + + // System V AMD64 ABI: + // - Integer arguments: RDI, RSI, RDX, RCX, R8, R9 (6 registers) + // - Floating-point arguments: XMM0-XMM7 (8 registers) + // - Each class has its own counter (int and FP are independent) + // - For variadic functions: float args go in XMM registers (printf needs this), + // but variadic integer args go on stack (our va_arg impl reads from stack) + // - AL must be set to the number of XMM registers used for variadic calls + let int_arg_regs = Reg::arg_regs(); + let fp_arg_regs = XmmReg::arg_regs(); + let mut int_arg_idx = 0; + let mut fp_arg_idx = 0; + let mut stack_args = 0; + + let variadic_start = insn.variadic_arg_start.unwrap_or(usize::MAX); + + // First pass: determine which args go on stack + // For variadic calls: variadic INTEGER args go on stack, variadic FLOAT args use XMM + // For non-variadic calls: overflow args go on stack + let mut stack_arg_indices = Vec::new(); + let mut temp_int_idx = 0; + let mut temp_fp_idx = 0; + + for i in 0..insn.src.len() { + let arg_type = insn.arg_types.get(i); + let is_fp = if let Some(typ) = arg_type { + typ.is_float() + } else { + let arg_loc = self.get_location(insn.src[i]); + matches!(arg_loc, Loc::Xmm(_) | Loc::FImm(..)) + }; + + // Note: We pass all args (fixed and variadic) the same way per SysV ABI + // Variadic args start at index variadic_start, but use same register sequence + let _is_variadic_arg = i >= variadic_start; + + if is_fp { + // Float args go in XMM registers (all args, including variadic) + if temp_fp_idx >= fp_arg_regs.len() { + stack_arg_indices.push(i); + } + temp_fp_idx += 1; + } else { + // Integer args go in GP registers (all args, including variadic) + // per x86-64 SysV ABI + if temp_int_idx >= int_arg_regs.len() { + stack_arg_indices.push(i); + } + temp_int_idx += 1; + } + } + + // Ensure 16-byte stack alignment before call + // If we're pushing an odd number of 8-byte args, add padding + let needs_padding = stack_arg_indices.len() % 2 == 1; + if needs_padding { + self.push_lir(X86Inst::Sub { + size: OperandSize::B64, + src: GpOperand::Imm(8), + dst: Reg::Rsp, + }); + } + + // Push stack arguments in reverse order + for &i in stack_arg_indices.iter().rev() { + let arg = insn.src[i]; + let arg_type = insn.arg_types.get(i); + let is_fp = if let Some(typ) = arg_type { + typ.is_float() + } else { + let arg_loc = self.get_location(arg); + matches!(arg_loc, Loc::Xmm(_) | Loc::FImm(..)) + }; + + if is_fp { + let fp_size = if let Some(typ) = arg_type { + typ.size_bits() + } else { + 64 + }; + self.emit_fp_move(arg, XmmReg::Xmm15, fp_size); + self.push_lir(X86Inst::Sub { + size: OperandSize::B64, + src: GpOperand::Imm(8), + dst: Reg::Rsp, + }); + let fp_lir_size = FpSize::from_bits(fp_size); + self.push_lir(X86Inst::MovFp { + size: fp_lir_size, + src: XmmOperand::Reg(XmmReg::Xmm15), + dst: XmmOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rsp, + offset: 0, + }), + }); + } else { + let arg_size = if let Some(typ) = arg_type { + typ.size_bits().max(32) + } else { + 64 + }; + self.emit_move(arg, Reg::Rax, arg_size); + self.push_lir(X86Inst::Push { + src: GpOperand::Reg(Reg::Rax), + }); + } + stack_args += 1; + } + + // Now handle register arguments + for i in 0..insn.src.len() { + // Skip args that went to stack + if stack_arg_indices.contains(&i) { + continue; + } + let arg = insn.src[i]; + // Get argument type if available, otherwise fall back to location-based detection + let arg_type = insn.arg_types.get(i); + let is_fp = if let Some(typ) = arg_type { + typ.is_float() + } else { + // Fall back to location-based detection for backwards compatibility + let arg_loc = self.get_location(arg); + matches!(arg_loc, Loc::Xmm(_) | Loc::FImm(..)) + }; + + // Get argument size from type, with minimum 32-bit for register ops + let arg_size = if let Some(typ) = arg_type { + typ.size_bits().max(32) + } else { + 64 // Default for backwards compatibility + }; + + if is_fp { + // FP size from type (32 for float, 64 for double) + let fp_size = if let Some(typ) = arg_type { + typ.size_bits() + } else { + 64 + }; + // Float args go in XMM registers + self.emit_fp_move(arg, fp_arg_regs[fp_arg_idx], fp_size); + fp_arg_idx += 1; + } else { + // Integer args go in GP registers + self.emit_move(arg, int_arg_regs[int_arg_idx], arg_size); + int_arg_idx += 1; + } + } + + // For variadic function calls, set AL to the number of XMM registers used + // This is required by the System V AMD64 ABI for variadic functions + if insn.variadic_arg_start.is_some() { + self.push_lir(X86Inst::Mov { + size: OperandSize::B8, + src: GpOperand::Imm(fp_arg_idx as i64), + dst: GpOperand::Reg(Reg::Rax), + }); + } + + // Emit the call + self.push_lir(X86Inst::Call { + target: CallTarget::Direct(Symbol::global(func_name.clone())), + }); + + // Clean up stack arguments (including padding if any) + let stack_cleanup = stack_args * 8 + if needs_padding { 8 } else { 0 }; + if stack_cleanup > 0 { + self.push_lir(X86Inst::Add { + size: OperandSize::B64, + src: GpOperand::Imm(stack_cleanup as i64), + dst: Reg::Rsp, + }); + } + + // Handle return value + if let Some(target) = insn.target { + let dst_loc = self.get_location(target); + // Check if return value is floating-point based on its location or type + let is_fp_result = if let Some(ref typ) = insn.typ { + typ.is_float() + } else { + matches!(dst_loc, Loc::Xmm(_) | Loc::FImm(..)) + }; + + // Get return value size from type + let ret_size = insn.size.max(32); + + if is_fp_result { + // FP return value is in XMM0 + self.emit_fp_move_from_xmm(XmmReg::Xmm0, &dst_loc, ret_size); + } else { + // Integer return value is in RAX + self.emit_move_to_loc(Reg::Rax, &dst_loc, ret_size); + } + } + } + + fn emit_select(&mut self, insn: &Instruction) { + let (cond, then_val, else_val) = match (insn.src.first(), insn.src.get(1), insn.src.get(2)) + { + (Some(&c), Some(&t), Some(&e)) => (c, t, e), + _ => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + let size = insn.size.max(32); + let op_size = OperandSize::from_bits(size); + let dst_loc = self.get_location(target); + let dst_reg = match &dst_loc { + Loc::Reg(r) => *r, + _ => Reg::Rax, + }; + self.emit_move(else_val, dst_reg, size); + let cond_loc = self.get_location(cond); + match &cond_loc { + Loc::Reg(r) => { + // LIR: test register with itself + self.push_lir(X86Inst::Test { + size: OperandSize::B64, + src: GpOperand::Reg(*r), + dst: GpOperand::Reg(*r), + }); + } + Loc::Imm(v) => { + if *v != 0 { + self.emit_move(then_val, dst_reg, size); + if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) { + self.emit_move_to_loc(dst_reg, &dst_loc, size); + } + return; + } + if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) { + self.emit_move_to_loc(dst_reg, &dst_loc, size); + } + return; + } + _ => { + self.emit_move(cond, Reg::R11, 64); + // LIR: test R11 with itself + self.push_lir(X86Inst::Test { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::R11), + dst: GpOperand::Reg(Reg::R11), + }); + } + } + self.emit_move(then_val, Reg::R10, size); + // LIR: conditional move if not equal (non-zero) + self.push_lir(X86Inst::CMov { + cc: IntCC::Ne, + size: op_size, + src: GpOperand::Reg(Reg::R10), + dst: dst_reg, + }); + if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) { + self.emit_move_to_loc(dst_reg, &dst_loc, size); + } + } + + fn emit_extend(&mut self, insn: &Instruction) { + let src = match insn.src.first() { + Some(&s) => s, + None => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + let dst_loc = self.get_location(target); + let dst_reg = match &dst_loc { + Loc::Reg(r) => *r, + _ => Reg::Rax, + }; + match insn.op { + Opcode::Zext => { + self.emit_move(src, dst_reg, insn.size); + } + Opcode::Sext => { + // Move source at its original size, then sign-extend + self.emit_move(src, dst_reg, insn.src_size.max(32)); + match insn.src_size { + 8 => { + // LIR: sign-extend byte to qword + self.push_lir(X86Inst::Movsx { + src_size: OperandSize::B8, + dst_size: OperandSize::B64, + src: GpOperand::Reg(dst_reg), + dst: dst_reg, + }); + } + 16 => { + // LIR: sign-extend word to qword + self.push_lir(X86Inst::Movsx { + src_size: OperandSize::B16, + dst_size: OperandSize::B64, + src: GpOperand::Reg(dst_reg), + dst: dst_reg, + }); + } + 32 => { + // LIR: sign-extend dword to qword + self.push_lir(X86Inst::Movsx { + src_size: OperandSize::B32, + dst_size: OperandSize::B64, + src: GpOperand::Reg(dst_reg), + dst: dst_reg, + }); + } + _ => {} + } + } + Opcode::Trunc => { + // Move source value to register, then mask to target size + self.emit_move(src, dst_reg, 64); + // Truncate by masking to the target size + match insn.size { + 8 => { + // LIR: zero-extend byte to dword (masks to 8 bits) + self.push_lir(X86Inst::Movzx { + src_size: OperandSize::B8, + dst_size: OperandSize::B32, + src: GpOperand::Reg(dst_reg), + dst: dst_reg, + }); + } + 16 => { + // LIR: zero-extend word to dword (masks to 16 bits) + self.push_lir(X86Inst::Movzx { + src_size: OperandSize::B16, + dst_size: OperandSize::B32, + src: GpOperand::Reg(dst_reg), + dst: dst_reg, + }); + } + 32 => { + // LIR: mov dword to self (32-bit ops zero upper 32 bits) + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Reg(dst_reg), + dst: GpOperand::Reg(dst_reg), + }); + } + _ => {} + } + } + _ => {} + } + if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) { + self.emit_move_to_loc(dst_reg, &dst_loc, insn.size); + } + } + + fn emit_copy_with_type(&mut self, src: PseudoId, dst: PseudoId, size: u32, typ: Option<&Type>) { + // Keep actual size for handling narrow types + let actual_size = size; + let reg_size = size.max(32); + let dst_loc = self.get_location(dst); + let src_loc = self.get_location(src); + + // Check if this is a FP copy (source or dest is in XMM or is FImm) + let is_fp_copy = + matches!(&src_loc, Loc::Xmm(_) | Loc::FImm(..)) || matches!(&dst_loc, Loc::Xmm(_)); + + // Determine if the type is unsigned (for proper sign/zero extension) + // For plain char, use target.char_signed to determine signedness + let is_unsigned = typ.is_some_and(|t| { + if t.is_unsigned() { + true + } else if t.is_plain_char() { + // Plain char: unsigned if target says char is not signed + !self.target.char_signed + } else { + false + } + }); + + if is_fp_copy { + // Handle FP copy + let dst_xmm = match &dst_loc { + Loc::Xmm(x) => *x, + _ => XmmReg::Xmm0, + }; + + self.emit_fp_move(src, dst_xmm, reg_size); + + if !matches!(&dst_loc, Loc::Xmm(x) if *x == dst_xmm) { + self.emit_fp_move_from_xmm(dst_xmm, &dst_loc, reg_size); + } + } else { + // Integer copy + match &dst_loc { + Loc::Reg(r) => { + self.emit_move(src, *r, reg_size); + // For narrow types (8 or 16 bits), truncate to correct width + // Unsigned: zero-extend (AND mask) + // Signed: sign-extend (shift left then arithmetic shift right) + if actual_size == 8 { + if is_unsigned { + // LIR: zero-extend with AND mask + self.push_lir(X86Inst::And { + size: OperandSize::B32, + src: GpOperand::Imm(0xFF), + dst: *r, + }); + } else { + // Sign-extend: shift left 24 bits then arithmetic shift right 24 bits + // LIR: shift left + self.push_lir(X86Inst::Shl { + size: OperandSize::B32, + count: ShiftCount::Imm(24), + dst: *r, + }); + // LIR: arithmetic shift right + self.push_lir(X86Inst::Sar { + size: OperandSize::B32, + count: ShiftCount::Imm(24), + dst: *r, + }); + } + } else if actual_size == 16 { + if is_unsigned { + // LIR: zero-extend with AND mask + self.push_lir(X86Inst::And { + size: OperandSize::B32, + src: GpOperand::Imm(0xFFFF), + dst: *r, + }); + } else { + // Sign-extend: shift left 16 bits then arithmetic shift right 16 bits + // LIR: shift left + self.push_lir(X86Inst::Shl { + size: OperandSize::B32, + count: ShiftCount::Imm(16), + dst: *r, + }); + // LIR: arithmetic shift right + self.push_lir(X86Inst::Sar { + size: OperandSize::B32, + count: ShiftCount::Imm(16), + dst: *r, + }); + } + } + } + Loc::Stack(_) => { + self.emit_move(src, Reg::Rax, reg_size); + // For narrow types stored to stack, use the actual size + if actual_size <= 16 { + self.emit_move_to_loc(Reg::Rax, &dst_loc, actual_size); + } else { + self.emit_move_to_loc(Reg::Rax, &dst_loc, reg_size); + } + } + _ => {} + } + } + } + + // ======================================================================== + // Variadic function support (va_* builtins) + // ======================================================================== + // + // On x86-64 System V ABI, va_list is a 24-byte struct: + // struct { + // unsigned int gp_offset; // offset to next GP reg in save area + // unsigned int fp_offset; // offset to next FP reg in save area + // void *overflow_arg_area; // pointer to stack arguments + // void *reg_save_area; // pointer to register save area + // }; + // + // This implementation provides a simplified version that works with + // stack-based arguments. Full register save area support would require + // function prologue changes. + + /// Emit va_start: Initialize va_list + fn emit_va_start(&mut self, insn: &Instruction) { + let ap_addr = match insn.src.first() { + Some(&s) => s, + None => return, + }; + + let ap_loc = self.get_location(ap_addr); + + // For x86-64 System V ABI: + // va_list is a 24-byte struct. We initialize: + // - gp_offset = fixed_gp_params * 8 (offset to first variadic GP arg in save area) + // - fp_offset = 176 (skip all 8 FP regs, not saving them in this impl) + // - overflow_arg_area = rbp + 16 (where stack args start, for overflow) + // - reg_save_area = pointer to where we saved the argument registers + + let gp_offset = (self.num_fixed_gp_params * 8) as i32; + let reg_save_base = self.reg_save_area_offset; + + match ap_loc { + Loc::Stack(offset) => { + // gp_offset = offset to next variadic GP arg + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Imm(gp_offset as i64), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset, + }), + }); + // fp_offset = 176 (all FP args consumed, use stack for FP varargs) + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Imm(176), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: offset + 4, + }), + }); + // overflow_arg_area = rbp + 16 + self.push_lir(X86Inst::Lea { + addr: MemAddr::BaseOffset { + base: Reg::Rbp, + offset: 16, + }, + dst: Reg::Rax, + }); + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: offset + 8, + }), + }); + // reg_save_area = pointer to saved registers + self.push_lir(X86Inst::Lea { + addr: MemAddr::BaseOffset { + base: Reg::Rbp, + offset: -reg_save_base, + }, + dst: Reg::Rax, + }); + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: offset + 16, + }), + }); + } + Loc::Reg(r) => { + // Register contains the address of the va_list struct + // gp_offset = offset to next variadic GP arg + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Imm(gp_offset as i64), + dst: GpOperand::Mem(MemAddr::BaseOffset { base: r, offset: 0 }), + }); + // fp_offset = 176 (all FP args consumed) + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Imm(176), + dst: GpOperand::Mem(MemAddr::BaseOffset { base: r, offset: 4 }), + }); + // overflow_arg_area = rbp + 16 + self.push_lir(X86Inst::Lea { + addr: MemAddr::BaseOffset { + base: Reg::Rbp, + offset: 16, + }, + dst: Reg::Rax, + }); + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { base: r, offset: 8 }), + }); + // reg_save_area = pointer to saved registers + self.push_lir(X86Inst::Lea { + addr: MemAddr::BaseOffset { + base: Reg::Rbp, + offset: -reg_save_base, + }, + dst: Reg::Rax, + }); + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: r, + offset: 16, + }), + }); + } + _ => {} + } + } + + /// Emit va_arg: Get the next variadic argument + fn emit_va_arg(&mut self, insn: &Instruction) { + let ap_addr = match insn.src.first() { + Some(&s) => s, + None => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + + let default_type = Type::basic(crate::types::TypeKind::Int); + let arg_type = insn.typ.as_ref().unwrap_or(&default_type); + let arg_size = arg_type.size_bits().max(32); + let arg_bytes = (arg_size / 8).max(8) as i32; // Minimum 8 bytes per slot + + let ap_loc = self.get_location(ap_addr); + let dst_loc = self.get_location(target); + + // Generate unique label for this va_arg call + let label_suffix = self.unique_label_counter; + self.unique_label_counter += 1; + + match &ap_loc { + Loc::Stack(ap_offset) => { + if arg_type.is_float() { + // For float args, use overflow_arg_area (FP register save not implemented) + // LIR: load overflow_arg_area pointer + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: ap_offset + 8, + }), + dst: GpOperand::Reg(Reg::Rax), + }); + + let fp_size = arg_type.size_bits(); + let lir_fp_size = if fp_size <= 32 { + FpSize::Single + } else { + FpSize::Double + }; + // LIR: load float value from overflow area + self.push_lir(X86Inst::MovFp { + size: lir_fp_size, + src: XmmOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rax, + offset: 0, + }), + dst: XmmOperand::Reg(XmmReg::Xmm15), + }); + + // Store to destination + match &dst_loc { + Loc::Xmm(x) => { + // LIR: move to destination XMM register + self.push_lir(X86Inst::MovFp { + size: lir_fp_size, + src: XmmOperand::Reg(XmmReg::Xmm15), + dst: XmmOperand::Reg(*x), + }); + } + Loc::Stack(dst_offset) => { + // LIR: store to stack + self.push_lir(X86Inst::MovFp { + size: lir_fp_size, + src: XmmOperand::Reg(XmmReg::Xmm15), + dst: XmmOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: *dst_offset, + }), + }); + } + _ => {} + } + + // Advance overflow_arg_area + // LIR: add bytes to pointer + self.push_lir(X86Inst::Add { + size: OperandSize::B64, + src: GpOperand::Imm(arg_bytes as i64), + dst: Reg::Rax, + }); + // LIR: store updated pointer + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: ap_offset + 8, + }), + }); + } else { + // For integer args, check gp_offset to see if we use reg_save_area or overflow + // gp_offset < 48 means we have saved registers to read + let overflow_label = Label::new("va_overflow", label_suffix); + let done_label = Label::new("va_done", label_suffix); + + // LIR: load gp_offset + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: *ap_offset, + }), + dst: GpOperand::Reg(Reg::Rax), + }); + // LIR: compare with 48 + self.push_lir(X86Inst::Cmp { + size: OperandSize::B32, + src: GpOperand::Imm(48), + dst: GpOperand::Reg(Reg::Rax), + }); + // LIR: jump if above or equal + self.push_lir(X86Inst::Jcc { + cc: IntCC::Ae, + target: overflow_label.clone(), + }); + + // Use register save area: load from reg_save_area + gp_offset + // LIR: load reg_save_area + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: ap_offset + 16, + }), + dst: GpOperand::Reg(Reg::R10), + }); + // LIR: sign-extend gp_offset + self.push_lir(X86Inst::Movsx { + src_size: OperandSize::B32, + dst_size: OperandSize::B64, + src: GpOperand::Reg(Reg::Rax), + dst: Reg::R11, + }); + // LIR: calculate reg_save_area + gp_offset + self.push_lir(X86Inst::Add { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::R11), + dst: Reg::R10, + }); + + // Load value from save area + let lir_arg_size = OperandSize::from_bits(arg_size); + match &dst_loc { + Loc::Reg(r) => { + // LIR: load from save area to destination register + self.push_lir(X86Inst::Mov { + size: lir_arg_size, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::R10, + offset: 0, + }), + dst: GpOperand::Reg(*r), + }); + } + Loc::Stack(dst_offset) => { + // LIR: load to temp then store to stack + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::R10, + offset: 0, + }), + dst: GpOperand::Reg(Reg::R11), + }); + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Reg(Reg::R11), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: *dst_offset, + }), + }); + } + _ => {} + } + + // Increment gp_offset by 8 + // LIR: add 8 to gp_offset + self.push_lir(X86Inst::Add { + size: OperandSize::B32, + src: GpOperand::Imm(8), + dst: Reg::Rax, + }); + // LIR: store updated gp_offset + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: *ap_offset, + }), + }); + // LIR: jump to done + self.push_lir(X86Inst::Jmp { + target: done_label.clone(), + }); + + // Overflow path: use overflow_arg_area + // LIR: overflow label + self.push_lir(X86Inst::Directive(Directive::BlockLabel(overflow_label))); + // LIR: load overflow_arg_area + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: ap_offset + 8, + }), + dst: GpOperand::Reg(Reg::Rax), + }); + + match &dst_loc { + Loc::Reg(r) => { + // LIR: load from overflow area + self.push_lir(X86Inst::Mov { + size: lir_arg_size, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rax, + offset: 0, + }), + dst: GpOperand::Reg(*r), + }); + } + Loc::Stack(dst_offset) => { + // LIR: load to temp then store + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rax, + offset: 0, + }), + dst: GpOperand::Reg(Reg::R11), + }); + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Reg(Reg::R11), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: *dst_offset, + }), + }); + } + _ => {} + } + + // Advance overflow_arg_area + // LIR: add arg_bytes + self.push_lir(X86Inst::Add { + size: OperandSize::B64, + src: GpOperand::Imm(arg_bytes as i64), + dst: Reg::Rax, + }); + // LIR: store updated pointer + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: ap_offset + 8, + }), + }); + + // Done label + self.push_lir(X86Inst::Directive(Directive::BlockLabel(done_label))); + } + } + Loc::Reg(ap_reg) => { + // Similar logic for when va_list is in a register + if arg_type.is_float() { + // LIR: load overflow_arg_area pointer (at offset 8 from va_list base) + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: *ap_reg, + offset: 8, + }), + dst: GpOperand::Reg(Reg::Rax), + }); + + let fp_size = arg_type.size_bits(); + let lir_fp_size = if fp_size <= 32 { + FpSize::Single + } else { + FpSize::Double + }; + // LIR: load float value + self.push_lir(X86Inst::MovFp { + size: lir_fp_size, + src: XmmOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rax, + offset: 0, + }), + dst: XmmOperand::Reg(XmmReg::Xmm15), + }); + + match &dst_loc { + Loc::Xmm(x) => { + // LIR: move to destination XMM + self.push_lir(X86Inst::MovFp { + size: lir_fp_size, + src: XmmOperand::Reg(XmmReg::Xmm15), + dst: XmmOperand::Reg(*x), + }); + } + Loc::Stack(dst_offset) => { + // LIR: store to stack + self.push_lir(X86Inst::MovFp { + size: lir_fp_size, + src: XmmOperand::Reg(XmmReg::Xmm15), + dst: XmmOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: *dst_offset, + }), + }); + } + _ => {} + } + + // LIR: advance overflow_arg_area + self.push_lir(X86Inst::Add { + size: OperandSize::B64, + src: GpOperand::Imm(arg_bytes as i64), + dst: Reg::Rax, + }); + // LIR: store updated pointer + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: *ap_reg, + offset: 8, + }), + }); + } else { + // Check gp_offset + let overflow_label = Label::new("va_overflow", label_suffix); + let done_label = Label::new("va_done", label_suffix); + + // LIR: load gp_offset + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: *ap_reg, + offset: 0, + }), + dst: GpOperand::Reg(Reg::Rax), + }); + // LIR: compare with 48 + self.push_lir(X86Inst::Cmp { + size: OperandSize::B32, + src: GpOperand::Imm(48), + dst: GpOperand::Reg(Reg::Rax), + }); + // LIR: conditional jump + self.push_lir(X86Inst::Jcc { + cc: IntCC::Ae, + target: overflow_label.clone(), + }); + + // Use register save area + // LIR: load reg_save_area (at offset 16) + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: *ap_reg, + offset: 16, + }), + dst: GpOperand::Reg(Reg::R10), + }); + // LIR: sign-extend gp_offset + self.push_lir(X86Inst::Movsx { + src_size: OperandSize::B32, + dst_size: OperandSize::B64, + src: GpOperand::Reg(Reg::Rax), + dst: Reg::R11, + }); + // LIR: add offset to base + self.push_lir(X86Inst::Add { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::R11), + dst: Reg::R10, + }); + + let lir_arg_size = OperandSize::from_bits(arg_size); + match &dst_loc { + Loc::Reg(r) => { + // LIR: load from save area + self.push_lir(X86Inst::Mov { + size: lir_arg_size, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::R10, + offset: 0, + }), + dst: GpOperand::Reg(*r), + }); + } + Loc::Stack(dst_offset) => { + // LIR: load to temp then store + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::R10, + offset: 0, + }), + dst: GpOperand::Reg(Reg::R11), + }); + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Reg(Reg::R11), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: *dst_offset, + }), + }); + } + _ => {} + } + + // Increment gp_offset by 8 + // LIR: add 8 + self.push_lir(X86Inst::Add { + size: OperandSize::B32, + src: GpOperand::Imm(8), + dst: Reg::Rax, + }); + // LIR: store updated gp_offset + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: *ap_reg, + offset: 0, + }), + }); + // LIR: jump to done + self.push_lir(X86Inst::Jmp { + target: done_label.clone(), + }); + + // Overflow path: use overflow_arg_area + // LIR: overflow label + self.push_lir(X86Inst::Directive(Directive::BlockLabel(overflow_label))); + // LIR: load overflow_arg_area + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: *ap_reg, + offset: 8, + }), + dst: GpOperand::Reg(Reg::Rax), + }); + + match &dst_loc { + Loc::Reg(r) => { + // LIR: load from overflow area + self.push_lir(X86Inst::Mov { + size: lir_arg_size, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rax, + offset: 0, + }), + dst: GpOperand::Reg(*r), + }); + } + Loc::Stack(dst_offset) => { + // LIR: load to temp then store + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rax, + offset: 0, + }), + dst: GpOperand::Reg(Reg::R11), + }); + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Reg(Reg::R11), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: *dst_offset, + }), + }); + } + _ => {} + } + + // Advance overflow_arg_area + // LIR: add arg_bytes + self.push_lir(X86Inst::Add { + size: OperandSize::B64, + src: GpOperand::Imm(arg_bytes as i64), + dst: Reg::Rax, + }); + // LIR: store updated pointer + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: *ap_reg, + offset: 8, + }), + }); + + // Done label + self.push_lir(X86Inst::Directive(Directive::BlockLabel(done_label))); + } + } + _ => {} + } + } + + /// Emit va_copy: Copy a va_list (24 bytes) + fn emit_va_copy(&mut self, insn: &Instruction) { + let dest_addr = match insn.src.first() { + Some(&s) => s, + None => return, + }; + let src_addr = match insn.src.get(1) { + Some(&s) => s, + None => return, + }; + + let dest_loc = self.get_location(dest_addr); + let src_loc = self.get_location(src_addr); + + // Copy 24 bytes from src to dest + // Both src_loc and dest_loc contain addresses of va_list structs + match (&src_loc, &dest_loc) { + (Loc::Stack(src_off), Loc::Stack(dst_off)) => { + // Copy gp_offset (4 bytes) + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: *src_off, + }), + dst: GpOperand::Reg(Reg::Rax), + }); + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: *dst_off, + }), + }); + // Copy fp_offset (4 bytes) + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: src_off + 4, + }), + dst: GpOperand::Reg(Reg::Rax), + }); + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: dst_off + 4, + }), + }); + // Copy overflow_arg_area (8 bytes) + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: src_off + 8, + }), + dst: GpOperand::Reg(Reg::Rax), + }); + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: dst_off + 8, + }), + }); + // Copy reg_save_area (8 bytes) + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: src_off + 16, + }), + dst: GpOperand::Reg(Reg::Rax), + }); + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: dst_off + 16, + }), + }); + } + (Loc::Reg(src_reg), Loc::Reg(dst_reg)) => { + // Both src and dest are in registers (containing addresses) + // Copy gp_offset (4 bytes) + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: *src_reg, + offset: 0, + }), + dst: GpOperand::Reg(Reg::Rax), + }); + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: *dst_reg, + offset: 0, + }), + }); + // Copy fp_offset (4 bytes) + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: *src_reg, + offset: 4, + }), + dst: GpOperand::Reg(Reg::Rax), + }); + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: *dst_reg, + offset: 4, + }), + }); + // Copy overflow_arg_area (8 bytes) + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: *src_reg, + offset: 8, + }), + dst: GpOperand::Reg(Reg::Rax), + }); + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: *dst_reg, + offset: 8, + }), + }); + // Copy reg_save_area (8 bytes) + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: *src_reg, + offset: 16, + }), + dst: GpOperand::Reg(Reg::Rax), + }); + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: *dst_reg, + offset: 16, + }), + }); + } + (Loc::Reg(src_reg), Loc::Stack(dst_off)) => { + // Src in register, dest on stack + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: *src_reg, + offset: 0, + }), + dst: GpOperand::Reg(Reg::Rax), + }); + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: *dst_off, + }), + }); + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: *src_reg, + offset: 4, + }), + dst: GpOperand::Reg(Reg::Rax), + }); + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: dst_off + 4, + }), + }); + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: *src_reg, + offset: 8, + }), + dst: GpOperand::Reg(Reg::Rax), + }); + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: dst_off + 8, + }), + }); + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: *src_reg, + offset: 16, + }), + dst: GpOperand::Reg(Reg::Rax), + }); + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: dst_off + 16, + }), + }); + } + (Loc::Stack(src_off), Loc::Reg(dst_reg)) => { + // Src on stack, dest in register + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: *src_off, + }), + dst: GpOperand::Reg(Reg::Rax), + }); + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: *dst_reg, + offset: 0, + }), + }); + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: src_off + 4, + }), + dst: GpOperand::Reg(Reg::Rax), + }); + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: *dst_reg, + offset: 4, + }), + }); + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: src_off + 8, + }), + dst: GpOperand::Reg(Reg::Rax), + }); + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: *dst_reg, + offset: 8, + }), + }); + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: src_off + 16, + }), + dst: GpOperand::Reg(Reg::Rax), + }); + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: *dst_reg, + offset: 16, + }), + }); + } + _ => {} + } + } + + // ========================================================================= + // Byte-swapping builtins + // ========================================================================= + + /// Emit bswap16: Byte-swap a 16-bit value + /// x86-64 doesn't have bswap for 16-bit, use ror $8 (rotate right 8 bits) + fn emit_bswap16(&mut self, insn: &Instruction) { + let src = match insn.src.first() { + Some(&s) => s, + None => return, + }; + let dst = match insn.target { + Some(t) => t, + None => return, + }; + + let src_loc = self.get_location(src); + let dst_loc = self.get_location(dst); + + // Load source into %ax + match src_loc { + Loc::Reg(r) => { + if r != Reg::Rax { + // LIR: zero-extend 16-bit to 32-bit + self.push_lir(X86Inst::Movzx { + src_size: OperandSize::B16, + dst_size: OperandSize::B32, + src: GpOperand::Reg(r), + dst: Reg::Rax, + }); + } + } + Loc::Stack(off) => { + // LIR: zero-extend load from stack + self.push_lir(X86Inst::Movzx { + src_size: OperandSize::B16, + dst_size: OperandSize::B32, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: off, + }), + dst: Reg::Rax, + }); + } + Loc::Imm(v) => { + // LIR: load immediate + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Imm(v), + dst: GpOperand::Reg(Reg::Rax), + }); + } + _ => return, + } + + // Rotate 8 bits right to swap bytes + // LIR: ror + self.push_lir(X86Inst::Ror { + size: OperandSize::B16, + count: ShiftCount::Imm(8), + dst: Reg::Rax, + }); + + // Store result + match dst_loc { + Loc::Reg(r) => { + if r != Reg::Rax { + // LIR: zero-extend result + self.push_lir(X86Inst::Movzx { + src_size: OperandSize::B16, + dst_size: OperandSize::B32, + src: GpOperand::Reg(Reg::Rax), + dst: r, + }); + } + } + Loc::Stack(off) => { + // LIR: store 16-bit result + self.push_lir(X86Inst::Mov { + size: OperandSize::B16, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: off, + }), + }); + } + _ => {} + } + } + + /// Emit bswap32: Byte-swap a 32-bit value + fn emit_bswap32(&mut self, insn: &Instruction) { + let src = match insn.src.first() { + Some(&s) => s, + None => return, + }; + let dst = match insn.target { + Some(t) => t, + None => return, + }; + + let src_loc = self.get_location(src); + let dst_loc = self.get_location(dst); + + // Load source into %eax + match src_loc { + Loc::Reg(r) => { + if r != Reg::Rax { + // LIR: move 32-bit value to rax + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Reg(r), + dst: GpOperand::Reg(Reg::Rax), + }); + } + } + Loc::Stack(off) => { + // LIR: load 32-bit from stack + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: off, + }), + dst: GpOperand::Reg(Reg::Rax), + }); + } + Loc::Imm(v) => { + // LIR: load immediate + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Imm(v), + dst: GpOperand::Reg(Reg::Rax), + }); + } + _ => return, + } + + // Byte-swap 32-bit value + // LIR: bswap + self.push_lir(X86Inst::Bswap { + size: OperandSize::B32, + reg: Reg::Rax, + }); + + // Store result + match dst_loc { + Loc::Reg(r) => { + if r != Reg::Rax { + // LIR: move result + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Reg(r), + }); + } + } + Loc::Stack(off) => { + // LIR: store to stack + self.push_lir(X86Inst::Mov { + size: OperandSize::B32, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: off, + }), + }); + } + _ => {} + } + } + + /// Emit bswap64: Byte-swap a 64-bit value + fn emit_bswap64(&mut self, insn: &Instruction) { + let src = match insn.src.first() { + Some(&s) => s, + None => return, + }; + let dst = match insn.target { + Some(t) => t, + None => return, + }; + + let src_loc = self.get_location(src); + let dst_loc = self.get_location(dst); + + // Load source into %rax + match src_loc { + Loc::Reg(r) => { + if r != Reg::Rax { + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(r), + dst: GpOperand::Reg(Reg::Rax), + }); + } + } + Loc::Stack(off) => { + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: off, + }), + dst: GpOperand::Reg(Reg::Rax), + }); + } + Loc::Imm(v) => { + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Imm(v), + dst: GpOperand::Reg(Reg::Rax), + }); + } + _ => return, + } + + // Byte-swap 64-bit value + self.push_lir(X86Inst::Bswap { + size: OperandSize::B64, + reg: Reg::Rax, + }); + + // Store result + match dst_loc { + Loc::Reg(r) => { + if r != Reg::Rax { + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Reg(r), + }); + } + } + Loc::Stack(off) => { + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::Rbp, + offset: off, + }), + }); + } + _ => {} + } + } + + /// Emit __builtin_alloca - dynamic stack allocation + fn emit_alloca(&mut self, insn: &Instruction) { + let size = match insn.src.first() { + Some(&s) => s, + None => return, + }; + let target = match insn.target { + Some(t) => t, + None => return, + }; + + // Load size into RAX + self.emit_move(size, Reg::Rax, 64); + + // Round up to 16-byte alignment: (size + 15) & ~15 + self.push_lir(X86Inst::Add { + size: OperandSize::B64, + src: GpOperand::Imm(15), + dst: Reg::Rax, + }); + self.push_lir(X86Inst::And { + size: OperandSize::B64, + src: GpOperand::Imm(-16), + dst: Reg::Rax, + }); + + // Subtract from stack pointer + self.push_lir(X86Inst::Sub { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::Rax), + dst: Reg::Rsp, + }); + + // Return new stack pointer + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::Rsp), + dst: GpOperand::Reg(Reg::Rax), + }); + + // Store result + let dst_loc = self.get_location(target); + self.emit_move_to_loc(Reg::Rax, &dst_loc, 64); + } +} + +// ============================================================================ +// CodeGenerator trait implementation +// ============================================================================ + +impl CodeGenerator for X86_64CodeGen { + fn generate(&mut self, module: &Module) -> String { + self.output.clear(); + self.last_debug_line = 0; + self.last_debug_file = 0; + self.emit_debug = module.debug; + + // Emit file header + self.emit_header(); + + // Emit .file directives unconditionally (useful for diagnostics/profiling) + for (i, path) in module.source_files.iter().enumerate() { + // File indices in DWARF start at 1 + self.push_lir(X86Inst::Directive(Directive::file( + (i + 1) as u32, + path.clone(), + ))); + } + + // Emit globals + for (name, typ, init) in &module.globals { + self.emit_global(name, typ, init); + } + + // Emit string literals + if !module.strings.is_empty() { + self.emit_strings(&module.strings); + } + + // Emit functions + for func in &module.functions { + self.emit_function(func); + } + + // Emit all buffered LIR instructions to output string + self.emit_all(); + + self.output.clone() + } + + fn set_emit_unwind_tables(&mut self, emit: bool) { + self.emit_unwind_tables = emit; + } +} + +impl X86_64CodeGen { + fn emit_strings(&mut self, strings: &[(String, String)]) { + // Switch to rodata section (Directive handles OS differences) + self.push_lir(X86Inst::Directive(Directive::Rodata)); + + for (label, content) in strings { + // String labels are local (start with .) + self.push_lir(X86Inst::Directive(Directive::local_label(label.clone()))); + // Emit string with proper escaping + self.push_lir(X86Inst::Directive(Directive::Asciz(Self::escape_string( + content, + )))); + } + + // Switch back to text section for functions + self.push_lir(X86Inst::Directive(Directive::Text)); + } + + fn escape_string(s: &str) -> String { + let mut result = String::new(); + for c in s.chars() { + match c { + '\n' => result.push_str("\\n"), + '\r' => result.push_str("\\r"), + '\t' => result.push_str("\\t"), + '\\' => result.push_str("\\\\"), + '"' => result.push_str("\\\""), + c if c.is_ascii_graphic() || c == ' ' => result.push(c), + c => { + // Emit as octal escape + result.push_str(&format!("\\{:03o}", c as u32)); + } + } + } + result + } +} diff --git a/cc/arch/x86_64/lir.rs b/cc/arch/x86_64/lir.rs new file mode 100644 index 000000000..4d6413d72 --- /dev/null +++ b/cc/arch/x86_64/lir.rs @@ -0,0 +1,1280 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// x86-64 Low-level Intermediate Representation (LIR) +// +// Strongly-typed representation of x86-64 assembly instructions. +// Each instruction variant encodes all operands in typed fields, +// enabling peephole optimizations before final assembly emission. +// + +// Allow dead code - LIR infrastructure for future phases +#![allow(dead_code)] + +use super::codegen::{Reg, XmmReg}; +use crate::arch::lir::{Directive, EmitAsm, FpSize, Label, OperandSize, Symbol}; +use crate::target::{Os, Target}; +use std::fmt::{self, Write}; + +// ============================================================================ +// Memory Addressing Modes +// ============================================================================ + +/// x86-64 memory addressing mode +#[derive(Debug, Clone, PartialEq)] +pub enum MemAddr { + /// [base + offset] - Register indirect with displacement + BaseOffset { base: Reg, offset: i32 }, + + /// [rip + symbol] - PC-relative addressing for position-independent code + RipRelative(Symbol), + + /// [base + index * scale + offset] - Full SIB addressing + Sib { + base: Reg, + index: Reg, + scale: u8, // 1, 2, 4, or 8 + offset: i32, + }, + + /// [offset] - Absolute address (rare, mainly for globals on some platforms) + Absolute(i64), +} + +impl MemAddr { + /// Format memory operand in AT&T syntax + pub fn format(&self, target: &Target) -> String { + match self { + MemAddr::BaseOffset { base, offset } => { + if *offset == 0 { + format!("({})", base.name64()) + } else { + format!("{}({})", offset, base.name64()) + } + } + MemAddr::RipRelative(sym) => { + format!("{}(%rip)", sym.format_for_target(target)) + } + MemAddr::Sib { + base, + index, + scale, + offset, + } => { + if *offset == 0 { + format!("({},{},{})", base.name64(), index.name64(), scale) + } else { + format!("{}({},{},{})", offset, base.name64(), index.name64(), scale) + } + } + MemAddr::Absolute(addr) => { + format!("{:#x}", addr) + } + } + } +} + +// ============================================================================ +// General-Purpose Operands +// ============================================================================ + +/// x86-64 general-purpose operand (register, memory, or immediate) +#[derive(Debug, Clone, PartialEq)] +pub enum GpOperand { + /// Register operand + Reg(Reg), + /// Memory operand + Mem(MemAddr), + /// Immediate integer value + Imm(i64), +} + +impl GpOperand { + /// Format operand in AT&T syntax for given size + pub fn format(&self, size: OperandSize, target: &Target) -> String { + match self { + GpOperand::Reg(r) => r.name_for_size(size.bits()).to_string(), + GpOperand::Mem(addr) => addr.format(target), + GpOperand::Imm(v) => format!("${}", v), + } + } + + /// Check if this is a register operand + pub fn is_reg(&self) -> bool { + matches!(self, GpOperand::Reg(_)) + } + + /// Get register if this is a register operand + pub fn as_reg(&self) -> Option { + match self { + GpOperand::Reg(r) => Some(*r), + _ => None, + } + } +} + +// ============================================================================ +// XMM (Floating-Point) Operands +// ============================================================================ + +/// x86-64 XMM operand (register or memory) +#[derive(Debug, Clone, PartialEq)] +pub enum XmmOperand { + /// XMM register operand + Reg(XmmReg), + /// Memory operand + Mem(MemAddr), +} + +impl XmmOperand { + /// Format operand in AT&T syntax + pub fn format(&self, target: &Target) -> String { + match self { + XmmOperand::Reg(r) => r.name().to_string(), + XmmOperand::Mem(addr) => addr.format(target), + } + } +} + +// ============================================================================ +// Condition Codes +// ============================================================================ + +/// Integer condition codes for comparisons and conditional jumps/sets +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum IntCC { + /// Equal (ZF=1) + E, + /// Not equal (ZF=0) + Ne, + /// Less than (signed) (SF!=OF) + L, + /// Less than or equal (signed) (ZF=1 or SF!=OF) + Le, + /// Greater than (signed) (ZF=0 and SF=OF) + G, + /// Greater than or equal (signed) (SF=OF) + Ge, + /// Below (unsigned) (CF=1) + B, + /// Below or equal (unsigned) (CF=1 or ZF=1) + Be, + /// Above (unsigned) (CF=0 and ZF=0) + A, + /// Above or equal (unsigned) (CF=0) + Ae, + /// Sign (SF=1) + S, + /// Not sign (SF=0) + Ns, + /// Parity (PF=1) - for FP unordered + P, + /// Not parity (PF=0) - for FP ordered + Np, +} + +impl IntCC { + /// x86-64 condition code suffix + pub fn suffix(&self) -> &'static str { + match self { + IntCC::E => "e", + IntCC::Ne => "ne", + IntCC::L => "l", + IntCC::Le => "le", + IntCC::G => "g", + IntCC::Ge => "ge", + IntCC::B => "b", + IntCC::Be => "be", + IntCC::A => "a", + IntCC::Ae => "ae", + IntCC::S => "s", + IntCC::Ns => "ns", + IntCC::P => "p", + IntCC::Np => "np", + } + } + + /// Get the inverse condition + pub fn inverse(&self) -> IntCC { + match self { + IntCC::E => IntCC::Ne, + IntCC::Ne => IntCC::E, + IntCC::L => IntCC::Ge, + IntCC::Le => IntCC::G, + IntCC::G => IntCC::Le, + IntCC::Ge => IntCC::L, + IntCC::B => IntCC::Ae, + IntCC::Be => IntCC::A, + IntCC::A => IntCC::Be, + IntCC::Ae => IntCC::B, + IntCC::S => IntCC::Ns, + IntCC::Ns => IntCC::S, + IntCC::P => IntCC::Np, + IntCC::Np => IntCC::P, + } + } +} + +impl fmt::Display for IntCC { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.suffix()) + } +} + +// ============================================================================ +// Shift Count +// ============================================================================ + +/// Shift/rotate count specifier +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ShiftCount { + /// Immediate shift count (0-63) + Imm(u8), + /// Use %cl register for count + Cl, +} + +// ============================================================================ +// Call Target +// ============================================================================ + +/// Target for call instructions +#[derive(Debug, Clone, PartialEq)] +pub enum CallTarget { + /// Direct call to symbol + Direct(Symbol), + /// Indirect call through register + Indirect(Reg), + /// Indirect call through memory + IndirectMem(MemAddr), +} + +// ============================================================================ +// x86-64 LIR Instructions +// ============================================================================ + +/// x86-64 Low-level IR instruction +#[derive(Debug, Clone)] +pub enum X86Inst { + // ======================================================================== + // Data Movement + // ======================================================================== + /// MOV - Move data between registers/memory + Mov { + size: OperandSize, + src: GpOperand, + dst: GpOperand, + }, + + /// MOVABS - Move 64-bit immediate to register + MovAbs { imm: i64, dst: Reg }, + + /// MOVZX - Move with zero extension + Movzx { + src_size: OperandSize, + dst_size: OperandSize, + src: GpOperand, + dst: Reg, + }, + + /// MOVSX - Move with sign extension + Movsx { + src_size: OperandSize, + dst_size: OperandSize, + src: GpOperand, + dst: Reg, + }, + + /// LEA - Load effective address + Lea { addr: MemAddr, dst: Reg }, + + /// PUSH - Push value onto stack + Push { src: GpOperand }, + + /// POP - Pop value from stack + Pop { dst: Reg }, + + /// XCHG - Exchange register contents + Xchg { size: OperandSize, r1: Reg, r2: Reg }, + + // ======================================================================== + // Integer Arithmetic + // ======================================================================== + /// ADD - Integer addition + Add { + size: OperandSize, + src: GpOperand, + dst: Reg, + }, + + /// SUB - Integer subtraction + Sub { + size: OperandSize, + src: GpOperand, + dst: Reg, + }, + + /// IMUL - Signed multiply (2-operand form: dst *= src) + IMul2 { + size: OperandSize, + src: GpOperand, + dst: Reg, + }, + + /// IMUL - Signed multiply (1-operand form: RDX:RAX = RAX * src) + IMul1 { size: OperandSize, src: GpOperand }, + + /// MUL - Unsigned multiply (1-operand form: RDX:RAX = RAX * src) + Mul1 { size: OperandSize, src: GpOperand }, + + /// IDIV - Signed divide (RDX:RAX / src -> RAX quotient, RDX remainder) + IDiv { + size: OperandSize, + divisor: GpOperand, + }, + + /// DIV - Unsigned divide (RDX:RAX / src -> RAX quotient, RDX remainder) + Div { + size: OperandSize, + divisor: GpOperand, + }, + + /// NEG - Two's complement negation + Neg { size: OperandSize, dst: Reg }, + + /// INC - Increment by 1 + Inc { size: OperandSize, dst: GpOperand }, + + /// DEC - Decrement by 1 + Dec { size: OperandSize, dst: GpOperand }, + + // ======================================================================== + // Bitwise Operations + // ======================================================================== + /// NOT - One's complement (bitwise NOT) + Not { size: OperandSize, dst: Reg }, + + /// AND - Bitwise AND + And { + size: OperandSize, + src: GpOperand, + dst: Reg, + }, + + /// OR - Bitwise OR + Or { + size: OperandSize, + src: GpOperand, + dst: Reg, + }, + + /// XOR - Bitwise exclusive OR + Xor { + size: OperandSize, + src: GpOperand, + dst: Reg, + }, + + /// SHL/SAL - Shift left + Shl { + size: OperandSize, + count: ShiftCount, + dst: Reg, + }, + + /// SHR - Logical shift right + Shr { + size: OperandSize, + count: ShiftCount, + dst: Reg, + }, + + /// SAR - Arithmetic shift right + Sar { + size: OperandSize, + count: ShiftCount, + dst: Reg, + }, + + /// ROR - Rotate right + Ror { + size: OperandSize, + count: ShiftCount, + dst: Reg, + }, + + // ======================================================================== + // Comparison and Conditional + // ======================================================================== + /// CMP - Compare (sets flags based on dst - src) + Cmp { + size: OperandSize, + src: GpOperand, + dst: GpOperand, + }, + + /// TEST - Test (sets flags based on dst AND src) + Test { + size: OperandSize, + src: GpOperand, + dst: GpOperand, + }, + + /// SETcc - Set byte based on condition + SetCC { cc: IntCC, dst: Reg }, + + /// CMOVcc - Conditional move + CMov { + cc: IntCC, + size: OperandSize, + src: GpOperand, + dst: Reg, + }, + + // ======================================================================== + // Control Flow + // ======================================================================== + /// JMP - Unconditional jump + Jmp { target: Label }, + + /// Jcc - Conditional jump + Jcc { cc: IntCC, target: Label }, + + /// CALL - Function call + Call { target: CallTarget }, + + /// RET - Return from function + Ret, + + // ======================================================================== + // Floating-Point (SSE) + // ======================================================================== + /// MOVSS/MOVSD - Move scalar floating-point + MovFp { + size: FpSize, + src: XmmOperand, + dst: XmmOperand, + }, + + /// MOVD/MOVQ - Move between GP and XMM registers + MovGpXmm { + size: OperandSize, + src: Reg, + dst: XmmReg, + }, + + /// MOVD/MOVQ - Move from XMM to GP register + MovXmmGp { + size: OperandSize, + src: XmmReg, + dst: Reg, + }, + + /// ADDSS/ADDSD - Add scalar floating-point + AddFp { + size: FpSize, + src: XmmOperand, + dst: XmmReg, + }, + + /// SUBSS/SUBSD - Subtract scalar floating-point + SubFp { + size: FpSize, + src: XmmOperand, + dst: XmmReg, + }, + + /// MULSS/MULSD - Multiply scalar floating-point + MulFp { + size: FpSize, + src: XmmOperand, + dst: XmmReg, + }, + + /// DIVSS/DIVSD - Divide scalar floating-point + DivFp { + size: FpSize, + src: XmmOperand, + dst: XmmReg, + }, + + /// XORPS/XORPD - XOR packed floating-point (used for zeroing/negation) + XorFp { + size: FpSize, + src: XmmReg, + dst: XmmReg, + }, + + /// UCOMISS/UCOMISD - Unordered compare scalar floating-point + UComiFp { + size: FpSize, + src: XmmOperand, + dst: XmmReg, + }, + + /// CVTSI2SS/CVTSI2SD - Convert integer to scalar floating-point + CvtIntToFp { + int_size: OperandSize, + fp_size: FpSize, + src: GpOperand, + dst: XmmReg, + }, + + /// CVTTSS2SI/CVTTSD2SI - Convert scalar FP to integer (truncate toward zero) + CvtFpToInt { + fp_size: FpSize, + int_size: OperandSize, + src: XmmOperand, + dst: Reg, + }, + + /// CVTSS2SD/CVTSD2SS - Convert between FP sizes + CvtFpFp { + src_size: FpSize, + dst_size: FpSize, + src: XmmReg, + dst: XmmReg, + }, + + // ======================================================================== + // Special Instructions + // ======================================================================== + /// CLTD/CDQ - Sign extend EAX into EDX:EAX (32-bit) + Cltd, + + /// CQTO/CQO - Sign extend RAX into RDX:RAX (64-bit) + Cqto, + + /// BSWAP - Byte swap + Bswap { size: OperandSize, reg: Reg }, + + /// XORPS with same register - Fast zero XMM register + XorpsSelf { reg: XmmReg }, + + // ======================================================================== + // Directives (Architecture-Independent) + // ======================================================================== + /// Assembler directives (labels, sections, CFI, .loc, data, etc.) + /// These are architecture-independent and use the shared Directive type. + Directive(Directive), +} + +// ============================================================================ +// EmitAsm Implementation +// ============================================================================ + +impl EmitAsm for X86Inst { + fn emit(&self, target: &Target, out: &mut String) { + match self { + // Data Movement + X86Inst::Mov { size, src, dst } => { + let _ = writeln!( + out, + " mov{} {}, {}", + size.x86_suffix(), + src.format(*size, target), + dst.format(*size, target) + ); + } + + X86Inst::MovAbs { imm, dst } => { + let _ = writeln!(out, " movabsq ${}, {}", imm, dst.name64()); + } + + X86Inst::Movzx { + src_size, + dst_size, + src, + dst, + } => { + let op = match (src_size, dst_size) { + (OperandSize::B8, OperandSize::B16) => "movzbw", + (OperandSize::B8, OperandSize::B32) => "movzbl", + (OperandSize::B8, OperandSize::B64) => "movzbq", + (OperandSize::B16, OperandSize::B32) => "movzwl", + (OperandSize::B16, OperandSize::B64) => "movzwq", + _ => "movzbl", // fallback + }; + let _ = writeln!( + out, + " {} {}, {}", + op, + src.format(*src_size, target), + dst.name_for_size(dst_size.bits()) + ); + } + + X86Inst::Movsx { + src_size, + dst_size, + src, + dst, + } => { + let op = match (src_size, dst_size) { + (OperandSize::B8, OperandSize::B16) => "movsbw", + (OperandSize::B8, OperandSize::B32) => "movsbl", + (OperandSize::B8, OperandSize::B64) => "movsbq", + (OperandSize::B16, OperandSize::B32) => "movswl", + (OperandSize::B16, OperandSize::B64) => "movswq", + (OperandSize::B32, OperandSize::B64) => "movslq", + _ => "movsbl", // fallback + }; + let _ = writeln!( + out, + " {} {}, {}", + op, + src.format(*src_size, target), + dst.name_for_size(dst_size.bits()) + ); + } + + X86Inst::Lea { addr, dst } => { + let _ = writeln!(out, " leaq {}, {}", addr.format(target), dst.name64()); + } + + X86Inst::Push { src } => { + let _ = writeln!(out, " pushq {}", src.format(OperandSize::B64, target)); + } + + X86Inst::Pop { dst } => { + let _ = writeln!(out, " popq {}", dst.name64()); + } + + X86Inst::Xchg { size, r1, r2 } => { + let _ = writeln!( + out, + " xchg{} {}, {}", + size.x86_suffix(), + r1.name_for_size(size.bits()), + r2.name_for_size(size.bits()) + ); + } + + // Integer Arithmetic + X86Inst::Add { size, src, dst } => { + let _ = writeln!( + out, + " add{} {}, {}", + size.x86_suffix(), + src.format(*size, target), + dst.name_for_size(size.bits()) + ); + } + + X86Inst::Sub { size, src, dst } => { + let _ = writeln!( + out, + " sub{} {}, {}", + size.x86_suffix(), + src.format(*size, target), + dst.name_for_size(size.bits()) + ); + } + + X86Inst::IMul2 { size, src, dst } => { + let _ = writeln!( + out, + " imul{} {}, {}", + size.x86_suffix(), + src.format(*size, target), + dst.name_for_size(size.bits()) + ); + } + + X86Inst::IMul1 { size, src } => { + let _ = writeln!( + out, + " imul{} {}", + size.x86_suffix(), + src.format(*size, target) + ); + } + + X86Inst::Mul1 { size, src } => { + let _ = writeln!( + out, + " mul{} {}", + size.x86_suffix(), + src.format(*size, target) + ); + } + + X86Inst::IDiv { size, divisor } => { + let _ = writeln!( + out, + " idiv{} {}", + size.x86_suffix(), + divisor.format(*size, target) + ); + } + + X86Inst::Div { size, divisor } => { + let _ = writeln!( + out, + " div{} {}", + size.x86_suffix(), + divisor.format(*size, target) + ); + } + + X86Inst::Neg { size, dst } => { + let _ = writeln!( + out, + " neg{} {}", + size.x86_suffix(), + dst.name_for_size(size.bits()) + ); + } + + X86Inst::Inc { size, dst } => { + let _ = writeln!( + out, + " inc{} {}", + size.x86_suffix(), + dst.format(*size, target) + ); + } + + X86Inst::Dec { size, dst } => { + let _ = writeln!( + out, + " dec{} {}", + size.x86_suffix(), + dst.format(*size, target) + ); + } + + // Bitwise Operations + X86Inst::Not { size, dst } => { + let _ = writeln!( + out, + " not{} {}", + size.x86_suffix(), + dst.name_for_size(size.bits()) + ); + } + + X86Inst::And { size, src, dst } => { + let _ = writeln!( + out, + " and{} {}, {}", + size.x86_suffix(), + src.format(*size, target), + dst.name_for_size(size.bits()) + ); + } + + X86Inst::Or { size, src, dst } => { + let _ = writeln!( + out, + " or{} {}, {}", + size.x86_suffix(), + src.format(*size, target), + dst.name_for_size(size.bits()) + ); + } + + X86Inst::Xor { size, src, dst } => { + let _ = writeln!( + out, + " xor{} {}, {}", + size.x86_suffix(), + src.format(*size, target), + dst.name_for_size(size.bits()) + ); + } + + X86Inst::Shl { size, count, dst } => { + let count_str = match count { + ShiftCount::Imm(n) => format!("${}", n), + ShiftCount::Cl => "%cl".to_string(), + }; + let _ = writeln!( + out, + " shl{} {}, {}", + size.x86_suffix(), + count_str, + dst.name_for_size(size.bits()) + ); + } + + X86Inst::Shr { size, count, dst } => { + let count_str = match count { + ShiftCount::Imm(n) => format!("${}", n), + ShiftCount::Cl => "%cl".to_string(), + }; + let _ = writeln!( + out, + " shr{} {}, {}", + size.x86_suffix(), + count_str, + dst.name_for_size(size.bits()) + ); + } + + X86Inst::Sar { size, count, dst } => { + let count_str = match count { + ShiftCount::Imm(n) => format!("${}", n), + ShiftCount::Cl => "%cl".to_string(), + }; + let _ = writeln!( + out, + " sar{} {}, {}", + size.x86_suffix(), + count_str, + dst.name_for_size(size.bits()) + ); + } + + X86Inst::Ror { size, count, dst } => { + let count_str = match count { + ShiftCount::Imm(n) => format!("${}", n), + ShiftCount::Cl => "%cl".to_string(), + }; + let _ = writeln!( + out, + " ror{} {}, {}", + size.x86_suffix(), + count_str, + dst.name_for_size(size.bits()) + ); + } + + // Comparison and Conditional + X86Inst::Cmp { size, src, dst } => { + let _ = writeln!( + out, + " cmp{} {}, {}", + size.x86_suffix(), + src.format(*size, target), + dst.format(*size, target) + ); + } + + X86Inst::Test { size, src, dst } => { + let _ = writeln!( + out, + " test{} {}, {}", + size.x86_suffix(), + src.format(*size, target), + dst.format(*size, target) + ); + } + + X86Inst::SetCC { cc, dst } => { + let _ = writeln!(out, " set{} {}", cc.suffix(), dst.name8()); + } + + X86Inst::CMov { cc, size, src, dst } => { + let _ = writeln!( + out, + " cmov{}{} {}, {}", + cc.suffix(), + size.x86_suffix(), + src.format(*size, target), + dst.name_for_size(size.bits()) + ); + } + + // Control Flow + X86Inst::Jmp { target: lbl } => { + let _ = writeln!(out, " jmp {}", lbl.name()); + } + + X86Inst::Jcc { cc, target: lbl } => { + let _ = writeln!(out, " j{} {}", cc.suffix(), lbl.name()); + } + + X86Inst::Call { + target: call_target, + } => match call_target { + CallTarget::Direct(sym) => { + // On Linux/FreeBSD, external symbols need @PLT for PIC + let sym_name = sym.format_for_target(target); + match target.os { + Os::Linux | Os::FreeBSD if !sym.is_local => { + let _ = writeln!(out, " call {}@PLT", sym_name); + } + _ => { + let _ = writeln!(out, " call {}", sym_name); + } + } + } + CallTarget::Indirect(reg) => { + let _ = writeln!(out, " call *{}", reg.name64()); + } + CallTarget::IndirectMem(addr) => { + let _ = writeln!(out, " call *{}", addr.format(target)); + } + }, + + X86Inst::Ret => { + let _ = writeln!(out, " ret"); + } + + // Floating-Point + X86Inst::MovFp { size, src, dst } => { + let _ = writeln!( + out, + " mov{} {}, {}", + size.x86_suffix(), + src.format(target), + dst.format(target) + ); + } + + X86Inst::MovGpXmm { size, src, dst } => { + let op = if size.bits() <= 32 { "movd" } else { "movq" }; + let _ = writeln!( + out, + " {} {}, {}", + op, + src.name_for_size(size.bits()), + dst.name() + ); + } + + X86Inst::MovXmmGp { size, src, dst } => { + let op = if size.bits() <= 32 { "movd" } else { "movq" }; + let _ = writeln!( + out, + " {} {}, {}", + op, + src.name(), + dst.name_for_size(size.bits()) + ); + } + + X86Inst::AddFp { size, src, dst } => { + let _ = writeln!( + out, + " add{} {}, {}", + size.x86_suffix(), + src.format(target), + dst.name() + ); + } + + X86Inst::SubFp { size, src, dst } => { + let _ = writeln!( + out, + " sub{} {}, {}", + size.x86_suffix(), + src.format(target), + dst.name() + ); + } + + X86Inst::MulFp { size, src, dst } => { + let _ = writeln!( + out, + " mul{} {}, {}", + size.x86_suffix(), + src.format(target), + dst.name() + ); + } + + X86Inst::DivFp { size, src, dst } => { + let _ = writeln!( + out, + " div{} {}, {}", + size.x86_suffix(), + src.format(target), + dst.name() + ); + } + + X86Inst::XorFp { size, src, dst } => { + let _ = writeln!( + out, + " xor{} {}, {}", + size.x86_packed_suffix(), + src.name(), + dst.name() + ); + } + + X86Inst::UComiFp { size, src, dst } => { + let _ = writeln!( + out, + " ucomi{} {}, {}", + size.x86_suffix(), + src.format(target), + dst.name() + ); + } + + X86Inst::CvtIntToFp { + int_size, + fp_size, + src, + dst, + } => { + let int_suffix = if int_size.bits() <= 32 { "l" } else { "q" }; + let _ = writeln!( + out, + " cvtsi2{}{} {}, {}", + fp_size.x86_suffix(), + int_suffix, + src.format(*int_size, target), + dst.name() + ); + } + + X86Inst::CvtFpToInt { + fp_size, + int_size, + src, + dst, + } => { + let int_suffix = if int_size.bits() <= 32 { "l" } else { "q" }; + let _ = writeln!( + out, + " cvtt{}2si{} {}, {}", + fp_size.x86_suffix(), + int_suffix, + src.format(target), + dst.name_for_size(int_size.bits()) + ); + } + + X86Inst::CvtFpFp { + src_size, + dst_size, + src, + dst, + } => { + let _ = writeln!( + out, + " cvt{}2{} {}, {}", + src_size.x86_suffix(), + dst_size.x86_suffix(), + src.name(), + dst.name() + ); + } + + // Special Instructions + X86Inst::Cltd => { + let _ = writeln!(out, " cltd"); + } + + X86Inst::Cqto => { + let _ = writeln!(out, " cqto"); + } + + X86Inst::Bswap { size, reg } => { + if size.bits() == 16 { + // x86 bswap doesn't work for 16-bit, use xchg or rol + let _ = writeln!(out, " rolw $8, {}", reg.name16()); + } else { + let _ = writeln!( + out, + " bswap{}", + if size.bits() == 32 { + format!(" {}", reg.name32()) + } else { + format!(" {}", reg.name64()) + } + ); + } + } + + X86Inst::XorpsSelf { reg } => { + let _ = writeln!(out, " xorps {}, {}", reg.name(), reg.name()); + } + + // Directives - delegate to shared implementation + X86Inst::Directive(dir) => { + dir.emit(target, out); + } + } + } +} + +// ============================================================================ +// Utility Functions +// ============================================================================ + +/// Emit a sequence of LIR instructions to assembly text +pub fn emit_instrs(instrs: &[X86Inst], target: &Target) -> String { + let mut out = String::new(); + for inst in instrs { + inst.emit(target, &mut out); + } + out +} + +// ============================================================================ +// Tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + use crate::target::{Arch, Os}; + + fn linux_target() -> Target { + Target::new(Arch::X86_64, Os::Linux) + } + + fn macos_target() -> Target { + Target::new(Arch::X86_64, Os::MacOS) + } + + #[test] + fn test_mov_emit() { + let target = linux_target(); + let mut out = String::new(); + + let inst = X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Reg(Reg::Rbx), + }; + inst.emit(&target, &mut out); + assert_eq!(out.trim(), "movq %rax, %rbx"); + } + + #[test] + fn test_mov_sizes() { + let target = linux_target(); + + // Test all sizes + for (size, expected_suffix) in [ + (OperandSize::B8, "b"), + (OperandSize::B16, "w"), + (OperandSize::B32, "l"), + (OperandSize::B64, "q"), + ] { + let mut out = String::new(); + let inst = X86Inst::Mov { + size, + src: GpOperand::Imm(42), + dst: GpOperand::Reg(Reg::Rax), + }; + inst.emit(&target, &mut out); + assert!( + out.contains(&format!("mov{}", expected_suffix)), + "Expected mov{}, got: {}", + expected_suffix, + out + ); + } + } + + #[test] + fn test_mem_addr_format() { + let target = linux_target(); + + let addr = MemAddr::BaseOffset { + base: Reg::Rbp, + offset: -8, + }; + assert_eq!(addr.format(&target), "-8(%rbp)"); + + let addr = MemAddr::BaseOffset { + base: Reg::Rsp, + offset: 0, + }; + assert_eq!(addr.format(&target), "(%rsp)"); + + let addr = MemAddr::RipRelative(Symbol::global("printf")); + assert_eq!(addr.format(&target), "printf(%rip)"); + } + + #[test] + fn test_symbol_macos_prefix() { + let linux = linux_target(); + let macos = macos_target(); + + let sym = Symbol::global("main"); + assert_eq!(sym.format_for_target(&linux), "main"); + assert_eq!(sym.format_for_target(&macos), "_main"); + + let mut out = String::new(); + let inst = X86Inst::Call { + target: CallTarget::Direct(Symbol::global("printf")), + }; + inst.emit(&macos, &mut out); + assert!(out.contains("_printf")); + } + + #[test] + fn test_conditional_jump() { + let target = linux_target(); + + for (cc, expected) in [ + (IntCC::E, "je"), + (IntCC::Ne, "jne"), + (IntCC::L, "jl"), + (IntCC::G, "jg"), + (IntCC::B, "jb"), + (IntCC::A, "ja"), + ] { + let mut out = String::new(); + let inst = X86Inst::Jcc { + cc, + target: Label::new("test", 1), + }; + inst.emit(&target, &mut out); + assert!( + out.contains(expected), + "Expected {}, got: {}", + expected, + out + ); + } + } + + #[test] + fn test_fp_instructions() { + let target = linux_target(); + + let mut out = String::new(); + let inst = X86Inst::AddFp { + size: FpSize::Double, + src: XmmOperand::Reg(XmmReg::Xmm1), + dst: XmmReg::Xmm0, + }; + inst.emit(&target, &mut out); + assert!(out.contains("addsd")); + + let mut out = String::new(); + let inst = X86Inst::AddFp { + size: FpSize::Single, + src: XmmOperand::Reg(XmmReg::Xmm1), + dst: XmmReg::Xmm0, + }; + inst.emit(&target, &mut out); + assert!(out.contains("addss")); + } + + #[test] + fn test_emit_instrs() { + let target = linux_target(); + + let instrs = vec![ + X86Inst::Push { + src: GpOperand::Reg(Reg::Rbp), + }, + X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::Rsp), + dst: GpOperand::Reg(Reg::Rbp), + }, + X86Inst::Ret, + ]; + + let out = emit_instrs(&instrs, &target); + assert!(out.contains("pushq %rbp")); + assert!(out.contains("movq %rsp, %rbp")); + assert!(out.contains("ret")); + } +} diff --git a/cc/arch/x86_64/macros.rs b/cc/arch/x86_64/macros.rs new file mode 100644 index 000000000..6997c0e1a --- /dev/null +++ b/cc/arch/x86_64/macros.rs @@ -0,0 +1,54 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// x86_64-specific predefined macros +// + +/// Get x86_64-specific predefined macros +pub fn get_macros() -> Vec<(&'static str, Option<&'static str>)> { + vec![ + // Architecture identification + ("__x86_64__", Some("1")), + ("__x86_64", Some("1")), + ("__amd64__", Some("1")), + ("__amd64", Some("1")), + // Byte order + ("__BYTE_ORDER__", Some("__ORDER_LITTLE_ENDIAN__")), + ("__ORDER_LITTLE_ENDIAN__", Some("1234")), + ("__ORDER_BIG_ENDIAN__", Some("4321")), + ("__ORDER_PDP_ENDIAN__", Some("3412")), + ("__LITTLE_ENDIAN__", Some("1")), + // Register size + ("__REGISTER_PREFIX__", Some("")), + ("__USER_LABEL_PREFIX__", Some("")), + // Long double is 80-bit extended precision (padded to 128 bits) + ("__SIZEOF_LONG_DOUBLE__", Some("16")), + ("__LDBL_MANT_DIG__", Some("64")), + ("__LDBL_DIG__", Some("18")), + // SSE is baseline for x86_64 + ("__SSE__", Some("1")), + ("__SSE2__", Some("1")), + ("__SSE_MATH__", Some("1")), + ("__SSE2_MATH__", Some("1")), + // MMX is implied by SSE + ("__MMX__", Some("1")), + // FPU type + ("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1", Some("1")), + ("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2", Some("1")), + ("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4", Some("1")), + ("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8", Some("1")), + // Atomic primitives + ("__GCC_ATOMIC_BOOL_LOCK_FREE", Some("2")), + ("__GCC_ATOMIC_CHAR_LOCK_FREE", Some("2")), + ("__GCC_ATOMIC_SHORT_LOCK_FREE", Some("2")), + ("__GCC_ATOMIC_INT_LOCK_FREE", Some("2")), + ("__GCC_ATOMIC_LONG_LOCK_FREE", Some("2")), + ("__GCC_ATOMIC_LLONG_LOCK_FREE", Some("2")), + ("__GCC_ATOMIC_POINTER_LOCK_FREE", Some("2")), + ] +} diff --git a/cc/arch/x86_64/mod.rs b/cc/arch/x86_64/mod.rs new file mode 100644 index 000000000..feb19551e --- /dev/null +++ b/cc/arch/x86_64/mod.rs @@ -0,0 +1,16 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// x86_64 architecture support +// + +pub mod codegen; +pub mod lir; +pub mod macros; + +pub use macros::get_macros; diff --git a/cc/diag.rs b/cc/diag.rs new file mode 100644 index 000000000..2cbe05e09 --- /dev/null +++ b/cc/diag.rs @@ -0,0 +1,488 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Diagnostic and stream management module for pcc C99 compiler +// + +use std::cell::RefCell; +use std::fmt; +use std::io::{self, Write}; +use std::sync::atomic::{AtomicU32, Ordering}; + +// ============================================================================ +// Source Position +// ============================================================================ + +/// Source position tracking (mirrors sparse's struct position) +/// +/// This is a compact structure that gets attached to every token. +/// Following sparse's design where position fields are bit-packed. +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] +pub struct Position { + /// Stream/file index (which file this is in) + pub stream: u16, + /// Line number (1-based) + pub line: u32, + /// Column position (1-based, 0 means unknown) + pub col: u16, + /// Token preceded by newline + pub newline: bool, + /// Token preceded by whitespace + pub whitespace: bool, + /// Don't expand macros (for preprocessor) + pub noexpand: bool, +} + +impl Position { + /// Create a new position + pub fn new(stream: u16, line: u32, col: u16) -> Self { + Self { + stream, + line, + col, + newline: false, + whitespace: false, + noexpand: false, + } + } + + /// Create a "bad" position for error tokens + #[cfg(test)] + pub fn bad() -> Self { + Self { + stream: u16::MAX, + line: 0, + col: 0, + newline: false, + whitespace: false, + noexpand: false, + } + } + + /// Check if this is a bad/invalid position + pub fn is_bad(&self) -> bool { + self.stream == u16::MAX && self.line == 0 + } +} + +impl fmt::Display for Position { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Get filename from global registry + let name = STREAMS.with(|s| { + s.borrow() + .get_name(self.stream) + .map(|n| n.to_string()) + .unwrap_or_else(|| "".to_string()) + }); + + if self.col > 0 { + write!(f, "{}:{}:{}", name, self.line, self.col) + } else { + write!(f, "{}:{}", name, self.line) + } + } +} + +// ============================================================================ +// Stream (Input Source) +// ============================================================================ + +/// Input stream information (following sparse's struct stream) +#[derive(Debug, Clone)] +pub struct Stream { + /// Filename + pub name: String, + /// Position of #include directive that included this file + /// None for the main file + pub include_pos: Option, + /// Line number offset from #line directive + /// If Some((new_line, new_file)), positions should be adjusted + pub line_directive: Option<(u32, Option)>, +} + +impl Stream { + /// Create a new stream for a file + pub fn new(name: String) -> Self { + Self { + name, + include_pos: None, + line_directive: None, + } + } + + /// Create a stream for an included file + #[cfg(test)] + pub fn included(name: String, include_pos: Position) -> Self { + Self { + name, + include_pos: Some(include_pos), + line_directive: None, + } + } +} + +// ============================================================================ +// Stream Registry (Global) +// ============================================================================ + +/// Stream registry for managing all input files +#[derive(Debug, Default)] +pub struct StreamRegistry { + streams: Vec, +} + +impl StreamRegistry { + /// Create a new empty registry + pub fn new() -> Self { + Self::default() + } + + /// Add a new stream, returning its ID + pub fn add(&mut self, name: String) -> u16 { + let id = self.streams.len() as u16; + self.streams.push(Stream::new(name)); + id + } + + /// Add a stream for an included file + #[cfg(test)] + pub fn add_included(&mut self, name: String, include_pos: Position) -> u16 { + let id = self.streams.len() as u16; + self.streams.push(Stream::included(name, include_pos)); + id + } + + /// Get stream by ID + pub fn get(&self, id: u16) -> Option<&Stream> { + self.streams.get(id as usize) + } + + /// Get stream name by ID + pub fn get_name(&self, id: u16) -> Option<&str> { + self.streams.get(id as usize).map(|s| s.name.as_str()) + } + + /// Get the previous stream in the include chain + /// Returns None for the main file + pub fn prev_stream(&self, id: u16) -> Option { + self.streams + .get(id as usize) + .and_then(|s| s.include_pos) + .map(|pos| pos.stream) + } + + /// Get effective filename and line for a position + /// This handles #line directives + pub fn effective_position(&self, pos: Position) -> (String, u32, u16) { + if let Some(stream) = self.streams.get(pos.stream as usize) { + if let Some((line_offset, ref opt_file)) = stream.line_directive { + let name = opt_file + .as_ref() + .map(|s| s.as_str()) + .unwrap_or(&stream.name); + // Adjust line number based on #line directive + return (name.to_string(), pos.line + line_offset, pos.col); + } + (stream.name.clone(), pos.line, pos.col) + } else { + ("".to_string(), pos.line, pos.col) + } + } + + /// Clear all streams (for reuse between compilations) + pub fn clear(&mut self) { + self.streams.clear(); + } +} + +// Thread-local stream registry +thread_local! { + pub static STREAMS: RefCell = RefCell::new(StreamRegistry::new()); +} + +/// Initialize a new stream, returning its ID +pub fn init_stream(name: &str) -> u16 { + STREAMS.with(|s| s.borrow_mut().add(name.to_string())) +} + +/// Initialize a stream for an included file +#[cfg(test)] +pub fn init_included_stream(name: &str, include_pos: Position) -> u16 { + STREAMS.with(|s| s.borrow_mut().add_included(name.to_string(), include_pos)) +} + +/// Get stream name by ID (for tests) +#[cfg(test)] +pub fn stream_name(id: u16) -> String { + STREAMS.with(|s| { + s.borrow() + .get_name(id) + .map(|n| n.to_string()) + .unwrap_or_else(|| "".to_string()) + }) +} + +/// Get previous stream in include chain +#[cfg(test)] +pub fn stream_prev(id: u16) -> Option { + STREAMS.with(|s| s.borrow().prev_stream(id)) +} + +/// Clear all streams (call at start of new compilation) +pub fn clear_streams() { + STREAMS.with(|s| s.borrow_mut().clear()); +} + +// ============================================================================ +// Error Tracking +// ============================================================================ + +/// Error phase flag +pub const ERROR_CURR_PHASE: u32 = 1; + +// Global error state +static HAS_ERROR: AtomicU32 = AtomicU32::new(0); +static ERROR_COUNT: AtomicU32 = AtomicU32::new(0); +static WARNING_COUNT: AtomicU32 = AtomicU32::new(0); + +/// Get current error state +#[cfg(test)] +pub fn has_error() -> u32 { + HAS_ERROR.load(Ordering::Relaxed) +} + +/// Set error flag +fn set_error(flag: u32) { + HAS_ERROR.fetch_or(flag, Ordering::Relaxed); +} + +/// Get error count +#[cfg(test)] +pub fn error_count() -> u32 { + ERROR_COUNT.load(Ordering::Relaxed) +} + +/// Get warning count +#[cfg(test)] +pub fn warning_count() -> u32 { + WARNING_COUNT.load(Ordering::Relaxed) +} + +/// Reset error/warning counts +#[cfg(test)] +pub fn reset_counts() { + ERROR_COUNT.store(0, Ordering::Relaxed); + WARNING_COUNT.store(0, Ordering::Relaxed); + HAS_ERROR.store(0, Ordering::Relaxed); +} + +// ============================================================================ +// Diagnostic Output +// ============================================================================ + +/// Diagnostic severity level +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum DiagLevel { + Warning, + Error, +} + +impl DiagLevel { + fn prefix(&self) -> &'static str { + match self { + DiagLevel::Warning => "warning: ", + DiagLevel::Error => "error: ", + } + } +} + +/// Build include chain message for nested includes +fn show_include_chain(stream_id: u16) -> Option { + STREAMS.with(|s| { + let streams = s.borrow(); + let mut chain = Vec::new(); + let mut current = stream_id; + + // Walk up the include chain + while let Some(prev) = streams.prev_stream(current) { + if let Some(stream) = streams.get(prev) { + chain.push(prettify_path(&stream.name)); + } + current = prev; + } + + if chain.is_empty() { + None + } else { + Some(format!(" (through {})", chain.join(", "))) + } + }) +} + +/// Prettify a path by removing ./ prefix if present +fn prettify_path(path: &str) -> String { + path.strip_prefix("./") + .map(|s| s.to_string()) + .unwrap_or_else(|| path.to_string()) +} + +/// Output a diagnostic message +fn do_diag(level: DiagLevel, pos: Position, msg: &str) { + // Don't output if position is bad + if pos.is_bad() { + return; + } + + // Track errors/warnings + match level { + DiagLevel::Error => { + ERROR_COUNT.fetch_add(1, Ordering::Relaxed); + set_error(ERROR_CURR_PHASE); + } + DiagLevel::Warning => { + WARNING_COUNT.fetch_add(1, Ordering::Relaxed); + } + } + + // Format the message + let (filename, line, col) = STREAMS.with(|s| s.borrow().effective_position(pos)); + let filename = prettify_path(&filename); + + // Check for include chain (only on first occurrence of a file) + let include_note = show_include_chain(pos.stream); + + // Print include context if present + if let Some(chain) = include_note { + // Get base filename + let base = STREAMS.with(|s| { + s.borrow() + .get(0) + .map(|st| st.name.clone()) + .unwrap_or_else(|| "".to_string()) + }); + eprintln!("{}: note: in included file{}:", base, chain); + } + + // Print the diagnostic + let _ = if col > 0 { + writeln!( + io::stderr(), + "{}:{}:{}: {}{}", + filename, + line, + col, + level.prefix(), + msg + ) + } else { + writeln!( + io::stderr(), + "{}:{}: {}{}", + filename, + line, + level.prefix(), + msg + ) + }; +} + +// ============================================================================ +// Public Diagnostic Functions +// ============================================================================ + +/// Print a warning message +pub fn warning(pos: Position, msg: &str) { + do_diag(DiagLevel::Warning, pos, msg); +} + +/// Print an error message +pub fn error(pos: Position, msg: &str) { + do_diag(DiagLevel::Error, pos, msg); +} + +// ============================================================================ +// Tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_position_display() { + clear_streams(); + let stream = init_stream("test.c"); + let pos = Position::new(stream, 10, 5); + let s = format!("{}", pos); + assert_eq!(s, "test.c:10:5"); + } + + #[test] + fn test_position_no_column() { + clear_streams(); + let stream = init_stream("test.c"); + let pos = Position::new(stream, 42, 0); + let s = format!("{}", pos); + assert_eq!(s, "test.c:42"); + } + + #[test] + fn test_bad_position() { + let pos = Position::bad(); + assert!(pos.is_bad()); + } + + #[test] + fn test_stream_registry() { + clear_streams(); + let s1 = init_stream("main.c"); + let s2 = init_stream("header.h"); + assert_eq!(stream_name(s1), "main.c"); + assert_eq!(stream_name(s2), "header.h"); + } + + #[test] + fn test_include_chain() { + clear_streams(); + let main_stream = init_stream("main.c"); + let main_pos = Position::new(main_stream, 5, 1); + + let header_stream = init_included_stream("header.h", main_pos); + + assert_eq!(stream_prev(header_stream), Some(main_stream)); + assert_eq!(stream_prev(main_stream), None); + } + + #[test] + fn test_prettify_path() { + assert_eq!(prettify_path("./test.c"), "test.c"); + assert_eq!(prettify_path("test.c"), "test.c"); + assert_eq!(prettify_path("./src/main.c"), "src/main.c"); + } + + #[test] + fn test_error_counting() { + reset_counts(); + assert_eq!(error_count(), 0); + assert_eq!(warning_count(), 0); + + clear_streams(); + let stream = init_stream("test.c"); + let pos = Position::new(stream, 1, 1); + + error(pos, "test error"); + assert_eq!(error_count(), 1); + assert!(has_error() & ERROR_CURR_PHASE != 0); + + warning(pos, "test warning"); + assert_eq!(warning_count(), 1); + + reset_counts(); + assert_eq!(error_count(), 0); + } +} diff --git a/cc/dominate.rs b/cc/dominate.rs new file mode 100644 index 000000000..fd15b6815 --- /dev/null +++ b/cc/dominate.rs @@ -0,0 +1,636 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Dominator tree computation for pcc C99 compiler +// +// Based on sparse's flowgraph.c and dominate.c: +// - Dominator tree: "A simple, fast dominance algorithm" by Cooper, Harvey, Kennedy +// - IDF computation: "A Linear Time Algorithm for Placing phi-nodes" by Sreedhar and Gao +// + +use crate::ir::{BasicBlockId, Function}; +use std::collections::{HashMap, HashSet}; + +// ============================================================================ +// Reverse Postorder Computation +// ============================================================================ + +/// Compute reverse postorder numbering for all blocks. +/// Returns a vector of block IDs in reverse postorder, and updates each block +/// with its postorder number (stored temporarily in dom_level for now). +fn compute_postorder(func: &mut Function) -> Vec { + let mut visited = HashSet::new(); + let mut postorder = Vec::new(); + + fn dfs( + func: &Function, + bb_id: BasicBlockId, + visited: &mut HashSet, + postorder: &mut Vec, + ) { + if visited.contains(&bb_id) { + return; + } + visited.insert(bb_id); + + if let Some(bb) = func.get_block(bb_id) { + // Visit children in reverse order (like sparse) + for &child in bb.children.iter().rev() { + dfs(func, child, visited, postorder); + } + } + postorder.push(bb_id); + } + + dfs(func, func.entry, &mut visited, &mut postorder); + + // Assign postorder numbers + let mut postorder_map = HashMap::new(); + for (i, &bb_id) in postorder.iter().enumerate() { + postorder_map.insert(bb_id, i as u32); + } + + // Store postorder numbers in blocks (temporarily in dom_level) + for bb in &mut func.blocks { + if let Some(&nr) = postorder_map.get(&bb.id) { + bb.dom_level = nr; + } + } + + // Reverse to get reverse postorder + postorder.reverse(); + postorder +} + +// ============================================================================ +// Dominator Tree Construction (Cooper et al.) +// ============================================================================ + +/// Build the dominator tree for a function. +/// +/// Uses the algorithm from: +/// "A simple, fast dominance algorithm" by K. D. Cooper, T. J. Harvey, and K. Kennedy +/// +/// This populates: +/// - `bb.idom` - immediate dominator for each block +/// - `bb.dom_level` - depth in dominator tree +/// - `bb.dom_children` - blocks immediately dominated by this block +pub fn domtree_build(func: &mut Function) { + if func.blocks.is_empty() { + return; + } + + // Step 1: Compute reverse postorder + let rpo = compute_postorder(func); + let size = rpo.len(); + if size == 0 { + return; + } + + // Create postorder number lookup + let mut postorder_nr: HashMap = HashMap::new(); + for (i, &bb_id) in rpo.iter().rev().enumerate() { + postorder_nr.insert(bb_id, i); + } + + // Entry block has highest postorder number + let entry = func.entry; + let entry_nr = size - 1; + + // Initialize dominators array + // doms[postorder_nr] = immediate dominator's postorder_nr + let mut doms: Vec> = vec![None; size]; + doms[entry_nr] = Some(entry_nr); // Entry dominates itself + + // Helper: intersect two dominators using postorder numbers + let intersect = |doms: &[Option], mut b1: usize, mut b2: usize| -> usize { + while b1 != b2 { + while b1 < b2 { + if let Some(d) = doms[b1] { + b1 = d; + } else { + break; + } + } + while b2 < b1 { + if let Some(d) = doms[b2] { + b2 = d; + } else { + break; + } + } + } + b1 + }; + + // Iterate until fixed point + let mut changed = true; + while changed { + changed = false; + + for &bb_id in &rpo { + if bb_id == entry { + continue; + } + + let bb_nr = postorder_nr[&bb_id]; + + // Get parents of this block + let parents: Vec = func + .get_block(bb_id) + .map(|bb| bb.parents.clone()) + .unwrap_or_default(); + + // Find new idom as intersection of all processed predecessors + let mut new_idom: Option = None; + for parent_id in parents { + let parent_nr = postorder_nr[&parent_id]; + if doms[parent_nr].is_none() { + continue; + } + new_idom = Some(match new_idom { + None => parent_nr, + Some(current) => intersect(&doms, parent_nr, current), + }); + } + + if let Some(idom) = new_idom { + if doms[bb_nr] != Some(idom) { + doms[bb_nr] = Some(idom); + changed = true; + } + } + } + } + + // Create reverse mapping: postorder_nr -> BasicBlockId + let mut nr_to_bb: HashMap = HashMap::new(); + for (&bb_id, &nr) in &postorder_nr { + nr_to_bb.insert(nr, bb_id); + } + + // Clear old dominator info + for bb in &mut func.blocks { + bb.idom = None; + bb.dom_children.clear(); + } + + // Set idom links + for bb in &mut func.blocks { + if bb.id == entry { + continue; + } + // Skip unreachable blocks (not in postorder) + let bb_nr = match postorder_nr.get(&bb.id) { + Some(&nr) => nr, + None => continue, // Unreachable block + }; + if let Some(idom_nr) = doms[bb_nr] { + if idom_nr != bb_nr { + // Map back to BasicBlockId + if let Some(&idom_id) = nr_to_bb.get(&idom_nr) { + bb.idom = Some(idom_id); + } + } + } + } + + // Build dom_children lists + let idom_pairs: Vec<(BasicBlockId, BasicBlockId)> = func + .blocks + .iter() + .filter_map(|bb| bb.idom.map(|idom| (idom, bb.id))) + .collect(); + + for (idom_id, child_id) in idom_pairs { + if let Some(idom_bb) = func.get_block_mut(idom_id) { + idom_bb.dom_children.push(child_id); + } + } + + // Compute dominator tree levels + // Entry is level 0, children are level+1 + let mut max_level = 0u32; + for &bb_id in &rpo { + let level = if bb_id == entry { + 0 + } else { + let idom_level = func + .get_block(bb_id) + .and_then(|bb| bb.idom) + .and_then(|idom_id| func.get_block(idom_id)) + .map(|bb| bb.dom_level) + .unwrap_or(0); + idom_level + 1 + }; + + if let Some(bb) = func.get_block_mut(bb_id) { + bb.dom_level = level; + } + + if level > max_level { + max_level = level; + } + } + + func.max_dom_level = max_level; +} + +// ============================================================================ +// Dominance Frontier Computation +// ============================================================================ + +/// Compute the dominance frontier for all blocks. +/// +/// The dominance frontier of a block B is the set of blocks where B's dominance ends. +/// More precisely: DF(B) = {D | B dominates a predecessor of D, but B does not strictly dominate D} +/// +/// Uses the algorithm from Cytron et al. +pub fn compute_dominance_frontiers(func: &mut Function) { + // Clear existing frontiers + for bb in &mut func.blocks { + bb.dom_frontier.clear(); + } + + // For each block + let block_ids: Vec = func.blocks.iter().map(|bb| bb.id).collect(); + + for &bb_id in &block_ids { + // Get block's parents + let parents: Vec = func + .get_block(bb_id) + .map(|bb| bb.parents.clone()) + .unwrap_or_default(); + + // If block has multiple predecessors (join point) + if parents.len() >= 2 { + // Get block's immediate dominator + let idom = func.get_block(bb_id).and_then(|bb| bb.idom); + + // For each predecessor + for pred_id in parents { + // Walk up the dominator tree from pred until we reach idom + let mut runner = Some(pred_id); + while runner.is_some() && runner != idom { + let runner_id = runner.unwrap(); + + // Add bb_id to runner's dominance frontier + if let Some(runner_bb) = func.get_block_mut(runner_id) { + if !runner_bb.dom_frontier.contains(&bb_id) { + runner_bb.dom_frontier.push(bb_id); + } + } + + // Move up to idom + runner = func.get_block(runner_id).and_then(|bb| bb.idom); + } + } + } + } +} + +// ============================================================================ +// Iterated Dominance Frontier (IDF) Computation +// ============================================================================ + +/// Priority queue based on dominator tree level (higher level = higher priority). +/// Used by the Sreedhar-Gao algorithm. +struct LevelQueue { + /// Buckets indexed by level + buckets: Vec>, + /// Current maximum non-empty level + max_level: usize, +} + +impl LevelQueue { + fn new(max_level: u32) -> Self { + Self { + buckets: vec![Vec::new(); max_level as usize + 1], + max_level: 0, + } + } + + fn push(&mut self, bb_id: BasicBlockId, level: u32) { + let level = level as usize; + self.buckets[level].push(bb_id); + if level > self.max_level { + self.max_level = level; + } + } + + fn pop(&mut self) -> Option { + loop { + if let Some(bb) = self.buckets[self.max_level].pop() { + return Some(bb); + } + if self.max_level == 0 { + return None; + } + self.max_level -= 1; + } + } +} + +/// Compute the iterated dominance frontier of a set of blocks. +/// +/// The IDF of a set S is DF*(S) = DF(S) ∪ DF(DF(S)) ∪ DF(DF(DF(S))) ∪ ... +/// until fixed point. +/// +/// Uses the linear time algorithm from: +/// "A Linear Time Algorithm for Placing phi-nodes" by Sreedhar and Gao +/// +/// # Arguments +/// * `func` - The function (must have dominator tree built) +/// * `alpha` - The set of defining blocks +/// +/// # Returns +/// * Vector of blocks in the IDF +pub fn idf_compute(func: &Function, alpha: &[BasicBlockId]) -> Vec { + if func.max_dom_level == 0 && func.blocks.len() > 1 { + // Dominator tree not built + return Vec::new(); + } + + let mut visited = HashSet::new(); + let mut in_idf = HashSet::new(); + let mut in_alpha: HashSet = alpha.iter().copied().collect(); + let mut idf = Vec::new(); + + let mut queue = LevelQueue::new(func.max_dom_level); + + // Initialize: put all alpha blocks in the queue + for &bb_id in alpha { + if let Some(bb) = func.get_block(bb_id) { + queue.push(bb_id, bb.dom_level); + } + } + + // Process queue + while let Some(x) = queue.pop() { + visited.insert(x); + + let x_level = func.get_block(x).map(|bb| bb.dom_level).unwrap_or(0); + + // Get children (successors) of x + let children: Vec = func + .get_block(x) + .map(|bb| bb.children.clone()) + .unwrap_or_default(); + + for y in children { + // Skip if y is dominated by x (not a J-edge) + let y_idom = func.get_block(y).and_then(|bb| bb.idom); + if y_idom == Some(x) { + continue; + } + + // y must be at same or lower level than x to be in DF + let y_level = func.get_block(y).map(|bb| bb.dom_level).unwrap_or(0); + if y_level > x_level { + continue; + } + + // Add y to IDF if not already there + if !in_idf.contains(&y) { + in_idf.insert(y); + idf.push(y); + + // If y is not in alpha, add it to the queue for further exploration + if !in_alpha.contains(&y) { + queue.push(y, y_level); + } + } + } + + // Visit dominator tree children + let dom_children: Vec = func + .get_block(x) + .map(|bb| bb.dom_children.clone()) + .unwrap_or_default(); + + for child in dom_children { + if !visited.contains(&child) { + // Recursively visit in dominator tree order + // For proper IDF, we need to visit subtree + visit_domtree( + func, + child, + x_level, + &mut visited, + &mut in_idf, + &mut in_alpha, + &mut idf, + &mut queue, + ); + } + } + } + + idf +} + +#[allow(clippy::too_many_arguments)] +fn visit_domtree( + func: &Function, + bb_id: BasicBlockId, + curr_level: u32, + visited: &mut HashSet, + in_idf: &mut HashSet, + in_alpha: &mut HashSet, + idf: &mut Vec, + queue: &mut LevelQueue, +) { + visited.insert(bb_id); + + // Check successors + let children: Vec = func + .get_block(bb_id) + .map(|bb| bb.children.clone()) + .unwrap_or_default(); + + for y in children { + // Skip if y is dominated by bb_id (not a J-edge) + let y_idom = func.get_block(y).and_then(|bb| bb.idom); + if y_idom == Some(bb_id) { + continue; + } + + // y must be at same or lower level + let y_level = func.get_block(y).map(|bb| bb.dom_level).unwrap_or(0); + if y_level > curr_level { + continue; + } + + if !in_idf.contains(&y) { + in_idf.insert(y); + idf.push(y); + + if !in_alpha.contains(&y) { + queue.push(y, y_level); + } + } + } + + // Recurse into dominator tree children + let dom_children: Vec = func + .get_block(bb_id) + .map(|bb| bb.dom_children.clone()) + .unwrap_or_default(); + + for child in dom_children { + if !visited.contains(&child) { + visit_domtree( + func, child, curr_level, visited, in_idf, in_alpha, idf, queue, + ); + } + } +} + +// ============================================================================ +// Tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + use crate::ir::{BasicBlock, Instruction, Opcode}; + use crate::types::{Type, TypeKind}; + + fn make_test_cfg() -> Function { + // Create a simple CFG: + // entry(0) + // / \ + // v v + // bb1(1) bb2(2) + // \ / + // v v + // merge(3) + // | + // v + // exit(4) + + let mut func = Function::new("test", Type::basic(TypeKind::Void)); + + let mut entry = BasicBlock::new(BasicBlockId(0)); + entry.children = vec![BasicBlockId(1), BasicBlockId(2)]; + entry.add_insn(Instruction::new(Opcode::Entry)); + entry.add_insn(Instruction::cbr( + crate::ir::PseudoId(0), + BasicBlockId(1), + BasicBlockId(2), + )); + + let mut bb1 = BasicBlock::new(BasicBlockId(1)); + bb1.parents = vec![BasicBlockId(0)]; + bb1.children = vec![BasicBlockId(3)]; + bb1.add_insn(Instruction::br(BasicBlockId(3))); + + let mut bb2 = BasicBlock::new(BasicBlockId(2)); + bb2.parents = vec![BasicBlockId(0)]; + bb2.children = vec![BasicBlockId(3)]; + bb2.add_insn(Instruction::br(BasicBlockId(3))); + + let mut merge = BasicBlock::new(BasicBlockId(3)); + merge.parents = vec![BasicBlockId(1), BasicBlockId(2)]; + merge.children = vec![BasicBlockId(4)]; + merge.add_insn(Instruction::br(BasicBlockId(4))); + + let mut exit = BasicBlock::new(BasicBlockId(4)); + exit.parents = vec![BasicBlockId(3)]; + exit.add_insn(Instruction::ret(None)); + + func.entry = BasicBlockId(0); + func.blocks = vec![entry, bb1, bb2, merge, exit]; + func + } + + #[test] + fn test_domtree_build() { + let mut func = make_test_cfg(); + domtree_build(&mut func); + + // Entry should have no idom + let entry = func.get_block(BasicBlockId(0)).unwrap(); + assert!(entry.idom.is_none()); + assert_eq!(entry.dom_level, 0); + + // bb1 and bb2 should have entry as idom + let bb1 = func.get_block(BasicBlockId(1)).unwrap(); + assert_eq!(bb1.idom, Some(BasicBlockId(0))); + assert_eq!(bb1.dom_level, 1); + + let bb2 = func.get_block(BasicBlockId(2)).unwrap(); + assert_eq!(bb2.idom, Some(BasicBlockId(0))); + assert_eq!(bb2.dom_level, 1); + + // merge should have entry as idom (not bb1 or bb2) + let merge = func.get_block(BasicBlockId(3)).unwrap(); + assert_eq!(merge.idom, Some(BasicBlockId(0))); + assert_eq!(merge.dom_level, 1); + + // exit should have merge as idom + let exit = func.get_block(BasicBlockId(4)).unwrap(); + assert_eq!(exit.idom, Some(BasicBlockId(3))); + assert_eq!(exit.dom_level, 2); + } + + #[test] + fn test_dominance_frontiers() { + let mut func = make_test_cfg(); + domtree_build(&mut func); + compute_dominance_frontiers(&mut func); + + // bb1's DF should be {merge} - bb1 dominates itself but not merge + let bb1 = func.get_block(BasicBlockId(1)).unwrap(); + assert!(bb1.dom_frontier.contains(&BasicBlockId(3))); + + // bb2's DF should be {merge} + let bb2 = func.get_block(BasicBlockId(2)).unwrap(); + assert!(bb2.dom_frontier.contains(&BasicBlockId(3))); + + // merge's DF should be empty + let merge = func.get_block(BasicBlockId(3)).unwrap(); + assert!(merge.dom_frontier.is_empty()); + } + + #[test] + fn test_idf_compute() { + let mut func = make_test_cfg(); + domtree_build(&mut func); + + // IDF of {bb1} should be {merge} + let idf = idf_compute(&func, &[BasicBlockId(1)]); + assert!(idf.contains(&BasicBlockId(3))); + + // IDF of {bb1, bb2} should be {merge} + let idf2 = idf_compute(&func, &[BasicBlockId(1), BasicBlockId(2)]); + assert!(idf2.contains(&BasicBlockId(3))); + } + + #[test] + fn test_dominates() { + let mut func = make_test_cfg(); + domtree_build(&mut func); + + // Entry dominates everything + assert!(func.dominates(BasicBlockId(0), BasicBlockId(0))); + assert!(func.dominates(BasicBlockId(0), BasicBlockId(1))); + assert!(func.dominates(BasicBlockId(0), BasicBlockId(2))); + assert!(func.dominates(BasicBlockId(0), BasicBlockId(3))); + assert!(func.dominates(BasicBlockId(0), BasicBlockId(4))); + + // bb1 only dominates itself + assert!(func.dominates(BasicBlockId(1), BasicBlockId(1))); + assert!(!func.dominates(BasicBlockId(1), BasicBlockId(3))); + assert!(!func.dominates(BasicBlockId(1), BasicBlockId(4))); + + // merge dominates exit + assert!(func.dominates(BasicBlockId(3), BasicBlockId(4))); + assert!(!func.dominates(BasicBlockId(3), BasicBlockId(1))); + } +} diff --git a/cc/ir.rs b/cc/ir.rs new file mode 100644 index 000000000..405c76591 --- /dev/null +++ b/cc/ir.rs @@ -0,0 +1,1282 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Intermediate Representation (IR) for pcc C99 compiler +// Based on sparse's linearize.c SSA-style IR +// +// The IR uses Single Static Assignment (SSA) form where each variable +// is assigned exactly once. This simplifies dataflow analysis and +// optimization passes. +// + +use crate::diag::Position; +use crate::types::Type; +use std::collections::HashMap; +use std::fmt; + +// ============================================================================ +// Instruction Reference - for def-use chains +// ============================================================================ + +/// Reference to an instruction by (basic block id, instruction index) +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct InsnRef { + pub bb: BasicBlockId, + pub idx: usize, +} + +impl InsnRef { + pub fn new(bb: BasicBlockId, idx: usize) -> Self { + Self { bb, idx } + } +} + +// ============================================================================ +// Opcodes - Following sparse's opcode.def +// ============================================================================ + +/// IR opcodes following sparse's design +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Opcode { + // Function entry + Entry, + + // Terminators - end a basic block + Ret, // Return from function + Br, // Unconditional branch + Cbr, // Conditional branch + Switch, // Multi-way branch + + // Integer arithmetic binary ops + Add, + Sub, + Mul, + DivU, // Unsigned division + DivS, // Signed division + ModU, // Unsigned modulo + ModS, // Signed modulo + Shl, // Shift left + Lsr, // Logical shift right (unsigned) + Asr, // Arithmetic shift right (signed) + + // Floating-point binary ops + FAdd, + FSub, + FMul, + FDiv, + + // Bitwise/logical binary ops + And, + Or, + Xor, + + // Integer comparisons (result is 0 or 1) + SetEq, // == + SetNe, // != + SetLt, // < (signed) + SetLe, // <= (signed) + SetGt, // > (signed) + SetGe, // >= (signed) + SetB, // < (unsigned, "below") + SetBe, // <= (unsigned) + SetA, // > (unsigned, "above") + SetAe, // >= (unsigned) + + // Floating-point comparisons (ordered) + FCmpOEq, + FCmpONe, + FCmpOLt, + FCmpOLe, + FCmpOGt, + FCmpOGe, + + // Unary ops + Not, // Bitwise NOT + Neg, // Integer negation + FNeg, // Float negation + + // Type conversions + Trunc, // Truncate to smaller integer + Zext, // Zero-extend to larger integer + Sext, // Sign-extend to larger integer + FCvtU, // Float to unsigned int + FCvtS, // Float to signed int + UCvtF, // Unsigned int to float + SCvtF, // Signed int to float + FCvtF, // Float to float (different sizes) + + // Memory ops + Load, // Load from memory + Store, // Store to memory + + // SSA-specific + Phi, // Phi node for SSA + Copy, // Copy (for out-of-SSA) + + // Other + SymAddr, // Get address of symbol + Call, // Function call + Select, // Ternary select: cond ? a : b + SetVal, // Create pseudo for constant + Nop, // No operation + + // Variadic function support + VaStart, // Initialize va_list + VaArg, // Get next vararg + VaEnd, // Clean up va_list (usually no-op) + VaCopy, // Copy va_list + + // Byte-swapping builtins + Bswap16, // Byte-swap 16-bit value + Bswap32, // Byte-swap 32-bit value + Bswap64, // Byte-swap 64-bit value + + // Stack allocation builtin + Alloca, // Dynamic stack allocation +} + +impl Opcode { + /// Check if this opcode is a terminator (ends a basic block) + pub fn is_terminator(&self) -> bool { + matches!( + self, + Opcode::Ret | Opcode::Br | Opcode::Cbr | Opcode::Switch + ) + } + + /// Get the opcode name for display + pub fn name(&self) -> &'static str { + match self { + Opcode::Entry => "entry", + Opcode::Ret => "ret", + Opcode::Br => "br", + Opcode::Cbr => "cbr", + Opcode::Switch => "switch", + Opcode::Add => "add", + Opcode::Sub => "sub", + Opcode::Mul => "mul", + Opcode::DivU => "divu", + Opcode::DivS => "divs", + Opcode::ModU => "modu", + Opcode::ModS => "mods", + Opcode::Shl => "shl", + Opcode::Lsr => "lsr", + Opcode::Asr => "asr", + Opcode::FAdd => "fadd", + Opcode::FSub => "fsub", + Opcode::FMul => "fmul", + Opcode::FDiv => "fdiv", + Opcode::And => "and", + Opcode::Or => "or", + Opcode::Xor => "xor", + Opcode::SetEq => "seteq", + Opcode::SetNe => "setne", + Opcode::SetLt => "setlt", + Opcode::SetLe => "setle", + Opcode::SetGt => "setgt", + Opcode::SetGe => "setge", + Opcode::SetB => "setb", + Opcode::SetBe => "setbe", + Opcode::SetA => "seta", + Opcode::SetAe => "setae", + Opcode::FCmpOEq => "fcmp_oeq", + Opcode::FCmpONe => "fcmp_one", + Opcode::FCmpOLt => "fcmp_olt", + Opcode::FCmpOLe => "fcmp_ole", + Opcode::FCmpOGt => "fcmp_ogt", + Opcode::FCmpOGe => "fcmp_oge", + Opcode::Not => "not", + Opcode::Neg => "neg", + Opcode::FNeg => "fneg", + Opcode::Trunc => "trunc", + Opcode::Zext => "zext", + Opcode::Sext => "sext", + Opcode::FCvtU => "fcvtu", + Opcode::FCvtS => "fcvts", + Opcode::UCvtF => "ucvtf", + Opcode::SCvtF => "scvtf", + Opcode::FCvtF => "fcvtf", + Opcode::Load => "load", + Opcode::Store => "store", + Opcode::Phi => "phi", + Opcode::Copy => "copy", + Opcode::SymAddr => "symaddr", + Opcode::Call => "call", + Opcode::Select => "sel", + Opcode::SetVal => "setval", + Opcode::Nop => "nop", + Opcode::VaStart => "va_start", + Opcode::VaArg => "va_arg", + Opcode::VaEnd => "va_end", + Opcode::VaCopy => "va_copy", + Opcode::Bswap16 => "bswap16", + Opcode::Bswap32 => "bswap32", + Opcode::Bswap64 => "bswap64", + Opcode::Alloca => "alloca", + } + } +} + +impl fmt::Display for Opcode { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.name()) + } +} + +// ============================================================================ +// Pseudo - Virtual registers / values in SSA form +// ============================================================================ + +/// Unique ID for a pseudo (virtual register) +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct PseudoId(pub u32); + +impl fmt::Display for PseudoId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "%{}", self.0) + } +} + +/// Type of pseudo value +#[derive(Debug, Clone, PartialEq)] +pub enum PseudoKind { + /// Void (no value) + Void, + /// Undefined value + Undef, + /// Virtual register (result of an instruction) + Reg(u32), + /// Function argument + Arg(u32), + /// Phi node result + Phi(u32), + /// Symbol reference (variable, function) + Sym(String), + /// Constant integer value + Val(i64), + /// Constant float value + FVal(f64), +} + +/// A pseudo (virtual register or value) in SSA form +#[derive(Debug, Clone)] +pub struct Pseudo { + pub id: PseudoId, + pub kind: PseudoKind, + /// Optional name for debugging (from source variable) + pub name: Option, +} + +impl PartialEq for Pseudo { + fn eq(&self, other: &Self) -> bool { + self.id == other.id && self.kind == other.kind + } +} + +impl Default for Pseudo { + fn default() -> Self { + Self { + id: PseudoId(0), + kind: PseudoKind::Void, + name: None, + } + } +} + +impl Pseudo { + /// Create an undefined pseudo + pub fn undef(id: PseudoId) -> Self { + Self { + id, + kind: PseudoKind::Undef, + name: None, + } + } + + /// Create a register pseudo + pub fn reg(id: PseudoId, nr: u32) -> Self { + Self { + id, + kind: PseudoKind::Reg(nr), + name: None, + } + } + + /// Create an argument pseudo + pub fn arg(id: PseudoId, nr: u32) -> Self { + Self { + id, + kind: PseudoKind::Arg(nr), + name: None, + } + } + + /// Create a phi pseudo + pub fn phi(id: PseudoId, nr: u32) -> Self { + Self { + id, + kind: PseudoKind::Phi(nr), + name: None, + } + } + + /// Create a symbol pseudo + pub fn sym(id: PseudoId, name: String) -> Self { + Self { + id, + kind: PseudoKind::Sym(name.clone()), + name: Some(name), + } + } + + /// Create a constant value pseudo + pub fn val(id: PseudoId, value: i64) -> Self { + Self { + id, + kind: PseudoKind::Val(value), + name: None, + } + } + + /// Create a constant float pseudo + pub fn fval(id: PseudoId, value: f64) -> Self { + Self { + id, + kind: PseudoKind::FVal(value), + name: None, + } + } + + /// With a name for debugging + pub fn with_name(mut self, name: impl Into) -> Self { + self.name = Some(name.into()); + self + } +} + +impl fmt::Display for Pseudo { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self.kind { + PseudoKind::Void => write!(f, "VOID"), + PseudoKind::Undef => write!(f, "UNDEF"), + PseudoKind::Reg(nr) => { + if let Some(name) = &self.name { + write!(f, "%r{}({})", nr, name) + } else { + write!(f, "%r{}", nr) + } + } + PseudoKind::Arg(nr) => write!(f, "%arg{}", nr), + PseudoKind::Phi(nr) => { + if let Some(name) = &self.name { + write!(f, "%phi{}({})", nr, name) + } else { + write!(f, "%phi{}", nr) + } + } + PseudoKind::Sym(name) => write!(f, "{}", name), + PseudoKind::Val(v) => { + if *v > 1000 || *v < -1000 { + write!(f, "${:#x}", v) + } else { + write!(f, "${}", v) + } + } + PseudoKind::FVal(v) => write!(f, "${}", v), + } + } +} + +// ============================================================================ +// BasicBlock ID +// ============================================================================ + +/// Unique ID for a basic block +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct BasicBlockId(pub u32); + +impl fmt::Display for BasicBlockId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, ".L{}", self.0) + } +} + +// ============================================================================ +// Instruction +// ============================================================================ + +/// An IR instruction +#[derive(Debug, Clone)] +pub struct Instruction { + /// Opcode + pub op: Opcode, + /// Result pseudo (target) + pub target: Option, + /// Source operands + pub src: Vec, + /// Type of the result + pub typ: Option, + /// For branches: true target + pub bb_true: Option, + /// For conditional branches: false target + pub bb_false: Option, + /// For memory ops: offset + pub offset: i64, + /// For phi nodes: list of (bb, pseudo) pairs + pub phi_list: Vec<(BasicBlockId, PseudoId)>, + /// For calls: function name or pseudo + pub func_name: Option, + /// Bit size of the operation (target size for conversions) + pub size: u32, + /// Source size for extension/truncation operations + pub src_size: u32, + /// For switch: case value to target block mapping + pub switch_cases: Vec<(i64, BasicBlockId)>, + /// For switch: default block (if no case matches) + pub switch_default: Option, + /// For calls: argument types (parallel to src for Call instructions) + pub arg_types: Vec, + /// For variadic calls: index where variadic arguments start (0-based) + /// All arguments at this index and beyond are variadic (should be passed on stack) + pub variadic_arg_start: Option, + /// Source position for debug info + pub pos: Option, +} + +impl Default for Instruction { + fn default() -> Self { + Self { + op: Opcode::Nop, + target: None, + src: Vec::new(), + typ: None, + bb_true: None, + bb_false: None, + offset: 0, + phi_list: Vec::new(), + func_name: None, + size: 0, + src_size: 0, + switch_cases: Vec::new(), + switch_default: None, + arg_types: Vec::new(), + variadic_arg_start: None, + pos: None, + } + } +} + +impl Instruction { + /// Create a new instruction + pub fn new(op: Opcode) -> Self { + Self { + op, + ..Default::default() + } + } + + /// Set the target + pub fn with_target(mut self, target: PseudoId) -> Self { + self.target = Some(target); + self + } + + /// Add a source operand + pub fn with_src(mut self, src: PseudoId) -> Self { + self.src.push(src); + self + } + + /// Set src1 and src2 (for binary ops) + pub fn with_src2(mut self, src1: PseudoId, src2: PseudoId) -> Self { + self.src = vec![src1, src2]; + self + } + + /// Set src1, src2, src3 (for ternary ops like select) + pub fn with_src3(mut self, src1: PseudoId, src2: PseudoId, src3: PseudoId) -> Self { + self.src = vec![src1, src2, src3]; + self + } + + /// Set the type + pub fn with_type(mut self, typ: Type) -> Self { + self.size = typ.size_bits(); + self.typ = Some(typ); + self + } + + /// Set the true branch target + pub fn with_bb_true(mut self, bb: BasicBlockId) -> Self { + self.bb_true = Some(bb); + self + } + + /// Set the false branch target + pub fn with_bb_false(mut self, bb: BasicBlockId) -> Self { + self.bb_false = Some(bb); + self + } + + /// Set memory offset + pub fn with_offset(mut self, offset: i64) -> Self { + self.offset = offset; + self + } + + /// Set function name for calls + pub fn with_func(mut self, name: impl Into) -> Self { + self.func_name = Some(name.into()); + self + } + + /// Set bit size + pub fn with_size(mut self, size: u32) -> Self { + self.size = size; + self + } + + /// Set source position for debug info + pub fn with_pos(mut self, pos: Position) -> Self { + self.pos = Some(pos); + self + } + + /// Create a return instruction + pub fn ret(src: Option) -> Self { + let mut insn = Self::new(Opcode::Ret); + if let Some(s) = src { + insn.src.push(s); + } + insn + } + + /// Create a return instruction with type + pub fn ret_typed(src: Option, typ: Type) -> Self { + let mut insn = Self::ret(src).with_type(typ.clone()); + insn.size = typ.size_bits(); + insn + } + + /// Create an unconditional branch + pub fn br(target: BasicBlockId) -> Self { + Self::new(Opcode::Br).with_bb_true(target) + } + + /// Create a conditional branch + pub fn cbr(cond: PseudoId, bb_true: BasicBlockId, bb_false: BasicBlockId) -> Self { + Self::new(Opcode::Cbr) + .with_src(cond) + .with_bb_true(bb_true) + .with_bb_false(bb_false) + } + + /// Create a switch instruction + pub fn switch_insn( + value: PseudoId, + cases: Vec<(i64, BasicBlockId)>, + default: Option, + size: u32, + ) -> Self { + Self { + op: Opcode::Switch, + target: Some(value), + switch_cases: cases, + switch_default: default, + size, + ..Default::default() + } + } + + /// Create a binary operation + pub fn binop(op: Opcode, target: PseudoId, src1: PseudoId, src2: PseudoId, typ: Type) -> Self { + Self::new(op) + .with_target(target) + .with_src2(src1, src2) + .with_type(typ) + } + + /// Create a unary operation + pub fn unop(op: Opcode, target: PseudoId, src: PseudoId, typ: Type) -> Self { + Self::new(op) + .with_target(target) + .with_src(src) + .with_type(typ) + } + + /// Create a load instruction + pub fn load(target: PseudoId, addr: PseudoId, offset: i64, typ: Type) -> Self { + Self::new(Opcode::Load) + .with_target(target) + .with_src(addr) + .with_offset(offset) + .with_type(typ) + } + + /// Create a store instruction + pub fn store(value: PseudoId, addr: PseudoId, offset: i64, typ: Type) -> Self { + Self::new(Opcode::Store) + .with_src(addr) + .with_src(value) + .with_offset(offset) + .with_type(typ) + } + + /// Create a call instruction + pub fn call( + target: Option, + func: &str, + args: Vec, + arg_types: Vec, + ret_type: Type, + ) -> Self { + let mut insn = Self::new(Opcode::Call).with_func(func).with_type(ret_type); + if let Some(t) = target { + insn.target = Some(t); + } + insn.src = args; + insn.arg_types = arg_types; + insn + } + + /// Create a symbol address instruction (get address of a symbol like string literals) + pub fn sym_addr(target: PseudoId, sym: PseudoId, typ: Type) -> Self { + Self::new(Opcode::SymAddr) + .with_target(target) + .with_src(sym) + .with_type(typ) + } + + /// Create a phi node + pub fn phi(target: PseudoId, typ: Type) -> Self { + Self::new(Opcode::Phi).with_target(target).with_type(typ) + } + + /// Create a select (ternary) instruction + pub fn select( + target: PseudoId, + cond: PseudoId, + if_true: PseudoId, + if_false: PseudoId, + typ: Type, + ) -> Self { + Self::new(Opcode::Select) + .with_target(target) + .with_src3(cond, if_true, if_false) + .with_type(typ) + } +} + +impl fmt::Display for Instruction { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Format: target = op src1, src2 + if let Some(target) = &self.target { + write!(f, "{} = ", target)?; + } + + write!(f, "{}", self.op.name())?; + + // Size suffix + if self.size > 0 { + write!(f, ".{}", self.size)?; + } + + // Operands depend on opcode + match self.op { + Opcode::Br => { + if let Some(bb) = &self.bb_true { + write!(f, " {}", bb)?; + } + } + Opcode::Cbr => { + if let Some(cond) = self.src.first() { + write!(f, " {}", cond)?; + } + if let Some(bb) = &self.bb_true { + write!(f, ", {}", bb)?; + } + if let Some(bb) = &self.bb_false { + write!(f, ", {}", bb)?; + } + } + Opcode::Phi => { + for (i, (bb, pseudo)) in self.phi_list.iter().enumerate() { + if i > 0 { + write!(f, ",")?; + } + write!(f, " {} ({})", pseudo, bb)?; + } + } + Opcode::Call => { + if let Some(func) = &self.func_name { + write!(f, " {}", func)?; + } + write!(f, "(")?; + for (i, arg) in self.src.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{}", arg)?; + } + write!(f, ")")?; + } + Opcode::Load | Opcode::Store => { + for (i, src) in self.src.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } else { + write!(f, " ")?; + } + write!(f, "{}", src)?; + } + if self.offset != 0 { + write!(f, " + {}", self.offset)?; + } + } + _ => { + for (i, src) in self.src.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } else { + write!(f, " ")?; + } + write!(f, "{}", src)?; + } + } + } + + Ok(()) + } +} + +// ============================================================================ +// BasicBlock +// ============================================================================ + +/// A basic block - a sequence of instructions ending with a terminator +#[derive(Debug, Clone)] +pub struct BasicBlock { + /// Unique ID + pub id: BasicBlockId, + /// Instructions in this block + pub insns: Vec, + /// Predecessor blocks (CFG) + pub parents: Vec, + /// Successor blocks (CFG) + pub children: Vec, + /// Optional label name + pub label: Option, + + // ======================================================================== + // Dominator tree fields (computed by dominate.rs) + // ======================================================================== + /// Immediate dominator (idom) - the closest dominator in the dominator tree + pub idom: Option, + /// Depth in dominator tree (entry is level 0) + pub dom_level: u32, + /// Blocks immediately dominated by this one (dominator tree children) + pub dom_children: Vec, + /// Dominance frontier - blocks where this block's dominance ends + pub dom_frontier: Vec, + + // ======================================================================== + // SSA construction fields (used during SSA conversion) + // ======================================================================== + /// Phi nodes at the beginning of this block (variable name -> phi instruction) + /// Used during SSA construction to track inserted phi nodes + pub phi_map: HashMap, +} + +impl Default for BasicBlock { + fn default() -> Self { + Self { + id: BasicBlockId(0), + insns: Vec::new(), + parents: Vec::new(), + children: Vec::new(), + label: None, + idom: None, + dom_level: 0, + dom_children: Vec::new(), + dom_frontier: Vec::new(), + phi_map: HashMap::new(), + } + } +} + +impl BasicBlock { + /// Create a new basic block + pub fn new(id: BasicBlockId) -> Self { + Self { + id, + ..Default::default() + } + } + + /// Add an instruction to this block + pub fn add_insn(&mut self, insn: Instruction) { + self.insns.push(insn); + } + + /// Insert an instruction before the terminator + pub fn insert_before_terminator(&mut self, insn: Instruction) { + if self.is_terminated() { + let pos = self.insns.len() - 1; + self.insns.insert(pos, insn); + } else { + self.insns.push(insn); + } + } + + /// Check if the block is terminated + pub fn is_terminated(&self) -> bool { + self.insns + .last() + .map(|i| i.op.is_terminator()) + .unwrap_or(false) + } + + /// Add a predecessor + pub fn add_parent(&mut self, parent: BasicBlockId) { + if !self.parents.contains(&parent) { + self.parents.push(parent); + } + } + + /// Add a successor + pub fn add_child(&mut self, child: BasicBlockId) { + if !self.children.contains(&child) { + self.children.push(child); + } + } +} + +impl fmt::Display for BasicBlock { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Label + if let Some(label) = &self.label { + writeln!(f, "{}:", label)?; + } else { + writeln!(f, "{}:", self.id)?; + } + + // Instructions + for insn in &self.insns { + writeln!(f, " {}", insn)?; + } + + Ok(()) + } +} + +// ============================================================================ +// Function (Entrypoint) +// ============================================================================ + +/// Information about a local variable for SSA conversion +#[derive(Debug, Clone)] +pub struct LocalVar { + /// Symbol pseudo for this variable (address) + pub sym: PseudoId, + /// Type of the variable + pub typ: Type, + /// Is this variable volatile? + pub is_volatile: bool, + /// Block where this variable was declared (for scope-aware phi placement) + /// Phi nodes for this variable should only be placed at blocks dominated by this block. + pub decl_block: Option, +} + +/// A function in IR form +#[derive(Debug, Clone)] +pub struct Function { + /// Function name + pub name: String, + /// Return type + pub return_type: Type, + /// Parameter names and types + pub params: Vec<(String, Type)>, + /// All basic blocks + pub blocks: Vec, + /// Entry block ID + pub entry: BasicBlockId, + /// All pseudos indexed by PseudoId + pub pseudos: Vec, + /// Local variables (name -> info), used for SSA conversion + pub locals: HashMap, + /// Maximum dominator tree depth (computed by dominate.rs) + pub max_dom_level: u32, + /// Is this function static (internal linkage)? + pub is_static: bool, +} + +impl Default for Function { + fn default() -> Self { + Self { + name: String::new(), + return_type: Type::default(), + params: Vec::new(), + blocks: Vec::new(), + entry: BasicBlockId(0), + pseudos: Vec::new(), + locals: HashMap::new(), + max_dom_level: 0, + is_static: false, + } + } +} + +impl Function { + /// Create a new function + pub fn new(name: impl Into, return_type: Type) -> Self { + Self { + name: name.into(), + return_type, + ..Default::default() + } + } + + /// Add a parameter + pub fn add_param(&mut self, name: impl Into, typ: Type) { + self.params.push((name.into(), typ)); + } + + /// Add a basic block + pub fn add_block(&mut self, block: BasicBlock) { + self.blocks.push(block); + } + + /// Get a block by ID + pub fn get_block(&self, id: BasicBlockId) -> Option<&BasicBlock> { + self.blocks.iter().find(|b| b.id == id) + } + + /// Get a mutable block by ID + pub fn get_block_mut(&mut self, id: BasicBlockId) -> Option<&mut BasicBlock> { + self.blocks.iter_mut().find(|b| b.id == id) + } + + /// Add a pseudo for tracking + pub fn add_pseudo(&mut self, pseudo: Pseudo) { + self.pseudos.push(pseudo); + } + + /// Add a local variable + pub fn add_local( + &mut self, + name: impl Into, + sym: PseudoId, + typ: Type, + is_volatile: bool, + decl_block: Option, + ) { + self.locals.insert( + name.into(), + LocalVar { + sym, + typ, + is_volatile, + decl_block, + }, + ); + } + + /// Get a local variable + pub fn get_local(&self, name: &str) -> Option<&LocalVar> { + self.locals.get(name) + } + + /// Check if block a dominates block b + pub fn dominates(&self, a: BasicBlockId, b: BasicBlockId) -> bool { + if a == b { + return true; + } + + let mut current = b; + while let Some(bb) = self.get_block(current) { + if let Some(idom) = bb.idom { + if idom == a { + return true; + } + current = idom; + } else { + break; + } + } + false + } +} + +impl fmt::Display for Function { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Function header + write!(f, "define {} {}(", self.return_type, self.name)?; + for (i, (name, typ)) in self.params.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{} %{}", typ, name)?; + } + writeln!(f, ") {{")?; + + // Basic blocks + for block in &self.blocks { + write!(f, "{}", block)?; + } + + writeln!(f, "}}") + } +} + +// ============================================================================ +// Global Variable Initializer +// ============================================================================ + +/// Initializer for global variables +#[derive(Debug, Clone, PartialEq, Default)] +pub enum Initializer { + /// No initializer (zero-initialized) + #[default] + None, + /// Integer initializer + Int(i64), + /// Float/double initializer + Float(f64), +} + +impl fmt::Display for Initializer { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Initializer::None => write!(f, "0"), + Initializer::Int(v) => write!(f, "{}", v), + Initializer::Float(v) => write!(f, "{}", v), + } + } +} + +// ============================================================================ +// Module (Translation Unit) +// ============================================================================ + +/// A module containing multiple functions +#[derive(Debug, Clone)] +pub struct Module { + /// Functions + pub functions: Vec, + /// Global variables (name, type, initializer) + pub globals: Vec<(String, Type, Initializer)>, + /// String literals (label, content) + pub strings: Vec<(String, String)>, + /// Generate debug info + pub debug: bool, + /// Source file paths (stream id -> path) for .file directives + pub source_files: Vec, +} + +impl Module { + /// Create a new module + pub fn new() -> Self { + Self { + functions: Vec::new(), + globals: Vec::new(), + strings: Vec::new(), + debug: false, + source_files: Vec::new(), + } + } + + /// Add a function + pub fn add_function(&mut self, func: Function) { + self.functions.push(func); + } + + /// Add a global variable + pub fn add_global(&mut self, name: impl Into, typ: Type, init: Initializer) { + self.globals.push((name.into(), typ, init)); + } + + /// Add a string literal and return its label + pub fn add_string(&mut self, content: String) -> String { + let label = format!(".LC{}", self.strings.len()); + self.strings.push((label.clone(), content)); + label + } +} + +impl Default for Module { + fn default() -> Self { + Self::new() + } +} + +impl fmt::Display for Module { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Globals + for (name, typ, init) in &self.globals { + match init { + Initializer::None => writeln!(f, "@{}: {}", name, typ)?, + _ => writeln!(f, "@{}: {} = {}", name, typ, init)?, + } + } + + if !self.globals.is_empty() { + writeln!(f)?; + } + + // Functions + for func in &self.functions { + writeln!(f, "{}", func)?; + } + + Ok(()) + } +} + +// ============================================================================ +// Tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + use crate::types::TypeKind; + + #[test] + fn test_opcode_is_terminator() { + assert!(Opcode::Ret.is_terminator()); + assert!(Opcode::Br.is_terminator()); + assert!(Opcode::Cbr.is_terminator()); + assert!(!Opcode::Add.is_terminator()); + assert!(!Opcode::Load.is_terminator()); + } + + #[test] + fn test_pseudo_display() { + let reg = Pseudo::reg(PseudoId(1), 1); + assert_eq!(format!("{}", reg), "%r1"); + + let reg_named = Pseudo::reg(PseudoId(2), 2).with_name("x"); + assert_eq!(format!("{}", reg_named), "%r2(x)"); + + let val = Pseudo::val(PseudoId(3), 42); + assert_eq!(format!("{}", val), "$42"); + + let big_val = Pseudo::val(PseudoId(4), 0x1000); + assert_eq!(format!("{}", big_val), "$0x1000"); + + let arg = Pseudo::arg(PseudoId(5), 0); + assert_eq!(format!("{}", arg), "%arg0"); + } + + #[test] + fn test_instruction_binop() { + let typ = Type::basic(TypeKind::Int); + let insn = Instruction::binop(Opcode::Add, PseudoId(3), PseudoId(1), PseudoId(2), typ); + assert_eq!(insn.op, Opcode::Add); + assert_eq!(insn.target, Some(PseudoId(3))); + assert_eq!(insn.src.len(), 2); + } + + #[test] + fn test_instruction_display() { + let typ = Type::basic(TypeKind::Int); + let insn = Instruction::binop(Opcode::Add, PseudoId(3), PseudoId(1), PseudoId(2), typ); + let s = format!("{}", insn); + assert!(s.contains("add")); + assert!(s.contains("%3")); + assert!(s.contains("%1")); + assert!(s.contains("%2")); + } + + #[test] + fn test_basic_block() { + let mut bb = BasicBlock::new(BasicBlockId(0)); + assert!(!bb.is_terminated()); + + bb.add_insn(Instruction::new(Opcode::Nop)); + assert!(!bb.is_terminated()); + + bb.add_insn(Instruction::ret(None)); + assert!(bb.is_terminated()); + } + + #[test] + fn test_function_display() { + let mut func = Function::new("main", Type::basic(TypeKind::Int)); + func.add_param("argc", Type::basic(TypeKind::Int)); + + let mut entry = BasicBlock::new(BasicBlockId(0)); + entry.add_insn(Instruction::ret(Some(PseudoId(0)))); + func.add_block(entry); + + let s = format!("{}", func); + assert!(s.contains("define")); + assert!(s.contains("main")); + assert!(s.contains("argc")); + assert!(s.contains("ret")); + } + + #[test] + fn test_branch_instruction() { + let br = Instruction::br(BasicBlockId(1)); + assert_eq!(br.op, Opcode::Br); + assert_eq!(br.bb_true, Some(BasicBlockId(1))); + + let cbr = Instruction::cbr(PseudoId(0), BasicBlockId(1), BasicBlockId(2)); + assert_eq!(cbr.op, Opcode::Cbr); + assert_eq!(cbr.src.len(), 1); + assert_eq!(cbr.bb_true, Some(BasicBlockId(1))); + assert_eq!(cbr.bb_false, Some(BasicBlockId(2))); + } + + #[test] + fn test_call_instruction() { + let typ = Type::basic(TypeKind::Int); + let arg_types = vec![ + Type::pointer(Type::basic(TypeKind::Char)), + Type::basic(TypeKind::Int), + ]; + let call = Instruction::call( + Some(PseudoId(1)), + "printf", + vec![PseudoId(2), PseudoId(3)], + arg_types.clone(), + typ, + ); + assert_eq!(call.op, Opcode::Call); + assert_eq!(call.func_name, Some("printf".to_string())); + assert_eq!(call.src.len(), 2); + assert_eq!(call.arg_types.len(), 2); + } + + #[test] + fn test_load_store() { + let typ = Type::basic(TypeKind::Int); + + let load = Instruction::load(PseudoId(1), PseudoId(2), 8, typ.clone()); + assert_eq!(load.op, Opcode::Load); + assert_eq!(load.offset, 8); + + let store = Instruction::store(PseudoId(1), PseudoId(2), 0, typ); + assert_eq!(store.op, Opcode::Store); + assert_eq!(store.src.len(), 2); + } + + #[test] + fn test_module() { + let mut module = Module::new(); + + module.add_global("counter", Type::basic(TypeKind::Int), Initializer::Int(0)); + + let func = Function::new("main", Type::basic(TypeKind::Int)); + module.add_function(func); + + assert_eq!(module.globals.len(), 1); + assert_eq!(module.functions.len(), 1); + } +} diff --git a/cc/linearize.rs b/cc/linearize.rs new file mode 100644 index 000000000..9628d59ab --- /dev/null +++ b/cc/linearize.rs @@ -0,0 +1,4174 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Linearizer for pcc C99 compiler +// Converts AST to SSA-style IR following sparse's design +// + +use crate::diag::Position; +use crate::ir::{ + BasicBlock, BasicBlockId, Function, Initializer, Instruction, Module, Opcode, Pseudo, PseudoId, +}; +use crate::parse::ast::{ + AssignOp, BinaryOp, BlockItem, Declaration, Designator, Expr, ExprKind, ExternalDecl, ForInit, + FunctionDef, InitElement, Stmt, TranslationUnit, UnaryOp, +}; +use crate::ssa::ssa_convert; +use crate::symbol::SymbolTable; +use crate::types::{MemberInfo, Type, TypeKind, TypeModifiers}; +use std::collections::HashMap; + +/// Maximum size (in bits) for aggregate types (struct/union) to be passed or +/// returned by value in registers. Aggregates larger than this require +/// indirect passing (pointer) or sret (struct return pointer). +pub const MAX_REGISTER_AGGREGATE_BITS: u32 = 64; + +/// Information about a local variable +#[derive(Clone)] +struct LocalVarInfo { + /// Symbol pseudo (address of the variable) + sym: PseudoId, + /// Type of the variable + typ: Type, +} + +/// Information about a static local variable +#[derive(Clone)] +struct StaticLocalInfo { + /// Global symbol name (unique across translation unit) + global_name: String, + /// Type of the variable + typ: Type, +} + +// ============================================================================ +// Linearizer +// ============================================================================ + +/// Linearizer context for converting AST to IR +pub struct Linearizer<'a> { + /// The module being built + module: Module, + /// Current function being linearized + current_func: Option, + /// Current basic block being built + current_bb: Option, + /// Next pseudo ID + next_pseudo: u32, + /// Next basic block ID + next_bb: u32, + /// Parameter -> pseudo mapping (parameters are already SSA values) + var_map: HashMap, + /// Local variables (use Load/Store, converted to SSA later) + locals: HashMap, + /// Label -> basic block mapping + label_map: HashMap, + /// Break target stack (for loops) + break_targets: Vec, + /// Continue target stack (for loops) + continue_targets: Vec, + /// Whether to run SSA conversion after linearization + run_ssa: bool, + /// Symbol table for looking up enum constants, etc. + symbols: &'a SymbolTable, + /// Hidden struct return pointer (for functions returning large structs) + struct_return_ptr: Option, + /// Size of struct being returned (for functions returning large structs) + struct_return_size: u32, + /// Current function name (for generating unique static local names) + current_func_name: String, + /// Counter for generating unique static local names + static_local_counter: u32, + /// Static local variables (local name -> static local info) + /// This is persistent across function calls (not cleared per function) + static_locals: HashMap, + /// Current source position for debug info + current_pos: Option, +} + +impl<'a> Linearizer<'a> { + /// Create a new linearizer + pub fn new(symbols: &'a SymbolTable) -> Self { + Self { + module: Module::new(), + current_func: None, + current_bb: None, + next_pseudo: 0, + next_bb: 0, + var_map: HashMap::new(), + locals: HashMap::new(), + label_map: HashMap::new(), + break_targets: Vec::new(), + continue_targets: Vec::new(), + run_ssa: true, // Enable SSA conversion by default + symbols, + struct_return_ptr: None, + struct_return_size: 0, + current_func_name: String::new(), + static_local_counter: 0, + static_locals: HashMap::new(), + current_pos: None, + } + } + + /// Create a linearizer with SSA conversion disabled (for testing) + #[cfg(test)] + pub fn new_no_ssa(symbols: &'a SymbolTable) -> Self { + Self { + run_ssa: false, + ..Self::new(symbols) + } + } + + /// Linearize a translation unit + pub fn linearize(&mut self, tu: &TranslationUnit) -> Module { + for item in &tu.items { + match item { + ExternalDecl::FunctionDef(func) => { + self.linearize_function(func); + } + ExternalDecl::Declaration(decl) => { + self.linearize_global_decl(decl); + } + } + } + std::mem::take(&mut self.module) + } + + /// Allocate a new pseudo ID + fn alloc_pseudo(&mut self) -> PseudoId { + let id = PseudoId(self.next_pseudo); + self.next_pseudo += 1; + id + } + + /// Allocate a new basic block ID + fn alloc_bb(&mut self) -> BasicBlockId { + let id = BasicBlockId(self.next_bb); + self.next_bb += 1; + id + } + + /// Get or create a basic block + fn get_or_create_bb(&mut self, id: BasicBlockId) -> &mut BasicBlock { + let func = self.current_func.as_mut().unwrap(); + if func.get_block(id).is_none() { + func.add_block(BasicBlock::new(id)); + } + func.get_block_mut(id).unwrap() + } + + /// Add an instruction to the current basic block + fn emit(&mut self, insn: Instruction) { + if let Some(bb_id) = self.current_bb { + // Attach current source position for debug info + let insn = if let Some(pos) = self.current_pos { + insn.with_pos(pos) + } else { + insn + }; + let bb = self.get_or_create_bb(bb_id); + bb.add_insn(insn); + } + } + + /// Apply C99 integer promotions (6.3.1.1) + /// Types smaller than int are promoted to int (or unsigned int if int can't hold all values) + fn integer_promote(typ: &Type) -> Type { + // Integer promotions apply to _Bool, char, short (and their unsigned variants) + // They are promoted to int if int can represent all values, otherwise unsigned int + match typ.kind { + TypeKind::Bool | TypeKind::Char | TypeKind::Short => { + // int (32-bit signed) can represent all values of: + // - _Bool (0-1) + // - char/signed char (-128 to 127) + // - unsigned char (0 to 255) + // - short/signed short (-32768 to 32767) + // - unsigned short (0 to 65535) + // So always promote to int + Type::basic(TypeKind::Int) + } + _ => typ.clone(), + } + } + + /// Compute the common type for usual arithmetic conversions (C99 6.3.1.8) + /// Returns the wider type that both operands should be converted to + fn common_type(left: &Type, right: &Type) -> Type { + // C99 6.3.1.8 usual arithmetic conversions: + // 1. If either is long double, convert to long double + // 2. Else if either is double, convert to double + // 3. Else if either is float, convert to float + // 4. Otherwise apply integer promotions, then: + // a. If both have same type after promotion, done + // b. If both signed or both unsigned, convert narrower to wider + // c. If unsigned has rank >= signed, convert signed to unsigned + // d. If signed can represent all unsigned values, convert to signed + // e. Otherwise convert both to unsigned version of signed type + + // Check for floating point types + let left_float = left.is_float(); + let right_float = right.is_float(); + + if left_float || right_float { + // At least one operand is floating point + // Use the wider floating point type + if left.kind == TypeKind::LongDouble || right.kind == TypeKind::LongDouble { + return Type::basic(TypeKind::LongDouble); + } + if left.kind == TypeKind::Double || right.kind == TypeKind::Double { + return Type::basic(TypeKind::Double); + } + // Both are float or one is float and one is integer + return Type::basic(TypeKind::Float); + } + + // Apply integer promotions first (C99 6.3.1.1) + let left_promoted = Self::integer_promote(left); + let right_promoted = Self::integer_promote(right); + + let left_size = left_promoted.size_bits(); + let right_size = right_promoted.size_bits(); + let left_unsigned = left_promoted.is_unsigned(); + let right_unsigned = right_promoted.is_unsigned(); + + // If both have same type after promotion, use that type + if left_promoted.kind == right_promoted.kind + && left_unsigned == right_unsigned + && left_size == right_size + { + return left_promoted; + } + + // If both signed or both unsigned, convert narrower to wider + if left_unsigned == right_unsigned { + return if left_size >= right_size { + left_promoted + } else { + right_promoted + }; + } + + // Mixed signedness case + let (signed_typ, unsigned_typ) = if left_unsigned { + (&right_promoted, &left_promoted) + } else { + (&left_promoted, &right_promoted) + }; + + let signed_size = signed_typ.size_bits(); + let unsigned_size = unsigned_typ.size_bits(); + + // If unsigned has rank >= signed, convert to unsigned + if unsigned_size >= signed_size { + return unsigned_typ.clone(); + } + + // If signed type can represent all values of unsigned type, use signed + // (This is true when signed_size > unsigned_size on our platforms) + if signed_size > unsigned_size { + return signed_typ.clone(); + } + + // Otherwise convert both to unsigned version of signed type + // (This case shouldn't happen on LP64 since we already handled size comparisons) + Type::with_modifiers(signed_typ.kind, TypeModifiers::UNSIGNED) + } + + /// Emit a type conversion if needed + /// Returns the (possibly converted) pseudo ID + fn emit_convert(&mut self, val: PseudoId, from_typ: &Type, to_typ: &Type) -> PseudoId { + let from_size = from_typ.size_bits(); + let to_size = to_typ.size_bits(); + let from_float = from_typ.is_float(); + let to_float = to_typ.is_float(); + + // Same type and size - no conversion needed + if from_typ.kind == to_typ.kind && from_size == to_size { + return val; + } + + // Array to pointer conversion (decay) - no actual conversion needed + // The array value is already the address of the first element (64-bit) + if from_typ.kind == TypeKind::Array && to_typ.kind == TypeKind::Pointer { + return val; + } + + // Pointer to pointer conversion - no actual conversion needed + // All pointers are the same size (64-bit) + if from_typ.kind == TypeKind::Pointer && to_typ.kind == TypeKind::Pointer { + return val; + } + + // Special handling for _Bool conversion (C99 6.3.1.2) + // When any scalar value is converted to _Bool: + // - Result is 0 if the value compares equal to 0 + // - Result is 1 otherwise + if to_typ.kind == TypeKind::Bool && from_typ.kind != TypeKind::Bool { + let result = self.alloc_pseudo(); + let pseudo = Pseudo::reg(result, result.0); + if let Some(func) = &mut self.current_func { + func.add_pseudo(pseudo); + } + + // Create a zero constant for comparison + let zero = self.emit_const(0, from_typ.clone()); + + // Compare val != 0 + let opcode = if from_float { + Opcode::FCmpONe + } else { + Opcode::SetNe + }; + + let mut insn = Instruction::binop(opcode, result, val, zero, to_typ.clone()); + insn.size = to_size; + insn.src_size = from_size; + self.emit(insn); + + return result; + } + + // Handle floating point conversions + if from_float || to_float { + let result = self.alloc_pseudo(); + let pseudo = Pseudo::reg(result, result.0); + if let Some(func) = &mut self.current_func { + func.add_pseudo(pseudo); + } + + let opcode = if from_float && to_float { + // Float to float (e.g., float to double or double to float) + Opcode::FCvtF + } else if from_float { + // Float to integer + if to_typ.is_unsigned() { + Opcode::FCvtU + } else { + Opcode::FCvtS + } + } else { + // Integer to float + if from_typ.is_unsigned() { + Opcode::UCvtF + } else { + Opcode::SCvtF + } + }; + + let mut insn = Instruction::unop(opcode, result, val, to_typ.clone()); + insn.size = to_size; + insn.src_size = from_size; + self.emit(insn); + return result; + } + + // Integer to integer conversion + if from_size == to_size { + // Same size integers (e.g., signed to unsigned) - no actual conversion needed + return val; + } + + let result = self.alloc_pseudo(); + let pseudo = Pseudo::reg(result, result.0); + if let Some(func) = &mut self.current_func { + func.add_pseudo(pseudo); + } + + if to_size > from_size { + // Extending - use sign or zero extension based on source type + let opcode = if from_typ.is_unsigned() { + Opcode::Zext + } else { + Opcode::Sext + }; + let mut insn = Instruction::unop(opcode, result, val, to_typ.clone()); + insn.size = to_size; + insn.src_size = from_size; + self.emit(insn); + } else { + // Truncating + let mut insn = Instruction::unop(Opcode::Trunc, result, val, to_typ.clone()); + insn.size = to_size; + insn.src_size = from_size; + self.emit(insn); + } + + result + } + + /// Check if current basic block is terminated + fn is_terminated(&self) -> bool { + if let Some(bb_id) = self.current_bb { + if let Some(func) = &self.current_func { + if let Some(bb) = func.get_block(bb_id) { + return bb.is_terminated(); + } + } + } + false + } + + /// Link two basic blocks (parent -> child) + fn link_bb(&mut self, from: BasicBlockId, to: BasicBlockId) { + let func = self.current_func.as_mut().unwrap(); + + // Add child to parent + if let Some(from_bb) = func.get_block_mut(from) { + from_bb.add_child(to); + } + + // Add parent to child - need to get it separately + // First ensure it exists + if func.get_block(to).is_none() { + func.add_block(BasicBlock::new(to)); + } + if let Some(to_bb) = func.get_block_mut(to) { + to_bb.add_parent(from); + } + } + + /// Switch to a new basic block + fn switch_bb(&mut self, id: BasicBlockId) { + self.current_bb = Some(id); + self.get_or_create_bb(id); + } + + // ======================================================================== + // Global declarations + // ======================================================================== + + fn linearize_global_decl(&mut self, decl: &Declaration) { + for declarator in &decl.declarators { + // Skip function declarations (they're just forward declarations for external functions) + if declarator.typ.kind == TypeKind::Function { + continue; + } + + // Skip extern declarations - they don't define storage + if declarator.typ.modifiers.contains(TypeModifiers::EXTERN) { + continue; + } + + let init = declarator + .init + .as_ref() + .map_or(Initializer::None, |e| match &e.kind { + ExprKind::IntLit(v) => Initializer::Int(*v), + ExprKind::FloatLit(v) => Initializer::Float(*v), + _ => Initializer::None, + }); + + self.module + .add_global(&declarator.name, declarator.typ.clone(), init); + } + } + + // ======================================================================== + // Function linearization + // ======================================================================== + + fn linearize_function(&mut self, func: &FunctionDef) { + // Set current position for debug info (function definition location) + self.current_pos = Some(func.pos); + + // Reset per-function state + self.next_pseudo = 0; + self.next_bb = 0; + self.var_map.clear(); + self.locals.clear(); + self.label_map.clear(); + self.break_targets.clear(); + self.continue_targets.clear(); + self.struct_return_ptr = None; + self.struct_return_size = 0; + self.current_func_name = func.name.clone(); + // Note: static_locals is NOT cleared - it persists across functions + + // Create function + let is_static = func.return_type.modifiers.contains(TypeModifiers::STATIC); + let mut ir_func = Function::new(&func.name, func.return_type.clone()); + ir_func.is_static = is_static; + + // Check if function returns a large struct + // Large structs are returned via a hidden first parameter (sret) + // that points to caller-allocated space + let returns_large_struct = (func.return_type.kind == TypeKind::Struct + || func.return_type.kind == TypeKind::Union) + && func.return_type.size_bits() > MAX_REGISTER_AGGREGATE_BITS; + + // Argument index offset: if returning large struct, first arg is hidden return pointer + let arg_offset: u32 = if returns_large_struct { 1 } else { 0 }; + + // Add hidden return pointer parameter if needed + if returns_large_struct { + let sret_id = self.alloc_pseudo(); + let sret_pseudo = Pseudo::arg(sret_id, 0).with_name("__sret"); + ir_func.add_pseudo(sret_pseudo); + self.struct_return_ptr = Some(sret_id); + self.struct_return_size = func.return_type.size_bits(); + } + + // Add parameters + // For struct/union parameters, we need to copy them to local storage + // so member access works properly + let mut struct_params: Vec<(String, Type, PseudoId)> = Vec::new(); + + for (i, param) in func.params.iter().enumerate() { + let name = param.name.clone().unwrap_or_else(|| format!("arg{}", i)); + ir_func.add_param(&name, param.typ.clone()); + + // Create argument pseudo (offset by 1 if there's a hidden return pointer) + let pseudo_id = self.alloc_pseudo(); + let pseudo = Pseudo::arg(pseudo_id, i as u32 + arg_offset).with_name(&name); + ir_func.add_pseudo(pseudo); + + // For struct/union types, we'll copy to a local later + if param.typ.kind == TypeKind::Struct || param.typ.kind == TypeKind::Union { + struct_params.push((name, param.typ.clone(), pseudo_id)); + } else { + self.var_map.insert(name, pseudo_id); + } + } + + self.current_func = Some(ir_func); + + // Create entry block + let entry_bb = self.alloc_bb(); + self.switch_bb(entry_bb); + + // Entry instruction + self.emit(Instruction::new(Opcode::Entry)); + + // Copy struct parameters to local storage so member access works + for (name, typ, arg_pseudo) in struct_params { + // Create a symbol pseudo for this local variable (its address) + let local_sym = self.alloc_pseudo(); + let sym = Pseudo::sym(local_sym, name.clone()); + if let Some(func) = &mut self.current_func { + func.add_pseudo(sym); + let is_volatile = typ.modifiers.contains(TypeModifiers::VOLATILE); + func.add_local(&name, local_sym, typ.clone(), is_volatile, None); + } + + // For large structs, arg_pseudo is a pointer to the struct + // We need to copy the data from that pointer to local storage + if typ.size_bits() > MAX_REGISTER_AGGREGATE_BITS { + // arg_pseudo is a pointer - copy each 8-byte chunk + let struct_size = typ.size_bits() / 8; + let mut offset = 0i64; + while offset < struct_size as i64 { + // Load 8 bytes from arg_pseudo + offset + let temp = self.alloc_pseudo(); + let temp_pseudo = Pseudo::reg(temp, temp.0); + if let Some(func) = &mut self.current_func { + func.add_pseudo(temp_pseudo); + } + self.emit(Instruction::load( + temp, + arg_pseudo, + offset, + Type::basic(TypeKind::Long), + )); + // Store to local_sym + offset + self.emit(Instruction::store( + temp, + local_sym, + offset, + Type::basic(TypeKind::Long), + )); + offset += 8; + } + } else { + // Small struct: arg_pseudo contains the value directly + self.emit(Instruction::store(arg_pseudo, local_sym, 0, typ.clone())); + } + + // Register as a local variable + self.locals.insert( + name, + LocalVarInfo { + sym: local_sym, + typ, + }, + ); + } + + // Linearize body + self.linearize_stmt(&func.body); + + // Ensure function ends with a return + if !self.is_terminated() { + if func.return_type.kind == TypeKind::Void { + self.emit(Instruction::ret(None)); + } else { + // Return 0 as default + let zero = self.emit_const(0, Type::basic(TypeKind::Int)); + self.emit(Instruction::ret(Some(zero))); + } + } + + // Run SSA conversion if enabled + if self.run_ssa { + if let Some(ref mut ir_func) = self.current_func { + ssa_convert(ir_func); + } + } + + // Add function to module + if let Some(ir_func) = self.current_func.take() { + self.module.add_function(ir_func); + } + } + + // ======================================================================== + // Statement linearization + // ======================================================================== + + fn linearize_stmt(&mut self, stmt: &Stmt) { + match stmt { + Stmt::Empty => {} + + Stmt::Expr(expr) => { + self.linearize_expr(expr); + } + + Stmt::Block(items) => { + for item in items { + match item { + BlockItem::Declaration(decl) => self.linearize_local_decl(decl), + BlockItem::Statement(s) => self.linearize_stmt(s), + } + } + } + + Stmt::If { + cond, + then_stmt, + else_stmt, + } => { + self.linearize_if(cond, then_stmt, else_stmt.as_deref()); + } + + Stmt::While { cond, body } => { + self.linearize_while(cond, body); + } + + Stmt::DoWhile { body, cond } => { + self.linearize_do_while(body, cond); + } + + Stmt::For { + init, + cond, + post, + body, + } => { + self.linearize_for(init.as_ref(), cond.as_ref(), post.as_ref(), body); + } + + Stmt::Return(expr) => { + if let Some(e) = expr { + let typ = self.expr_type(e); + + // Check if this is a large struct return with hidden pointer + if let Some(sret_ptr) = self.struct_return_ptr { + let struct_size = self.struct_return_size; + // Get address of the struct being returned + let src_addr = self.linearize_lvalue(e); + + // Copy the struct to the hidden return pointer + // struct_size is in bits, so /8 gives bytes + let struct_bytes = struct_size as i64 / 8; + let mut byte_offset = 0i64; + while byte_offset < struct_bytes { + // Load 8 bytes from source + let temp = self.alloc_pseudo(); + let temp_pseudo = Pseudo::reg(temp, temp.0); + if let Some(func) = &mut self.current_func { + func.add_pseudo(temp_pseudo); + } + self.emit(Instruction::load( + temp, + src_addr, + byte_offset, + Type::basic(TypeKind::Long), + )); + // Store to destination + self.emit(Instruction::store( + temp, + sret_ptr, + byte_offset, + Type::basic(TypeKind::Long), + )); + byte_offset += 8; + } + + // Return the hidden pointer (ABI requirement) + self.emit(Instruction::ret_typed(Some(sret_ptr), Type::pointer(typ))); + } else { + let val = self.linearize_expr(e); + self.emit(Instruction::ret_typed(Some(val), typ)); + } + } else { + self.emit(Instruction::ret(None)); + } + } + + Stmt::Break => { + if let Some(&target) = self.break_targets.last() { + if let Some(current) = self.current_bb { + self.emit(Instruction::br(target)); + self.link_bb(current, target); + } + } + } + + Stmt::Continue => { + if let Some(&target) = self.continue_targets.last() { + if let Some(current) = self.current_bb { + self.emit(Instruction::br(target)); + self.link_bb(current, target); + } + } + } + + Stmt::Goto(label) => { + let target = self.get_or_create_label(label); + self.emit(Instruction::br(target)); + } + + Stmt::Label { name, stmt } => { + let label_bb = self.get_or_create_label(name); + + // If current block is not terminated, branch to label + if !self.is_terminated() { + if let Some(current) = self.current_bb { + self.emit(Instruction::br(label_bb)); + self.link_bb(current, label_bb); + } + } + + self.switch_bb(label_bb); + self.linearize_stmt(stmt); + } + + Stmt::Switch { expr, body } => { + self.linearize_switch(expr, body); + } + + Stmt::Case(_) | Stmt::Default => { + // Case/Default labels are handled by linearize_switch + // If we encounter them outside a switch, ignore them + } + } + } + + fn linearize_local_decl(&mut self, decl: &Declaration) { + for declarator in &decl.declarators { + let typ = declarator.typ.clone(); + + // Check if this is a static local variable + if typ.modifiers.contains(TypeModifiers::STATIC) { + // Static local: create a global with unique name + self.linearize_static_local(declarator); + continue; + } + + // Create a symbol pseudo for this local variable (its address) + // Use unique name (name#id) to distinguish shadowed variables for SSA + let sym_id = self.alloc_pseudo(); + let unique_name = format!("{}#{}", declarator.name, sym_id.0); + let sym = Pseudo::sym(sym_id, unique_name.clone()); + if let Some(func) = &mut self.current_func { + func.add_pseudo(sym); + // Register with function's local variable tracking for SSA + // Pass the current basic block as the declaration block for scope-aware phi placement + let is_volatile = typ.modifiers.contains(TypeModifiers::VOLATILE); + func.add_local( + &unique_name, + sym_id, + typ.clone(), + is_volatile, + self.current_bb, + ); + } + + // Track in linearizer's locals map + self.locals.insert( + declarator.name.clone(), + LocalVarInfo { + sym: sym_id, + typ: typ.clone(), + }, + ); + + // If there's an initializer, emit Store(s) + if let Some(init) = &declarator.init { + if let ExprKind::InitList { elements } = &init.kind { + // Handle initializer list for arrays and structs + self.linearize_init_list(sym_id, &typ, elements); + } else { + // Simple scalar initializer + let val = self.linearize_expr(init); + // Convert the value to the target type (important for _Bool normalization) + let init_type = self.expr_type(init); + let converted = self.emit_convert(val, &init_type, &typ); + self.emit(Instruction::store(converted, sym_id, 0, typ)); + } + } + } + } + + /// Linearize a static local variable declaration + /// + /// Static locals have static storage duration but no linkage. + /// They are implemented as globals with unique names like `funcname.varname.N`. + /// Initialization happens once at program start (compile-time). + fn linearize_static_local(&mut self, declarator: &crate::parse::ast::InitDeclarator) { + // Generate unique global name: funcname.varname.counter + let global_name = format!( + "{}.{}.{}", + self.current_func_name, declarator.name, self.static_local_counter + ); + self.static_local_counter += 1; + + // Track mapping from local name to global name for this function's scope + // Use a key that includes function name to handle same-named statics in different functions + let key = format!("{}.{}", self.current_func_name, declarator.name); + self.static_locals.insert( + key, + StaticLocalInfo { + global_name: global_name.clone(), + typ: declarator.typ.clone(), + }, + ); + + // Also insert with just the local name for the current function scope + // This is used during expression linearization + self.locals.insert( + declarator.name.clone(), + LocalVarInfo { + // Use a sentinel value - we'll handle static locals specially + sym: PseudoId(u32::MAX), + typ: declarator.typ.clone(), + }, + ); + + // Determine initializer (static locals are initialized at compile time) + let init = declarator + .init + .as_ref() + .map_or(Initializer::None, |e| match &e.kind { + ExprKind::IntLit(v) => Initializer::Int(*v), + ExprKind::FloatLit(v) => Initializer::Float(*v), + ExprKind::CharLit(c) => Initializer::Int(*c as i64), + ExprKind::Unary { + op: UnaryOp::Neg, + operand, + } => { + // Handle negative literals + match &operand.kind { + ExprKind::IntLit(v) => Initializer::Int(-*v), + ExprKind::FloatLit(v) => Initializer::Float(-*v), + _ => Initializer::None, + } + } + _ => Initializer::None, + }); + + // Add as a global (type already has STATIC modifier which codegen uses) + self.module + .add_global(&global_name, declarator.typ.clone(), init); + } + + /// Linearize an initializer list for arrays or structs + fn linearize_init_list(&mut self, base_sym: PseudoId, typ: &Type, elements: &[InitElement]) { + self.linearize_init_list_at_offset(base_sym, 0, typ, elements); + } + + /// Linearize an initializer list at a given base offset + fn linearize_init_list_at_offset( + &mut self, + base_sym: PseudoId, + base_offset: i64, + typ: &Type, + elements: &[InitElement], + ) { + match typ.kind { + TypeKind::Array => { + let elem_type = typ + .base + .as_ref() + .map(|b| (**b).clone()) + .unwrap_or_else(|| Type::basic(TypeKind::Int)); + let elem_size = elem_type.size_bits() / 8; + + for (idx, element) in elements.iter().enumerate() { + // Calculate the actual index considering designators + let actual_idx = if element.designators.is_empty() { + idx as i64 + } else { + // Use the first designator (should be Index for arrays) + match &element.designators[0] { + Designator::Index(i) => *i, + Designator::Field(_) => idx as i64, // Fall back for mismatched designator + } + }; + + let offset = base_offset + actual_idx * elem_size as i64; + + // Handle nested initializer lists or scalar values + if let ExprKind::InitList { + elements: nested_elems, + } = &element.value.kind + { + // Nested array/struct initialization - recurse with accumulated offset + self.linearize_init_list_at_offset( + base_sym, + offset, + &elem_type, + nested_elems, + ); + } else { + // Scalar value + let val = self.linearize_expr(&element.value); + let val_type = self.expr_type(&element.value); + let converted = self.emit_convert(val, &val_type, &elem_type); + self.emit(Instruction::store( + converted, + base_sym, + offset, + elem_type.clone(), + )); + } + } + } + TypeKind::Struct | TypeKind::Union => { + // Get struct fields from the type's composite data + if let Some(composite) = &typ.composite { + let members = &composite.members; + + for (idx, element) in elements.iter().enumerate() { + // Find the field (by designator or position) + let member = + if let Some(Designator::Field(name)) = element.designators.first() { + // Designated initializer: .field = value + members.iter().find(|m| &m.name == name) + } else if idx < members.len() { + // Positional initializer + Some(&members[idx]) + } else { + None // Too many initializers + }; + + let Some(member) = member else { + continue; + }; + + let offset = base_offset + member.offset as i64; + let field_type = member.typ.clone(); + + // Handle nested initializer lists or scalar values + if let ExprKind::InitList { + elements: nested_elems, + } = &element.value.kind + { + // Nested struct/array initialization - recurse with accumulated offset + self.linearize_init_list_at_offset( + base_sym, + offset, + &field_type, + nested_elems, + ); + } else { + // Scalar value + let val = self.linearize_expr(&element.value); + let val_type = self.expr_type(&element.value); + let converted = self.emit_convert(val, &val_type, &field_type); + self.emit(Instruction::store(converted, base_sym, offset, field_type)); + } + } + } + } + _ => { + // For other types, just use the first element if present + if let Some(element) = elements.first() { + let val = self.linearize_expr(&element.value); + let val_type = self.expr_type(&element.value); + let converted = self.emit_convert(val, &val_type, typ); + self.emit(Instruction::store( + converted, + base_sym, + base_offset, + typ.clone(), + )); + } + } + } + } + + fn linearize_if(&mut self, cond: &Expr, then_stmt: &Stmt, else_stmt: Option<&Stmt>) { + let cond_val = self.linearize_expr(cond); + + let then_bb = self.alloc_bb(); + let else_bb = self.alloc_bb(); + let merge_bb = self.alloc_bb(); + + // Conditional branch + if let Some(current) = self.current_bb { + if else_stmt.is_some() { + self.emit(Instruction::cbr(cond_val, then_bb, else_bb)); + } else { + self.emit(Instruction::cbr(cond_val, then_bb, merge_bb)); + } + self.link_bb(current, then_bb); + if else_stmt.is_some() { + self.link_bb(current, else_bb); + } else { + self.link_bb(current, merge_bb); + } + } + + // Then block + self.switch_bb(then_bb); + self.linearize_stmt(then_stmt); + if !self.is_terminated() { + self.emit(Instruction::br(merge_bb)); + self.link_bb(then_bb, merge_bb); + } + + // Else block + if let Some(else_s) = else_stmt { + self.switch_bb(else_bb); + self.linearize_stmt(else_s); + if !self.is_terminated() { + self.emit(Instruction::br(merge_bb)); + self.link_bb(else_bb, merge_bb); + } + } + + // Merge block + self.switch_bb(merge_bb); + } + + fn linearize_while(&mut self, cond: &Expr, body: &Stmt) { + let cond_bb = self.alloc_bb(); + let body_bb = self.alloc_bb(); + let exit_bb = self.alloc_bb(); + + // Jump to condition + if let Some(current) = self.current_bb { + if !self.is_terminated() { + self.emit(Instruction::br(cond_bb)); + self.link_bb(current, cond_bb); + } + } + + // Condition block + self.switch_bb(cond_bb); + let cond_val = self.linearize_expr(cond); + self.emit(Instruction::cbr(cond_val, body_bb, exit_bb)); + self.link_bb(cond_bb, body_bb); + self.link_bb(cond_bb, exit_bb); + + // Body block + self.break_targets.push(exit_bb); + self.continue_targets.push(cond_bb); + + self.switch_bb(body_bb); + self.linearize_stmt(body); + if !self.is_terminated() { + // After linearizing body, current_bb may be different from body_bb + // (e.g., if body contains nested loops). Link the CURRENT block to cond_bb. + if let Some(current) = self.current_bb { + self.emit(Instruction::br(cond_bb)); + self.link_bb(current, cond_bb); + } + } + + self.break_targets.pop(); + self.continue_targets.pop(); + + // Exit block + self.switch_bb(exit_bb); + } + + fn linearize_do_while(&mut self, body: &Stmt, cond: &Expr) { + let body_bb = self.alloc_bb(); + let cond_bb = self.alloc_bb(); + let exit_bb = self.alloc_bb(); + + // Jump to body + if let Some(current) = self.current_bb { + if !self.is_terminated() { + self.emit(Instruction::br(body_bb)); + self.link_bb(current, body_bb); + } + } + + // Body block + self.break_targets.push(exit_bb); + self.continue_targets.push(cond_bb); + + self.switch_bb(body_bb); + self.linearize_stmt(body); + if !self.is_terminated() { + // After linearizing body, current_bb may be different from body_bb + if let Some(current) = self.current_bb { + self.emit(Instruction::br(cond_bb)); + self.link_bb(current, cond_bb); + } + } + + self.break_targets.pop(); + self.continue_targets.pop(); + + // Condition block + self.switch_bb(cond_bb); + let cond_val = self.linearize_expr(cond); + self.emit(Instruction::cbr(cond_val, body_bb, exit_bb)); + self.link_bb(cond_bb, body_bb); + self.link_bb(cond_bb, exit_bb); + + // Exit block + self.switch_bb(exit_bb); + } + + fn linearize_for( + &mut self, + init: Option<&ForInit>, + cond: Option<&Expr>, + post: Option<&Expr>, + body: &Stmt, + ) { + // Init + if let Some(init) = init { + match init { + ForInit::Declaration(decl) => self.linearize_local_decl(decl), + ForInit::Expression(expr) => { + self.linearize_expr(expr); + } + } + } + + let cond_bb = self.alloc_bb(); + let body_bb = self.alloc_bb(); + let post_bb = self.alloc_bb(); + let exit_bb = self.alloc_bb(); + + // Jump to condition + if let Some(current) = self.current_bb { + if !self.is_terminated() { + self.emit(Instruction::br(cond_bb)); + self.link_bb(current, cond_bb); + } + } + + // Condition block + self.switch_bb(cond_bb); + if let Some(cond_expr) = cond { + let cond_val = self.linearize_expr(cond_expr); + self.emit(Instruction::cbr(cond_val, body_bb, exit_bb)); + } else { + // No condition = always true + self.emit(Instruction::br(body_bb)); + } + self.link_bb(cond_bb, body_bb); + self.link_bb(cond_bb, exit_bb); + + // Body block + self.break_targets.push(exit_bb); + self.continue_targets.push(post_bb); + + self.switch_bb(body_bb); + self.linearize_stmt(body); + if !self.is_terminated() { + // After linearizing body, current_bb may be different from body_bb + if let Some(current) = self.current_bb { + self.emit(Instruction::br(post_bb)); + self.link_bb(current, post_bb); + } + } + + self.break_targets.pop(); + self.continue_targets.pop(); + + // Post block + self.switch_bb(post_bb); + if let Some(post_expr) = post { + self.linearize_expr(post_expr); + } + self.emit(Instruction::br(cond_bb)); + self.link_bb(post_bb, cond_bb); + + // Exit block + self.switch_bb(exit_bb); + } + + fn linearize_switch(&mut self, expr: &Expr, body: &Stmt) { + // Linearize the switch expression + let switch_val = self.linearize_expr(expr); + let expr_type = self.expr_type(expr); + let size = expr_type.size_bits(); + + let exit_bb = self.alloc_bb(); + + // Push exit block for break handling + self.break_targets.push(exit_bb); + + // Collect case labels and create basic blocks for each + let (case_values, has_default) = self.collect_switch_cases(body); + let case_bbs: Vec = case_values.iter().map(|_| self.alloc_bb()).collect(); + let default_bb = if has_default { + Some(self.alloc_bb()) + } else { + None + }; + + // Build switch instruction with case -> block mapping + let switch_cases: Vec<(i64, BasicBlockId)> = case_values + .iter() + .zip(case_bbs.iter()) + .map(|(val, bb)| (*val, *bb)) + .collect(); + + // Default goes to default_bb if present, otherwise exit_bb + let default_target = default_bb.unwrap_or(exit_bb); + + // Emit switch instruction + self.emit(Instruction::switch_insn( + switch_val, + switch_cases.clone(), + Some(default_target), + size, + )); + + // Link CFG edges from current block to all case/default/exit blocks + if let Some(current) = self.current_bb { + for &(_, bb) in &switch_cases { + self.link_bb(current, bb); + } + self.link_bb(current, default_target); + if default_bb.is_none() { + self.link_bb(current, exit_bb); + } + } + + // Linearize body with case block switching + self.linearize_switch_body(body, &case_values, &case_bbs, default_bb); + + // If not terminated after body, jump to exit + if !self.is_terminated() { + if let Some(current) = self.current_bb { + self.emit(Instruction::br(exit_bb)); + self.link_bb(current, exit_bb); + } + } + + self.break_targets.pop(); + + // Exit block + self.switch_bb(exit_bb); + } + + /// Collect case values from switch body (must be a Block) + /// Returns (case_values, has_default) + fn collect_switch_cases(&self, body: &Stmt) -> (Vec, bool) { + let mut case_values = Vec::new(); + let mut has_default = false; + + if let Stmt::Block(items) = body { + for item in items { + if let BlockItem::Statement(stmt) = item { + self.collect_cases_from_stmt(stmt, &mut case_values, &mut has_default); + } + } + } + + (case_values, has_default) + } + + fn collect_cases_from_stmt( + &self, + stmt: &Stmt, + case_values: &mut Vec, + has_default: &mut bool, + ) { + match stmt { + Stmt::Case(expr) => { + // Extract constant value from case expression + if let Some(val) = self.eval_const_expr(expr) { + case_values.push(val); + } + } + Stmt::Default => { + *has_default = true; + } + // Recursively check labeled statements + Stmt::Label { stmt, .. } => { + self.collect_cases_from_stmt(stmt, case_values, has_default); + } + _ => {} + } + } + + /// Evaluate a constant expression (for case labels) + fn eval_const_expr(&self, expr: &Expr) -> Option { + match &expr.kind { + ExprKind::IntLit(val) => Some(*val), + ExprKind::CharLit(c) => Some(*c as i64), + ExprKind::Ident { name, .. } => { + // Check if it's an enum constant + self.symbols.get_enum_value(name) + } + ExprKind::Unary { op, operand } => { + let val = self.eval_const_expr(operand)?; + match op { + UnaryOp::Neg => Some(-val), + UnaryOp::Not => Some(if val == 0 { 1 } else { 0 }), + UnaryOp::BitNot => Some(!val), + _ => None, + } + } + ExprKind::Binary { op, left, right } => { + let l = self.eval_const_expr(left)?; + let r = self.eval_const_expr(right)?; + match op { + BinaryOp::Add => Some(l + r), + BinaryOp::Sub => Some(l - r), + BinaryOp::Mul => Some(l * r), + BinaryOp::Div => Some(l / r), + BinaryOp::Mod => Some(l % r), + BinaryOp::BitAnd => Some(l & r), + BinaryOp::BitOr => Some(l | r), + BinaryOp::BitXor => Some(l ^ r), + BinaryOp::Shl => Some(l << r), + BinaryOp::Shr => Some(l >> r), + _ => None, + } + } + _ => None, + } + } + + /// Linearize switch body, switching basic blocks at case/default labels + fn linearize_switch_body( + &mut self, + body: &Stmt, + case_values: &[i64], + case_bbs: &[BasicBlockId], + default_bb: Option, + ) { + let mut case_idx = 0; + + if let Stmt::Block(items) = body { + for item in items { + match item { + BlockItem::Declaration(decl) => self.linearize_local_decl(decl), + BlockItem::Statement(stmt) => { + self.linearize_switch_stmt( + stmt, + case_values, + case_bbs, + default_bb, + &mut case_idx, + ); + } + } + } + } + } + + fn linearize_switch_stmt( + &mut self, + stmt: &Stmt, + case_values: &[i64], + case_bbs: &[BasicBlockId], + default_bb: Option, + case_idx: &mut usize, + ) { + match stmt { + Stmt::Case(expr) => { + // Find the matching case block + if let Some(val) = self.eval_const_expr(expr) { + if let Some(idx) = case_values.iter().position(|v| *v == val) { + let case_bb = case_bbs[idx]; + + // Fall through from previous case if not terminated + if !self.is_terminated() { + if let Some(current) = self.current_bb { + self.emit(Instruction::br(case_bb)); + self.link_bb(current, case_bb); + } + } + + self.switch_bb(case_bb); + *case_idx = idx + 1; + } + } + } + Stmt::Default => { + if let Some(def_bb) = default_bb { + // Fall through from previous case if not terminated + if !self.is_terminated() { + if let Some(current) = self.current_bb { + self.emit(Instruction::br(def_bb)); + self.link_bb(current, def_bb); + } + } + + self.switch_bb(def_bb); + } + } + _ => { + // Regular statement - linearize it + self.linearize_stmt(stmt); + } + } + } + + fn get_or_create_label(&mut self, name: &str) -> BasicBlockId { + if let Some(&bb) = self.label_map.get(name) { + bb + } else { + let bb = self.alloc_bb(); + self.label_map.insert(name.to_string(), bb); + // Set label name on the block + let block = self.get_or_create_bb(bb); + block.label = Some(name.to_string()); + bb + } + } + + // ======================================================================== + // Expression linearization + // ======================================================================== + + /// Get the type of an expression. + /// PANICS if expression has no type - type evaluation pass must run first. + /// Following sparse's design: the IR should ALWAYS receive fully typed input. + fn expr_type(&self, expr: &Expr) -> Type { + expr.typ.clone().expect( + "BUG: expression has no type. Type evaluation pass must run before linearization.", + ) + } + + /// Linearize an expression as an lvalue (get its address) + fn linearize_lvalue(&mut self, expr: &Expr) -> PseudoId { + match &expr.kind { + ExprKind::Ident { name, .. } => { + // For local variables, emit SymAddr to get the stack address + if let Some(local) = self.locals.get(name).cloned() { + // Check if this is a static local (sentinel value) + if local.sym.0 == u32::MAX { + // Static local - look up the global name + let key = format!("{}.{}", self.current_func_name, name); + if let Some(static_info) = self.static_locals.get(&key).cloned() { + let sym_id = self.alloc_pseudo(); + let pseudo = Pseudo::sym(sym_id, static_info.global_name); + if let Some(func) = &mut self.current_func { + func.add_pseudo(pseudo); + } + let result = self.alloc_pseudo(); + self.emit(Instruction::sym_addr( + result, + sym_id, + Type::pointer(static_info.typ), + )); + return result; + } + } + let result = self.alloc_pseudo(); + self.emit(Instruction::sym_addr( + result, + local.sym, + Type::pointer(local.typ), + )); + result + } else { + // Global variable - emit SymAddr to get its address + let sym_id = self.alloc_pseudo(); + let pseudo = Pseudo::sym(sym_id, name.clone()); + if let Some(func) = &mut self.current_func { + func.add_pseudo(pseudo); + } + let result = self.alloc_pseudo(); + let typ = self.expr_type(expr); + self.emit(Instruction::sym_addr(result, sym_id, Type::pointer(typ))); + result + } + } + ExprKind::Unary { + op: UnaryOp::Deref, + operand, + } => { + // *ptr as lvalue = ptr itself + self.linearize_expr(operand) + } + ExprKind::Member { + expr: inner, + member, + } => { + // s.m as lvalue = &s + offset(m) + let base = self.linearize_lvalue(inner); + let struct_type = self.expr_type(inner); + let member_info = struct_type + .find_member(member) + .unwrap_or_else(|| MemberInfo { + offset: 0, + typ: self.expr_type(expr), + bit_offset: None, + bit_width: None, + storage_unit_size: None, + }); + + if member_info.offset == 0 { + base + } else { + let offset_val = + self.emit_const(member_info.offset as i64, Type::basic(TypeKind::Long)); + let result = self.alloc_pseudo(); + self.emit(Instruction::binop( + Opcode::Add, + result, + base, + offset_val, + Type::basic(TypeKind::Long), + )); + result + } + } + ExprKind::Arrow { + expr: inner, + member, + } => { + // ptr->m as lvalue = ptr + offset(m) + let ptr = self.linearize_expr(inner); + let ptr_type = self.expr_type(inner); + let struct_type = ptr_type + .base + .as_ref() + .map(|b| b.as_ref().clone()) + .unwrap_or_else(|| self.expr_type(expr)); + let member_info = struct_type + .find_member(member) + .unwrap_or_else(|| MemberInfo { + offset: 0, + typ: self.expr_type(expr), + bit_offset: None, + bit_width: None, + storage_unit_size: None, + }); + + if member_info.offset == 0 { + ptr + } else { + let offset_val = + self.emit_const(member_info.offset as i64, Type::basic(TypeKind::Long)); + let result = self.alloc_pseudo(); + self.emit(Instruction::binop( + Opcode::Add, + result, + ptr, + offset_val, + Type::basic(TypeKind::Long), + )); + result + } + } + ExprKind::Index { array, index } => { + // arr[idx] as lvalue = arr + idx * sizeof(elem) + // Handle commutative form: 0[arr] is equivalent to arr[0] + let array_type = self.expr_type(array); + let index_type = self.expr_type(index); + + let (ptr_expr, idx_expr, idx_type) = + if array_type.kind == TypeKind::Pointer || array_type.kind == TypeKind::Array { + (array, index, index_type) + } else { + // Swap: index is actually the pointer/array + (index, array, array_type) + }; + + let arr = self.linearize_expr(ptr_expr); + let idx = self.linearize_expr(idx_expr); + let elem_type = self.expr_type(expr); + let elem_size = elem_type.size_bits() / 8; + let elem_size_val = self.emit_const(elem_size as i64, Type::basic(TypeKind::Long)); + + // Sign-extend index to 64-bit for proper pointer arithmetic (negative indices) + let idx_extended = self.emit_convert(idx, &idx_type, &Type::basic(TypeKind::Long)); + + let offset = self.alloc_pseudo(); + self.emit(Instruction::binop( + Opcode::Mul, + offset, + idx_extended, + elem_size_val, + Type::basic(TypeKind::Long), + )); + + let addr = self.alloc_pseudo(); + self.emit(Instruction::binop( + Opcode::Add, + addr, + arr, + offset, + Type::basic(TypeKind::Long), + )); + addr + } + _ => { + // Fallback: just evaluate the expression (shouldn't happen for valid lvalues) + self.linearize_expr(expr) + } + } + } + + fn linearize_expr(&mut self, expr: &Expr) -> PseudoId { + // Set current position for debug info + self.current_pos = Some(expr.pos); + + match &expr.kind { + ExprKind::IntLit(val) => { + let typ = self.expr_type(expr); + self.emit_const(*val, typ) + } + + ExprKind::FloatLit(val) => { + let typ = self.expr_type(expr); + self.emit_fconst(*val, typ) + } + + ExprKind::CharLit(c) => { + let typ = self.expr_type(expr); + self.emit_const(*c as i64, typ) + } + + ExprKind::StringLit(s) => { + // Add string to module and get its label + let label = self.module.add_string(s.clone()); + + // Create a symbol pseudo for the string label + let sym_id = self.alloc_pseudo(); + let sym_pseudo = Pseudo::sym(sym_id, label.clone()); + if let Some(func) = &mut self.current_func { + func.add_pseudo(sym_pseudo); + } + + // Create result pseudo for the address + let result = self.alloc_pseudo(); + let typ = self.expr_type(expr); + let pseudo = Pseudo::reg(result, result.0); + if let Some(func) = &mut self.current_func { + func.add_pseudo(pseudo); + } + + // Emit SymAddr to load the address of the string + self.emit(Instruction::sym_addr(result, sym_id, typ)); + result + } + + ExprKind::Ident { name, .. } => { + // First check if it's an enum constant + if let Some(value) = self.symbols.get_enum_value(name) { + self.emit_const(value, Type::basic(TypeKind::Int)) + } + // Check if it's a local variable + else if let Some(local) = self.locals.get(name).cloned() { + // Check if this is a static local (sentinel value) + if local.sym.0 == u32::MAX { + // Static local - look up the global name and treat as global + let key = format!("{}.{}", self.current_func_name, name); + if let Some(static_info) = self.static_locals.get(&key).cloned() { + let sym_id = self.alloc_pseudo(); + let pseudo = Pseudo::sym(sym_id, static_info.global_name); + if let Some(func) = &mut self.current_func { + func.add_pseudo(pseudo); + } + let typ = static_info.typ; + // Arrays decay to pointers - get address, not value + if typ.kind == TypeKind::Array { + let result = self.alloc_pseudo(); + self.emit(Instruction::sym_addr( + result, + sym_id, + Type::pointer( + typ.base + .as_ref() + .map(|b| (**b).clone()) + .unwrap_or_else(|| Type::basic(TypeKind::Int)), + ), + )); + return result; + } else { + let result = self.alloc_pseudo(); + self.emit(Instruction::load(result, sym_id, 0, typ)); + return result; + } + } + } + let result = self.alloc_pseudo(); + let pseudo = Pseudo::reg(result, result.0); + if let Some(func) = &mut self.current_func { + func.add_pseudo(pseudo); + } + // Arrays decay to pointers - get address, not value + if local.typ.kind == TypeKind::Array { + self.emit(Instruction::sym_addr( + result, + local.sym, + Type::pointer( + local + .typ + .base + .as_ref() + .map(|b| (**b).clone()) + .unwrap_or_else(|| Type::basic(TypeKind::Int)), + ), + )); + } else { + self.emit(Instruction::load(result, local.sym, 0, local.typ)); + } + result + } + // Check if it's a parameter (already SSA value) + else if let Some(&pseudo) = self.var_map.get(name) { + pseudo + } + // Global variable - create symbol reference and load + else { + let sym_id = self.alloc_pseudo(); + let pseudo = Pseudo::sym(sym_id, name.clone()); + if let Some(func) = &mut self.current_func { + func.add_pseudo(pseudo); + } + let typ = self.expr_type(expr); + // Arrays decay to pointers - get address, not value + if typ.kind == TypeKind::Array { + let result = self.alloc_pseudo(); + self.emit(Instruction::sym_addr( + result, + sym_id, + Type::pointer( + typ.base + .as_ref() + .map(|b| (**b).clone()) + .unwrap_or_else(|| Type::basic(TypeKind::Int)), + ), + )); + result + } else { + let result = self.alloc_pseudo(); + self.emit(Instruction::load(result, sym_id, 0, typ)); + result + } + } + } + + ExprKind::Unary { op, operand } => { + // Handle AddrOf specially - we need the lvalue address, not the value + if *op == UnaryOp::AddrOf { + return self.linearize_lvalue(operand); + } + + // Handle PreInc/PreDec specially - they need store-back + if *op == UnaryOp::PreInc || *op == UnaryOp::PreDec { + let val = self.linearize_expr(operand); + let typ = self.expr_type(operand); + let is_float = typ.is_float(); + let is_ptr = typ.kind == TypeKind::Pointer; + + // Compute new value - for pointers, scale by element size + let increment = if is_ptr { + let elem_type = typ + .base + .as_ref() + .map(|b| (**b).clone()) + .unwrap_or_else(|| Type::basic(TypeKind::Char)); + let elem_size = elem_type.size_bits() / 8; + self.emit_const(elem_size as i64, Type::basic(TypeKind::Long)) + } else if is_float { + self.emit_fconst(1.0, typ.clone()) + } else { + self.emit_const(1, typ.clone()) + }; + let result = self.alloc_pseudo(); + let pseudo = Pseudo::reg(result, result.0); + if let Some(func) = &mut self.current_func { + func.add_pseudo(pseudo); + } + let opcode = if is_float { + if *op == UnaryOp::PreInc { + Opcode::FAdd + } else { + Opcode::FSub + } + } else if *op == UnaryOp::PreInc { + Opcode::Add + } else { + Opcode::Sub + }; + self.emit(Instruction::binop( + opcode, + result, + val, + increment, + typ.clone(), + )); + + // For _Bool, normalize the result (any non-zero -> 1) + let final_result = if typ.kind == TypeKind::Bool { + self.emit_convert(result, &Type::basic(TypeKind::Int), &typ) + } else { + result + }; + + // Store back to the variable + if let ExprKind::Ident { name, .. } = &operand.kind { + if let Some(local) = self.locals.get(name).cloned() { + self.emit(Instruction::store(final_result, local.sym, 0, typ)); + } else if self.var_map.contains_key(name) { + self.var_map.insert(name.clone(), final_result); + } else { + // Global variable - emit store + let sym_id = self.alloc_pseudo(); + let pseudo = Pseudo::sym(sym_id, name.clone()); + if let Some(func) = &mut self.current_func { + func.add_pseudo(pseudo); + } + self.emit(Instruction::store(final_result, sym_id, 0, typ)); + } + } + + return final_result; + } + + let src = self.linearize_expr(operand); + let result_typ = self.expr_type(expr); + let operand_typ = self.expr_type(operand); + // For logical NOT, use operand type for comparison size + let typ = if *op == UnaryOp::Not { + operand_typ + } else { + result_typ + }; + self.emit_unary(*op, src, typ) + } + + ExprKind::Binary { op, left, right } => { + // Handle short-circuit operators before linearizing both operands + // C99 requires that && and || only evaluate the RHS if needed + if *op == BinaryOp::LogAnd { + return self.emit_logical_and(left, right); + } + if *op == BinaryOp::LogOr { + return self.emit_logical_or(left, right); + } + + let left_typ = self.expr_type(left); + let right_typ = self.expr_type(right); + let result_typ = self.expr_type(expr); + + // Check for pointer arithmetic: ptr +/- int or int + ptr + let left_is_ptr_or_arr = + left_typ.kind == TypeKind::Pointer || left_typ.kind == TypeKind::Array; + let right_is_ptr_or_arr = + right_typ.kind == TypeKind::Pointer || right_typ.kind == TypeKind::Array; + let is_ptr_arith = (*op == BinaryOp::Add || *op == BinaryOp::Sub) + && ((left_is_ptr_or_arr && right_typ.is_integer()) + || (left_typ.is_integer() && right_is_ptr_or_arr)); + + // Check for pointer difference: ptr - ptr + let is_ptr_diff = *op == BinaryOp::Sub && left_is_ptr_or_arr && right_is_ptr_or_arr; + + if is_ptr_diff { + // Pointer difference: (ptr1 - ptr2) / element_size + let left_val = self.linearize_expr(left); + let right_val = self.linearize_expr(right); + + // Compute byte difference + let byte_diff = self.alloc_pseudo(); + self.emit(Instruction::binop( + Opcode::Sub, + byte_diff, + left_val, + right_val, + Type::basic(TypeKind::Long), + )); + + // Get element size from the pointer type + let elem_type = left_typ + .base + .as_ref() + .map(|b| (**b).clone()) + .unwrap_or_else(|| Type::basic(TypeKind::Char)); + let elem_size = elem_type.size_bits() / 8; + + // Divide by element size + let scale = self.emit_const(elem_size as i64, Type::basic(TypeKind::Long)); + let result = self.alloc_pseudo(); + self.emit(Instruction::binop( + Opcode::DivS, + result, + byte_diff, + scale, + Type::basic(TypeKind::Long), + )); + result + } else if is_ptr_arith { + // Pointer arithmetic: scale integer operand by element size + let (ptr_val, ptr_typ, int_val) = if left_is_ptr_or_arr { + let ptr = self.linearize_expr(left); + let int = self.linearize_expr(right); + (ptr, left_typ.clone(), int) + } else { + // int + ptr case + let int = self.linearize_expr(left); + let ptr = self.linearize_expr(right); + (ptr, right_typ.clone(), int) + }; + + // Get element size + let elem_type = ptr_typ + .base + .as_ref() + .map(|b| (**b).clone()) + .unwrap_or_else(|| Type::basic(TypeKind::Char)); + let elem_size = elem_type.size_bits() / 8; + + // Scale the integer by element size + let scale = self.emit_const(elem_size as i64, Type::basic(TypeKind::Long)); + let scaled_offset = self.alloc_pseudo(); + // Extend int_val to 64-bit for proper address arithmetic + let int_val_extended = self.emit_convert( + int_val, + &Type::basic(TypeKind::Int), + &Type::basic(TypeKind::Long), + ); + self.emit(Instruction::binop( + Opcode::Mul, + scaled_offset, + int_val_extended, + scale, + Type::basic(TypeKind::Long), + )); + + // Add (or subtract) to pointer + let result = self.alloc_pseudo(); + let opcode = if *op == BinaryOp::Sub { + Opcode::Sub + } else { + Opcode::Add + }; + self.emit(Instruction::binop( + opcode, + result, + ptr_val, + scaled_offset, + Type::basic(TypeKind::Long), + )); + result + } else { + // For comparisons, compute common type for both operands + // (usual arithmetic conversions) + let operand_typ = if op.is_comparison() { + Self::common_type(&left_typ, &right_typ) + } else { + result_typ.clone() + }; + + // Linearize operands + let left_val = self.linearize_expr(left); + let right_val = self.linearize_expr(right); + + // Emit type conversions if needed + let left_val = self.emit_convert(left_val, &left_typ, &operand_typ); + let right_val = self.emit_convert(right_val, &right_typ, &operand_typ); + + self.emit_binary(*op, left_val, right_val, result_typ, operand_typ) + } + } + + ExprKind::Assign { op, target, value } => self.emit_assign(*op, target, value), + + ExprKind::PostInc(operand) => { + let val = self.linearize_expr(operand); + let typ = self.expr_type(operand); + let is_float = typ.is_float(); + let is_ptr = typ.kind == TypeKind::Pointer; + + // For locals, we need to save the old value before updating + // because the pseudo will be reloaded from stack which gets overwritten + let is_local = if let ExprKind::Ident { name, .. } = &operand.kind { + self.locals.contains_key(name) + } else { + false + }; + + let old_val = if is_local { + // Copy the old value to a temp + let temp = self.alloc_pseudo(); + let pseudo = Pseudo::reg(temp, temp.0); + if let Some(func) = &mut self.current_func { + func.add_pseudo(pseudo); + } + self.emit( + Instruction::new(Opcode::Copy) + .with_target(temp) + .with_src(val) + .with_size(typ.size_bits()), + ); + temp + } else { + val + }; + + // For pointers, increment by element size; for others, increment by 1 + let increment = if is_ptr { + let elem_type = typ + .base + .as_ref() + .map(|b| (**b).clone()) + .unwrap_or_else(|| Type::basic(TypeKind::Char)); + let elem_size = elem_type.size_bits() / 8; + self.emit_const(elem_size as i64, Type::basic(TypeKind::Long)) + } else if is_float { + self.emit_fconst(1.0, typ.clone()) + } else { + self.emit_const(1, typ.clone()) + }; + let result = self.alloc_pseudo(); + let pseudo = Pseudo::reg(result, result.0); + if let Some(func) = &mut self.current_func { + func.add_pseudo(pseudo); + } + let opcode = if is_float { Opcode::FAdd } else { Opcode::Add }; + let arith_type = if is_ptr { + Type::basic(TypeKind::Long) + } else { + typ.clone() + }; + self.emit(Instruction::binop( + opcode, result, val, increment, arith_type, + )); + + // For _Bool, normalize the result (any non-zero -> 1) + let final_result = if typ.kind == TypeKind::Bool { + self.emit_convert(result, &Type::basic(TypeKind::Int), &typ) + } else { + result + }; + + // Store to local, update parameter mapping, or store through pointer + match &operand.kind { + ExprKind::Ident { name, .. } => { + if let Some(local) = self.locals.get(name).cloned() { + self.emit(Instruction::store(final_result, local.sym, 0, typ)); + } else if self.var_map.contains_key(name) { + self.var_map.insert(name.clone(), final_result); + } else { + // Global variable - emit store + let sym_id = self.alloc_pseudo(); + let pseudo = Pseudo::sym(sym_id, name.clone()); + if let Some(func) = &mut self.current_func { + func.add_pseudo(pseudo); + } + self.emit(Instruction::store(final_result, sym_id, 0, typ)); + } + } + ExprKind::Unary { + op: UnaryOp::Deref, + operand: ptr_expr, + } => { + // (*p)++ - store back through the pointer + let addr = self.linearize_expr(ptr_expr); + self.emit(Instruction::store(final_result, addr, 0, typ)); + } + _ => {} + } + + old_val // Return old value + } + + ExprKind::PostDec(operand) => { + let val = self.linearize_expr(operand); + let typ = self.expr_type(operand); + let is_float = typ.is_float(); + let is_ptr = typ.kind == TypeKind::Pointer; + + // For locals, we need to save the old value before updating + // because the pseudo will be reloaded from stack which gets overwritten + let is_local = if let ExprKind::Ident { name, .. } = &operand.kind { + self.locals.contains_key(name) + } else { + false + }; + + let old_val = if is_local { + // Copy the old value to a temp + let temp = self.alloc_pseudo(); + let pseudo = Pseudo::reg(temp, temp.0); + if let Some(func) = &mut self.current_func { + func.add_pseudo(pseudo); + } + self.emit( + Instruction::new(Opcode::Copy) + .with_target(temp) + .with_src(val) + .with_size(typ.size_bits()), + ); + temp + } else { + val + }; + + // For pointers, decrement by element size; for others, decrement by 1 + let decrement = if is_ptr { + let elem_type = typ + .base + .as_ref() + .map(|b| (**b).clone()) + .unwrap_or_else(|| Type::basic(TypeKind::Char)); + let elem_size = elem_type.size_bits() / 8; + self.emit_const(elem_size as i64, Type::basic(TypeKind::Long)) + } else if is_float { + self.emit_fconst(1.0, typ.clone()) + } else { + self.emit_const(1, typ.clone()) + }; + let result = self.alloc_pseudo(); + let pseudo = Pseudo::reg(result, result.0); + if let Some(func) = &mut self.current_func { + func.add_pseudo(pseudo); + } + let opcode = if is_float { Opcode::FSub } else { Opcode::Sub }; + let arith_type = if is_ptr { + Type::basic(TypeKind::Long) + } else { + typ.clone() + }; + self.emit(Instruction::binop( + opcode, result, val, decrement, arith_type, + )); + + // For _Bool, normalize the result (any non-zero -> 1) + let final_result = if typ.kind == TypeKind::Bool { + self.emit_convert(result, &Type::basic(TypeKind::Int), &typ) + } else { + result + }; + + // Store to local, update parameter mapping, or store through pointer + match &operand.kind { + ExprKind::Ident { name, .. } => { + if let Some(local) = self.locals.get(name).cloned() { + self.emit(Instruction::store(final_result, local.sym, 0, typ)); + } else if self.var_map.contains_key(name) { + self.var_map.insert(name.clone(), final_result); + } else { + // Global variable - emit store + let sym_id = self.alloc_pseudo(); + let pseudo = Pseudo::sym(sym_id, name.clone()); + if let Some(func) = &mut self.current_func { + func.add_pseudo(pseudo); + } + self.emit(Instruction::store(final_result, sym_id, 0, typ)); + } + } + ExprKind::Unary { + op: UnaryOp::Deref, + operand: ptr_expr, + } => { + // (*p)-- - store back through the pointer + let addr = self.linearize_expr(ptr_expr); + self.emit(Instruction::store(final_result, addr, 0, typ)); + } + _ => {} + } + + old_val // Return old value + } + + ExprKind::Conditional { + cond, + then_expr, + else_expr, + } => { + let cond_val = self.linearize_expr(cond); + let then_val = self.linearize_expr(then_expr); + let else_val = self.linearize_expr(else_expr); + + let result = self.alloc_pseudo(); + let typ = self.expr_type(expr); // Use evaluated type + + self.emit(Instruction::select( + result, cond_val, then_val, else_val, typ, + )); + result + } + + ExprKind::Call { func, args } => { + // Get function name + let func_name = match &func.kind { + ExprKind::Ident { name, .. } => name.clone(), + _ => "".to_string(), + }; + + let typ = self.expr_type(expr); // Use evaluated type (function return type) + + // Check if this is a variadic function call + // If the function expression has a type, check its variadic flag + let variadic_arg_start = if let Some(ref func_type) = func.typ { + if func_type.variadic { + // Variadic args start after the fixed parameters + func_type.params.as_ref().map(|p| p.len()) + } else { + None + } + } else { + None // No type info, assume non-variadic + }; + + // Check if function returns a large struct + // If so, allocate space and pass address as hidden first argument + let returns_large_struct = (typ.kind == TypeKind::Struct + || typ.kind == TypeKind::Union) + && typ.size_bits() > MAX_REGISTER_AGGREGATE_BITS; + + let (result_sym, mut arg_vals, mut arg_types_vec) = if returns_large_struct { + // Allocate local storage for the return value + let sret_sym = self.alloc_pseudo(); + let sret_pseudo = Pseudo::sym(sret_sym, format!("__sret_{}", sret_sym.0)); + if let Some(func) = &mut self.current_func { + func.add_pseudo(sret_pseudo); + // Internal sret storage is never volatile + func.add_local( + format!("__sret_{}", sret_sym.0), + sret_sym, + typ.clone(), + false, + self.current_bb, + ); + } + + // Get address of the allocated space + let sret_addr = self.alloc_pseudo(); + let addr_pseudo = Pseudo::reg(sret_addr, sret_addr.0); + if let Some(func) = &mut self.current_func { + func.add_pseudo(addr_pseudo); + } + self.emit(Instruction::sym_addr( + sret_addr, + sret_sym, + Type::pointer(typ.clone()), + )); + + // Hidden return pointer is the first argument (pointer type) + (sret_sym, vec![sret_addr], vec![Type::pointer(typ.clone())]) + } else { + let result = self.alloc_pseudo(); + (result, Vec::new(), Vec::new()) + }; + + // Linearize regular arguments + // For large structs, pass by reference (address) instead of by value + for a in args.iter() { + let arg_type = self.expr_type(a); + let arg_val = if (arg_type.kind == TypeKind::Struct + || arg_type.kind == TypeKind::Union) + && arg_type.size_bits() > MAX_REGISTER_AGGREGATE_BITS + { + // Large struct: pass address instead of value + // The argument type becomes a pointer + arg_types_vec.push(Type::pointer(arg_type)); + self.linearize_lvalue(a) + } else { + arg_types_vec.push(arg_type); + self.linearize_expr(a) + }; + arg_vals.push(arg_val); + } + + if returns_large_struct { + // For large struct returns, the return value is the address + // stored in result_sym (which is a local symbol containing the struct) + let result = self.alloc_pseudo(); + let result_pseudo = Pseudo::reg(result, result.0); + if let Some(func) = &mut self.current_func { + func.add_pseudo(result_pseudo); + } + let mut call_insn = Instruction::call( + Some(result), + &func_name, + arg_vals, + arg_types_vec, + Type::pointer(typ), + ); + call_insn.variadic_arg_start = variadic_arg_start; + self.emit(call_insn); + // Return the symbol (address) where struct is stored + result_sym + } else { + let mut call_insn = Instruction::call( + Some(result_sym), + &func_name, + arg_vals, + arg_types_vec, + typ, + ); + call_insn.variadic_arg_start = variadic_arg_start; + self.emit(call_insn); + result_sym + } + } + + ExprKind::Member { + expr: inner_expr, + member, + } => { + // Get address of the struct base + let base = self.linearize_lvalue(inner_expr); + let struct_type = self.expr_type(inner_expr); + + // Look up member offset and type + let member_info = struct_type + .find_member(member) + .unwrap_or_else(|| MemberInfo { + offset: 0, + typ: self.expr_type(expr), + bit_offset: None, + bit_width: None, + storage_unit_size: None, + }); + + // If member type is an array, return the address (arrays decay to pointers) + if member_info.typ.kind == TypeKind::Array { + if member_info.offset == 0 { + base + } else { + let result = self.alloc_pseudo(); + let offset_val = + self.emit_const(member_info.offset as i64, Type::basic(TypeKind::Long)); + self.emit(Instruction::binop( + Opcode::Add, + result, + base, + offset_val, + Type::basic(TypeKind::Long), + )); + result + } + } else if let (Some(bit_offset), Some(bit_width), Some(storage_size)) = ( + member_info.bit_offset, + member_info.bit_width, + member_info.storage_unit_size, + ) { + // Bitfield read + self.emit_bitfield_load( + base, + member_info.offset, + bit_offset, + bit_width, + storage_size, + &member_info.typ, + ) + } else { + let result = self.alloc_pseudo(); + self.emit(Instruction::load( + result, + base, + member_info.offset as i64, + member_info.typ, + )); + result + } + } + + ExprKind::Arrow { + expr: inner_expr, + member, + } => { + // Pointer already contains the struct address + let ptr = self.linearize_expr(inner_expr); + let ptr_type = self.expr_type(inner_expr); + + // Dereference pointer to get struct type + let struct_type = ptr_type + .base + .as_ref() + .map(|b| b.as_ref().clone()) + .unwrap_or_else(|| self.expr_type(expr)); + + // Look up member offset and type + let member_info = struct_type + .find_member(member) + .unwrap_or_else(|| MemberInfo { + offset: 0, + typ: self.expr_type(expr), + bit_offset: None, + bit_width: None, + storage_unit_size: None, + }); + + // If member type is an array, return the address (arrays decay to pointers) + if member_info.typ.kind == TypeKind::Array { + if member_info.offset == 0 { + ptr + } else { + let result = self.alloc_pseudo(); + let offset_val = + self.emit_const(member_info.offset as i64, Type::basic(TypeKind::Long)); + self.emit(Instruction::binop( + Opcode::Add, + result, + ptr, + offset_val, + Type::basic(TypeKind::Long), + )); + result + } + } else if let (Some(bit_offset), Some(bit_width), Some(storage_size)) = ( + member_info.bit_offset, + member_info.bit_width, + member_info.storage_unit_size, + ) { + // Bitfield read + self.emit_bitfield_load( + ptr, + member_info.offset, + bit_offset, + bit_width, + storage_size, + &member_info.typ, + ) + } else { + let result = self.alloc_pseudo(); + self.emit(Instruction::load( + result, + ptr, + member_info.offset as i64, + member_info.typ, + )); + result + } + } + + ExprKind::Index { array, index } => { + // In C, a[b] is defined as *(a + b), so either operand can be the pointer + // Handle commutative form: 0[arr] is equivalent to arr[0] + let array_type = self.expr_type(array); + let index_type = self.expr_type(index); + + let (ptr_expr, idx_expr, idx_type) = + if array_type.kind == TypeKind::Pointer || array_type.kind == TypeKind::Array { + (array, index, index_type) + } else { + // Swap: index is actually the pointer/array + (index, array, array_type) + }; + + let arr = self.linearize_expr(ptr_expr); + let idx = self.linearize_expr(idx_expr); + + // Get element type from the expression type + let elem_type = self.expr_type(expr); + let elem_size = elem_type.size_bits() / 8; + let elem_size_val = self.emit_const(elem_size as i64, Type::basic(TypeKind::Long)); + + // Sign-extend index to 64-bit for proper pointer arithmetic (negative indices) + let idx_extended = self.emit_convert(idx, &idx_type, &Type::basic(TypeKind::Long)); + + let offset = self.alloc_pseudo(); + let ptr_typ = Type::basic(TypeKind::Long); + self.emit(Instruction::binop( + Opcode::Mul, + offset, + idx_extended, + elem_size_val, + ptr_typ.clone(), + )); + + let addr = self.alloc_pseudo(); + self.emit(Instruction::binop(Opcode::Add, addr, arr, offset, ptr_typ)); + + // If element type is an array, just return the address (arrays decay to pointers) + if elem_type.kind == TypeKind::Array { + addr + } else { + let result = self.alloc_pseudo(); + self.emit(Instruction::load(result, addr, 0, elem_type)); + result + } + } + + ExprKind::Cast { + cast_type, + expr: inner_expr, + } => { + let src = self.linearize_expr(inner_expr); + let src_type = self.expr_type(inner_expr); + + // Emit conversion if needed + let src_is_float = src_type.is_float(); + let dst_is_float = cast_type.is_float(); + + if src_is_float && !dst_is_float { + // Float to integer conversion + let result = self.alloc_pseudo(); + let pseudo = Pseudo::reg(result, result.0); + if let Some(func) = &mut self.current_func { + func.add_pseudo(pseudo); + } + // FCvtS for signed int, FCvtU for unsigned + let opcode = if cast_type.is_unsigned() { + Opcode::FCvtU + } else { + Opcode::FCvtS + }; + let mut insn = Instruction::new(opcode) + .with_target(result) + .with_src(src) + .with_type(cast_type.clone()); + insn.src_size = src_type.size_bits(); + self.emit(insn); + result + } else if !src_is_float && dst_is_float { + // Integer to float conversion + let result = self.alloc_pseudo(); + let pseudo = Pseudo::reg(result, result.0); + if let Some(func) = &mut self.current_func { + func.add_pseudo(pseudo); + } + // SCvtF for signed int, UCvtF for unsigned + let opcode = if src_type.is_unsigned() { + Opcode::UCvtF + } else { + Opcode::SCvtF + }; + let mut insn = Instruction::new(opcode) + .with_target(result) + .with_src(src) + .with_type(cast_type.clone()); + insn.src_size = src_type.size_bits(); + self.emit(insn); + result + } else if src_is_float && dst_is_float { + // Float to float conversion (e.g., float to double) + if src_type.size_bits() != cast_type.size_bits() { + let result = self.alloc_pseudo(); + let pseudo = Pseudo::reg(result, result.0); + if let Some(func) = &mut self.current_func { + func.add_pseudo(pseudo); + } + let mut insn = Instruction::new(Opcode::FCvtF) + .with_target(result) + .with_src(src) + .with_type(cast_type.clone()); + insn.src_size = src_type.size_bits(); + self.emit(insn); + result + } else { + src // Same size, no conversion needed + } + } else { + // Integer to integer conversion + // Use emit_convert for proper type conversions including _Bool + self.emit_convert(src, &src_type, cast_type) + } + } + + ExprKind::SizeofType(typ) => { + let size = typ.size_bits() / 8; + // sizeof returns size_t, which is unsigned long in our implementation + let result_typ = Type::with_modifiers(TypeKind::Long, TypeModifiers::UNSIGNED); + self.emit_const(size as i64, result_typ) + } + + ExprKind::SizeofExpr(inner_expr) => { + // Get type from expression and compute size + let inner_typ = self.expr_type(inner_expr); + let size = inner_typ.size_bits() / 8; + // sizeof returns size_t, which is unsigned long in our implementation + let result_typ = Type::with_modifiers(TypeKind::Long, TypeModifiers::UNSIGNED); + self.emit_const(size as i64, result_typ) + } + + ExprKind::Comma(exprs) => { + let mut result = self.emit_const(0, Type::basic(TypeKind::Int)); + for e in exprs { + result = self.linearize_expr(e); + } + result + } + + ExprKind::InitList { .. } => { + // InitList is handled specially in linearize_local_decl and linearize_global_decl + // It shouldn't be reached here during normal expression evaluation + panic!("InitList should be handled in declaration context, not as standalone expression") + } + + // ================================================================ + // Variadic function support (va_* builtins) + // ================================================================ + ExprKind::VaStart { ap, last_param } => { + // va_start(ap, last_param) + // Get address of ap (it's an lvalue) + let ap_addr = self.linearize_lvalue(ap); + let result = self.alloc_pseudo(); + + // Create instruction with last_param stored in func_name field + let insn = Instruction::new(Opcode::VaStart) + .with_target(result) + .with_src(ap_addr) + .with_func(last_param.clone()) + .with_type(Type::basic(TypeKind::Void)); + self.emit(insn); + result + } + + ExprKind::VaArg { ap, arg_type } => { + // va_arg(ap, type) + // Get address of ap (it's an lvalue) + let ap_addr = self.linearize_lvalue(ap); + let result = self.alloc_pseudo(); + + let insn = Instruction::new(Opcode::VaArg) + .with_target(result) + .with_src(ap_addr) + .with_type(arg_type.clone()); + self.emit(insn); + result + } + + ExprKind::VaEnd { ap } => { + // va_end(ap) - usually a no-op + let ap_addr = self.linearize_lvalue(ap); + let result = self.alloc_pseudo(); + + let insn = Instruction::new(Opcode::VaEnd) + .with_target(result) + .with_src(ap_addr) + .with_type(Type::basic(TypeKind::Void)); + self.emit(insn); + result + } + + ExprKind::VaCopy { dest, src } => { + // va_copy(dest, src) + let dest_addr = self.linearize_lvalue(dest); + let src_addr = self.linearize_lvalue(src); + let result = self.alloc_pseudo(); + + let insn = Instruction::new(Opcode::VaCopy) + .with_target(result) + .with_src(dest_addr) + .with_src(src_addr) + .with_type(Type::basic(TypeKind::Void)); + self.emit(insn); + result + } + + // ================================================================ + // Byte-swapping builtins + // ================================================================ + ExprKind::Bswap16 { arg } => { + let arg_val = self.linearize_expr(arg); + let result = self.alloc_pseudo(); + + let insn = Instruction::new(Opcode::Bswap16) + .with_target(result) + .with_src(arg_val) + .with_size(16) + .with_type(Type::with_modifiers( + TypeKind::Short, + TypeModifiers::UNSIGNED, + )); + self.emit(insn); + result + } + + ExprKind::Bswap32 { arg } => { + let arg_val = self.linearize_expr(arg); + let result = self.alloc_pseudo(); + + let insn = Instruction::new(Opcode::Bswap32) + .with_target(result) + .with_src(arg_val) + .with_size(32) + .with_type(Type::with_modifiers(TypeKind::Int, TypeModifiers::UNSIGNED)); + self.emit(insn); + result + } + + ExprKind::Bswap64 { arg } => { + let arg_val = self.linearize_expr(arg); + let result = self.alloc_pseudo(); + + let insn = Instruction::new(Opcode::Bswap64) + .with_target(result) + .with_src(arg_val) + .with_size(64) + .with_type(Type::with_modifiers( + TypeKind::LongLong, + TypeModifiers::UNSIGNED, + )); + self.emit(insn); + result + } + + ExprKind::Alloca { size } => { + let size_val = self.linearize_expr(size); + let result = self.alloc_pseudo(); + + let insn = Instruction::new(Opcode::Alloca) + .with_target(result) + .with_src(size_val) + .with_size(64) // Returns pointer (64-bit) + .with_type(Type::pointer(Type::basic(TypeKind::Void))); + self.emit(insn); + result + } + } + } + + fn emit_const(&mut self, val: i64, typ: Type) -> PseudoId { + let id = self.alloc_pseudo(); + let pseudo = Pseudo::val(id, val); + if let Some(func) = &mut self.current_func { + func.add_pseudo(pseudo); + } + + // Emit setval instruction + let insn = Instruction::new(Opcode::SetVal) + .with_target(id) + .with_type(typ); + self.emit(insn); + + id + } + + fn emit_fconst(&mut self, val: f64, typ: Type) -> PseudoId { + let id = self.alloc_pseudo(); + let pseudo = Pseudo::fval(id, val); + if let Some(func) = &mut self.current_func { + func.add_pseudo(pseudo); + } + + // Emit setval instruction + let insn = Instruction::new(Opcode::SetVal) + .with_target(id) + .with_type(typ); + self.emit(insn); + + id + } + + /// Emit code to load a bitfield value + /// Returns the loaded value as a PseudoId + fn emit_bitfield_load( + &mut self, + base: PseudoId, + byte_offset: usize, + bit_offset: u32, + bit_width: u32, + storage_size: u32, + typ: &Type, + ) -> PseudoId { + // Determine storage type based on storage unit size + let storage_type = match storage_size { + 1 => Type::with_modifiers(TypeKind::Char, TypeModifiers::UNSIGNED), + 2 => Type::with_modifiers(TypeKind::Short, TypeModifiers::UNSIGNED), + 4 => Type::with_modifiers(TypeKind::Int, TypeModifiers::UNSIGNED), + 8 => Type::with_modifiers(TypeKind::Long, TypeModifiers::UNSIGNED), + _ => Type::with_modifiers(TypeKind::Int, TypeModifiers::UNSIGNED), + }; + + // 1. Load the entire storage unit + let storage_val = self.alloc_pseudo(); + self.emit(Instruction::load( + storage_val, + base, + byte_offset as i64, + storage_type.clone(), + )); + + // 2. Shift right by bit_offset (using logical shift for unsigned extraction) + let shifted = if bit_offset > 0 { + let shift_amount = self.emit_const(bit_offset as i64, Type::basic(TypeKind::Int)); + let shifted = self.alloc_pseudo(); + self.emit(Instruction::binop( + Opcode::Lsr, + shifted, + storage_val, + shift_amount, + storage_type.clone(), + )); + shifted + } else { + storage_val + }; + + // 3. Mask to bit_width bits + let mask = (1u64 << bit_width) - 1; + let mask_val = self.emit_const(mask as i64, storage_type.clone()); + let masked = self.alloc_pseudo(); + self.emit(Instruction::binop( + Opcode::And, + masked, + shifted, + mask_val, + storage_type.clone(), + )); + + // 4. Sign extend if this is a signed bitfield + if !typ.is_unsigned() && bit_width < storage_size * 8 { + self.emit_sign_extend_bitfield(masked, bit_width, storage_size * 8) + } else { + masked + } + } + + /// Sign-extend a bitfield value from bit_width to target_bits + fn emit_sign_extend_bitfield( + &mut self, + value: PseudoId, + bit_width: u32, + target_bits: u32, + ) -> PseudoId { + // Sign extend by shifting left then arithmetic shifting right + let shift_amount = target_bits - bit_width; + let typ = Type::basic(TypeKind::Int); + + let shift_val = self.emit_const(shift_amount as i64, typ.clone()); + let shifted_left = self.alloc_pseudo(); + self.emit(Instruction::binop( + Opcode::Shl, + shifted_left, + value, + shift_val, + typ.clone(), + )); + + let result = self.alloc_pseudo(); + self.emit(Instruction::binop( + Opcode::Asr, + result, + shifted_left, + shift_val, + typ, + )); + result + } + + /// Emit code to store a value into a bitfield + fn emit_bitfield_store( + &mut self, + base: PseudoId, + byte_offset: usize, + bit_offset: u32, + bit_width: u32, + storage_size: u32, + new_value: PseudoId, + ) { + // Determine storage type based on storage unit size + let storage_type = match storage_size { + 1 => Type::with_modifiers(TypeKind::Char, TypeModifiers::UNSIGNED), + 2 => Type::with_modifiers(TypeKind::Short, TypeModifiers::UNSIGNED), + 4 => Type::with_modifiers(TypeKind::Int, TypeModifiers::UNSIGNED), + 8 => Type::with_modifiers(TypeKind::Long, TypeModifiers::UNSIGNED), + _ => Type::with_modifiers(TypeKind::Int, TypeModifiers::UNSIGNED), + }; + + // 1. Load current storage unit value + let old_val = self.alloc_pseudo(); + self.emit(Instruction::load( + old_val, + base, + byte_offset as i64, + storage_type.clone(), + )); + + // 2. Create mask for the bitfield bits: ~(((1 << width) - 1) << offset) + let field_mask = ((1u64 << bit_width) - 1) << bit_offset; + let clear_mask = !field_mask; + let clear_mask_val = self.emit_const(clear_mask as i64, storage_type.clone()); + + // 3. Clear the bitfield bits in old value + let cleared = self.alloc_pseudo(); + self.emit(Instruction::binop( + Opcode::And, + cleared, + old_val, + clear_mask_val, + storage_type.clone(), + )); + + // 4. Mask new value to bit_width and shift to position + let value_mask = (1u64 << bit_width) - 1; + let value_mask_val = self.emit_const(value_mask as i64, storage_type.clone()); + let masked_new = self.alloc_pseudo(); + self.emit(Instruction::binop( + Opcode::And, + masked_new, + new_value, + value_mask_val, + storage_type.clone(), + )); + + let positioned = if bit_offset > 0 { + let shift_val = self.emit_const(bit_offset as i64, Type::basic(TypeKind::Int)); + let positioned = self.alloc_pseudo(); + self.emit(Instruction::binop( + Opcode::Shl, + positioned, + masked_new, + shift_val, + storage_type.clone(), + )); + positioned + } else { + masked_new + }; + + // 5. OR cleared value with positioned new value + let combined = self.alloc_pseudo(); + self.emit(Instruction::binop( + Opcode::Or, + combined, + cleared, + positioned, + storage_type.clone(), + )); + + // 6. Store back + self.emit(Instruction::store( + combined, + base, + byte_offset as i64, + storage_type, + )); + } + + fn emit_unary(&mut self, op: UnaryOp, src: PseudoId, typ: Type) -> PseudoId { + let result = self.alloc_pseudo(); + let is_float = typ.is_float(); + + let opcode = match op { + UnaryOp::Neg => { + if is_float { + Opcode::FNeg + } else { + Opcode::Neg + } + } + UnaryOp::Not => { + // Logical not: compare with 0 + if is_float { + let zero = self.emit_fconst(0.0, typ.clone()); + self.emit(Instruction::binop(Opcode::FCmpOEq, result, src, zero, typ)); + } else { + let zero = self.emit_const(0, typ.clone()); + self.emit(Instruction::binop(Opcode::SetEq, result, src, zero, typ)); + } + return result; + } + UnaryOp::BitNot => Opcode::Not, + UnaryOp::AddrOf => { + return src; + } + UnaryOp::Deref => { + // Dereferencing a pointer-to-array gives an array, which is just an address + // (arrays decay to their first element's address) + if typ.kind == TypeKind::Array { + return src; + } + self.emit(Instruction::load(result, src, 0, typ)); + return result; + } + UnaryOp::PreInc => { + let is_ptr = typ.kind == TypeKind::Pointer; + let increment = if is_ptr { + let elem_type = typ + .base + .as_ref() + .map(|b| (**b).clone()) + .unwrap_or_else(|| Type::basic(TypeKind::Char)); + let elem_size = elem_type.size_bits() / 8; + self.emit_const(elem_size as i64, Type::basic(TypeKind::Long)) + } else if is_float { + self.emit_fconst(1.0, typ.clone()) + } else { + self.emit_const(1, typ.clone()) + }; + let opcode = if is_float { Opcode::FAdd } else { Opcode::Add }; + self.emit(Instruction::binop(opcode, result, src, increment, typ)); + return result; + } + UnaryOp::PreDec => { + let is_ptr = typ.kind == TypeKind::Pointer; + let decrement = if is_ptr { + let elem_type = typ + .base + .as_ref() + .map(|b| (**b).clone()) + .unwrap_or_else(|| Type::basic(TypeKind::Char)); + let elem_size = elem_type.size_bits() / 8; + self.emit_const(elem_size as i64, Type::basic(TypeKind::Long)) + } else if is_float { + self.emit_fconst(1.0, typ.clone()) + } else { + self.emit_const(1, typ.clone()) + }; + let opcode = if is_float { Opcode::FSub } else { Opcode::Sub }; + self.emit(Instruction::binop(opcode, result, src, decrement, typ)); + return result; + } + }; + + self.emit(Instruction::unop(opcode, result, src, typ)); + result + } + + fn emit_binary( + &mut self, + op: BinaryOp, + left: PseudoId, + right: PseudoId, + result_typ: Type, + operand_typ: Type, + ) -> PseudoId { + let result = self.alloc_pseudo(); + + let is_float = operand_typ.is_float(); + + let opcode = match op { + BinaryOp::Add => { + if is_float { + Opcode::FAdd + } else { + Opcode::Add + } + } + BinaryOp::Sub => { + if is_float { + Opcode::FSub + } else { + Opcode::Sub + } + } + BinaryOp::Mul => { + if is_float { + Opcode::FMul + } else { + Opcode::Mul + } + } + BinaryOp::Div => { + if is_float { + Opcode::FDiv + } else if operand_typ.is_unsigned() { + Opcode::DivU + } else { + Opcode::DivS + } + } + BinaryOp::Mod => { + // Modulo is not supported for floats in hardware - use fmod() library call + // For now, use integer modulo (semantic analysis should catch float % float) + if operand_typ.is_unsigned() { + Opcode::ModU + } else { + Opcode::ModS + } + } + BinaryOp::Lt => { + if is_float { + Opcode::FCmpOLt + } else if operand_typ.is_unsigned() { + Opcode::SetB + } else { + Opcode::SetLt + } + } + BinaryOp::Gt => { + if is_float { + Opcode::FCmpOGt + } else if operand_typ.is_unsigned() { + Opcode::SetA + } else { + Opcode::SetGt + } + } + BinaryOp::Le => { + if is_float { + Opcode::FCmpOLe + } else if operand_typ.is_unsigned() { + Opcode::SetBe + } else { + Opcode::SetLe + } + } + BinaryOp::Ge => { + if is_float { + Opcode::FCmpOGe + } else if operand_typ.is_unsigned() { + Opcode::SetAe + } else { + Opcode::SetGe + } + } + BinaryOp::Eq => { + if is_float { + Opcode::FCmpOEq + } else { + Opcode::SetEq + } + } + BinaryOp::Ne => { + if is_float { + Opcode::FCmpONe + } else { + Opcode::SetNe + } + } + // LogAnd and LogOr are handled earlier in linearize_expr via + // emit_logical_and/emit_logical_or for proper short-circuit evaluation + BinaryOp::LogAnd | BinaryOp::LogOr => { + unreachable!("LogAnd/LogOr should be handled in ExprKind::Binary") + } + BinaryOp::BitAnd => Opcode::And, + BinaryOp::BitOr => Opcode::Or, + BinaryOp::BitXor => Opcode::Xor, + BinaryOp::Shl => Opcode::Shl, + BinaryOp::Shr => { + // Logical shift for unsigned, arithmetic for signed + if operand_typ.is_unsigned() { + Opcode::Lsr + } else { + Opcode::Asr + } + } + }; + + // For comparison operations, use operand_typ to ensure correct size + // (comparisons produce int result but must operate at operand size) + let insn_typ = match opcode { + Opcode::SetEq + | Opcode::SetNe + | Opcode::SetLt + | Opcode::SetLe + | Opcode::SetGt + | Opcode::SetGe + | Opcode::SetB + | Opcode::SetBe + | Opcode::SetA + | Opcode::SetAe + | Opcode::FCmpOEq + | Opcode::FCmpONe + | Opcode::FCmpOLt + | Opcode::FCmpOLe + | Opcode::FCmpOGt + | Opcode::FCmpOGe => operand_typ, + _ => result_typ, + }; + self.emit(Instruction::binop(opcode, result, left, right, insn_typ)); + result + } + + fn emit_compare_zero(&mut self, val: PseudoId, operand_typ: &Type) -> PseudoId { + let result = self.alloc_pseudo(); + let zero = self.emit_const(0, operand_typ.clone()); + self.emit(Instruction::binop( + Opcode::SetNe, + result, + val, + zero, + operand_typ.clone(), + )); + result + } + + /// Emit short-circuit logical AND: a && b + /// If a is false, skip evaluation of b and return 0. + /// Otherwise, evaluate b and return (b != 0). + fn emit_logical_and(&mut self, left: &Expr, right: &Expr) -> PseudoId { + let result_typ = Type::basic(TypeKind::Int); + + // Create basic blocks + let eval_b_bb = self.alloc_bb(); + let merge_bb = self.alloc_bb(); + + // Evaluate LHS + let left_typ = self.expr_type(left); + let left_val = self.linearize_expr(left); + let left_bool = self.emit_compare_zero(left_val, &left_typ); + + // Emit the short-circuit value (0) BEFORE the branch, while still in LHS block + // This value will be used if we short-circuit (LHS is false) + let zero = self.emit_const(0, result_typ.clone()); + + // Get the block where LHS evaluation ended (may differ from initial block + // if LHS contains nested control flow) + let lhs_end_bb = self.current_bb.unwrap(); + + // Branch: if LHS is false, go to merge (result = 0); else evaluate RHS + self.emit(Instruction::cbr(left_bool, eval_b_bb, merge_bb)); + self.link_bb(lhs_end_bb, eval_b_bb); + self.link_bb(lhs_end_bb, merge_bb); + + // eval_b_bb: Evaluate RHS + self.switch_bb(eval_b_bb); + let right_typ = self.expr_type(right); + let right_val = self.linearize_expr(right); + let right_bool = self.emit_compare_zero(right_val, &right_typ); + + // Get the actual block where RHS evaluation ended (may differ from eval_b_bb + // if RHS contains nested control flow like another &&/||) + let rhs_end_bb = self.current_bb.unwrap(); + + // Branch to merge + self.emit(Instruction::br(merge_bb)); + self.link_bb(rhs_end_bb, merge_bb); + + // merge_bb: Create phi node to merge results + self.switch_bb(merge_bb); + + // Result is 0 if we came from lhs_end_bb (LHS was false), + // or right_bool if we came from rhs_end_bb (LHS was true) + let result = self.alloc_pseudo(); + let mut phi_insn = Instruction::phi(result, result_typ); + phi_insn.phi_list.push((lhs_end_bb, zero)); + phi_insn.phi_list.push((rhs_end_bb, right_bool)); + self.emit(phi_insn); + + result + } + + /// Emit short-circuit logical OR: a || b + /// If a is true, skip evaluation of b and return 1. + /// Otherwise, evaluate b and return (b != 0). + fn emit_logical_or(&mut self, left: &Expr, right: &Expr) -> PseudoId { + let result_typ = Type::basic(TypeKind::Int); + + // Create basic blocks + let eval_b_bb = self.alloc_bb(); + let merge_bb = self.alloc_bb(); + + // Evaluate LHS + let left_typ = self.expr_type(left); + let left_val = self.linearize_expr(left); + let left_bool = self.emit_compare_zero(left_val, &left_typ); + + // Emit the short-circuit value (1) BEFORE the branch, while still in LHS block + // This value will be used if we short-circuit (LHS is true) + let one = self.emit_const(1, result_typ.clone()); + + // Get the block where LHS evaluation ended (may differ from initial block + // if LHS contains nested control flow) + let lhs_end_bb = self.current_bb.unwrap(); + + // Branch: if LHS is true, go to merge (result = 1); else evaluate RHS + self.emit(Instruction::cbr(left_bool, merge_bb, eval_b_bb)); + self.link_bb(lhs_end_bb, merge_bb); + self.link_bb(lhs_end_bb, eval_b_bb); + + // eval_b_bb: Evaluate RHS + self.switch_bb(eval_b_bb); + let right_typ = self.expr_type(right); + let right_val = self.linearize_expr(right); + let right_bool = self.emit_compare_zero(right_val, &right_typ); + + // Get the actual block where RHS evaluation ended (may differ from eval_b_bb + // if RHS contains nested control flow like another &&/||) + let rhs_end_bb = self.current_bb.unwrap(); + + // Branch to merge + self.emit(Instruction::br(merge_bb)); + self.link_bb(rhs_end_bb, merge_bb); + + // merge_bb: Create phi node to merge results + self.switch_bb(merge_bb); + + // Result is 1 if we came from lhs_end_bb (LHS was true), + // or right_bool if we came from rhs_end_bb (LHS was false) + let result = self.alloc_pseudo(); + let mut phi_insn = Instruction::phi(result, result_typ); + phi_insn.phi_list.push((lhs_end_bb, one)); + phi_insn.phi_list.push((rhs_end_bb, right_bool)); + self.emit(phi_insn); + + result + } + + fn emit_assign(&mut self, op: AssignOp, target: &Expr, value: &Expr) -> PseudoId { + let rhs = self.linearize_expr(value); + let target_typ = self.expr_type(target); + let value_typ = self.expr_type(value); + + // Check for pointer compound assignment (p += n or p -= n) + let is_ptr_arith = target_typ.kind == TypeKind::Pointer + && value_typ.is_integer() + && (op == AssignOp::AddAssign || op == AssignOp::SubAssign); + + // Convert RHS to target type if needed (but not for pointer arithmetic) + let rhs = if is_ptr_arith { + // For pointer arithmetic, scale the integer by element size + let elem_type = target_typ + .base + .as_ref() + .map(|b| (**b).clone()) + .unwrap_or_else(|| Type::basic(TypeKind::Char)); + let elem_size = elem_type.size_bits() / 8; + let scale = self.emit_const(elem_size as i64, Type::basic(TypeKind::Long)); + + // Extend the integer to 64-bit for proper arithmetic + let rhs_extended = self.emit_convert(rhs, &value_typ, &Type::basic(TypeKind::Long)); + + let scaled = self.alloc_pseudo(); + let pseudo = Pseudo::reg(scaled, scaled.0); + if let Some(func) = &mut self.current_func { + func.add_pseudo(pseudo); + } + self.emit(Instruction::binop( + Opcode::Mul, + scaled, + rhs_extended, + scale, + Type::basic(TypeKind::Long), + )); + scaled + } else { + self.emit_convert(rhs, &value_typ, &target_typ) + }; + + let final_val = match op { + AssignOp::Assign => rhs, + _ => { + // Compound assignment - get current value and apply operation + let lhs = self.linearize_expr(target); + let result = self.alloc_pseudo(); + let pseudo = Pseudo::reg(result, result.0); + if let Some(func) = &mut self.current_func { + func.add_pseudo(pseudo); + } + + let is_float = target_typ.is_float(); + let opcode = match op { + AssignOp::AddAssign => { + if is_float { + Opcode::FAdd + } else { + Opcode::Add + } + } + AssignOp::SubAssign => { + if is_float { + Opcode::FSub + } else { + Opcode::Sub + } + } + AssignOp::MulAssign => { + if is_float { + Opcode::FMul + } else { + Opcode::Mul + } + } + AssignOp::DivAssign => { + if is_float { + Opcode::FDiv + } else if target_typ.is_unsigned() { + Opcode::DivU + } else { + Opcode::DivS + } + } + AssignOp::ModAssign => { + // Modulo not supported for floats + if target_typ.is_unsigned() { + Opcode::ModU + } else { + Opcode::ModS + } + } + AssignOp::AndAssign => Opcode::And, + AssignOp::OrAssign => Opcode::Or, + AssignOp::XorAssign => Opcode::Xor, + AssignOp::ShlAssign => Opcode::Shl, + AssignOp::ShrAssign => { + if target_typ.is_unsigned() { + Opcode::Lsr + } else { + Opcode::Asr + } + } + AssignOp::Assign => unreachable!(), + }; + + // For pointer arithmetic, use Long type for the operation + let arith_type = if is_ptr_arith { + Type::basic(TypeKind::Long) + } else { + target_typ.clone() + }; + self.emit(Instruction::binop(opcode, result, lhs, rhs, arith_type)); + result + } + }; + + // Store based on target expression type + match &target.kind { + ExprKind::Ident { name, .. } => { + if let Some(local) = self.locals.get(name).cloned() { + // Check if this is a static local (sentinel value) + if local.sym.0 == u32::MAX { + // Static local - look up the global name and emit store to global + let key = format!("{}.{}", self.current_func_name, name); + if let Some(static_info) = self.static_locals.get(&key).cloned() { + let sym_id = self.alloc_pseudo(); + let pseudo = Pseudo::sym(sym_id, static_info.global_name); + if let Some(func) = &mut self.current_func { + func.add_pseudo(pseudo); + } + self.emit(Instruction::store(final_val, sym_id, 0, target_typ.clone())); + } + } else { + // Regular local variable: emit Store + self.emit(Instruction::store( + final_val, + local.sym, + 0, + target_typ.clone(), + )); + } + } else if self.var_map.contains_key(name) { + // Parameter: this is not SSA-correct but parameters + // shouldn't be reassigned. If they are, we'd need to + // demote them to locals. For now, just update the mapping. + self.var_map.insert(name.clone(), final_val); + } else { + // Global variable - emit store + let sym_id = self.alloc_pseudo(); + let pseudo = Pseudo::sym(sym_id, name.clone()); + if let Some(func) = &mut self.current_func { + func.add_pseudo(pseudo); + } + self.emit(Instruction::store(final_val, sym_id, 0, target_typ.clone())); + } + } + ExprKind::Member { expr, member } => { + // Struct member: get address and store with offset + let base = self.linearize_lvalue(expr); + let struct_type = self.expr_type(expr); + let member_info = struct_type + .find_member(member) + .unwrap_or_else(|| MemberInfo { + offset: 0, + typ: target_typ.clone(), + bit_offset: None, + bit_width: None, + storage_unit_size: None, + }); + if let (Some(bit_offset), Some(bit_width), Some(storage_size)) = ( + member_info.bit_offset, + member_info.bit_width, + member_info.storage_unit_size, + ) { + // Bitfield store + self.emit_bitfield_store( + base, + member_info.offset, + bit_offset, + bit_width, + storage_size, + final_val, + ); + } else { + self.emit(Instruction::store( + final_val, + base, + member_info.offset as i64, + member_info.typ, + )); + } + } + ExprKind::Arrow { expr, member } => { + // Pointer member: pointer value is the base address + let ptr = self.linearize_expr(expr); + let ptr_type = self.expr_type(expr); + let struct_type = ptr_type + .base + .as_ref() + .map(|b| b.as_ref().clone()) + .unwrap_or_else(|| target_typ.clone()); + let member_info = struct_type + .find_member(member) + .unwrap_or_else(|| MemberInfo { + offset: 0, + typ: target_typ.clone(), + bit_offset: None, + bit_width: None, + storage_unit_size: None, + }); + if let (Some(bit_offset), Some(bit_width), Some(storage_size)) = ( + member_info.bit_offset, + member_info.bit_width, + member_info.storage_unit_size, + ) { + // Bitfield store + self.emit_bitfield_store( + ptr, + member_info.offset, + bit_offset, + bit_width, + storage_size, + final_val, + ); + } else { + self.emit(Instruction::store( + final_val, + ptr, + member_info.offset as i64, + member_info.typ, + )); + } + } + ExprKind::Unary { + op: UnaryOp::Deref, + operand, + } => { + // Dereference: store to the pointer address + let ptr = self.linearize_expr(operand); + self.emit(Instruction::store(final_val, ptr, 0, target_typ.clone())); + } + ExprKind::Index { array, index } => { + // Array subscript: compute address and store + // Handle commutative form: 0[arr] is equivalent to arr[0] + let array_type = self.expr_type(array); + let index_type = self.expr_type(index); + + let (ptr_expr, idx_expr, idx_type) = + if array_type.kind == TypeKind::Pointer || array_type.kind == TypeKind::Array { + (array, index, index_type) + } else { + // Swap: index is actually the pointer/array + (index, array, array_type) + }; + + let arr = self.linearize_expr(ptr_expr); + let idx = self.linearize_expr(idx_expr); + let elem_size = target_typ.size_bits() / 8; + let elem_size_val = self.emit_const(elem_size as i64, Type::basic(TypeKind::Long)); + + // Sign-extend index to 64-bit for proper pointer arithmetic (negative indices) + let idx_extended = self.emit_convert(idx, &idx_type, &Type::basic(TypeKind::Long)); + + let offset = self.alloc_pseudo(); + let ptr_typ = Type::basic(TypeKind::Long); + self.emit(Instruction::binop( + Opcode::Mul, + offset, + idx_extended, + elem_size_val, + ptr_typ.clone(), + )); + + let addr = self.alloc_pseudo(); + self.emit(Instruction::binop(Opcode::Add, addr, arr, offset, ptr_typ)); + + self.emit(Instruction::store(final_val, addr, 0, target_typ.clone())); + } + _ => { + // Other lvalues - should not happen for valid C code + } + } + + final_val + } +} + +// ============================================================================ +// Public API +// ============================================================================ + +/// Linearize an AST to IR (convenience wrapper for tests) +#[cfg(test)] +pub fn linearize(tu: &TranslationUnit, symbols: &SymbolTable) -> Module { + linearize_with_debug(tu, symbols, false, None) +} + +/// Linearize an AST to IR with debug info support +pub fn linearize_with_debug( + tu: &TranslationUnit, + symbols: &SymbolTable, + debug: bool, + source_file: Option<&str>, +) -> Module { + let mut linearizer = Linearizer::new(symbols); + let mut module = linearizer.linearize(tu); + module.debug = debug; + if let Some(path) = source_file { + module.source_files.push(path.to_string()); + } + module +} + +// ============================================================================ +// Tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + use crate::parse::ast::{ExternalDecl, FunctionDef, Parameter}; + + /// Create a default position for test code + fn test_pos() -> Position { + Position { + stream: 0, + line: 1, + col: 1, + newline: false, + whitespace: false, + noexpand: false, + } + } + + fn test_linearize(tu: &TranslationUnit) -> Module { + let symbols = SymbolTable::new(); + linearize(tu, &symbols) + } + + fn make_simple_func(name: &str, body: Stmt) -> FunctionDef { + FunctionDef { + return_type: Type::basic(TypeKind::Int), + name: name.to_string(), + params: vec![], + body, + pos: test_pos(), + } + } + + #[test] + fn test_linearize_empty_function() { + let func = make_simple_func("test", Stmt::Block(vec![])); + let tu = TranslationUnit { + items: vec![ExternalDecl::FunctionDef(func)], + }; + + let module = test_linearize(&tu); + assert_eq!(module.functions.len(), 1); + assert_eq!(module.functions[0].name, "test"); + assert!(!module.functions[0].blocks.is_empty()); + } + + #[test] + fn test_linearize_return() { + let func = make_simple_func("test", Stmt::Return(Some(Expr::int(42)))); + let tu = TranslationUnit { + items: vec![ExternalDecl::FunctionDef(func)], + }; + + let module = test_linearize(&tu); + let ir = format!("{}", module); + assert!(ir.contains("ret")); + } + + #[test] + fn test_linearize_if() { + let func = make_simple_func( + "test", + Stmt::If { + cond: Expr::int(1), + then_stmt: Box::new(Stmt::Return(Some(Expr::int(1)))), + else_stmt: Some(Box::new(Stmt::Return(Some(Expr::int(0))))), + }, + ); + let tu = TranslationUnit { + items: vec![ExternalDecl::FunctionDef(func)], + }; + + let module = test_linearize(&tu); + let ir = format!("{}", module); + assert!(ir.contains("cbr")); // Conditional branch + } + + #[test] + fn test_linearize_while() { + let func = make_simple_func( + "test", + Stmt::While { + cond: Expr::int(1), + body: Box::new(Stmt::Break), + }, + ); + let tu = TranslationUnit { + items: vec![ExternalDecl::FunctionDef(func)], + }; + + let module = test_linearize(&tu); + assert!(module.functions[0].blocks.len() >= 3); // cond, body, exit + } + + #[test] + fn test_linearize_for() { + // for (int i = 0; i < 10; i++) { } + let int_type = Type::basic(TypeKind::Int); + let i_var = Expr::var_typed("i", int_type.clone()); + let func = make_simple_func( + "test", + Stmt::For { + init: Some(ForInit::Declaration(Declaration { + declarators: vec![crate::parse::ast::InitDeclarator { + name: "i".to_string(), + typ: int_type.clone(), + init: Some(Expr::int(0)), + }], + })), + cond: Some(Expr::binary(BinaryOp::Lt, i_var.clone(), Expr::int(10))), + post: Some(Expr::typed( + ExprKind::PostInc(Box::new(i_var)), + int_type, + test_pos(), + )), + body: Box::new(Stmt::Empty), + }, + ); + let tu = TranslationUnit { + items: vec![ExternalDecl::FunctionDef(func)], + }; + + let module = test_linearize(&tu); + assert!(module.functions[0].blocks.len() >= 4); // entry, cond, body, post, exit + } + + #[test] + fn test_linearize_binary_expr() { + // return 1 + 2 * 3; + let func = make_simple_func( + "test", + Stmt::Return(Some(Expr::binary( + BinaryOp::Add, + Expr::int(1), + Expr::binary(BinaryOp::Mul, Expr::int(2), Expr::int(3)), + ))), + ); + let tu = TranslationUnit { + items: vec![ExternalDecl::FunctionDef(func)], + }; + + let module = test_linearize(&tu); + let ir = format!("{}", module); + assert!(ir.contains("mul")); + assert!(ir.contains("add")); + } + + #[test] + fn test_linearize_function_with_params() { + let int_type = Type::basic(TypeKind::Int); + let func = FunctionDef { + return_type: int_type.clone(), + name: "add".to_string(), + params: vec![ + Parameter { + name: Some("a".to_string()), + typ: int_type.clone(), + }, + Parameter { + name: Some("b".to_string()), + typ: int_type.clone(), + }, + ], + body: Stmt::Return(Some(Expr::binary( + BinaryOp::Add, + Expr::var_typed("a", int_type.clone()), + Expr::var_typed("b", int_type), + ))), + pos: test_pos(), + }; + let tu = TranslationUnit { + items: vec![ExternalDecl::FunctionDef(func)], + }; + + let module = test_linearize(&tu); + let ir = format!("{}", module); + assert!(ir.contains("add")); + assert!(ir.contains("%a")); + assert!(ir.contains("%b")); + } + + #[test] + fn test_linearize_call() { + let func = make_simple_func( + "test", + Stmt::Return(Some(Expr::call( + Expr::var("foo"), + vec![Expr::int(1), Expr::int(2)], + ))), + ); + let tu = TranslationUnit { + items: vec![ExternalDecl::FunctionDef(func)], + }; + + let module = test_linearize(&tu); + let ir = format!("{}", module); + assert!(ir.contains("call")); + assert!(ir.contains("foo")); + } + + #[test] + fn test_linearize_comparison() { + let func = make_simple_func( + "test", + Stmt::Return(Some(Expr::binary(BinaryOp::Lt, Expr::int(1), Expr::int(2)))), + ); + let tu = TranslationUnit { + items: vec![ExternalDecl::FunctionDef(func)], + }; + + let module = test_linearize(&tu); + let ir = format!("{}", module); + assert!(ir.contains("setlt")); + } + + #[test] + fn test_linearize_unsigned_comparison() { + use crate::types::TypeModifiers; + + // Create unsigned comparison: (unsigned)1 < (unsigned)2 + let uint_type = Type::with_modifiers(TypeKind::Int, TypeModifiers::UNSIGNED); + let mut left = Expr::int(1); + left.typ = Some(uint_type.clone()); + let mut right = Expr::int(2); + right.typ = Some(uint_type); + let mut cmp = Expr::binary(BinaryOp::Lt, left, right); + cmp.typ = Some(Type::basic(TypeKind::Int)); + + let func = make_simple_func("test", Stmt::Return(Some(cmp))); + let tu = TranslationUnit { + items: vec![ExternalDecl::FunctionDef(func)], + }; + + let module = test_linearize(&tu); + let ir = format!("{}", module); + // Should use unsigned comparison opcode (setb = set if below) + assert!( + ir.contains("setb"), + "Expected 'setb' for unsigned comparison, got:\n{}", + ir + ); + } + + #[test] + fn test_display_module() { + let func = make_simple_func("main", Stmt::Return(Some(Expr::int(0)))); + let tu = TranslationUnit { + items: vec![ExternalDecl::FunctionDef(func)], + }; + + let module = test_linearize(&tu); + let ir = format!("{}", module); + + // Should have proper structure + assert!(ir.contains("define")); + assert!(ir.contains("main")); + assert!(ir.contains(".L0:")); // Entry block label + assert!(ir.contains("ret")); + } + + #[test] + fn test_type_propagation_expr_type() { + use crate::types::TypeModifiers; + + // Create an expression with a type annotation + let mut expr = Expr::int(42); + // Simulate type evaluation having set the type + expr.typ = Some(Type::basic(TypeKind::Int)); + + // Create linearizer and test that expr_type reads from the expression + let symbols = SymbolTable::new(); + let linearizer = Linearizer::new(&symbols); + let typ = linearizer.expr_type(&expr); + assert_eq!(typ.kind, TypeKind::Int); + + // Test with unsigned type + let mut unsigned_expr = Expr::int(42); + unsigned_expr.typ = Some(Type::with_modifiers(TypeKind::Int, TypeModifiers::UNSIGNED)); + let typ = linearizer.expr_type(&unsigned_expr); + assert!(typ.is_unsigned()); + } + + #[test] + fn test_type_propagation_double_literal() { + // Create a double literal + let mut expr = Expr::new(ExprKind::FloatLit(3.14), test_pos()); + expr.typ = Some(Type::basic(TypeKind::Double)); + + let symbols = SymbolTable::new(); + let linearizer = Linearizer::new(&symbols); + let typ = linearizer.expr_type(&expr); + assert_eq!(typ.kind, TypeKind::Double); + } + + // ======================================================================== + // SSA Conversion Tests + // ======================================================================== + + /// Helper to linearize without SSA conversion (for comparing before/after) + fn linearize_no_ssa(tu: &TranslationUnit) -> Module { + let symbols = SymbolTable::new(); + let mut linearizer = Linearizer::new_no_ssa(&symbols); + linearizer.linearize(tu) + } + + #[test] + fn test_local_var_emits_load_store() { + // int test() { int x = 1; return x; } + let int_type = Type::basic(TypeKind::Int); + let func = FunctionDef { + return_type: int_type.clone(), + name: "test".to_string(), + params: vec![], + body: Stmt::Block(vec![ + BlockItem::Declaration(Declaration { + declarators: vec![crate::parse::ast::InitDeclarator { + name: "x".to_string(), + typ: int_type.clone(), + init: Some(Expr::int(1)), + }], + }), + BlockItem::Statement(Stmt::Return(Some(Expr::var_typed("x", int_type)))), + ]), + pos: test_pos(), + }; + let tu = TranslationUnit { + items: vec![ExternalDecl::FunctionDef(func)], + }; + + // Without SSA, should have store and load + let module = linearize_no_ssa(&tu); + let ir = format!("{}", module); + assert!( + ir.contains("store"), + "Should have store instruction before SSA: {}", + ir + ); + assert!( + ir.contains("load"), + "Should have load instruction before SSA: {}", + ir + ); + } + + #[test] + fn test_ssa_converts_local_to_phi() { + // int test(int cond) { + // int x = 1; + // if (cond) x = 2; + // return x; + // } + let int_type = Type::basic(TypeKind::Int); + + let func = FunctionDef { + return_type: int_type.clone(), + name: "test".to_string(), + params: vec![Parameter { + name: Some("cond".to_string()), + typ: int_type.clone(), + }], + body: Stmt::Block(vec![ + // int x = 1; + BlockItem::Declaration(Declaration { + declarators: vec![crate::parse::ast::InitDeclarator { + name: "x".to_string(), + typ: int_type.clone(), + init: Some(Expr::int(1)), + }], + }), + // if (cond) x = 2; + BlockItem::Statement(Stmt::If { + cond: Expr::var_typed("cond", int_type.clone()), + then_stmt: Box::new(Stmt::Expr(Expr::assign( + Expr::var_typed("x", int_type.clone()), + Expr::int(2), + ))), + else_stmt: None, + }), + // return x; + BlockItem::Statement(Stmt::Return(Some(Expr::var_typed("x", int_type)))), + ]), + pos: test_pos(), + }; + let tu = TranslationUnit { + items: vec![ExternalDecl::FunctionDef(func)], + }; + + // With SSA, should have phi node at merge point + let module = test_linearize(&tu); + let ir = format!("{}", module); + + // Should have a phi instruction + assert!( + ir.contains("phi"), + "SSA should insert phi node at merge point: {}", + ir + ); + } + + #[test] + fn test_ssa_loop_variable() { + // int test() { + // int i = 0; + // while (i < 10) { i = i + 1; } + // return i; + // } + let int_type = Type::basic(TypeKind::Int); + + let i_var = || Expr::var_typed("i", int_type.clone()); + + let func = FunctionDef { + return_type: int_type.clone(), + name: "test".to_string(), + params: vec![], + body: Stmt::Block(vec![ + // int i = 0; + BlockItem::Declaration(Declaration { + declarators: vec![crate::parse::ast::InitDeclarator { + name: "i".to_string(), + typ: int_type.clone(), + init: Some(Expr::int(0)), + }], + }), + // while (i < 10) { i = i + 1; } + BlockItem::Statement(Stmt::While { + cond: Expr::binary(BinaryOp::Lt, i_var(), Expr::int(10)), + body: Box::new(Stmt::Expr(Expr::assign( + i_var(), + Expr::binary(BinaryOp::Add, i_var(), Expr::int(1)), + ))), + }), + // return i; + BlockItem::Statement(Stmt::Return(Some(i_var()))), + ]), + pos: test_pos(), + }; + let tu = TranslationUnit { + items: vec![ExternalDecl::FunctionDef(func)], + }; + + // With SSA, should have phi node at loop header + let module = test_linearize(&tu); + let ir = format!("{}", module); + + // Loop should have a phi at the condition block + assert!(ir.contains("phi"), "Loop should have phi node: {}", ir); + } + + #[test] + fn test_short_circuit_and() { + // int test(int a, int b) { + // return a && b; + // } + // Short-circuit: if a is false, don't evaluate b + let int_type = Type::basic(TypeKind::Int); + + let func = FunctionDef { + return_type: int_type.clone(), + name: "test".to_string(), + params: vec![ + Parameter { + name: Some("a".to_string()), + typ: int_type.clone(), + }, + Parameter { + name: Some("b".to_string()), + typ: int_type.clone(), + }, + ], + body: Stmt::Return(Some(Expr::binary( + BinaryOp::LogAnd, + Expr::var_typed("a", int_type.clone()), + Expr::var_typed("b", int_type), + ))), + pos: test_pos(), + }; + let tu = TranslationUnit { + items: vec![ExternalDecl::FunctionDef(func)], + }; + + let module = test_linearize(&tu); + let ir = format!("{}", module); + + // Short-circuit AND should have: + // 1. A conditional branch (cbr) to skip evaluation of b if a is false + // 2. A phi node to merge the result + assert!( + ir.contains("cbr"), + "Short-circuit AND should have conditional branch: {}", + ir + ); + assert!( + ir.contains("phi"), + "Short-circuit AND should have phi node: {}", + ir + ); + } + + #[test] + fn test_short_circuit_or() { + // int test(int a, int b) { + // return a || b; + // } + // Short-circuit: if a is true, don't evaluate b + let int_type = Type::basic(TypeKind::Int); + + let func = FunctionDef { + return_type: int_type.clone(), + name: "test".to_string(), + params: vec![ + Parameter { + name: Some("a".to_string()), + typ: int_type.clone(), + }, + Parameter { + name: Some("b".to_string()), + typ: int_type.clone(), + }, + ], + body: Stmt::Return(Some(Expr::binary( + BinaryOp::LogOr, + Expr::var_typed("a", int_type.clone()), + Expr::var_typed("b", int_type), + ))), + pos: test_pos(), + }; + let tu = TranslationUnit { + items: vec![ExternalDecl::FunctionDef(func)], + }; + + let module = test_linearize(&tu); + let ir = format!("{}", module); + + // Short-circuit OR should have: + // 1. A conditional branch (cbr) to skip evaluation of b if a is true + // 2. A phi node to merge the result + assert!( + ir.contains("cbr"), + "Short-circuit OR should have conditional branch: {}", + ir + ); + assert!( + ir.contains("phi"), + "Short-circuit OR should have phi node: {}", + ir + ); + } +} diff --git a/cc/lower.rs b/cc/lower.rs new file mode 100644 index 000000000..f079bd572 --- /dev/null +++ b/cc/lower.rs @@ -0,0 +1,300 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Lowering passes for pcc C99 compiler +// +// This module contains IR-to-IR transformation passes that lower +// high-level IR constructs to forms suitable for code generation. +// +// Key passes: +// - Phi elimination: Converts SSA phi nodes to copy instructions +// + +use crate::ir::{BasicBlockId, Function, Instruction, Module, Opcode}; +use std::collections::HashMap; + +// ============================================================================ +// Phi Elimination +// ============================================================================ + +/// Eliminate phi nodes by inserting copy instructions in predecessor blocks. +/// +/// SSA phi nodes like: +/// %9 = phi.32 [.L0: %1], [.L2: %7] +/// +/// Are converted to: +/// - At end of .L0: %9 = copy %1 +/// - At end of .L2: %9 = copy %7 +/// - Original phi is converted to Nop +/// +/// This is a standard "naive" phi elimination. For better code quality, +/// a more sophisticated approach with parallel copies and coalescing +/// could be used, but this works correctly for all cases. +pub fn eliminate_phi_nodes(func: &mut Function) { + // Collect all phi information first to avoid borrowing issues + // Map: predecessor_bb -> Vec<(target, source, size)> + let mut copies_to_insert: HashMap> = HashMap::new(); + let mut phi_positions: Vec<(BasicBlockId, usize)> = Vec::new(); + + // Scan all blocks for phi nodes + for bb in &func.blocks { + for (insn_idx, insn) in bb.insns.iter().enumerate() { + if insn.op == Opcode::Phi { + if let Some(target) = insn.target { + let size = insn.size; + + // For each incoming edge in the phi + for (pred_bb, src_pseudo) in &insn.phi_list { + let copy_info = CopyInfo { + target, + source: *src_pseudo, + size, + }; + + copies_to_insert + .entry(*pred_bb) + .or_default() + .push(copy_info); + } + + // Record phi position for later removal + phi_positions.push((bb.id, insn_idx)); + } + } + } + } + + // Insert copy instructions at the end of predecessor blocks + for (pred_bb_id, copies) in copies_to_insert { + if let Some(pred_bb) = func.get_block_mut(pred_bb_id) { + // Insert copies before the terminator + // Note: If there are multiple copies, they should logically execute + // in parallel (this is the "lost copy" problem). For now, we insert + // them sequentially which works for non-overlapping cases. + // A proper solution would use parallel copy sequentialization. + for copy_info in copies { + let copy_insn = Instruction::new(Opcode::Copy) + .with_target(copy_info.target) + .with_src(copy_info.source) + .with_size(copy_info.size); + + pred_bb.insert_before_terminator(copy_insn); + } + } + } + + // Convert phi nodes to Nop (instead of removing to preserve indices) + // IMPORTANT: Keep phi_list AND target intact so codegen can use them for live interval computation. + // - phi_list contains which blocks phi sources come from (needed for back edges) + // - target is the phi target pseudo, which needs extended live intervals + for (bb_id, insn_idx) in phi_positions { + if let Some(bb) = func.get_block_mut(bb_id) { + if insn_idx < bb.insns.len() { + bb.insns[insn_idx].op = Opcode::Nop; + // Note: target and phi_list are intentionally NOT cleared - codegen needs them + bb.insns[insn_idx].src.clear(); + } + } + } +} + +/// Information needed to insert a copy instruction +#[derive(Debug, Clone)] +struct CopyInfo { + target: crate::ir::PseudoId, + source: crate::ir::PseudoId, + size: u32, +} + +// ============================================================================ +// Module-level lowering +// ============================================================================ + +/// Lower all functions in a module. +/// +/// This runs all lowering passes to prepare the IR for code generation. +pub fn lower_module(module: &mut Module) { + for func in &mut module.functions { + lower_function(func); + } +} + +/// Lower a single function. +/// +/// Runs: +/// 1. Phi elimination +pub fn lower_function(func: &mut Function) { + eliminate_phi_nodes(func); +} + +// ============================================================================ +// Tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + use crate::ir::{BasicBlock, Instruction, Opcode, Pseudo, PseudoId}; + use crate::types::{Type, TypeKind}; + + fn make_loop_cfg() -> Function { + // Create a simple loop CFG: + // + // entry(0) + // | + // v + // cond(1) <--+ + // / \ | + // v v | + // body(2) exit(3) + // | + // +----------+ + // + // With phi node in cond block: + // %3 = phi [.L0: %1], [.L2: %2] + + let mut func = Function::new("test", Type::basic(TypeKind::Int)); + + // Entry block + let mut entry = BasicBlock::new(BasicBlockId(0)); + entry.children = vec![BasicBlockId(1)]; + entry.add_insn(Instruction::new(Opcode::Entry)); + // %1 = setval 0 (initial value) + let mut setval = Instruction::new(Opcode::SetVal); + setval.target = Some(PseudoId(1)); + setval.size = 32; + entry.add_insn(setval); + entry.add_insn(Instruction::br(BasicBlockId(1))); + + // Condition block with phi + let mut cond = BasicBlock::new(BasicBlockId(1)); + cond.parents = vec![BasicBlockId(0), BasicBlockId(2)]; + cond.children = vec![BasicBlockId(2), BasicBlockId(3)]; + + // Phi node: %3 = phi [.L0: %1], [.L2: %2] + let mut phi = Instruction::phi(PseudoId(3), Type::basic(TypeKind::Int)); + phi.phi_list = vec![ + (BasicBlockId(0), PseudoId(1)), + (BasicBlockId(2), PseudoId(2)), + ]; + cond.add_insn(phi); + cond.add_insn(Instruction::cbr( + PseudoId(3), + BasicBlockId(2), + BasicBlockId(3), + )); + + // Body block + let mut body = BasicBlock::new(BasicBlockId(2)); + body.parents = vec![BasicBlockId(1)]; + body.children = vec![BasicBlockId(1)]; + // %2 = add %3, 1 + let add = Instruction::binop( + Opcode::Add, + PseudoId(2), + PseudoId(3), + PseudoId(4), // constant 1 + Type::basic(TypeKind::Int), + ); + body.add_insn(add); + body.add_insn(Instruction::br(BasicBlockId(1))); + + // Exit block + let mut exit = BasicBlock::new(BasicBlockId(3)); + exit.parents = vec![BasicBlockId(1)]; + exit.add_insn(Instruction::ret(Some(PseudoId(3)))); + + func.entry = BasicBlockId(0); + func.blocks = vec![entry, cond, body, exit]; + + // Add pseudos + func.add_pseudo(Pseudo::reg(PseudoId(1), 1)); + func.add_pseudo(Pseudo::reg(PseudoId(2), 2)); + func.add_pseudo(Pseudo::phi(PseudoId(3), 3)); + func.add_pseudo(Pseudo::val(PseudoId(4), 1)); // constant 1 + + func + } + + #[test] + fn test_phi_elimination() { + let mut func = make_loop_cfg(); + + // Verify phi exists before elimination + let cond_before = func.get_block(BasicBlockId(1)).unwrap(); + assert!( + cond_before.insns.iter().any(|i| i.op == Opcode::Phi), + "Should have phi before elimination" + ); + + // Run phi elimination + eliminate_phi_nodes(&mut func); + + // Verify phi is now Nop + let cond_after = func.get_block(BasicBlockId(1)).unwrap(); + assert!( + !cond_after.insns.iter().any(|i| i.op == Opcode::Phi), + "Should not have phi after elimination" + ); + assert!( + cond_after.insns.iter().any(|i| i.op == Opcode::Nop), + "Should have Nop where phi was" + ); + + // Verify copies were inserted in predecessor blocks + let entry = func.get_block(BasicBlockId(0)).unwrap(); + let entry_copies: Vec<_> = entry + .insns + .iter() + .filter(|i| i.op == Opcode::Copy) + .collect(); + assert_eq!(entry_copies.len(), 1, "Entry should have 1 copy"); + assert_eq!(entry_copies[0].target, Some(PseudoId(3))); + assert_eq!(entry_copies[0].src, vec![PseudoId(1)]); + + let body = func.get_block(BasicBlockId(2)).unwrap(); + let body_copies: Vec<_> = body.insns.iter().filter(|i| i.op == Opcode::Copy).collect(); + assert_eq!(body_copies.len(), 1, "Body should have 1 copy"); + assert_eq!(body_copies[0].target, Some(PseudoId(3))); + assert_eq!(body_copies[0].src, vec![PseudoId(2)]); + } + + #[test] + fn test_copy_before_terminator() { + let mut func = make_loop_cfg(); + eliminate_phi_nodes(&mut func); + + // In entry block, copy should be before the branch + let entry = func.get_block(BasicBlockId(0)).unwrap(); + let last_idx = entry.insns.len() - 1; + assert_eq!( + entry.insns[last_idx].op, + Opcode::Br, + "Last instruction should be branch" + ); + assert_eq!( + entry.insns[last_idx - 1].op, + Opcode::Copy, + "Copy should be before branch" + ); + + // In body block, copy should be before the branch + let body = func.get_block(BasicBlockId(2)).unwrap(); + let last_idx = body.insns.len() - 1; + assert_eq!( + body.insns[last_idx].op, + Opcode::Br, + "Last instruction should be branch" + ); + assert_eq!( + body.insns[last_idx - 1].op, + Opcode::Copy, + "Copy should be before branch" + ); + } +} diff --git a/cc/main.rs b/cc/main.rs new file mode 100644 index 000000000..aa3d79770 --- /dev/null +++ b/cc/main.rs @@ -0,0 +1,369 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// pcc - A POSIX C99 compiler +// + +mod arch; +mod diag; +mod dominate; +mod ir; +mod linearize; +mod lower; +mod os; +mod parse; +mod ssa; +mod symbol; +mod target; +mod token; +mod types; + +use clap::Parser; +use gettextrs::{bind_textdomain_codeset, gettext, setlocale, textdomain, LocaleCategory}; +use std::fs::File; +use std::io::{self, BufReader, Read, Write}; +use std::path::Path; +use std::process::Command; + +use parse::Parser as CParser; +use symbol::SymbolTable; +use target::Target; +use token::{preprocess, show_token, token_type_name, StreamTable, Tokenizer}; + +// ============================================================================ +// CLI +// ============================================================================ + +#[derive(Parser)] +#[command(version, about = gettext("pcc - compile standard C programs"))] +struct Args { + /// Input files + #[arg(required_unless_present = "print_targets")] + files: Vec, + + /// Print registered targets + #[arg(long = "print-targets", help = gettext("Display available target architectures"))] + print_targets: bool, + + /// Dump tokens (for debugging tokenizer) + #[arg( + short = 'T', + long = "dump-tokens", + help = gettext("Dump tokens to stdout") + )] + dump_tokens: bool, + + /// Run preprocessor and dump result + #[arg(short = 'E', help = gettext("Preprocess only, output to stdout"))] + preprocess_only: bool, + + /// Dump AST (for debugging parser) + #[arg( + short = 'A', + long = "dump-ast", + help = gettext("Parse and dump AST to stdout") + )] + dump_ast: bool, + + /// Dump IR (for debugging linearizer) + #[arg(long = "dump-ir", help = gettext("Linearize and dump IR to stdout"))] + dump_ir: bool, + + /// Verbose output (include position info) + #[arg( + short = 'v', + long = "verbose", + help = gettext("Verbose output with position info") + )] + verbose: bool, + + /// Define a macro (-D name or -D name=value) + #[arg(short = 'D', action = clap::ArgAction::Append, value_name = "macro")] + defines: Vec, + + /// Undefine a macro + #[arg(short = 'U', action = clap::ArgAction::Append, value_name = "macro")] + undefines: Vec, + + /// Add include path + #[arg(short = 'I', action = clap::ArgAction::Append, value_name = "dir")] + include_paths: Vec, + + /// Compile and assemble, but do not link + #[arg(short = 'c', help = gettext("Compile and assemble, but do not link"))] + compile_only: bool, + + /// Compile only; do not assemble or link (output assembly) + #[arg(short = 'S', help = gettext("Compile only; output assembly"))] + asm_only: bool, + + /// Place output in file + #[arg(short = 'o', value_name = "file", help = gettext("Place output in file"))] + output: Option, + + /// Dump generated assembly (for debugging codegen) + #[arg(long = "dump-asm", help = gettext("Dump generated assembly to stdout"))] + dump_asm: bool, + + /// Generate debug information (DWARF) + #[arg(short = 'g', help = gettext("Generate debug information"))] + debug: bool, + + /// Disable CFI unwind tables (enabled by default) + #[arg(long = "fno-unwind-tables", help = gettext("Disable CFI unwind table generation"))] + no_unwind_tables: bool, + + /// Target triple (e.g., aarch64-apple-darwin, x86_64-unknown-linux-gnu) + #[arg(long = "target", value_name = "triple", help = gettext("Target triple for cross-compilation"))] + target: Option, +} + +fn process_file( + path: &str, + streams: &mut StreamTable, + args: &Args, + target: &Target, +) -> io::Result<()> { + // Read file (or stdin if path is "-") + let mut buffer = Vec::new(); + let display_path = if path == "-" { + io::stdin().read_to_end(&mut buffer)?; + "" + } else { + let file = File::open(path)?; + let mut reader = BufReader::new(file); + reader.read_to_end(&mut buffer)?; + path + }; + + // Create stream + let stream_id = streams.add(display_path.to_string()); + + // Tokenize + let mut tokenizer = Tokenizer::new(&buffer, stream_id); + let tokens = tokenizer.tokenize(); + + // Dump raw tokens if requested + if args.dump_tokens && !args.preprocess_only { + let idents = tokenizer.ident_table(); + for token in &tokens { + if args.verbose { + println!( + "{:>4}:{:<3} {:12} {}", + token.pos.line, + token.pos.col, + token_type_name(token.typ), + show_token(token, idents) + ); + } else { + let text = show_token(token, idents); + if !text.starts_with('<') { + print!("{} ", text); + } + } + } + if !args.verbose { + println!(); + } + return Ok(()); + } + + // Preprocess (may add new identifiers from included files) + let preprocessed = preprocess(tokens, target, tokenizer.ident_table_mut(), path); + let idents = tokenizer.ident_table(); + + if args.preprocess_only { + // Output preprocessed tokens + for token in &preprocessed { + if args.verbose { + println!( + "{:>4}:{:<3} {:12} {}", + token.pos.line, + token.pos.col, + token_type_name(token.typ), + show_token(token, idents) + ); + } else { + let text = show_token(token, idents); + if !text.starts_with('<') { + print!("{} ", text); + } + } + } + if !args.verbose { + println!(); + } + return Ok(()); + } + + // Create symbol table BEFORE parsing + // symbols are bound during parsing + let mut symbols = SymbolTable::new(); + + // Parse (this also binds symbols to the symbol table) + let mut parser = CParser::new(&preprocessed, idents, &mut symbols); + let ast = parser + .parse_translation_unit() + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, format!("parse error: {}", e)))?; + + if args.dump_ast { + println!("{:#?}", ast); + return Ok(()); + } + + // Linearize to IR + let mut module = + linearize::linearize_with_debug(&ast, &symbols, args.debug, Some(display_path)); + + if args.dump_ir { + print!("{}", module); + return Ok(()); + } + + // Lower IR (phi elimination, etc.) + lower::lower_module(&mut module); + + // Generate assembly + let emit_unwind_tables = !args.no_unwind_tables; + let mut codegen = + arch::codegen::create_codegen_with_options(target.clone(), emit_unwind_tables); + let asm = codegen.generate(&module); + + if args.dump_asm { + print!("{}", asm); + return Ok(()); + } + + // Determine output file names + // For stdin ("-"), use "stdin" as the default stem + let stem = if path == "-" { + "stdin" + } else { + let input_path = Path::new(path); + input_path + .file_stem() + .unwrap_or_default() + .to_str() + .unwrap_or("a") + }; + + if args.asm_only { + // Output assembly + let asm_file = args.output.clone().unwrap_or_else(|| format!("{}.s", stem)); + let mut file = File::create(&asm_file)?; + file.write_all(asm.as_bytes())?; + if args.verbose { + eprintln!("Wrote assembly to {}", asm_file); + } + return Ok(()); + } + + // Write temporary assembly file + let temp_asm = format!("/tmp/pcc_{}.s", std::process::id()); + { + let mut file = File::create(&temp_asm)?; + file.write_all(asm.as_bytes())?; + } + + if args.compile_only { + // Assemble to object file + let obj_file = args.output.clone().unwrap_or_else(|| format!("{}.o", stem)); + + let mut as_cmd = Command::new("as"); + if args.debug { + as_cmd.arg("-g"); + } + let status = as_cmd.args(["-o", &obj_file, &temp_asm]).status()?; + + // Clean up temp file + let _ = std::fs::remove_file(&temp_asm); + + if !status.success() { + return Err(io::Error::new(io::ErrorKind::Other, "assembler failed")); + } + + if args.verbose { + eprintln!("Wrote object file to {}", obj_file); + } + return Ok(()); + } + + // Full compile: assemble and link to executable + let exe_file = args.output.clone().unwrap_or_else(|| "a.out".to_string()); + + // Assemble + let temp_obj = format!("/tmp/pcc_{}.o", std::process::id()); + let mut as_cmd = Command::new("as"); + if args.debug { + as_cmd.arg("-g"); + } + let status = as_cmd.args(["-o", &temp_obj, &temp_asm]).status()?; + + let _ = std::fs::remove_file(&temp_asm); + + if !status.success() { + return Err(io::Error::new(io::ErrorKind::Other, "assembler failed")); + } + + // Link + let status = Command::new("cc") + .args(["-o", &exe_file, &temp_obj]) + .status()?; + + let _ = std::fs::remove_file(&temp_obj); + + if !status.success() { + return Err(io::Error::new(io::ErrorKind::Other, "linker failed")); + } + + if args.verbose { + eprintln!("Wrote executable to {}", exe_file); + } + + Ok(()) +} + +fn main() -> Result<(), Box> { + setlocale(LocaleCategory::LcAll, ""); + textdomain("posixutils-rs")?; + bind_textdomain_codeset("posixutils-rs", "UTF-8")?; + + let args = Args::parse(); + + // Handle --print-targets + if args.print_targets { + println!(" Registered Targets:"); + println!(" aarch64 - AArch64 (little endian)"); + println!(" x86-64 - 64-bit X86: EM64T and AMD64"); + return Ok(()); + } + + // Detect target (use --target if specified, otherwise detect host) + let target = if let Some(ref triple) = args.target { + match Target::from_triple(triple) { + Some(t) => t, + None => { + eprintln!("pcc: unsupported target: {}", triple); + std::process::exit(1); + } + } + } else { + Target::host() + }; + + let mut streams = StreamTable::new(); + + for path in &args.files { + if let Err(e) = process_file(path, &mut streams, &args, &target) { + eprintln!("pcc: {}: {}", path, e); + std::process::exit(1); + } + } + + Ok(()) +} diff --git a/cc/os/freebsd.rs b/cc/os/freebsd.rs new file mode 100644 index 000000000..db77c592c --- /dev/null +++ b/cc/os/freebsd.rs @@ -0,0 +1,31 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// FreeBSD-specific predefined macros +// + +/// Get FreeBSD-specific predefined macros +pub fn get_macros() -> Vec<(&'static str, Option<&'static str>)> { + vec![ + // FreeBSD identification + ("__FreeBSD__", Some("13")), // Conservative version + ("__FreeBSD_kernel__", Some("1")), + // ELF binary format + ("__ELF__", Some("1")), + // BSD compatibility + ("BSD", Some("199506")), + ("__BSD_VISIBLE", Some("1")), + // POSIX threads + ("_REENTRANT", Some("1")), + ] +} + +/// Get standard include paths for FreeBSD +pub fn get_include_paths() -> Vec<&'static str> { + vec!["/usr/local/include", "/usr/include"] +} diff --git a/cc/os/linux.rs b/cc/os/linux.rs new file mode 100644 index 000000000..196879886 --- /dev/null +++ b/cc/os/linux.rs @@ -0,0 +1,42 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Linux-specific predefined macros +// + +/// Get Linux-specific predefined macros +pub fn get_macros() -> Vec<(&'static str, Option<&'static str>)> { + vec![ + // Linux identification + ("__linux__", Some("1")), + ("__linux", Some("1")), + ("linux", Some("1")), + ("__gnu_linux__", Some("1")), + // ELF binary format + ("__ELF__", Some("1")), + // GNU C library (glibc) compatibility + ("__GLIBC__", Some("2")), + ("__GLIBC_MINOR__", Some("17")), // Conservative baseline + // Thread model + ("_REENTRANT", Some("1")), + // Feature test macros + ("_GNU_SOURCE", Some("1")), + ("_DEFAULT_SOURCE", Some("1")), + ("_XOPEN_SOURCE", Some("700")), + ("_XOPEN_SOURCE_EXTENDED", Some("1")), + ] +} + +/// Get standard include paths for Linux +pub fn get_include_paths() -> Vec<&'static str> { + vec![ + "/usr/local/include", + "/usr/include", + // Architecture-specific paths will be added based on target + ] +} diff --git a/cc/os/macos.rs b/cc/os/macos.rs new file mode 100644 index 000000000..078ce6bd6 --- /dev/null +++ b/cc/os/macos.rs @@ -0,0 +1,44 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// macOS-specific predefined macros +// + +/// Get macOS-specific predefined macros +pub fn get_macros() -> Vec<(&'static str, Option<&'static str>)> { + vec![ + // Darwin/macOS identification + ("__APPLE__", Some("1")), + ("__MACH__", Some("1")), + ("__DARWIN__", Some("1")), + // Mach-O binary format + ("__MACH_O__", Some("1")), + // BSD compatibility + ("__FreeBSD__", None), // Not defined + ("__NetBSD__", None), // Not defined + // Apple extensions + ("__APPLE_CC__", Some("1")), + // POSIX threads + ("_REENTRANT", Some("1")), + // Version - match current macOS version (26.0.0 = macOS Tahoe) + ( + "__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__", + Some("260000"), + ), + ("__ENVIRONMENT_OS_VERSION_MIN_REQUIRED__", Some("260000")), + ] +} + +/// Get standard include paths for macOS +pub fn get_include_paths() -> Vec<&'static str> { + vec![ + "/usr/local/include", + "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include", + "/usr/include", + ] +} diff --git a/cc/os/mod.rs b/cc/os/mod.rs new file mode 100644 index 000000000..1cae3b9e3 --- /dev/null +++ b/cc/os/mod.rs @@ -0,0 +1,53 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// OS-specific predefined macros +// + +pub mod freebsd; +pub mod linux; +pub mod macos; + +use crate::target::{Os, Target}; + +/// Get OS-specific predefined macros +pub fn get_os_macros(target: &Target) -> Vec<(&'static str, Option<&'static str>)> { + let mut macros = vec![ + // POSIX compliance + ("__STDC_HOSTED__", Some("1")), + ("_POSIX_SOURCE", Some("1")), + ("_POSIX_C_SOURCE", Some("200809L")), + // Unix-like + ("__unix__", Some("1")), + ("__unix", Some("1")), + ("unix", Some("1")), + ]; + + match target.os { + Os::Linux => { + macros.extend(linux::get_macros()); + } + Os::MacOS => { + macros.extend(macos::get_macros()); + } + Os::FreeBSD => { + macros.extend(freebsd::get_macros()); + } + } + + macros +} + +/// Get standard include paths for the OS +pub fn get_include_paths(target: &Target) -> Vec<&'static str> { + match target.os { + Os::Linux => linux::get_include_paths(), + Os::MacOS => macos::get_include_paths(), + Os::FreeBSD => freebsd::get_include_paths(), + } +} diff --git a/cc/parse/ast.rs b/cc/parse/ast.rs new file mode 100644 index 000000000..3ad5e5883 --- /dev/null +++ b/cc/parse/ast.rs @@ -0,0 +1,870 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Abstract Syntax Tree for pcc C99 compiler +// Based on sparse's AST representation +// + +use crate::diag::Position; +use crate::types::Type; + +// ============================================================================ +// Operators +// ============================================================================ + +/// Unary operators +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum UnaryOp { + /// Negation: -x + Neg, + /// Logical not: !x + Not, + /// Bitwise not: ~x + BitNot, + /// Address-of: &x + AddrOf, + /// Dereference: *x + Deref, + /// Pre-increment: ++x + PreInc, + /// Pre-decrement: --x + PreDec, +} + +/// Binary operators +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BinaryOp { + // Arithmetic + Add, + Sub, + Mul, + Div, + Mod, + + // Comparison + Lt, + Gt, + Le, + Ge, + Eq, + Ne, + + // Logical + LogAnd, + LogOr, + + // Bitwise + BitAnd, + BitOr, + BitXor, + Shl, + Shr, +} + +impl BinaryOp { + /// Check if this is a comparison operator (result type differs from operand type) + pub fn is_comparison(&self) -> bool { + matches!( + self, + BinaryOp::Lt | BinaryOp::Gt | BinaryOp::Le | BinaryOp::Ge | BinaryOp::Eq | BinaryOp::Ne + ) + } +} + +/// Assignment operators +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum AssignOp { + /// Simple assignment: = + Assign, + /// Add-assign: += + AddAssign, + /// Sub-assign: -= + SubAssign, + /// Mul-assign: *= + MulAssign, + /// Div-assign: /= + DivAssign, + /// Mod-assign: %= + ModAssign, + /// And-assign: &= + AndAssign, + /// Or-assign: |= + OrAssign, + /// Xor-assign: ^= + XorAssign, + /// Left-shift-assign: <<= + ShlAssign, + /// Right-shift-assign: >>= + ShrAssign, +} + +// ============================================================================ +// Expressions +// ============================================================================ + +/// An expression with type annotation (like sparse's expr->ctype) +/// +/// Following sparse's design, every expression carries its computed type. +/// The type is filled in during type evaluation (after parsing, before linearization). +#[derive(Debug, Clone)] +pub struct Expr { + /// The expression kind/variant + pub kind: ExprKind, + /// The computed type of this expression (like sparse's expr->ctype) + /// None before type evaluation, Some after + pub typ: Option, + /// Source position for debug info + pub pos: Position, +} + +impl Expr { + /// Create a new untyped expression with position + pub fn new(kind: ExprKind, pos: Position) -> Self { + Self { + kind, + typ: None, + pos, + } + } + + /// Create an expression with a known type and position + pub fn typed(kind: ExprKind, typ: Type, pos: Position) -> Self { + Self { + kind, + typ: Some(typ), + pos, + } + } + + /// Create a new untyped expression with default position (for tests) + #[cfg(test)] + pub fn new_unpositioned(kind: ExprKind) -> Self { + Self { + kind, + typ: None, + pos: Position::default(), + } + } + + /// Create an expression with a known type but default position (for tests) + #[cfg(test)] + pub fn typed_unpositioned(kind: ExprKind, typ: Type) -> Self { + Self { + kind, + typ: Some(typ), + pos: Position::default(), + } + } +} + +/// Expression kinds (variants) +#[derive(Debug, Clone)] +pub enum ExprKind { + /// Integer literal + IntLit(i64), + + /// Floating-point literal + FloatLit(f64), + + /// Character literal + CharLit(char), + + /// String literal + StringLit(String), + + /// Identifier (variable reference) + Ident { + name: String, + }, + + /// Unary operation + Unary { + op: UnaryOp, + operand: Box, + }, + + /// Binary operation + Binary { + op: BinaryOp, + left: Box, + right: Box, + }, + + /// Assignment operation + Assign { + op: AssignOp, + target: Box, + value: Box, + }, + + /// Postfix increment/decrement + PostInc(Box), + PostDec(Box), + + /// Conditional (ternary) expression: cond ? then : else + Conditional { + cond: Box, + then_expr: Box, + else_expr: Box, + }, + + /// Function call + Call { + func: Box, + args: Vec, + }, + + /// Member access: expr.member + Member { + expr: Box, + member: String, + }, + + /// Pointer member access: expr->member + Arrow { + expr: Box, + member: String, + }, + + /// Array subscript: array[index] + Index { + array: Box, + index: Box, + }, + + /// Type cast: (type)expr + Cast { + cast_type: Type, + expr: Box, + }, + + /// sizeof type: sizeof(int) + SizeofType(Type), + + /// sizeof expression: sizeof expr + SizeofExpr(Box), + + /// Comma expression: expr1, expr2 + Comma(Vec), + + /// Initializer list: {1, 2, 3} or {.x = 1, [0] = 2} + InitList { + elements: Vec, + }, + + // ========================================================================= + // Variadic function support (va_* builtins) + // ========================================================================= + /// __builtin_va_start(ap, last_param) + /// Initializes a va_list for use with variadic arguments + VaStart { + /// The va_list to initialize (lvalue) + ap: Box, + /// Name of the last named parameter before ... + last_param: String, + }, + + /// __builtin_va_arg(ap, type) + /// Retrieves the next argument from a va_list + VaArg { + /// The va_list to read from + ap: Box, + /// The type of argument to retrieve + arg_type: Type, + }, + + /// __builtin_va_end(ap) + /// Cleans up a va_list (often a no-op) + VaEnd { + /// The va_list to clean up + ap: Box, + }, + + /// __builtin_va_copy(dest, src) + /// Copies a va_list + VaCopy { + /// Destination va_list + dest: Box, + /// Source va_list + src: Box, + }, + + // ========================================================================= + // Byte-swapping builtins + // ========================================================================= + /// __builtin_bswap16(x) + /// Returns x with the order of bytes reversed (16-bit) + Bswap16 { + /// The value to byte-swap + arg: Box, + }, + + /// __builtin_bswap32(x) + /// Returns x with the order of bytes reversed (32-bit) + Bswap32 { + /// The value to byte-swap + arg: Box, + }, + + /// __builtin_bswap64(x) + /// Returns x with the order of bytes reversed (64-bit) + Bswap64 { + /// The value to byte-swap + arg: Box, + }, + + /// __builtin_alloca(size) + /// Allocates size bytes on the stack, returns pointer + Alloca { + /// The size to allocate in bytes + size: Box, + }, +} + +// ============================================================================ +// Initializer List Support (C99) +// ============================================================================ + +/// A designator for struct field or array index in an initializer +#[derive(Debug, Clone)] +pub enum Designator { + /// Field designator: .field_name + Field(String), + /// Index designator: [constant_expr] - evaluated at parse time + Index(i64), +} + +/// A single element in an initializer list +#[derive(Debug, Clone)] +pub struct InitElement { + /// Optional designator chain: .x, [0], .x[1].y, etc. + /// Empty for positional initialization + pub designators: Vec, + /// The initializer value (can be another InitList for nested) + pub value: Box, +} + +// Test-only helper constructors for AST nodes +#[cfg(test)] +use crate::types::TypeKind; + +#[cfg(test)] +impl Expr { + /// Create an integer literal (typed as int) - no position (for tests/internal use) + pub fn int(value: i64) -> Self { + Expr::typed_unpositioned(ExprKind::IntLit(value), Type::basic(TypeKind::Int)) + } + + /// Create a variable reference (untyped - needs type evaluation) - no position + pub fn var(name: &str) -> Self { + Expr::new_unpositioned(ExprKind::Ident { + name: name.to_string(), + }) + } + + /// Create a variable reference with a known type - no position + pub fn var_typed(name: &str, typ: Type) -> Self { + Expr::typed_unpositioned( + ExprKind::Ident { + name: name.to_string(), + }, + typ, + ) + } + + /// Create a binary expression (result type matches left operand for arithmetic) + pub fn binary(op: BinaryOp, left: Expr, right: Expr) -> Self { + // Derive type from operands - comparisons return int, arithmetic uses left type + let result_type = match op { + BinaryOp::Lt + | BinaryOp::Gt + | BinaryOp::Le + | BinaryOp::Ge + | BinaryOp::Eq + | BinaryOp::Ne + | BinaryOp::LogAnd + | BinaryOp::LogOr => Type::basic(TypeKind::Int), + _ => left + .typ + .clone() + .unwrap_or_else(|| Type::basic(TypeKind::Int)), + }; + let pos = left.pos; + Expr::typed( + ExprKind::Binary { + op, + left: Box::new(left), + right: Box::new(right), + }, + result_type, + pos, + ) + } + + /// Create a unary expression + pub fn unary(op: UnaryOp, operand: Expr) -> Self { + let result_type = match op { + UnaryOp::Not => Type::basic(TypeKind::Int), + _ => operand + .typ + .clone() + .unwrap_or_else(|| Type::basic(TypeKind::Int)), + }; + let pos = operand.pos; + Expr::typed( + ExprKind::Unary { + op, + operand: Box::new(operand), + }, + result_type, + pos, + ) + } + + /// Create an assignment (result type is target type) + pub fn assign(target: Expr, value: Expr) -> Self { + let result_type = target + .typ + .clone() + .unwrap_or_else(|| Type::basic(TypeKind::Int)); + let pos = target.pos; + Expr::typed( + ExprKind::Assign { + op: AssignOp::Assign, + target: Box::new(target), + value: Box::new(value), + }, + result_type, + pos, + ) + } + + /// Create a function call (returns int by default - proper type needs evaluation) + pub fn call(func: Expr, args: Vec) -> Self { + let pos = func.pos; + Expr::typed( + ExprKind::Call { + func: Box::new(func), + args, + }, + Type::basic(TypeKind::Int), + pos, + ) + } +} + +// ============================================================================ +// Statements +// ============================================================================ + +/// A statement in the AST +#[derive(Debug, Clone)] +pub enum Stmt { + /// Empty statement: ; + Empty, + + /// Expression statement: expr; + Expr(Expr), + + /// Compound statement (block): { ... } + Block(Vec), + + /// If statement: if (cond) then_stmt [else else_stmt] + If { + cond: Expr, + then_stmt: Box, + else_stmt: Option>, + }, + + /// While loop: while (cond) body + While { cond: Expr, body: Box }, + + /// Do-while loop: do body while (cond); + DoWhile { body: Box, cond: Expr }, + + /// For loop: for (init; cond; post) body + For { + init: Option, + cond: Option, + post: Option, + body: Box, + }, + + /// Return statement: return [expr]; + Return(Option), + + /// Break statement + Break, + + /// Continue statement + Continue, + + /// Goto statement: goto label; + Goto(String), + + /// Labeled statement: label: stmt + Label { name: String, stmt: Box }, + + /// Switch statement: switch (expr) { cases } + Switch { expr: Expr, body: Box }, + + /// Case label: case expr: (within switch body) + Case(Expr), + + /// Default label (within switch body) + Default, +} + +/// Initializer for a for loop (can be declaration or expression) +#[derive(Debug, Clone)] +pub enum ForInit { + /// Declaration: for (int i = 0; ...) + Declaration(Declaration), + /// Expression: for (i = 0; ...) + Expression(Expr), +} + +/// An item in a compound statement (block) +#[derive(Debug, Clone)] +pub enum BlockItem { + Declaration(Declaration), + Statement(Stmt), +} + +// ============================================================================ +// Declarations +// ============================================================================ + +/// A declaration +#[derive(Debug, Clone)] +pub struct Declaration { + /// List of declarators + pub declarators: Vec, +} + +/// A single declarator with optional initializer +#[derive(Debug, Clone)] +pub struct InitDeclarator { + /// The name being declared + pub name: String, + /// The complete type (after applying declarator modifiers) + pub typ: Type, + /// Optional initializer + pub init: Option, +} + +#[cfg(test)] +impl Declaration { + /// Create a simple declaration with one variable + pub fn simple(name: &str, typ: Type, init: Option) -> Self { + Declaration { + declarators: vec![InitDeclarator { + name: name.to_string(), + typ, + init, + }], + } + } +} + +// ============================================================================ +// Function Definition +// ============================================================================ + +/// A function parameter +#[derive(Debug, Clone)] +pub struct Parameter { + pub name: Option, + pub typ: Type, +} + +/// A function definition +#[derive(Debug, Clone)] +pub struct FunctionDef { + /// Return type + pub return_type: Type, + /// Function name + pub name: String, + /// Parameters + pub params: Vec, + /// Function body + pub body: Stmt, + /// Source position of function definition (for debug info) + pub pos: Position, +} + +// ============================================================================ +// Translation Unit +// ============================================================================ + +/// An external declaration (top-level item) +#[derive(Debug, Clone)] +pub enum ExternalDecl { + /// Function definition + FunctionDef(FunctionDef), + /// Variable/type declaration + Declaration(Declaration), +} + +/// A translation unit (entire source file) +#[derive(Debug, Clone)] +pub struct TranslationUnit { + pub items: Vec, +} + +impl TranslationUnit { + pub fn new() -> Self { + TranslationUnit { items: Vec::new() } + } + + pub fn add(&mut self, item: ExternalDecl) { + self.items.push(item); + } +} + +impl Default for TranslationUnit { + fn default() -> Self { + Self::new() + } +} + +// ============================================================================ +// Tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + use crate::types::TypeKind; + + #[test] + fn test_int_literal() { + let expr = Expr::int(42); + match expr.kind { + ExprKind::IntLit(v) => assert_eq!(v, 42), + _ => panic!("Expected IntLit"), + } + } + + #[test] + fn test_binary_expr() { + // 1 + 2 + let expr = Expr::binary(BinaryOp::Add, Expr::int(1), Expr::int(2)); + + match expr.kind { + ExprKind::Binary { op, left, right } => { + assert_eq!(op, BinaryOp::Add); + match left.kind { + ExprKind::IntLit(v) => assert_eq!(v, 1), + _ => panic!("Expected IntLit"), + } + match right.kind { + ExprKind::IntLit(v) => assert_eq!(v, 2), + _ => panic!("Expected IntLit"), + } + } + _ => panic!("Expected Binary"), + } + } + + #[test] + fn test_nested_binary() { + // 1 + 2 * 3 (represented as 1 + (2 * 3)) + let mul = Expr::binary(BinaryOp::Mul, Expr::int(2), Expr::int(3)); + let add = Expr::binary(BinaryOp::Add, Expr::int(1), mul); + + match add.kind { + ExprKind::Binary { op, left, right } => { + assert_eq!(op, BinaryOp::Add); + match left.kind { + ExprKind::IntLit(v) => assert_eq!(v, 1), + _ => panic!("Expected IntLit"), + } + match right.kind { + ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::Mul), + _ => panic!("Expected Binary"), + } + } + _ => panic!("Expected Binary"), + } + } + + #[test] + fn test_unary_expr() { + // -x + let expr = Expr::unary(UnaryOp::Neg, Expr::var("x")); + + match expr.kind { + ExprKind::Unary { op, operand } => { + assert_eq!(op, UnaryOp::Neg); + match operand.kind { + ExprKind::Ident { name, .. } => assert_eq!(name, "x"), + _ => panic!("Expected Ident"), + } + } + _ => panic!("Expected Unary"), + } + } + + #[test] + fn test_assignment() { + // x = 5 + let expr = Expr::assign(Expr::var("x"), Expr::int(5)); + + match expr.kind { + ExprKind::Assign { op, target, value } => { + assert_eq!(op, AssignOp::Assign); + match target.kind { + ExprKind::Ident { name, .. } => assert_eq!(name, "x"), + _ => panic!("Expected Ident"), + } + match value.kind { + ExprKind::IntLit(v) => assert_eq!(v, 5), + _ => panic!("Expected IntLit"), + } + } + _ => panic!("Expected Assign"), + } + } + + #[test] + fn test_function_call() { + // foo(1, 2) + let expr = Expr::call(Expr::var("foo"), vec![Expr::int(1), Expr::int(2)]); + + match expr.kind { + ExprKind::Call { func, args } => { + match func.kind { + ExprKind::Ident { name, .. } => assert_eq!(name, "foo"), + _ => panic!("Expected Ident"), + } + assert_eq!(args.len(), 2); + } + _ => panic!("Expected Call"), + } + } + + #[test] + fn test_if_stmt() { + // if (x) return 1; + let stmt = Stmt::If { + cond: Expr::var("x"), + then_stmt: Box::new(Stmt::Return(Some(Expr::int(1)))), + else_stmt: None, + }; + + match stmt { + Stmt::If { + cond, + then_stmt, + else_stmt, + } => { + match cond.kind { + ExprKind::Ident { name, .. } => assert_eq!(name, "x"), + _ => panic!("Expected Ident"), + } + match *then_stmt { + Stmt::Return(Some(ref e)) => match e.kind { + ExprKind::IntLit(1) => {} + _ => panic!("Expected IntLit(1)"), + }, + _ => panic!("Expected Return"), + } + assert!(else_stmt.is_none()); + } + _ => panic!("Expected If"), + } + } + + #[test] + fn test_while_stmt() { + // while (x) x--; + let stmt = Stmt::While { + cond: Expr::var("x"), + body: Box::new(Stmt::Expr(Expr::new_unpositioned(ExprKind::PostDec( + Box::new(Expr::var("x")), + )))), + }; + + match stmt { + Stmt::While { cond, body } => { + match cond.kind { + ExprKind::Ident { name, .. } => assert_eq!(name, "x"), + _ => panic!("Expected Ident"), + } + match *body { + Stmt::Expr(ref e) => match e.kind { + ExprKind::PostDec(_) => {} + _ => panic!("Expected PostDec"), + }, + _ => panic!("Expected Expr stmt"), + } + } + _ => panic!("Expected While"), + } + } + + #[test] + fn test_declaration() { + // int x = 5; + let decl = Declaration::simple("x", Type::basic(TypeKind::Int), Some(Expr::int(5))); + + assert_eq!(decl.declarators.len(), 1); + assert_eq!(decl.declarators[0].name, "x"); + assert_eq!(decl.declarators[0].typ.kind, TypeKind::Int); + assert!(decl.declarators[0].init.is_some()); + } + + #[test] + fn test_translation_unit() { + let mut tu = TranslationUnit::new(); + + // Add a declaration + let decl = Declaration::simple("x", Type::basic(TypeKind::Int), None); + tu.add(ExternalDecl::Declaration(decl)); + + assert_eq!(tu.items.len(), 1); + } + + #[test] + fn test_for_loop() { + // for (int i = 0; i < 10; i++) {} + let init = ForInit::Declaration(Declaration::simple( + "i", + Type::basic(TypeKind::Int), + Some(Expr::int(0)), + )); + let cond = Expr::binary(BinaryOp::Lt, Expr::var("i"), Expr::int(10)); + let post = Expr::new_unpositioned(ExprKind::PostInc(Box::new(Expr::var("i")))); + + let stmt = Stmt::For { + init: Some(init), + cond: Some(cond), + post: Some(post), + body: Box::new(Stmt::Block(vec![])), + }; + + match stmt { + Stmt::For { + init, + cond, + post, + body, + } => { + assert!(init.is_some()); + assert!(cond.is_some()); + assert!(post.is_some()); + match *body { + Stmt::Block(items) => assert!(items.is_empty()), + _ => panic!("Expected Block"), + } + } + _ => panic!("Expected For"), + } + } +} diff --git a/cc/parse/mod.rs b/cc/parse/mod.rs new file mode 100644 index 000000000..9c17cb2b8 --- /dev/null +++ b/cc/parse/mod.rs @@ -0,0 +1,16 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Parse module - AST and parser +// + +pub mod ast; +pub mod parser; + +// Re-export parser used by main.rs +pub use parser::Parser; diff --git a/cc/parse/parser.rs b/cc/parse/parser.rs new file mode 100644 index 000000000..09bc005f0 --- /dev/null +++ b/cc/parse/parser.rs @@ -0,0 +1,6599 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Parser for pcc C99 compiler +// Based on sparse's expression.c recursive descent design +// + +use super::ast::{ + AssignOp, BinaryOp, BlockItem, Declaration, Designator, Expr, ExprKind, ExternalDecl, ForInit, + FunctionDef, InitDeclarator, InitElement, Parameter, Stmt, TranslationUnit, UnaryOp, +}; +use crate::diag; +use crate::symbol::{Namespace, Symbol, SymbolTable}; +use crate::token::lexer::{IdentTable, Position, SpecialToken, Token, TokenType, TokenValue}; +use crate::types::{CompositeType, EnumConstant, StructMember, Type, TypeKind, TypeModifiers}; +use std::fmt; + +// ============================================================================ +// Parse Error +// ============================================================================ + +/// Parse error type +#[derive(Debug, Clone)] +pub struct ParseError { + pub message: String, + pub pos: Position, +} + +impl ParseError { + pub fn new(message: impl Into, pos: Position) -> Self { + Self { + message: message.into(), + pos, + } + } +} + +impl fmt::Display for ParseError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}:{}: {}", self.pos.line, self.pos.col, self.message) + } +} + +impl std::error::Error for ParseError {} + +pub type ParseResult = Result; + +// ============================================================================ +// GCC __attribute__ Support +// ============================================================================ + +/// An argument to a GCC __attribute__ +#[derive(Debug, Clone, PartialEq)] +pub enum AttributeArg { + /// Identifier argument (e.g., `noreturn`, `__printf__`) + Ident(String), + /// String literal argument (e.g., `"default"`) + String(String), + /// Integer argument (e.g., `16` in `aligned(16)`) + Int(i64), + /// Nested arguments (e.g., `__format__(__printf__, 1, 2)`) + Nested(Vec), +} + +/// A single GCC __attribute__ +#[derive(Debug, Clone)] +pub struct Attribute { + /// Attribute name (e.g., `packed`, `aligned`, `visibility`) + pub name: String, + /// Arguments to the attribute (may be empty) + pub args: Vec, +} + +impl Attribute { + pub fn new(name: impl Into) -> Self { + Self { + name: name.into(), + args: Vec::new(), + } + } + + pub fn with_args(name: impl Into, args: Vec) -> Self { + Self { + name: name.into(), + args, + } + } +} + +impl fmt::Display for Attribute { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.name)?; + if !self.args.is_empty() { + write!(f, "(")?; + for (i, arg) in self.args.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + match arg { + AttributeArg::Ident(s) => write!(f, "{}", s)?, + AttributeArg::String(s) => write!(f, "\"{}\"", s)?, + AttributeArg::Int(n) => write!(f, "{}", n)?, + AttributeArg::Nested(args) => { + for (j, a) in args.iter().enumerate() { + if j > 0 { + write!(f, ", ")?; + } + match a { + AttributeArg::Ident(s) => write!(f, "{}", s)?, + AttributeArg::String(s) => write!(f, "\"{}\"", s)?, + AttributeArg::Int(n) => write!(f, "{}", n)?, + AttributeArg::Nested(_) => write!(f, "...")?, + } + } + } + } + } + write!(f, ")")?; + } + Ok(()) + } +} + +/// A list of GCC __attribute__ declarations +#[derive(Debug, Clone, Default)] +pub struct AttributeList { + pub attrs: Vec, +} + +impl AttributeList { + pub fn new() -> Self { + Self { attrs: Vec::new() } + } + + pub fn push(&mut self, attr: Attribute) { + self.attrs.push(attr); + } +} + +impl fmt::Display for AttributeList { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.attrs.is_empty() { + return Ok(()); + } + write!(f, "__attribute__((")?; + for (i, attr) in self.attrs.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{}", attr)?; + } + write!(f, "))") + } +} + +// ============================================================================ +// Parser +// ============================================================================ + +/// C expression parser using recursive descent with precedence climbing +/// +/// Following sparse's design, the parser binds symbols to the symbol table +/// during parsing. This means that by the time parsing is complete, all +/// declared symbols are in the table with their types. +pub struct Parser<'a> { + /// Token stream + tokens: &'a [Token], + /// Identifier table for looking up names + idents: &'a IdentTable, + /// Symbol table for binding declarations (like sparse's bind_symbol) + symbols: &'a mut SymbolTable, + /// Current position in token stream + pos: usize, +} + +impl<'a> Parser<'a> { + /// Create a new parser with a symbol table + pub fn new(tokens: &'a [Token], idents: &'a IdentTable, symbols: &'a mut SymbolTable) -> Self { + Self { + tokens, + idents, + symbols, + pos: 0, + } + } + + // ======================================================================== + // Token Navigation + // ======================================================================== + + /// Get the current token + fn current(&self) -> &Token { + self.tokens + .get(self.pos) + .unwrap_or(&self.tokens[self.tokens.len() - 1]) + } + + /// Peek at the current token type + fn peek(&self) -> TokenType { + self.current().typ + } + + /// Peek at the current token's special value (if it's a Special token) + fn peek_special(&self) -> Option { + let token = self.current(); + if token.typ == TokenType::Special { + if let TokenValue::Special(v) = &token.value { + return Some(*v); + } + } + None + } + + /// Check if current token is a specific special character + fn is_special(&self, c: u8) -> bool { + self.peek_special() == Some(c as u32) + } + + /// Check if current token is a specific multi-char special token + fn is_special_token(&self, tok: SpecialToken) -> bool { + self.peek_special() == Some(tok as u32) + } + + /// Get current position for error messages + fn current_pos(&self) -> Position { + self.current().pos + } + + /// Advance to the next token + fn advance(&mut self) { + if self.pos < self.tokens.len() - 1 { + self.pos += 1; + } + } + + /// Consume a token and advance, returning a clone + fn consume(&mut self) -> Token { + let token = self.current().clone(); + self.advance(); + token + } + + /// Expect a specific special character, return error if not found + fn expect_special(&mut self, c: u8) -> ParseResult<()> { + if self.is_special(c) { + self.advance(); + Ok(()) + } else { + Err(ParseError::new( + format!("expected '{}'", c as char), + self.current_pos(), + )) + } + } + + /// Get the identifier name from an Ident token value + fn get_ident_name(&self, token: &Token) -> Option { + if let TokenValue::Ident(id) = &token.value { + self.idents.get(*id).map(|s| s.to_string()) + } else { + None + } + } + + /// Skip StreamBegin/StreamEnd tokens + pub fn skip_stream_tokens(&mut self) { + while self.peek() == TokenType::StreamBegin || self.peek() == TokenType::StreamEnd { + self.advance(); + } + } + + /// Check if we're at end of input + fn is_eof(&self) -> bool { + matches!(self.peek(), TokenType::Eof | TokenType::StreamEnd) + } + + /// Check if current token is __attribute__ or __attribute + fn is_attribute_keyword(&self) -> bool { + if self.peek() != TokenType::Ident { + return false; + } + if let Some(name) = self.get_ident_name(self.current()) { + name == "__attribute__" || name == "__attribute" + } else { + false + } + } + + /// Parse a single attribute argument + /// Returns None if not a recognizable argument + fn parse_attribute_arg(&mut self) -> Option { + match self.peek() { + TokenType::Ident => { + let name = self.get_ident_name(self.current())?; + self.advance(); + + // Check if this identifier has nested arguments + if self.is_special(b'(') { + self.advance(); + let mut nested = Vec::new(); + while !self.is_special(b')') && !self.is_eof() { + if let Some(arg) = self.parse_attribute_arg() { + nested.push(arg); + } + if self.is_special(b',') { + self.advance(); + } else if !self.is_special(b')') { + // Skip unknown tokens + self.advance(); + } + } + if self.is_special(b')') { + self.advance(); + } + // Return as nested with first element being the function name + let mut all_args = vec![AttributeArg::Ident(name)]; + all_args.extend(nested); + Some(AttributeArg::Nested(all_args)) + } else { + Some(AttributeArg::Ident(name)) + } + } + TokenType::String => { + if let TokenValue::String(s) = &self.current().value { + let s = s.clone(); + self.advance(); + Some(AttributeArg::String(s)) + } else { + None + } + } + TokenType::Number => { + if let TokenValue::Number(s) = &self.current().value { + // Parse the number string to i64 + let n = s.parse::().unwrap_or(0); + self.advance(); + Some(AttributeArg::Int(n)) + } else { + None + } + } + _ => None, + } + } + + /// Parse a single attribute: name or name(args) + fn parse_single_attribute(&mut self) -> Option { + if self.peek() != TokenType::Ident { + return None; + } + + let name = self.get_ident_name(self.current())?; + self.advance(); + + // Check for arguments + if self.is_special(b'(') { + self.advance(); + let mut args = Vec::new(); + + while !self.is_special(b')') && !self.is_eof() { + if let Some(arg) = self.parse_attribute_arg() { + args.push(arg); + } + if self.is_special(b',') { + self.advance(); + } else if !self.is_special(b')') { + // Skip unknown tokens + self.advance(); + } + } + + if self.is_special(b')') { + self.advance(); + } + + Some(Attribute::with_args(name, args)) + } else { + Some(Attribute::new(name)) + } + } + + /// Parse __attribute__((...)) declarations (GCC extension) + /// + /// Syntax: __attribute__((attr1, attr2(args), ...)) + /// Returns the parsed attributes. Currently a no-op for code generation, + /// but attributes are captured for diagnostics. + fn parse_attributes(&mut self) -> AttributeList { + let mut result = AttributeList::new(); + + while self.is_attribute_keyword() { + self.advance(); // consume __attribute__ + + // Expect first '(' + if !self.is_special(b'(') { + return result; + } + self.advance(); + + // Expect second '(' + if !self.is_special(b'(') { + return result; + } + self.advance(); + + // Parse comma-separated list of attributes + while !self.is_special(b')') && !self.is_eof() { + if let Some(attr) = self.parse_single_attribute() { + result.push(attr); + } + if self.is_special(b',') { + self.advance(); + } else if !self.is_special(b')') { + // Skip unknown tokens within attributes + self.advance(); + } + } + + // Consume first ')' + if self.is_special(b')') { + self.advance(); + } + + // Consume second ')' + if self.is_special(b')') { + self.advance(); + } + } + + result + } + + /// Skip __attribute__((...)) declarations (GCC extension, no-op) + /// Wrapper around parse_attributes that discards the result. + fn skip_attributes(&mut self) { + let _ = self.parse_attributes(); + } + + /// Check if current token is __asm or __asm__ + fn is_asm_keyword(&self) -> bool { + if self.peek() != TokenType::Ident { + return false; + } + if let Some(name) = self.get_ident_name(self.current()) { + name == "__asm__" || name == "__asm" || name == "asm" + } else { + false + } + } + + /// Skip __asm("...") or __asm__("...") declarations (GCC extension for symbol aliasing) + /// Used in declarations like: FILE *fopen(...) __asm("_fopen$DARWIN_EXTSN"); + fn skip_asm(&mut self) { + while self.is_asm_keyword() { + self.advance(); // consume __asm/__asm__ + + // Expect '(' + if !self.is_special(b'(') { + return; + } + self.advance(); // consume '(' + + // Skip contents until matching ')' + let mut depth = 1; + while depth > 0 && !self.is_eof() { + if self.is_special(b'(') { + depth += 1; + } else if self.is_special(b')') { + depth -= 1; + } + self.advance(); + } + } + } + + /// Skip both __attribute__ and __asm extensions + fn skip_extensions(&mut self) { + loop { + if self.is_attribute_keyword() { + self.skip_attributes(); + } else if self.is_asm_keyword() { + self.skip_asm(); + } else { + break; + } + } + } + + // ======================================================================== + // Expression Parsing - Precedence Chain + // + // From lowest to highest precedence: + // 1. comma (left-to-right) + // 2. assignment (right-to-left) + // 3. ternary/conditional (right-to-left) + // 4. logical-or (left-to-right) + // 5. logical-and (left-to-right) + // 6. bitwise-or (left-to-right) + // 7. bitwise-xor (left-to-right) + // 8. bitwise-and (left-to-right) + // 9. equality (left-to-right) + // 10. relational (left-to-right) + // 11. shift (left-to-right) + // 12. additive (left-to-right) + // 13. multiplicative (left-to-right) + // 14. unary (right-to-left) + // 15. postfix (left-to-right) + // 16. primary + // ======================================================================== + + /// Parse an expression (comma expression, lowest precedence) + pub fn parse_expression(&mut self) -> ParseResult { + self.parse_comma_expr() + } + + /// Parse a comma expression: expr, expr, ... + /// Result type is the type of the rightmost expression + fn parse_comma_expr(&mut self) -> ParseResult { + let mut expr = self.parse_assignment_expr()?; + + while self.is_special(b',') { + self.advance(); + let right = self.parse_assignment_expr()?; + // Comma expression type is the type of the rightmost expression + let result_typ = right.typ.clone(); + + // Build comma expression + let Expr { kind, typ, pos } = expr; + expr = match kind { + ExprKind::Comma(mut exprs) => { + exprs.push(right); + Expr { + kind: ExprKind::Comma(exprs), + typ: result_typ, + pos, + } + } + other => Expr { + kind: ExprKind::Comma(vec![ + Expr { + kind: other, + typ, + pos, + }, + right, + ]), + typ: result_typ, + pos, + }, + }; + } + + Ok(expr) + } + + /// Parse an assignment expression (right-to-left associative) + fn parse_assignment_expr(&mut self) -> ParseResult { + // Parse left side (could be lvalue for assignment) + let left = self.parse_conditional_expr()?; + + // Check for assignment operators + let op = match self.peek_special() { + Some(v) if v == b'=' as u32 => Some(AssignOp::Assign), + Some(v) if v == SpecialToken::AddAssign as u32 => Some(AssignOp::AddAssign), + Some(v) if v == SpecialToken::SubAssign as u32 => Some(AssignOp::SubAssign), + Some(v) if v == SpecialToken::MulAssign as u32 => Some(AssignOp::MulAssign), + Some(v) if v == SpecialToken::DivAssign as u32 => Some(AssignOp::DivAssign), + Some(v) if v == SpecialToken::ModAssign as u32 => Some(AssignOp::ModAssign), + Some(v) if v == SpecialToken::AndAssign as u32 => Some(AssignOp::AndAssign), + Some(v) if v == SpecialToken::OrAssign as u32 => Some(AssignOp::OrAssign), + Some(v) if v == SpecialToken::XorAssign as u32 => Some(AssignOp::XorAssign), + Some(v) if v == SpecialToken::ShlAssign as u32 => Some(AssignOp::ShlAssign), + Some(v) if v == SpecialToken::ShrAssign as u32 => Some(AssignOp::ShrAssign), + _ => None, + }; + + if let Some(assign_op) = op { + let assign_pos = self.current_pos(); + self.advance(); + + // Check if target is const (assignment to const is an error) + self.check_const_assignment(&left, assign_pos); + + // Right-to-left associativity: parse the right side as another assignment + let right = self.parse_assignment_expr()?; + Ok(Expr::new( + ExprKind::Assign { + op: assign_op, + target: Box::new(left), + value: Box::new(right), + }, + assign_pos, + )) + } else { + Ok(left) + } + } + + /// Parse an initializer (C99 6.7.8) + /// Can be either: + /// - assignment-expression + /// - { initializer-list } + /// - { initializer-list , } + fn parse_initializer(&mut self) -> ParseResult { + if self.is_special(b'{') { + self.parse_initializer_list() + } else { + self.parse_assignment_expr() + } + } + + /// Parse a brace-enclosed initializer list + fn parse_initializer_list(&mut self) -> ParseResult { + let list_pos = self.current_pos(); + self.expect_special(b'{')?; + + let mut elements = Vec::new(); + + // Handle empty initializer list: {} + if self.is_special(b'}') { + self.advance(); + return Ok(Expr::new(ExprKind::InitList { elements }, list_pos)); + } + + loop { + // Parse one initializer element (with optional designators) + let element = self.parse_init_element()?; + elements.push(element); + + // Check for comma or end + if self.is_special(b',') { + self.advance(); + // Trailing comma is allowed + if self.is_special(b'}') { + break; + } + } else { + break; + } + } + + self.expect_special(b'}')?; + Ok(Expr::new(ExprKind::InitList { elements }, list_pos)) + } + + /// Parse a single element of an initializer list + /// Can have designators: .field = value, [index] = value, or just value + fn parse_init_element(&mut self) -> ParseResult { + let mut designators = Vec::new(); + + // Parse designator chain: .field, [index], can be chained like .x[0].y + loop { + if self.is_special(b'.') { + // Field designator: .fieldname + self.advance(); + let name = self.expect_identifier()?; + designators.push(Designator::Field(name)); + } else if self.is_special(b'[') { + // Array index designator: [constant-expression] + self.advance(); + let index_expr = self.parse_conditional_expr()?; + self.expect_special(b']')?; + + // Evaluate to constant + let index = self.eval_const_expr(&index_expr).ok_or_else(|| { + ParseError::new( + "array designator index must be constant", + self.current_pos(), + ) + })?; + designators.push(Designator::Index(index)); + } else { + break; + } + } + + // If we had designators, expect '=' + if !designators.is_empty() { + self.expect_special(b'=')?; + } + + // Parse the initializer value (can be nested initializer list) + let value = self.parse_initializer()?; + + Ok(InitElement { + designators, + value: Box::new(value), + }) + } + + /// Parse a conditional (ternary) expression: cond ? then : else + fn parse_conditional_expr(&mut self) -> ParseResult { + let cond = self.parse_logical_or_expr()?; + + if self.is_special(b'?') { + self.advance(); + let then_expr = self.parse_expression()?; + self.expect_special(b':')?; + // Right-to-left: parse else as another conditional + let else_expr = self.parse_conditional_expr()?; + + // The result type is the common type of then and else branches + // For simplicity, use then_expr's type (proper impl would need type promotion) + let typ = then_expr + .typ + .clone() + .unwrap_or_else(|| Type::basic(TypeKind::Int)); + + let pos = cond.pos; + Ok(Self::typed_expr( + ExprKind::Conditional { + cond: Box::new(cond), + then_expr: Box::new(then_expr), + else_expr: Box::new(else_expr), + }, + typ, + pos, + )) + } else { + Ok(cond) + } + } + + /// Parse logical-or: expr || expr + fn parse_logical_or_expr(&mut self) -> ParseResult { + let mut left = self.parse_logical_and_expr()?; + + while self.is_special_token(SpecialToken::LogicalOr) { + self.advance(); + let right = self.parse_logical_and_expr()?; + left = Self::make_binary(BinaryOp::LogOr, left, right); + } + + Ok(left) + } + + /// Parse logical-and: expr && expr + fn parse_logical_and_expr(&mut self) -> ParseResult { + let mut left = self.parse_bitwise_or_expr()?; + + while self.is_special_token(SpecialToken::LogicalAnd) { + self.advance(); + let right = self.parse_bitwise_or_expr()?; + left = Self::make_binary(BinaryOp::LogAnd, left, right); + } + + Ok(left) + } + + /// Parse bitwise-or: expr | expr + fn parse_bitwise_or_expr(&mut self) -> ParseResult { + let mut left = self.parse_bitwise_xor_expr()?; + + // | but not || + while self.is_special(b'|') && !self.is_special_token(SpecialToken::LogicalOr) { + self.advance(); + let right = self.parse_bitwise_xor_expr()?; + left = Self::make_binary(BinaryOp::BitOr, left, right); + } + + Ok(left) + } + + /// Parse bitwise-xor: expr ^ expr + fn parse_bitwise_xor_expr(&mut self) -> ParseResult { + let mut left = self.parse_bitwise_and_expr()?; + + while self.is_special(b'^') && !self.is_special_token(SpecialToken::XorAssign) { + self.advance(); + let right = self.parse_bitwise_and_expr()?; + left = Self::make_binary(BinaryOp::BitXor, left, right); + } + + Ok(left) + } + + /// Parse bitwise-and: expr & expr + fn parse_bitwise_and_expr(&mut self) -> ParseResult { + let mut left = self.parse_equality_expr()?; + + // & but not && + while self.is_special(b'&') && !self.is_special_token(SpecialToken::LogicalAnd) { + self.advance(); + let right = self.parse_equality_expr()?; + left = Self::make_binary(BinaryOp::BitAnd, left, right); + } + + Ok(left) + } + + /// Parse equality: expr == expr, expr != expr + fn parse_equality_expr(&mut self) -> ParseResult { + let mut left = self.parse_relational_expr()?; + + loop { + let op = if self.is_special_token(SpecialToken::Equal) { + Some(BinaryOp::Eq) + } else if self.is_special_token(SpecialToken::NotEqual) { + Some(BinaryOp::Ne) + } else { + None + }; + + if let Some(binary_op) = op { + self.advance(); + let right = self.parse_relational_expr()?; + left = Self::make_binary(binary_op, left, right); + } else { + break; + } + } + + Ok(left) + } + + /// Parse relational: expr < expr, expr > expr, expr <= expr, expr >= expr + fn parse_relational_expr(&mut self) -> ParseResult { + let mut left = self.parse_shift_expr()?; + + loop { + let op = if self.is_special_token(SpecialToken::Lte) { + Some(BinaryOp::Le) + } else if self.is_special_token(SpecialToken::Gte) { + Some(BinaryOp::Ge) + } else if self.is_special(b'<') && !self.is_special_token(SpecialToken::LeftShift) { + Some(BinaryOp::Lt) + } else if self.is_special(b'>') && !self.is_special_token(SpecialToken::RightShift) { + Some(BinaryOp::Gt) + } else { + None + }; + + if let Some(binary_op) = op { + self.advance(); + let right = self.parse_shift_expr()?; + left = Self::make_binary(binary_op, left, right); + } else { + break; + } + } + + Ok(left) + } + + /// Parse shift: expr << expr, expr >> expr + fn parse_shift_expr(&mut self) -> ParseResult { + let mut left = self.parse_additive_expr()?; + + loop { + let op = if self.is_special_token(SpecialToken::LeftShift) { + Some(BinaryOp::Shl) + } else if self.is_special_token(SpecialToken::RightShift) { + Some(BinaryOp::Shr) + } else { + None + }; + + if let Some(binary_op) = op { + self.advance(); + let right = self.parse_additive_expr()?; + left = Self::make_binary(binary_op, left, right); + } else { + break; + } + } + + Ok(left) + } + + /// Parse additive: expr + expr, expr - expr + fn parse_additive_expr(&mut self) -> ParseResult { + let mut left = self.parse_multiplicative_expr()?; + + loop { + let op = if self.is_special(b'+') && !self.is_special_token(SpecialToken::Increment) { + Some(BinaryOp::Add) + } else if self.is_special(b'-') && !self.is_special_token(SpecialToken::Decrement) { + Some(BinaryOp::Sub) + } else { + None + }; + + if let Some(binary_op) = op { + self.advance(); + let right = self.parse_multiplicative_expr()?; + left = Self::make_binary(binary_op, left, right); + } else { + break; + } + } + + Ok(left) + } + + /// Parse multiplicative: expr * expr, expr / expr, expr % expr + fn parse_multiplicative_expr(&mut self) -> ParseResult { + let mut left = self.parse_unary_expr()?; + + loop { + let op = if self.is_special(b'*') && !self.is_special_token(SpecialToken::MulAssign) { + Some(BinaryOp::Mul) + } else if self.is_special(b'/') && !self.is_special_token(SpecialToken::DivAssign) { + Some(BinaryOp::Div) + } else if self.is_special(b'%') && !self.is_special_token(SpecialToken::ModAssign) { + Some(BinaryOp::Mod) + } else { + None + }; + + if let Some(binary_op) = op { + self.advance(); + let right = self.parse_unary_expr()?; + left = Self::make_binary(binary_op, left, right); + } else { + break; + } + } + + Ok(left) + } + + /// Parse unary expression: ++x, --x, &x, *x, +x, -x, ~x, !x, sizeof + fn parse_unary_expr(&mut self) -> ParseResult { + // Check for prefix operators + if self.is_special_token(SpecialToken::Increment) { + let op_pos = self.current_pos(); + self.advance(); + let operand = self.parse_unary_expr()?; + // Check for const modification + self.check_const_assignment(&operand, op_pos); + // PreInc has same type as operand + let typ = operand + .typ + .clone() + .unwrap_or_else(|| Type::basic(TypeKind::Int)); + return Ok(Self::typed_expr( + ExprKind::Unary { + op: UnaryOp::PreInc, + operand: Box::new(operand), + }, + typ, + op_pos, + )); + } + + if self.is_special_token(SpecialToken::Decrement) { + let op_pos = self.current_pos(); + self.advance(); + let operand = self.parse_unary_expr()?; + // Check for const modification + self.check_const_assignment(&operand, op_pos); + // PreDec has same type as operand + let typ = operand + .typ + .clone() + .unwrap_or_else(|| Type::basic(TypeKind::Int)); + return Ok(Self::typed_expr( + ExprKind::Unary { + op: UnaryOp::PreDec, + operand: Box::new(operand), + }, + typ, + op_pos, + )); + } + + if self.is_special(b'&') && !self.is_special_token(SpecialToken::LogicalAnd) { + let op_pos = self.current_pos(); + self.advance(); + let operand = self.parse_unary_expr()?; + // AddrOf produces pointer to operand's type + let base_type = operand + .typ + .clone() + .unwrap_or_else(|| Type::basic(TypeKind::Int)); + let ptr_type = Type::pointer(base_type); + return Ok(Self::typed_expr( + ExprKind::Unary { + op: UnaryOp::AddrOf, + operand: Box::new(operand), + }, + ptr_type, + op_pos, + )); + } + + if self.is_special(b'*') { + let op_pos = self.current_pos(); + self.advance(); + let operand = self.parse_unary_expr()?; + // Deref produces the base type of the pointer + let typ = operand + .typ + .as_ref() + .and_then(|t| t.base.as_ref().map(|b| (**b).clone())) + .unwrap_or_else(|| Type::basic(TypeKind::Int)); + return Ok(Self::typed_expr( + ExprKind::Unary { + op: UnaryOp::Deref, + operand: Box::new(operand), + }, + typ, + op_pos, + )); + } + + if self.is_special(b'+') && !self.is_special_token(SpecialToken::Increment) { + self.advance(); + // Unary + is a no-op for numeric types, but we need to parse it + return self.parse_unary_expr(); + } + + if self.is_special(b'-') && !self.is_special_token(SpecialToken::Decrement) { + let op_pos = self.current_pos(); + self.advance(); + let operand = self.parse_unary_expr()?; + // Neg has same type as operand + let typ = operand + .typ + .clone() + .unwrap_or_else(|| Type::basic(TypeKind::Int)); + return Ok(Self::typed_expr( + ExprKind::Unary { + op: UnaryOp::Neg, + operand: Box::new(operand), + }, + typ, + op_pos, + )); + } + + if self.is_special(b'~') { + let op_pos = self.current_pos(); + self.advance(); + let operand = self.parse_unary_expr()?; + // BitNot: C99 integer promotion - types smaller than int promote to int + let op_typ = operand + .typ + .clone() + .unwrap_or_else(|| Type::basic(TypeKind::Int)); + // Apply integer promotion: _Bool, char, short -> int + let typ = if matches!( + op_typ.kind, + TypeKind::Bool | TypeKind::Char | TypeKind::Short + ) { + Type::basic(TypeKind::Int) + } else { + op_typ + }; + return Ok(Self::typed_expr( + ExprKind::Unary { + op: UnaryOp::BitNot, + operand: Box::new(operand), + }, + typ, + op_pos, + )); + } + + if self.is_special(b'!') { + let op_pos = self.current_pos(); + self.advance(); + let operand = self.parse_unary_expr()?; + // Logical not always produces int (0 or 1) + return Ok(Self::typed_expr( + ExprKind::Unary { + op: UnaryOp::Not, + operand: Box::new(operand), + }, + Type::basic(TypeKind::Int), + op_pos, + )); + } + + // sizeof + if self.peek() == TokenType::Ident { + if let Some(name) = self.get_ident_name(self.current()) { + if name == "sizeof" { + self.advance(); + return self.parse_sizeof(); + } + } + } + + // No unary operator, parse postfix + self.parse_postfix_expr() + } + + /// Parse sizeof expression + fn parse_sizeof(&mut self) -> ParseResult { + let sizeof_pos = self.current_pos(); + // sizeof returns size_t, which is unsigned long in our implementation + let size_t = Type::with_modifiers(TypeKind::Long, TypeModifiers::UNSIGNED); + + if self.is_special(b'(') { + // Could be sizeof(type) or sizeof(expr) + // For now, try to detect if it's a type + // This is a simplified check - full implementation needs type lookahead + self.advance(); // consume '(' + + // Try to parse as type first + if let Some(typ) = self.try_parse_type_name() { + self.expect_special(b')')?; + return Ok(Expr::typed(ExprKind::SizeofType(typ), size_t, sizeof_pos)); + } + + // Not a type, parse as expression + let expr = self.parse_expression()?; + self.expect_special(b')')?; + Ok(Expr::typed( + ExprKind::SizeofExpr(Box::new(expr)), + size_t, + sizeof_pos, + )) + } else { + // sizeof without parens - must be expression + let expr = self.parse_unary_expr()?; + Ok(Expr::typed( + ExprKind::SizeofExpr(Box::new(expr)), + size_t, + sizeof_pos, + )) + } + } + + /// Check if identifier is a type-starting keyword (for cast/sizeof disambiguation) + fn is_type_keyword(name: &str) -> bool { + matches!( + name, + "void" + | "_Bool" + | "char" + | "short" + | "int" + | "long" + | "float" + | "double" + | "signed" + | "unsigned" + | "const" + | "volatile" + | "struct" + | "union" + | "enum" + | "__builtin_va_list" + ) + } + + /// Parse a type name (required, returns error if not a type) + fn parse_type_name(&mut self) -> ParseResult { + self.try_parse_type_name() + .ok_or_else(|| ParseError::new("expected type name".to_string(), self.current_pos())) + } + + /// Try to parse a type name for casts and sizeof + /// Supports compound types like `unsigned char`, `long long`, pointers, etc. + fn try_parse_type_name(&mut self) -> Option { + if self.peek() != TokenType::Ident { + return None; + } + + // Check if this looks like a type name (keyword or typedef) + let name = self.get_ident_name(self.current())?; + if !Self::is_type_keyword(&name) && self.symbols.lookup_typedef(&name).is_none() { + // Not a type keyword and not a typedef + return None; + } + + // Parse type specifiers (similar to parse_type_specifier) + let mut modifiers = TypeModifiers::empty(); + let mut base_kind: Option = None; + let mut parsed_something = false; + + loop { + if self.peek() != TokenType::Ident { + break; + } + + let name = match self.get_ident_name(self.current()) { + Some(n) => n, + None => break, + }; + + match name.as_str() { + "const" => { + self.advance(); + modifiers |= TypeModifiers::CONST; + parsed_something = true; + } + "volatile" => { + self.advance(); + modifiers |= TypeModifiers::VOLATILE; + parsed_something = true; + } + "signed" => { + self.advance(); + modifiers |= TypeModifiers::SIGNED; + parsed_something = true; + } + "unsigned" => { + self.advance(); + modifiers |= TypeModifiers::UNSIGNED; + parsed_something = true; + } + "short" => { + self.advance(); + modifiers |= TypeModifiers::SHORT; + if base_kind.is_none() { + base_kind = Some(TypeKind::Short); + } + parsed_something = true; + } + "long" => { + self.advance(); + if modifiers.contains(TypeModifiers::LONG) { + modifiers |= TypeModifiers::LONGLONG; + base_kind = Some(TypeKind::LongLong); + } else { + modifiers |= TypeModifiers::LONG; + // long double case + if base_kind == Some(TypeKind::Double) { + base_kind = Some(TypeKind::LongDouble); + } else if base_kind.is_none() { + base_kind = Some(TypeKind::Long); + } + } + parsed_something = true; + } + "void" => { + self.advance(); + base_kind = Some(TypeKind::Void); + parsed_something = true; + } + "char" => { + self.advance(); + base_kind = Some(TypeKind::Char); + parsed_something = true; + } + "int" => { + self.advance(); + if base_kind.is_none() + || !matches!( + base_kind, + Some(TypeKind::Short) | Some(TypeKind::Long) | Some(TypeKind::LongLong) + ) + { + base_kind = Some(TypeKind::Int); + } + parsed_something = true; + } + "float" => { + self.advance(); + base_kind = Some(TypeKind::Float); + parsed_something = true; + } + "double" => { + self.advance(); + // Handle long double + if modifiers.contains(TypeModifiers::LONG) { + base_kind = Some(TypeKind::LongDouble); + } else { + base_kind = Some(TypeKind::Double); + } + parsed_something = true; + } + "_Bool" => { + self.advance(); + base_kind = Some(TypeKind::Bool); + parsed_something = true; + } + "__builtin_va_list" => { + self.advance(); + base_kind = Some(TypeKind::VaList); + parsed_something = true; + } + "struct" => { + // For cast to struct, parse struct tag + if let Ok(struct_type) = self.parse_struct_or_union_specifier(false) { + let mut typ = struct_type; + typ.modifiers |= modifiers; + // Handle pointer + let mut result = typ; + while self.is_special(b'*') { + self.advance(); + result = Type::pointer(result); + } + // Handle array declarators + while self.is_special(b'[') { + self.advance(); + if let Ok(size_expr) = self.parse_conditional_expr() { + if let Some(size) = self.eval_const_expr(&size_expr) { + result.array_size = Some(size as usize); + } + } + if !self.is_special(b']') { + return None; + } + self.advance(); + } + return Some(result); + } + return None; + } + "union" => { + if let Ok(union_type) = self.parse_struct_or_union_specifier(true) { + let mut typ = union_type; + typ.modifiers |= modifiers; + let mut result = typ; + while self.is_special(b'*') { + self.advance(); + result = Type::pointer(result); + } + // Handle array declarators + while self.is_special(b'[') { + self.advance(); + if let Ok(size_expr) = self.parse_conditional_expr() { + if let Some(size) = self.eval_const_expr(&size_expr) { + result.array_size = Some(size as usize); + } + } + if !self.is_special(b']') { + return None; + } + self.advance(); + } + return Some(result); + } + return None; + } + "enum" => { + if let Ok(enum_type) = self.parse_enum_specifier() { + let mut typ = enum_type; + typ.modifiers |= modifiers; + let mut result = typ; + while self.is_special(b'*') { + self.advance(); + result = Type::pointer(result); + } + // Handle array declarators + while self.is_special(b'[') { + self.advance(); + if let Ok(size_expr) = self.parse_conditional_expr() { + if let Some(size) = self.eval_const_expr(&size_expr) { + result.array_size = Some(size as usize); + } + } + if !self.is_special(b']') { + return None; + } + self.advance(); + } + return Some(result); + } + return None; + } + _ => { + // Check if it's a typedef name + if base_kind.is_none() { + if let Some(typedef_type) = self.symbols.lookup_typedef(&name).cloned() { + self.advance(); + let mut result = typedef_type; + result.modifiers |= modifiers; + // Handle pointer declarators + while self.is_special(b'*') { + self.advance(); + result = Type::pointer(result); + } + // Handle array declarators + while self.is_special(b'[') { + self.advance(); + if let Ok(size_expr) = self.parse_conditional_expr() { + if let Some(size) = self.eval_const_expr(&size_expr) { + result.array_size = Some(size as usize); + } + } + if !self.is_special(b']') { + return None; + } + self.advance(); + } + return Some(result); + } + } + break; + } + } + } + + if !parsed_something { + return None; + } + + // If we only have modifiers like `unsigned` without a base type, default to int + let kind = base_kind.unwrap_or(TypeKind::Int); + let mut typ = Type::with_modifiers(kind, modifiers); + + // Handle pointer declarators + while self.is_special(b'*') { + self.advance(); + typ = Type::pointer(typ); + } + + // Handle array declarators: int[10], char[20], etc. + while self.is_special(b'[') { + self.advance(); + if let Ok(size_expr) = self.parse_conditional_expr() { + if let Some(size) = self.eval_const_expr(&size_expr) { + typ.array_size = Some(size as usize); + } + } + if !self.is_special(b']') { + return None; + } + self.advance(); + } + + Some(typ) + } + + /// Parse postfix expression: x++, x--, x[i], x.member, x->member, x(args) + fn parse_postfix_expr(&mut self) -> ParseResult { + let mut expr = self.parse_primary_expr()?; + + loop { + // Preserve the position of the base expression for all postfix ops + let base_pos = expr.pos; + + if self.is_special_token(SpecialToken::Increment) { + let op_pos = self.current_pos(); + self.advance(); + // Check for const modification + self.check_const_assignment(&expr, op_pos); + // PostInc has same type as operand + let typ = expr + .typ + .clone() + .unwrap_or_else(|| Type::basic(TypeKind::Int)); + expr = Self::typed_expr(ExprKind::PostInc(Box::new(expr)), typ, base_pos); + } else if self.is_special_token(SpecialToken::Decrement) { + let op_pos = self.current_pos(); + self.advance(); + // Check for const modification + self.check_const_assignment(&expr, op_pos); + // PostDec has same type as operand + let typ = expr + .typ + .clone() + .unwrap_or_else(|| Type::basic(TypeKind::Int)); + expr = Self::typed_expr(ExprKind::PostDec(Box::new(expr)), typ, base_pos); + } else if self.is_special(b'[') { + // Array subscript + self.advance(); + let index = self.parse_expression()?; + self.expect_special(b']')?; + // Get element type from array/pointer type + let elem_type = expr + .typ + .as_ref() + .and_then(|t| t.base.as_ref()) + .map(|b| (**b).clone()) + .unwrap_or_else(|| Type::basic(TypeKind::Int)); + expr = Self::typed_expr( + ExprKind::Index { + array: Box::new(expr), + index: Box::new(index), + }, + elem_type, + base_pos, + ); + } else if self.is_special(b'.') { + // Member access + self.advance(); + let member = self.expect_identifier()?; + // Get member type from struct type + let member_type = expr + .typ + .as_ref() + .and_then(|t| t.find_member(&member)) + .map(|info| info.typ) + .unwrap_or_else(|| Type::basic(TypeKind::Int)); + expr = Self::typed_expr( + ExprKind::Member { + expr: Box::new(expr), + member, + }, + member_type, + base_pos, + ); + } else if self.is_special_token(SpecialToken::Arrow) { + // Pointer member access + self.advance(); + let member = self.expect_identifier()?; + // Get member type: dereference pointer to get struct, then find member + let member_type = expr + .typ + .as_ref() + .and_then(|t| t.base.as_ref()) + .and_then(|struct_type| struct_type.find_member(&member)) + .map(|info| info.typ) + .unwrap_or_else(|| Type::basic(TypeKind::Int)); + expr = Self::typed_expr( + ExprKind::Arrow { + expr: Box::new(expr), + member, + }, + member_type, + base_pos, + ); + } else if self.is_special(b'(') { + // Function call + self.advance(); + let args = self.parse_argument_list()?; + self.expect_special(b')')?; + + // Get the return type from the function type + // The func expression should have type TypeKind::Function + // and the return type is stored in base + let return_type = expr + .typ + .as_ref() + .and_then(|t| { + if t.kind == TypeKind::Function { + t.base.as_ref().map(|b| (**b).clone()) + } else { + None + } + }) + .unwrap_or_else(|| Type::basic(TypeKind::Int)); // Default to int + + expr = Self::typed_expr( + ExprKind::Call { + func: Box::new(expr), + args, + }, + return_type, + base_pos, + ); + } else { + break; + } + } + + Ok(expr) + } + + /// Parse function argument list + fn parse_argument_list(&mut self) -> ParseResult> { + let mut args = Vec::new(); + + if self.is_special(b')') { + return Ok(args); + } + + loop { + // Parse assignment expression (not comma, as comma separates args) + args.push(self.parse_assignment_expr()?); + + if self.is_special(b',') { + self.advance(); + } else { + break; + } + } + + Ok(args) + } + + /// Expect and consume an identifier, returning its name + fn expect_identifier(&mut self) -> ParseResult { + if self.peek() != TokenType::Ident { + return Err(ParseError::new("expected identifier", self.current_pos())); + } + + let name = self + .get_ident_name(self.current()) + .ok_or_else(|| ParseError::new("invalid identifier", self.current_pos()))?; + + self.advance(); + Ok(name) + } + + /// Check if an expression is const and report error if assigning to it + fn check_const_assignment(&self, target: &Expr, pos: Position) { + // Check for assignment through pointer to const first: *p where p is const T* + if let ExprKind::Unary { + op: UnaryOp::Deref, + operand, + } = &target.kind + { + if let Some(ptr_type) = &operand.typ { + if let Some(base_type) = &ptr_type.base { + if base_type.modifiers.contains(TypeModifiers::CONST) { + diag::error(pos, "assignment of read-only location"); + return; // Don't duplicate with the general const check + } + } + } + } + + // Check if target type has CONST modifier (direct const variable) + if let Some(typ) = &target.typ { + if typ.modifiers.contains(TypeModifiers::CONST) { + // Get variable name if it's an identifier + let var_name = match &target.kind { + ExprKind::Ident { name } => format!(" '{}'", name), + _ => String::new(), + }; + diag::error( + pos, + &format!("assignment of read-only variable{}", var_name), + ); + } + } + } + + /// Parse primary expression: literals, identifiers, parenthesized expressions + /// Create a typed expression with position + fn typed_expr(kind: ExprKind, typ: Type, pos: Position) -> Expr { + Expr { + kind, + typ: Some(typ), + pos, + } + } + + /// Create a typed binary expression, computing result type from operands + fn make_binary(op: BinaryOp, left: Expr, right: Expr) -> Expr { + // Compute result type based on operator and operand types + let left_type = left + .typ + .clone() + .unwrap_or_else(|| Type::basic(TypeKind::Int)); + let right_type = right + .typ + .clone() + .unwrap_or_else(|| Type::basic(TypeKind::Int)); + + let result_type = match op { + // Comparison and logical operators always return int + BinaryOp::Eq + | BinaryOp::Ne + | BinaryOp::Lt + | BinaryOp::Gt + | BinaryOp::Le + | BinaryOp::Ge + | BinaryOp::LogAnd + | BinaryOp::LogOr => Type::basic(TypeKind::Int), + + // Arithmetic operators use usual arithmetic conversions + // But Add/Sub with pointers/arrays need special handling + BinaryOp::Add | BinaryOp::Sub => { + let left_is_ptr_or_arr = + left_type.kind == TypeKind::Pointer || left_type.kind == TypeKind::Array; + let right_is_ptr_or_arr = + right_type.kind == TypeKind::Pointer || right_type.kind == TypeKind::Array; + + if left_is_ptr_or_arr && right_type.is_integer() { + // ptr + int or arr + int -> pointer to element type + if left_type.kind == TypeKind::Array { + // Array decays to pointer + let elem_type = left_type + .base + .as_ref() + .map(|b| (**b).clone()) + .unwrap_or_else(|| Type::basic(TypeKind::Int)); + Type::pointer(elem_type) + } else { + left_type.clone() + } + } else if left_type.is_integer() && right_is_ptr_or_arr && op == BinaryOp::Add { + // int + ptr or int + arr -> pointer to element type + if right_type.kind == TypeKind::Array { + let elem_type = right_type + .base + .as_ref() + .map(|b| (**b).clone()) + .unwrap_or_else(|| Type::basic(TypeKind::Int)); + Type::pointer(elem_type) + } else { + right_type.clone() + } + } else if left_is_ptr_or_arr && right_is_ptr_or_arr && op == BinaryOp::Sub { + // ptr - ptr -> ptrdiff_t (long) + Type::basic(TypeKind::Long) + } else { + Self::usual_arithmetic_conversions(&left_type, &right_type) + } + } + BinaryOp::Mul | BinaryOp::Div | BinaryOp::Mod => { + Self::usual_arithmetic_conversions(&left_type, &right_type) + } + + // Bitwise and shift operators use integer promotions + BinaryOp::BitAnd + | BinaryOp::BitOr + | BinaryOp::BitXor + | BinaryOp::Shl + | BinaryOp::Shr => Self::usual_arithmetic_conversions(&left_type, &right_type), + }; + + let pos = left.pos; + Self::typed_expr( + ExprKind::Binary { + op, + left: Box::new(left), + right: Box::new(right), + }, + result_type, + pos, + ) + } + + /// Compute usual arithmetic conversions (C99 6.3.1.8) + fn usual_arithmetic_conversions(left: &Type, right: &Type) -> Type { + use crate::types::TypeModifiers; + + // C99 6.3.1.8: Usual arithmetic conversions + // 1. If either is long double, result is long double + // 2. If either is double, result is double + // 3. If either is float, result is float + // 4. Otherwise, integer promotions apply + + if left.kind == TypeKind::LongDouble || right.kind == TypeKind::LongDouble { + Type::basic(TypeKind::LongDouble) + } else if left.kind == TypeKind::Double || right.kind == TypeKind::Double { + Type::basic(TypeKind::Double) + } else if left.kind == TypeKind::Float || right.kind == TypeKind::Float { + Type::basic(TypeKind::Float) + } else if left.kind == TypeKind::LongLong || right.kind == TypeKind::LongLong { + // If either is unsigned long long, result is unsigned long long + if left.is_unsigned() || right.is_unsigned() { + Type::with_modifiers(TypeKind::LongLong, TypeModifiers::UNSIGNED) + } else { + Type::basic(TypeKind::LongLong) + } + } else if left.kind == TypeKind::Long || right.kind == TypeKind::Long { + // If either is unsigned long, result is unsigned long + if left.is_unsigned() || right.is_unsigned() { + Type::with_modifiers(TypeKind::Long, TypeModifiers::UNSIGNED) + } else { + Type::basic(TypeKind::Long) + } + } else if left.is_unsigned() || right.is_unsigned() { + Type::with_modifiers(TypeKind::Int, TypeModifiers::UNSIGNED) + } else { + Type::basic(TypeKind::Int) + } + } + + fn parse_primary_expr(&mut self) -> ParseResult { + match self.peek() { + TokenType::Number => { + let token = self.consume(); + if let TokenValue::Number(s) = &token.value { + // Parse the number literal (returns typed expression) + self.parse_number_literal(s, token.pos) + } else { + Err(ParseError::new("invalid number token", token.pos)) + } + } + + TokenType::Ident => { + let token = self.consume(); + let token_pos = token.pos; + if let TokenValue::Ident(id) = &token.value { + let name = self.idents.get(*id).unwrap_or("").to_string(); + + // Check for varargs builtins that need special parsing + match name.as_str() { + "__builtin_va_start" => { + // __builtin_va_start(ap, last_param) + self.expect_special(b'(')?; + let ap = self.parse_assignment_expr()?; + self.expect_special(b',')?; + // Second arg is a parameter name + let last_param = self.expect_identifier()?; + self.expect_special(b')')?; + return Ok(Self::typed_expr( + ExprKind::VaStart { + ap: Box::new(ap), + last_param, + }, + Type::basic(TypeKind::Void), + token_pos, + )); + } + "__builtin_va_arg" => { + // __builtin_va_arg(ap, type) + self.expect_special(b'(')?; + let ap = self.parse_assignment_expr()?; + self.expect_special(b',')?; + // Second arg is a type + let arg_type = self.parse_type_name()?; + self.expect_special(b')')?; + return Ok(Self::typed_expr( + ExprKind::VaArg { + ap: Box::new(ap), + arg_type: arg_type.clone(), + }, + arg_type, + token_pos, + )); + } + "__builtin_va_end" => { + // __builtin_va_end(ap) + self.expect_special(b'(')?; + let ap = self.parse_assignment_expr()?; + self.expect_special(b')')?; + return Ok(Self::typed_expr( + ExprKind::VaEnd { ap: Box::new(ap) }, + Type::basic(TypeKind::Void), + token_pos, + )); + } + "__builtin_va_copy" => { + // __builtin_va_copy(dest, src) + self.expect_special(b'(')?; + let dest = self.parse_assignment_expr()?; + self.expect_special(b',')?; + let src = self.parse_assignment_expr()?; + self.expect_special(b')')?; + return Ok(Self::typed_expr( + ExprKind::VaCopy { + dest: Box::new(dest), + src: Box::new(src), + }, + Type::basic(TypeKind::Void), + token_pos, + )); + } + "__builtin_bswap16" => { + // __builtin_bswap16(x) - returns uint16_t + self.expect_special(b'(')?; + let arg = self.parse_assignment_expr()?; + self.expect_special(b')')?; + return Ok(Self::typed_expr( + ExprKind::Bswap16 { arg: Box::new(arg) }, + Type::with_modifiers(TypeKind::Short, TypeModifiers::UNSIGNED), + token_pos, + )); + } + "__builtin_bswap32" => { + // __builtin_bswap32(x) - returns uint32_t + self.expect_special(b'(')?; + let arg = self.parse_assignment_expr()?; + self.expect_special(b')')?; + return Ok(Self::typed_expr( + ExprKind::Bswap32 { arg: Box::new(arg) }, + Type::with_modifiers(TypeKind::Int, TypeModifiers::UNSIGNED), + token_pos, + )); + } + "__builtin_bswap64" => { + // __builtin_bswap64(x) - returns uint64_t + self.expect_special(b'(')?; + let arg = self.parse_assignment_expr()?; + self.expect_special(b')')?; + return Ok(Self::typed_expr( + ExprKind::Bswap64 { arg: Box::new(arg) }, + Type::with_modifiers(TypeKind::LongLong, TypeModifiers::UNSIGNED), + token_pos, + )); + } + "__builtin_alloca" => { + // __builtin_alloca(size) - returns void* + self.expect_special(b'(')?; + let size = self.parse_assignment_expr()?; + self.expect_special(b')')?; + return Ok(Self::typed_expr( + ExprKind::Alloca { + size: Box::new(size), + }, + Type::pointer(Type::basic(TypeKind::Void)), + token_pos, + )); + } + "__builtin_constant_p" => { + // __builtin_constant_p(expr) - returns 1 if expr is a constant, 0 otherwise + // This is evaluated at compile time, not runtime + self.expect_special(b'(')?; + let arg = self.parse_assignment_expr()?; + self.expect_special(b')')?; + // Check if the argument is a constant expression + let is_constant = self.eval_const_expr(&arg).is_some(); + return Ok(Self::typed_expr( + ExprKind::IntLit(if is_constant { 1 } else { 0 }), + Type::basic(TypeKind::Int), + token_pos, + )); + } + "__builtin_types_compatible_p" => { + // __builtin_types_compatible_p(type1, type2) - returns 1 if types are compatible + // This is evaluated at compile time, ignoring top-level qualifiers + self.expect_special(b'(')?; + let type1 = self.parse_type_name()?; + self.expect_special(b',')?; + let type2 = self.parse_type_name()?; + self.expect_special(b')')?; + // Check type compatibility (ignoring qualifiers) + let compatible = type1.types_compatible(&type2); + return Ok(Self::typed_expr( + ExprKind::IntLit(if compatible { 1 } else { 0 }), + Type::basic(TypeKind::Int), + token_pos, + )); + } + _ => {} + } + + // Look up symbol to get type (during parsing, symbol is in scope) + let typ = self + .symbols + .lookup(&name, Namespace::Ordinary) + .map(|s| s.typ.clone()) + .unwrap_or_else(|| Type::basic(TypeKind::Int)); // Default to int if not found + Ok(Self::typed_expr(ExprKind::Ident { name }, typ, token_pos)) + } else { + Err(ParseError::new("invalid identifier token", token.pos)) + } + } + + TokenType::Char => { + let token = self.consume(); + let token_pos = token.pos; + if let TokenValue::Char(s) = &token.value { + // Parse character literal - type is int (C promotes char to int) + let c = self.parse_char_literal(s); + Ok(Self::typed_expr( + ExprKind::CharLit(c), + Type::basic(TypeKind::Int), + token_pos, + )) + } else { + Err(ParseError::new("invalid char token", token.pos)) + } + } + + TokenType::String => { + let token = self.consume(); + let token_pos = token.pos; + if let TokenValue::String(s) = &token.value { + // Parse string literal - convert escape sequences + let parsed = Self::parse_string_literal(s); + // String literal type is char* + Ok(Self::typed_expr( + ExprKind::StringLit(parsed), + Type::pointer(Type::basic(TypeKind::Char)), + token_pos, + )) + } else { + Err(ParseError::new("invalid string token", token.pos)) + } + } + + TokenType::Special => { + if self.is_special(b'(') { + // Parenthesized expression or cast + let paren_pos = self.current_pos(); + self.advance(); + + // Try to detect cast (type) - simplified check + if let Some(typ) = self.try_parse_type_name() { + self.expect_special(b')')?; + let expr = self.parse_unary_expr()?; + // Cast expression has the cast type + return Ok(Self::typed_expr( + ExprKind::Cast { + cast_type: typ.clone(), + expr: Box::new(expr), + }, + typ, + paren_pos, + )); + } + + // Regular parenthesized expression + let expr = self.parse_expression()?; + self.expect_special(b')')?; + Ok(expr) + } else { + Err(ParseError::new( + "unexpected token in expression".to_string(), + self.current_pos(), + )) + } + } + + _ => Err(ParseError::new( + format!("unexpected token {:?}", self.peek()), + self.current_pos(), + )), + } + } + + /// Parse a number literal string into an expression + fn parse_number_literal(&self, s: &str, pos: Position) -> ParseResult { + let s_lower = s.to_lowercase(); + + // Check if it's a hex number (must check before suffix trimming) + let is_hex = s_lower.starts_with("0x"); + + // Check if it's a floating point number + let is_float = s_lower.contains('.') + || (s_lower.contains('e') && !is_hex) + || (s_lower.contains('p') && is_hex); + + // Remove suffixes - but for hex numbers, don't strip a-f as they're digits + let num_str = if is_hex { + // For hex, only strip u/l suffixes (not f which is a hex digit) + s_lower.trim_end_matches(['u', 'l']) + } else { + // For decimal/octal, strip u/l/f suffixes + s_lower.trim_end_matches(['u', 'l', 'f']) + }; + + if is_float { + // Float - type is double by default, float if 'f' suffix, long double if 'l' suffix + // C99: floating-suffix is f, F, l, or L + let is_float_suffix = s_lower.ends_with('f'); + let is_longdouble_suffix = s_lower.ends_with('l'); + let value: f64 = if is_hex { + // Hex float parsing: 0x[hex-digits].[hex-digits]p[±exponent] + // Value = significand × 2^exponent + Self::parse_hex_float(num_str).map_err(|_| { + ParseError::new(format!("invalid hex float literal: {}", s), pos) + })? + } else { + num_str + .parse() + .map_err(|_| ParseError::new(format!("invalid float literal: {}", s), pos))? + }; + let typ = if is_float_suffix { + Type::basic(TypeKind::Float) + } else if is_longdouble_suffix { + Type::basic(TypeKind::LongDouble) + } else { + Type::basic(TypeKind::Double) + }; + Ok(Self::typed_expr(ExprKind::FloatLit(value), typ, pos)) + } else { + // Integer - determine type from suffix + // Check for long long first (ll, ull, llu) before checking for long (l, ul, lu) + let is_longlong = + s_lower.ends_with("ll") || s_lower.ends_with("ull") || s_lower.ends_with("llu"); + let is_long = !is_longlong + && (s_lower.ends_with('l') || s_lower.ends_with("ul") || s_lower.ends_with("lu")); + let is_unsigned = s_lower.contains('u'); + + // Parse as u64 first to handle large unsigned values, then reinterpret as i64 + let value_u64: u64 = if is_hex { + // Strip 0x or 0X prefix + let hex_part = num_str + .strip_prefix("0x") + .or_else(|| num_str.strip_prefix("0X")) + .unwrap_or(num_str); + u64::from_str_radix(hex_part, 16) + } else if let Some(bin_part) = num_str.strip_prefix("0b") { + u64::from_str_radix(bin_part, 2) + } else if num_str.starts_with('0') && num_str.len() > 1 { + u64::from_str_radix(num_str, 8) + } else { + num_str.parse() + } + .map_err(|_| ParseError::new(format!("invalid integer literal: {}", s), pos))?; + + // Reinterpret bits as i64 (preserves bit pattern for unsigned values) + let value = value_u64 as i64; + + let typ = match (is_longlong, is_long, is_unsigned) { + (true, _, false) => Type::basic(TypeKind::LongLong), + (true, _, true) => { + Type::with_modifiers(TypeKind::LongLong, TypeModifiers::UNSIGNED) + } + (false, true, false) => Type::basic(TypeKind::Long), + (false, true, true) => { + Type::with_modifiers(TypeKind::Long, TypeModifiers::UNSIGNED) + } + (false, false, false) => Type::basic(TypeKind::Int), + (false, false, true) => { + Type::with_modifiers(TypeKind::Int, TypeModifiers::UNSIGNED) + } + }; + Ok(Self::typed_expr(ExprKind::IntLit(value), typ, pos)) + } + } + + /// Parse a hexadecimal floating-point literal (C99 feature) + /// Format: 0x[hex-mantissa]p[±exponent] where mantissa can have decimal point + /// Value = significand × 2^exponent + fn parse_hex_float(s: &str) -> Result { + // Strip 0x prefix + let s = s + .strip_prefix("0x") + .or_else(|| s.strip_prefix("0X")) + .ok_or(())?; + + // Find 'p' or 'P' separator for exponent + let p_pos = s.find(['p', 'P']).ok_or(())?; + let (mantissa_str, exp_str) = s.split_at(p_pos); + let exp_str = &exp_str[1..]; // Skip the 'p'/'P' + + // Parse mantissa (may have decimal point) + let (int_part, frac_part) = if let Some(dot_pos) = mantissa_str.find('.') { + (&mantissa_str[..dot_pos], &mantissa_str[dot_pos + 1..]) + } else { + (mantissa_str, "") + }; + + // Convert hex mantissa to f64 + let int_val = if int_part.is_empty() { + 0u64 + } else { + u64::from_str_radix(int_part, 16).map_err(|_| ())? + }; + + let mut mantissa = int_val as f64; + + // Add fractional part: each hex digit after dot is worth 1/16, 1/256, etc. + if !frac_part.is_empty() { + let frac_val = u64::from_str_radix(frac_part, 16).map_err(|_| ())?; + let frac_bits = frac_part.len() * 4; // 4 bits per hex digit + mantissa += frac_val as f64 / (1u64 << frac_bits) as f64; + } + + // Parse exponent (base 10 number representing power of 2) + let exponent: i32 = exp_str.parse().map_err(|_| ())?; + + // Calculate final value: mantissa × 2^exponent + Ok(mantissa * (2.0_f64).powi(exponent)) + } + + /// Parse an escape sequence starting at position i (after the backslash). + /// Returns (unescaped_char, number_of_chars_consumed_after_backslash). + fn parse_escape_sequence(chars: &[char], i: usize) -> (char, usize) { + if i >= chars.len() { + return ('\\', 0); + } + + match chars[i] { + 'n' => ('\n', 1), + 't' => ('\t', 1), + 'r' => ('\r', 1), + '\\' => ('\\', 1), + '\'' => ('\'', 1), + '"' => ('"', 1), + 'a' => ('\x07', 1), // bell + 'b' => ('\x08', 1), // backspace + 'f' => ('\x0C', 1), // form feed + 'v' => ('\x0B', 1), // vertical tab + 'x' => { + // Hex escape \xHH - consume all hex digits + let mut hex_chars = 0; + while i + 1 + hex_chars < chars.len() + && chars[i + 1 + hex_chars].is_ascii_hexdigit() + { + hex_chars += 1; + } + if hex_chars > 0 { + let hex: String = chars[i + 1..i + 1 + hex_chars].iter().collect(); + // C allows arbitrary-length hex escapes, but only low 8 bits matter + let val = u64::from_str_radix(&hex, 16).unwrap_or(0) as u8; + (val as char, 1 + hex_chars) + } else { + ('x', 1) // \x with no hex digits - just 'x' + } + } + c if c.is_ascii_digit() && c != '8' && c != '9' => { + // Octal escape \NNN (up to 3 digits) + let mut oct_chars = 1; + while oct_chars < 3 + && i + oct_chars < chars.len() + && chars[i + oct_chars].is_ascii_digit() + && chars[i + oct_chars] != '8' + && chars[i + oct_chars] != '9' + { + oct_chars += 1; + } + let oct: String = chars[i..i + oct_chars].iter().collect(); + let val = u8::from_str_radix(&oct, 8).unwrap_or(0); + (val as char, oct_chars) + } + c => (c, 1), // Unknown escape - just return the character + } + } + + /// Parse a character literal string into a char + fn parse_char_literal(&self, s: &str) -> char { + if s.is_empty() { + return '\0'; + } + + let chars: Vec = s.chars().collect(); + if chars[0] == '\\' && chars.len() > 1 { + let (c, _) = Self::parse_escape_sequence(&chars, 1); + c + } else { + chars[0] + } + } + + /// Parse a string literal, converting escape sequences to their actual values. + /// This implements C99 translation phase 5 for string literals. + fn parse_string_literal(s: &str) -> String { + let chars: Vec = s.chars().collect(); + let mut result = String::new(); + let mut i = 0; + + while i < chars.len() { + if chars[i] == '\\' && i + 1 < chars.len() { + let (c, consumed) = Self::parse_escape_sequence(&chars, i + 1); + result.push(c); + i += 1 + consumed; + } else { + result.push(chars[i]); + i += 1; + } + } + + result + } +} + +// ============================================================================ +// Statement Parsing +// ============================================================================ + +impl Parser<'_> { + /// Parse a statement + pub fn parse_statement(&mut self) -> ParseResult { + // Check for keywords + if self.peek() == TokenType::Ident { + if let Some(name) = self.get_ident_name(self.current()) { + match name.as_str() { + "if" => return self.parse_if_stmt(), + "while" => return self.parse_while_stmt(), + "do" => return self.parse_do_while_stmt(), + "for" => return self.parse_for_stmt(), + "return" => return self.parse_return_stmt(), + "break" => { + self.advance(); + self.expect_special(b';')?; + return Ok(Stmt::Break); + } + "continue" => { + self.advance(); + self.expect_special(b';')?; + return Ok(Stmt::Continue); + } + "goto" => { + self.advance(); + let label = self.expect_identifier()?; + self.expect_special(b';')?; + return Ok(Stmt::Goto(label)); + } + "switch" => return self.parse_switch_stmt(), + "case" => return self.parse_case_label(), + "default" => return self.parse_default_label(), + _ => {} + } + } + } + + // Check for compound statement + if self.is_special(b'{') { + return self.parse_block_stmt(); + } + + // Check for empty statement + if self.is_special(b';') { + self.advance(); + return Ok(Stmt::Empty); + } + + // Check for labeled statement + if self.peek() == TokenType::Ident { + // Save position for potential backtrack + let saved_pos = self.pos; + let name = self.expect_identifier()?; + if self.is_special(b':') { + self.advance(); + let stmt = self.parse_statement()?; + return Ok(Stmt::Label { + name, + stmt: Box::new(stmt), + }); + } + // Not a label, backtrack + self.pos = saved_pos; + } + + // Expression statement + let expr = self.parse_expression()?; + self.expect_special(b';')?; + Ok(Stmt::Expr(expr)) + } + + /// Parse an if statement + fn parse_if_stmt(&mut self) -> ParseResult { + self.advance(); // consume 'if' + self.expect_special(b'(')?; + let cond = self.parse_expression()?; + self.expect_special(b')')?; + let then_stmt = self.parse_statement()?; + + let else_stmt = if self.peek() == TokenType::Ident { + if let Some(name) = self.get_ident_name(self.current()) { + if name == "else" { + self.advance(); + Some(Box::new(self.parse_statement()?)) + } else { + None + } + } else { + None + } + } else { + None + }; + + Ok(Stmt::If { + cond, + then_stmt: Box::new(then_stmt), + else_stmt, + }) + } + + /// Parse a while statement + fn parse_while_stmt(&mut self) -> ParseResult { + self.advance(); // consume 'while' + self.expect_special(b'(')?; + let cond = self.parse_expression()?; + self.expect_special(b')')?; + let body = self.parse_statement()?; + + Ok(Stmt::While { + cond, + body: Box::new(body), + }) + } + + /// Parse a do-while statement + fn parse_do_while_stmt(&mut self) -> ParseResult { + self.advance(); // consume 'do' + let body = self.parse_statement()?; + + // Expect 'while' + if self.peek() != TokenType::Ident { + return Err(ParseError::new("expected 'while'", self.current_pos())); + } + if let Some(name) = self.get_ident_name(self.current()) { + if name != "while" { + return Err(ParseError::new("expected 'while'", self.current_pos())); + } + } + self.advance(); + + self.expect_special(b'(')?; + let cond = self.parse_expression()?; + self.expect_special(b')')?; + self.expect_special(b';')?; + + Ok(Stmt::DoWhile { + body: Box::new(body), + cond, + }) + } + + /// Parse a for statement + /// + /// C99 allows declarations in for-init: `for (int i = 0; i < n; i++)` + /// These declarations are scoped to the for loop (including body). + fn parse_for_stmt(&mut self) -> ParseResult { + self.advance(); // consume 'for' + self.expect_special(b'(')?; + + // Enter scope for for-loop declarations (C99) + // This scope includes init declaration + body + self.symbols.enter_scope(); + + // Parse init (can be declaration or expression) + let init = if self.is_special(b';') { + self.advance(); + None + } else if self.is_declaration_start() { + // C99: declaration in for-init, bind to for-scope + let decl = self.parse_declaration_and_bind()?; + // Declaration already consumed the semicolon + Some(ForInit::Declaration(decl)) + } else { + let expr = self.parse_expression()?; + self.expect_special(b';')?; + Some(ForInit::Expression(expr)) + }; + + // Parse condition + let cond = if self.is_special(b';') { + self.advance(); + None + } else { + let expr = self.parse_expression()?; + self.expect_special(b';')?; + Some(expr) + }; + + // Parse post + let post = if self.is_special(b')') { + None + } else { + Some(self.parse_expression()?) + }; + + self.expect_special(b')')?; + let body = self.parse_statement()?; + + // Leave for-scope + self.symbols.leave_scope(); + + Ok(Stmt::For { + init, + cond, + post, + body: Box::new(body), + }) + } + + /// Parse a return statement + fn parse_return_stmt(&mut self) -> ParseResult { + self.advance(); // consume 'return' + + if self.is_special(b';') { + self.advance(); + return Ok(Stmt::Return(None)); + } + + let expr = self.parse_expression()?; + self.expect_special(b';')?; + Ok(Stmt::Return(Some(expr))) + } + + /// Parse a switch statement + fn parse_switch_stmt(&mut self) -> ParseResult { + self.advance(); // consume 'switch' + self.expect_special(b'(')?; + let expr = self.parse_expression()?; + self.expect_special(b')')?; + let body = self.parse_statement()?; + Ok(Stmt::Switch { + expr, + body: Box::new(body), + }) + } + + /// Parse a case label + fn parse_case_label(&mut self) -> ParseResult { + self.advance(); // consume 'case' + let expr = self.parse_conditional_expr()?; + self.expect_special(b':')?; + Ok(Stmt::Case(expr)) + } + + /// Parse a default label + fn parse_default_label(&mut self) -> ParseResult { + self.advance(); // consume 'default' + self.expect_special(b':')?; + Ok(Stmt::Default) + } + + /// Parse a compound statement (block) with its own scope + /// + /// Like sparse, blocks create their own scope for local declarations. + /// This enters a new scope, parses the block, binds any declarations, + /// then leaves the scope. + fn parse_block_stmt(&mut self) -> ParseResult { + self.expect_special(b'{')?; + + // Enter block scope + self.symbols.enter_scope(); + + let mut items = Vec::new(); + while !self.is_special(b'}') && !self.is_eof() { + if self.is_declaration_start() { + let decl = self.parse_declaration_and_bind()?; + items.push(BlockItem::Declaration(decl)); + } else { + let stmt = self.parse_statement()?; + items.push(BlockItem::Statement(stmt)); + } + } + + // Leave block scope + self.symbols.leave_scope(); + + self.expect_special(b'}')?; + Ok(Stmt::Block(items)) + } + + /// Parse a compound statement without entering a new scope + /// + /// Used by function definitions where the scope is already entered + /// by the function parsing code (to include parameters in scope). + fn parse_block_stmt_no_scope(&mut self) -> ParseResult { + self.expect_special(b'{')?; + + let mut items = Vec::new(); + while !self.is_special(b'}') && !self.is_eof() { + if self.is_declaration_start() { + let decl = self.parse_declaration_and_bind()?; + items.push(BlockItem::Declaration(decl)); + } else { + let stmt = self.parse_statement()?; + items.push(BlockItem::Statement(stmt)); + } + } + + self.expect_special(b'}')?; + Ok(Stmt::Block(items)) + } + + /// Check if current position starts a declaration + fn is_declaration_start(&self) -> bool { + if self.peek() != TokenType::Ident { + return false; + } + + if let Some(name) = self.get_ident_name(self.current()) { + // Check for type keywords first + if matches!( + name.as_str(), + "void" + | "char" + | "short" + | "int" + | "long" + | "float" + | "double" + | "signed" + | "unsigned" + | "const" + | "volatile" + | "static" + | "extern" + | "auto" + | "register" + | "typedef" + | "struct" + | "union" + | "enum" + | "_Bool" + | "__attribute__" + | "__attribute" + | "__builtin_va_list" + ) { + return true; + } + // Also check for typedef names + self.symbols.lookup_typedef(&name).is_some() + } else { + false + } + } + + /// Parse a declaration (without binding to symbol table) + /// + /// Used for testing declarations in isolation. + #[cfg(test)] + fn parse_declaration(&mut self) -> ParseResult { + // Parse type specifiers + let base_type = self.parse_type_specifier()?; + + // Parse declarators + let mut declarators = Vec::new(); + + // Check for struct/union/enum-only declaration (no declarators) + // e.g., "struct point { int x; int y; };" + if !self.is_special(b';') { + loop { + let (name, typ) = self.parse_declarator(base_type.clone())?; + let init = if self.is_special(b'=') { + self.advance(); + Some(self.parse_initializer()?) + } else { + None + }; + + declarators.push(InitDeclarator { name, typ, init }); + + if self.is_special(b',') { + self.advance(); + } else { + break; + } + } + } + + self.expect_special(b';')?; + + Ok(Declaration { declarators }) + } + + /// Parse a declaration and bind variables to symbol table + /// + /// Following sparse's design, this binds each declared variable to the + /// symbol table immediately during parsing. Like sparse's bind_symbol(). + fn parse_declaration_and_bind(&mut self) -> ParseResult { + // Parse type specifiers + let base_type = self.parse_type_specifier()?; + + // Parse declarators + let mut declarators = Vec::new(); + + // Check for struct/union/enum-only declaration (no declarators) + // e.g., "struct point { int x; int y; };" + if !self.is_special(b';') { + loop { + let (name, typ) = self.parse_declarator(base_type.clone())?; + // Check if this is a typedef declaration + let is_typedef = base_type.modifiers.contains(TypeModifiers::TYPEDEF); + + let init = if self.is_special(b'=') { + if is_typedef { + return Err(ParseError::new( + "typedef cannot have initializer", + self.current_pos(), + )); + } + self.advance(); + Some(self.parse_initializer()?) + } else { + None + }; + + // Bind to symbol table (like sparse's bind_symbol) + if !name.is_empty() { + if is_typedef { + // Strip TYPEDEF modifier from the type being aliased + let mut aliased_type = typ.clone(); + aliased_type.modifiers.remove(TypeModifiers::TYPEDEF); + let sym = Symbol::typedef(name.clone(), aliased_type, self.symbols.depth()); + let _ = self.symbols.declare(sym); + } else { + let sym = Symbol::variable(name.clone(), typ.clone(), self.symbols.depth()); + let _ = self.symbols.declare(sym); + } + } + + declarators.push(InitDeclarator { name, typ, init }); + + if self.is_special(b',') { + self.advance(); + } else { + break; + } + } + } + + self.expect_special(b';')?; + + Ok(Declaration { declarators }) + } + + /// Parse a type specifier + fn parse_type_specifier(&mut self) -> ParseResult { + let mut modifiers = TypeModifiers::empty(); + let mut base_kind: Option = None; + + // Skip any leading __attribute__ + self.skip_extensions(); + + loop { + if self.peek() != TokenType::Ident { + break; + } + + let name = match self.get_ident_name(self.current()) { + Some(n) => n, + None => break, + }; + + match name.as_str() { + // Skip __attribute__ in the type specifier loop + "__attribute__" | "__attribute" => { + self.skip_extensions(); + continue; + } + "const" => { + self.advance(); + modifiers |= TypeModifiers::CONST; + } + "volatile" => { + self.advance(); + modifiers |= TypeModifiers::VOLATILE; + } + "static" => { + self.advance(); + modifiers |= TypeModifiers::STATIC; + } + "extern" => { + self.advance(); + modifiers |= TypeModifiers::EXTERN; + } + "register" => { + self.advance(); + modifiers |= TypeModifiers::REGISTER; + } + "auto" => { + self.advance(); + modifiers |= TypeModifiers::AUTO; + } + "typedef" => { + self.advance(); + modifiers |= TypeModifiers::TYPEDEF; + } + "inline" => { + self.advance(); + modifiers |= TypeModifiers::INLINE; + } + "signed" => { + self.advance(); + modifiers |= TypeModifiers::SIGNED; + } + "unsigned" => { + self.advance(); + modifiers |= TypeModifiers::UNSIGNED; + } + "short" => { + self.advance(); + modifiers |= TypeModifiers::SHORT; + if base_kind.is_none() { + base_kind = Some(TypeKind::Short); + } + } + "long" => { + self.advance(); + if modifiers.contains(TypeModifiers::LONG) { + modifiers |= TypeModifiers::LONGLONG; + base_kind = Some(TypeKind::LongLong); + } else { + modifiers |= TypeModifiers::LONG; + if base_kind.is_none() { + base_kind = Some(TypeKind::Long); + } + } + } + "void" => { + self.advance(); + base_kind = Some(TypeKind::Void); + } + "char" => { + self.advance(); + base_kind = Some(TypeKind::Char); + } + "int" => { + self.advance(); + if base_kind.is_none() + || !matches!( + base_kind, + Some(TypeKind::Short) | Some(TypeKind::Long) | Some(TypeKind::LongLong) + ) + { + base_kind = Some(TypeKind::Int); + } + } + "float" => { + self.advance(); + base_kind = Some(TypeKind::Float); + } + "double" => { + self.advance(); + base_kind = Some(TypeKind::Double); + } + "_Bool" => { + self.advance(); + base_kind = Some(TypeKind::Bool); + } + "__builtin_va_list" => { + self.advance(); + base_kind = Some(TypeKind::VaList); + } + "enum" => { + let enum_type = self.parse_enum_specifier()?; + // Apply any modifiers we collected + return Ok(Type { + modifiers, + ..enum_type + }); + } + "struct" => { + let struct_type = self.parse_struct_or_union_specifier(false)?; + return Ok(Type { + modifiers, + ..struct_type + }); + } + "union" => { + let union_type = self.parse_struct_or_union_specifier(true)?; + return Ok(Type { + modifiers, + ..union_type + }); + } + _ => { + // Check if it's a typedef name + // Only consume the typedef if we haven't already seen a base type + if base_kind.is_none() { + if let Some(typedef_type) = self.symbols.lookup_typedef(&name).cloned() { + self.advance(); + let mut result = typedef_type; + // Merge in any modifiers we collected (const, volatile, etc.) + result.modifiers |= modifiers; + return Ok(result); + } + } + break; + } + } + } + + let kind = base_kind.unwrap_or(TypeKind::Int); + Ok(Type::with_modifiers(kind, modifiers)) + } + + /// Parse an enum specifier + /// enum-specifier: 'enum' identifier? '{' enumerator-list? '}' | 'enum' identifier + fn parse_enum_specifier(&mut self) -> ParseResult { + self.advance(); // consume 'enum' + + // Optional tag name + let tag = if self.peek() == TokenType::Ident && !self.is_special(b'{') { + Some(self.expect_identifier()?) + } else { + None + }; + + // Check for definition vs forward reference + if self.is_special(b'{') { + self.advance(); // consume '{' + + let mut constants = Vec::new(); + let mut next_value = 0i64; + + while !self.is_special(b'}') && !self.is_eof() { + let name = self.expect_identifier()?; + + let value = if self.is_special(b'=') { + self.advance(); + let expr = self.parse_conditional_expr()?; + // Evaluate constant expression + self.eval_const_expr(&expr).ok_or_else(|| { + ParseError::new("enum value must be constant", self.current_pos()) + })? + } else { + next_value + }; + + constants.push(EnumConstant { + name: name.clone(), + value, + }); + next_value = value + 1; + + // Register enum constant in symbol table (Ordinary namespace) + let sym = Symbol::enum_constant(name, value, self.symbols.depth()); + let _ = self.symbols.declare(sym); + + if self.is_special(b',') { + self.advance(); + // Allow trailing comma before '}' + if self.is_special(b'}') { + break; + } + } else { + break; + } + } + + self.expect_special(b'}')?; + + let composite = CompositeType { + tag: tag.clone(), + members: Vec::new(), + enum_constants: constants, + size: 4, + align: 4, + is_complete: true, + }; + + // Register tag if present + if let Some(ref tag_name) = tag { + let typ = Type::enum_type(composite.clone()); + let sym = Symbol::tag(tag_name.clone(), typ, self.symbols.depth()); + let _ = self.symbols.declare(sym); + } + + Ok(Type::enum_type(composite)) + } else { + // Forward reference - look up existing tag + if let Some(ref tag_name) = tag { + // Look up or create incomplete type + if let Some(existing) = self.symbols.lookup_tag(tag_name) { + Ok(existing.typ.clone()) + } else { + Ok(Type::incomplete_enum(tag_name.clone())) + } + } else { + Err(ParseError::new( + "expected enum definition or tag name", + self.current_pos(), + )) + } + } + } + + /// Parse a struct or union specifier + /// struct-or-union-specifier: ('struct'|'union') identifier? '{' struct-declaration-list? '}' + /// | ('struct'|'union') identifier + fn parse_struct_or_union_specifier(&mut self, is_union: bool) -> ParseResult { + self.advance(); // consume 'struct' or 'union' + + // Skip any __attribute__ between 'struct' and tag name + self.skip_extensions(); + + // Optional tag name + let tag = if self.peek() == TokenType::Ident && !self.is_special(b'{') { + // Make sure it's not __attribute__ + if !self.is_attribute_keyword() { + Some(self.expect_identifier()?) + } else { + self.skip_extensions(); + if self.peek() == TokenType::Ident && !self.is_special(b'{') { + Some(self.expect_identifier()?) + } else { + None + } + } + } else { + None + }; + + // Skip any __attribute__ after tag name but before '{' + self.skip_extensions(); + + // Check for definition vs forward reference + if self.is_special(b'{') { + self.advance(); // consume '{' + + let mut members = Vec::new(); + + while !self.is_special(b'}') && !self.is_eof() { + // Parse member declaration + let member_base_type = self.parse_type_specifier()?; + + // Check for unnamed bitfield (starts with ':') + if self.is_special(b':') { + // Unnamed bitfield: parse width only + self.advance(); // consume ':' + let width = self.parse_bitfield_width()?; + + members.push(StructMember { + name: String::new(), + typ: member_base_type.clone(), + offset: 0, + bit_offset: None, + bit_width: Some(width), + storage_unit_size: None, + }); + + self.expect_special(b';')?; + continue; + } + + loop { + let (name, typ) = self.parse_declarator(member_base_type.clone())?; + + // Check for bitfield: name : width + let bit_width = if self.is_special(b':') { + self.advance(); // consume ':' + let width = self.parse_bitfield_width()?; + // Validate bitfield type and width + self.validate_bitfield(&typ, width)?; + Some(width) + } else { + None + }; + + members.push(StructMember { + name, + typ, + offset: 0, // Computed later + bit_offset: None, + bit_width, + storage_unit_size: None, + }); + + if self.is_special(b',') { + self.advance(); + } else { + break; + } + } + + self.expect_special(b';')?; + } + + self.expect_special(b'}')?; + + // Skip any trailing __attribute__ (e.g., __attribute__((packed))) + self.skip_extensions(); + + // Compute layout + let (size, align) = if is_union { + CompositeType::compute_union_layout(&mut members) + } else { + CompositeType::compute_struct_layout(&mut members) + }; + + let composite = CompositeType { + tag: tag.clone(), + members, + enum_constants: Vec::new(), + size, + align, + is_complete: true, + }; + + // Register tag if present + if let Some(ref tag_name) = tag { + let typ = if is_union { + Type::union_type(composite.clone()) + } else { + Type::struct_type(composite.clone()) + }; + let sym = Symbol::tag(tag_name.clone(), typ, self.symbols.depth()); + let _ = self.symbols.declare(sym); + } + + if is_union { + Ok(Type::union_type(composite)) + } else { + Ok(Type::struct_type(composite)) + } + } else { + // Forward reference + if let Some(ref tag_name) = tag { + // Look up existing tag + if let Some(existing) = self.symbols.lookup_tag(tag_name) { + Ok(existing.typ.clone()) + } else if is_union { + Ok(Type::incomplete_union(tag_name.clone())) + } else { + Ok(Type::incomplete_struct(tag_name.clone())) + } + } else { + Err(ParseError::new( + "expected struct/union definition or tag name", + self.current_pos(), + )) + } + } + } + + /// Parse a declarator (name and type modifiers) + /// + /// C declarators are parsed "inside-out". For example, `int (*p)[3]`: + /// - `int` is the base type + /// - `(*p)` means p is a pointer + /// - `[3]` after the parens means "to array of 3" + /// So p is "pointer to array of 3 ints" + fn parse_declarator(&mut self, base_type: Type) -> ParseResult<(String, Type)> { + // Collect pointer modifiers (they bind tighter than array/function) + let mut pointer_modifiers: Vec = Vec::new(); + while self.is_special(b'*') { + self.advance(); + let mut ptr_modifiers = TypeModifiers::empty(); + + // Parse pointer qualifiers (const, volatile, restrict) + while self.peek() == TokenType::Ident { + if let Some(name) = self.get_ident_name(self.current()) { + match name.as_str() { + "const" => { + self.advance(); + ptr_modifiers |= TypeModifiers::CONST; + } + "volatile" => { + self.advance(); + ptr_modifiers |= TypeModifiers::VOLATILE; + } + "restrict" => { + self.advance(); + ptr_modifiers |= TypeModifiers::RESTRICT; + } + _ => break, + } + } else { + break; + } + } + pointer_modifiers.push(ptr_modifiers); + } + + // Check for parenthesized declarator: int (*p)[3] + // The paren comes AFTER pointers, e.g. int *(*p)[3] = pointer to (pointer to array of 3 ints) + let (name, inner_type) = if self.is_special(b'(') { + // Check if this looks like a function parameter list or a grouped declarator + // A grouped declarator will have * or identifier immediately after ( + let saved_pos = self.pos; + self.advance(); // consume '(' + + // Check what follows - if it's *, it's likely a grouped declarator + // If it's a type or ), it's likely function params + let is_grouped = self.is_special(b'*') || self.peek() == TokenType::Ident; + + if is_grouped { + // Parse nested declarator recursively + // For int (*p)[3]: we're now at *p), base_type is int + // We need to parse *p as the declarator, but we don't know the full type yet + // because [3] comes after the ) + // So we use a placeholder and fix it up after + + // Parse the inner declarator with a placeholder type + let placeholder = Type::basic(TypeKind::Void); + let (inner_name, inner_decl_type) = self.parse_declarator(placeholder)?; + self.expect_special(b')')?; + + // Now parse any suffix modifiers (arrays, function params) + // These apply to the base type, not the inner declarator + (inner_name, Some(inner_decl_type)) + } else { + // Not a grouped declarator, restore position + self.pos = saved_pos; + (self.expect_identifier()?, None) + } + } else { + // Get the name directly + (self.expect_identifier()?, None) + }; + + // Handle array declarators - collect all dimensions first + let mut dimensions: Vec> = Vec::new(); + while self.is_special(b'[') { + self.advance(); + let size = if self.is_special(b']') { + None + } else { + // Parse constant expression for array size + let expr = self.parse_assignment_expr()?; + // For simplicity, only handle integer literals + match expr.kind { + ExprKind::IntLit(n) => Some(n as usize), + _ => None, // VLA or complex expression + } + }; + self.expect_special(b']')?; + dimensions.push(size); + } + + // Handle function declarators: void (*fp)(int, char) + // This parses the parameter list after a grouped declarator + let func_params: Option<(Vec, bool)> = if self.is_special(b'(') { + self.advance(); + let (params, variadic) = self.parse_parameter_list()?; + self.expect_special(b')')?; + Some((params.iter().map(|p| p.typ.clone()).collect(), variadic)) + } else { + None + }; + + // Build the type from the base type + let mut result_type = base_type; + + if inner_type.is_some() { + // Grouped declarator: int (*p)[3] or void (*fp)(int) + // Arrays/functions in suffix apply to the base type first + // Then we substitute into the inner declarator + + // Apply function parameters to base type first (if present) + // For void (*fp)(int): base is void, suffix (int) -> Function(void, [int]) + if let Some((param_types, variadic)) = func_params { + result_type = Type { + kind: TypeKind::Function, + modifiers: TypeModifiers::empty(), + base: Some(Box::new(result_type)), + array_size: None, + params: Some(param_types), + variadic, + composite: None, + }; + } + + // Apply array dimensions to base type + // For int (*p)[3]: base is int, suffix [3] -> Array(3, int) + for size in dimensions.into_iter().rev() { + result_type = Type { + kind: TypeKind::Array, + modifiers: TypeModifiers::empty(), + base: Some(Box::new(result_type)), + array_size: size, + params: None, + variadic: false, + composite: None, + }; + } + + // Apply any outer pointers (before the parens) + for modifiers in pointer_modifiers.into_iter().rev() { + result_type = Type { + kind: TypeKind::Pointer, + modifiers, + base: Some(Box::new(result_type)), + array_size: None, + params: None, + variadic: false, + composite: None, + }; + } + + // Substitute into inner declarator + // For int (*p)[3]: inner_decl is Pointer(Void), result_type is Array(3, int) + // -> Pointer(Array(3, int)) + // For void (*fp)(int): inner_decl is Pointer(Void), result_type is Function(void, [int]) + // -> Pointer(Function(void, [int])) + result_type = Self::substitute_base_type(inner_type.unwrap(), result_type); + } else { + // Simple declarator: char *arr[3] + // Pointers bind tighter than arrays: *arr[3] = array of pointers + + // Apply pointer modifiers to base type first + for modifiers in pointer_modifiers.into_iter().rev() { + result_type = Type { + kind: TypeKind::Pointer, + modifiers, + base: Some(Box::new(result_type)), + array_size: None, + params: None, + variadic: false, + composite: None, + }; + } + + // Then apply array dimensions + // For char *arr[3]: result_type is char*, suffix [3] -> Array(3, char*) + for size in dimensions.into_iter().rev() { + result_type = Type { + kind: TypeKind::Array, + modifiers: TypeModifiers::empty(), + base: Some(Box::new(result_type)), + array_size: size, + params: None, + variadic: false, + composite: None, + }; + } + } + + Ok((name, result_type)) + } + + /// Substitute the actual base type into a declarator parsed with a placeholder + /// For int (*p)[3]: inner_decl is Pointer(Void), actual_base is Array(3, int) + /// Result should be Pointer(Array(3, int)) + fn substitute_base_type(decl_type: Type, actual_base: Type) -> Type { + match decl_type.kind { + TypeKind::Void => actual_base, + TypeKind::Pointer => Type { + kind: TypeKind::Pointer, + modifiers: decl_type.modifiers, + base: Some(Box::new(Self::substitute_base_type( + *decl_type.base.unwrap(), + actual_base, + ))), + array_size: None, + params: None, + variadic: false, + composite: None, + }, + TypeKind::Array => Type { + kind: TypeKind::Array, + modifiers: decl_type.modifiers, + base: Some(Box::new(Self::substitute_base_type( + *decl_type.base.unwrap(), + actual_base, + ))), + array_size: decl_type.array_size, + params: None, + variadic: false, + composite: None, + }, + _ => decl_type, // Other types don't need substitution + } + } + + /// Parse a function definition + /// + /// Following sparse's design, this binds the function to the symbol table + /// at global scope, then enters a new scope for the function body and + /// binds all parameters in that scope. + #[cfg(test)] + fn parse_function_def(&mut self) -> ParseResult { + let func_pos = self.current_pos(); + // Parse return type + let return_type = self.parse_type_specifier()?; + + // Handle pointer in return type with qualifiers + let mut ret_type = return_type; + while self.is_special(b'*') { + self.advance(); + let mut ptr_modifiers = TypeModifiers::empty(); + + // Parse pointer qualifiers + while self.peek() == TokenType::Ident { + if let Some(name) = self.get_ident_name(self.current()) { + match name.as_str() { + "const" => { + self.advance(); + ptr_modifiers |= TypeModifiers::CONST; + } + "volatile" => { + self.advance(); + ptr_modifiers |= TypeModifiers::VOLATILE; + } + "restrict" => { + self.advance(); + ptr_modifiers |= TypeModifiers::RESTRICT; + } + _ => break, + } + } else { + break; + } + } + + ret_type = Type { + kind: TypeKind::Pointer, + modifiers: ptr_modifiers, + base: Some(Box::new(ret_type)), + array_size: None, + params: None, + variadic: false, + composite: None, + }; + } + + // Parse function name + let name = self.expect_identifier()?; + + // Parse parameter list + self.expect_special(b'(')?; + let (params, variadic) = self.parse_parameter_list()?; + self.expect_special(b')')?; + + // Build the function type + let param_types: Vec = params.iter().map(|p| p.typ.clone()).collect(); + let func_type = Type::function(ret_type.clone(), param_types, variadic); + + // Bind function to symbol table at current (global) scope + // Like sparse's bind_symbol() in parse.c + let func_sym = Symbol::function(name.clone(), func_type, self.symbols.depth()); + let _ = self.symbols.declare(func_sym); // Ignore redefinition errors for now + + // Enter function scope for parameters and body + self.symbols.enter_scope(); + + // Bind parameters in function scope + for param in ¶ms { + if let Some(param_name) = ¶m.name { + let param_sym = + Symbol::parameter(param_name.clone(), param.typ.clone(), self.symbols.depth()); + let _ = self.symbols.declare(param_sym); + } + } + + // Parse function body (block handles its own scope for locals) + let body = self.parse_block_stmt_no_scope()?; + + // Leave function scope + self.symbols.leave_scope(); + + Ok(FunctionDef { + return_type: ret_type, + name, + params, + body, + pos: func_pos, + }) + } + + /// Parse a parameter list + fn parse_parameter_list(&mut self) -> ParseResult<(Vec, bool)> { + let mut params = Vec::new(); + let mut variadic = false; + + if self.is_special(b')') { + return Ok((params, variadic)); + } + + // Check for (void) + if self.peek() == TokenType::Ident { + if let Some(name) = self.get_ident_name(self.current()) { + if name == "void" { + let saved_pos = self.pos; + self.advance(); + if self.is_special(b')') { + return Ok((params, variadic)); + } + // Not just void, backtrack + self.pos = saved_pos; + } + } + } + + loop { + // Check for ellipsis + if self.is_special_token(SpecialToken::Ellipsis) { + self.advance(); + variadic = true; + break; + } + + // Parse parameter type + let param_type = self.parse_type_specifier()?; + + // Handle pointer with qualifiers (const, volatile, restrict) + let mut typ = param_type; + while self.is_special(b'*') { + self.advance(); + let mut ptr_modifiers = TypeModifiers::empty(); + + // Parse pointer qualifiers + while self.peek() == TokenType::Ident { + if let Some(name) = self.get_ident_name(self.current()) { + match name.as_str() { + "const" => { + self.advance(); + ptr_modifiers |= TypeModifiers::CONST; + } + "volatile" => { + self.advance(); + ptr_modifiers |= TypeModifiers::VOLATILE; + } + "restrict" => { + self.advance(); + ptr_modifiers |= TypeModifiers::RESTRICT; + } + _ => break, + } + } else { + break; + } + } + + typ = Type { + kind: TypeKind::Pointer, + modifiers: ptr_modifiers, + base: Some(Box::new(typ)), + array_size: None, + params: None, + variadic: false, + composite: None, + }; + } + + // Parse optional parameter name + let param_name = if self.peek() == TokenType::Ident { + // Check if it's actually a name (not a type keyword) + if let Some(name) = self.get_ident_name(self.current()) { + if !self.is_declaration_start() { + self.advance(); + Some(name) + } else { + None + } + } else { + None + } + } else { + None + }; + + // Handle array declarator in parameter: int arr[] becomes int *arr + // C99 6.7.5.3: "A declaration of a parameter as 'array of type' shall be + // adjusted to 'qualified pointer to type'" + if self.is_special(b'[') { + self.advance(); + // Skip the array size if present (ignored in parameter context) + if !self.is_special(b']') { + let _ = self.parse_assignment_expr()?; + } + self.expect_special(b']')?; + + // Convert array to pointer + typ = Type { + kind: TypeKind::Pointer, + modifiers: TypeModifiers::empty(), + base: Some(Box::new(typ)), + array_size: None, + params: None, + variadic: false, + composite: None, + }; + } + + params.push(Parameter { + name: param_name, + typ, + }); + + if self.is_special(b',') { + self.advance(); + } else { + break; + } + } + + Ok((params, variadic)) + } + + /// Parse a translation unit (top-level) + pub fn parse_translation_unit(&mut self) -> ParseResult { + let mut tu = TranslationUnit::new(); + + self.skip_stream_tokens(); + + while !self.is_eof() { + // Try to determine if this is a function definition or a declaration + // Both start with type specifier + declarator + let external_decl = self.parse_external_decl()?; + tu.add(external_decl); + } + + Ok(tu) + } + + /// Parse an external declaration (function definition or declaration) + fn parse_external_decl(&mut self) -> ParseResult { + let decl_pos = self.current_pos(); + // Parse type specifier + let base_type = self.parse_type_specifier()?; + + // Check for standalone type definition (e.g., "enum Color { ... };") + // This happens when a composite type is defined but no variables are declared + if self.is_special(b';') { + self.advance(); + // Return empty declaration - the type was already registered in parse_*_specifier + return Ok(ExternalDecl::Declaration(Declaration { + declarators: vec![], + })); + } + + // Check for grouped declarator: void (*fp)(int) or int (*arr)[10] + // These are detected by '(' followed by '*' + if self.is_special(b'(') { + // Look ahead to see if this is a grouped declarator + let saved_pos = self.pos; + self.advance(); // consume '(' + if self.is_special(b'*') { + // This is a grouped declarator - use parse_declarator + self.pos = saved_pos; // restore position before '(' + let (name, typ) = self.parse_declarator(base_type.clone())?; + + // Skip any __attribute__ after declarator + self.skip_extensions(); + + // Handle initializer + let is_typedef = base_type.modifiers.contains(TypeModifiers::TYPEDEF); + let init = if self.is_special(b'=') { + if is_typedef { + return Err(ParseError::new( + "typedef cannot have initializer", + self.current_pos(), + )); + } + self.advance(); + Some(self.parse_initializer()?) + } else { + None + }; + + self.expect_special(b';')?; + + // Add to symbol table + if is_typedef { + let mut aliased_type = typ.clone(); + aliased_type.modifiers.remove(TypeModifiers::TYPEDEF); + let sym = Symbol::typedef(name.clone(), aliased_type, self.symbols.depth()); + let _ = self.symbols.declare(sym); + } else { + let var_sym = Symbol::variable(name.clone(), typ.clone(), self.symbols.depth()); + let _ = self.symbols.declare(var_sym); + } + + return Ok(ExternalDecl::Declaration(Declaration { + declarators: vec![InitDeclarator { name, typ, init }], + })); + } + // Not a grouped declarator, restore position + self.pos = saved_pos; + } + + // Handle pointer with qualifiers (const, volatile, restrict) + let mut typ = base_type.clone(); + while self.is_special(b'*') { + self.advance(); + let mut ptr_modifiers = TypeModifiers::empty(); + + // Parse pointer qualifiers + while self.peek() == TokenType::Ident { + if let Some(name) = self.get_ident_name(self.current()) { + match name.as_str() { + "const" => { + self.advance(); + ptr_modifiers |= TypeModifiers::CONST; + } + "volatile" => { + self.advance(); + ptr_modifiers |= TypeModifiers::VOLATILE; + } + "restrict" => { + self.advance(); + ptr_modifiers |= TypeModifiers::RESTRICT; + } + _ => break, + } + } else { + break; + } + } + + typ = Type { + kind: TypeKind::Pointer, + modifiers: ptr_modifiers, + base: Some(Box::new(typ)), + array_size: None, + params: None, + variadic: false, + composite: None, + }; + } + + // Check again for grouped declarator after pointer modifiers: char *(*fp)(int) + // At this point typ is char*, and we see (*fp)(int) + if self.is_special(b'(') { + let saved_pos = self.pos; + self.advance(); // consume '(' + if self.is_special(b'*') { + // This is a grouped declarator - use parse_declarator + self.pos = saved_pos; // restore position before '(' + let (name, full_typ) = self.parse_declarator(typ)?; + + // Skip any __attribute__ after declarator + self.skip_extensions(); + + // Handle initializer + let is_typedef = base_type.modifiers.contains(TypeModifiers::TYPEDEF); + let init = if self.is_special(b'=') { + if is_typedef { + return Err(ParseError::new( + "typedef cannot have initializer", + self.current_pos(), + )); + } + self.advance(); + Some(self.parse_initializer()?) + } else { + None + }; + + self.expect_special(b';')?; + + // Add to symbol table + if is_typedef { + let mut aliased_type = full_typ.clone(); + aliased_type.modifiers.remove(TypeModifiers::TYPEDEF); + let sym = Symbol::typedef(name.clone(), aliased_type, self.symbols.depth()); + let _ = self.symbols.declare(sym); + } else { + let var_sym = + Symbol::variable(name.clone(), full_typ.clone(), self.symbols.depth()); + let _ = self.symbols.declare(var_sym); + } + + return Ok(ExternalDecl::Declaration(Declaration { + declarators: vec![InitDeclarator { + name, + typ: full_typ, + init, + }], + })); + } + // Not a grouped declarator, restore position + self.pos = saved_pos; + } + + // Parse name + let name = self.expect_identifier()?; + + // Check for function definition vs declaration + if self.is_special(b'(') { + // Could be function definition or declaration + self.advance(); + let (params, variadic) = self.parse_parameter_list()?; + self.expect_special(b')')?; + + // Skip any __attribute__ after parameter list + self.skip_extensions(); + + if self.is_special(b'{') { + // Function definition + // Add function to symbol table so it can be called by other functions + let func_type = Type::function( + typ.clone(), + params.iter().map(|p| p.typ.clone()).collect(), + variadic, + ); + let func_sym = Symbol::function(name.clone(), func_type, self.symbols.depth()); + let _ = self.symbols.declare(func_sym); + + // Enter function scope for parameters + self.symbols.enter_scope(); + + // Bind parameters in function scope + for param in ¶ms { + if let Some(param_name) = ¶m.name { + let param_sym = Symbol::parameter( + param_name.clone(), + param.typ.clone(), + self.symbols.depth(), + ); + let _ = self.symbols.declare(param_sym); + } + } + + // Parse body without creating another scope + let body = self.parse_block_stmt_no_scope()?; + + // Leave function scope + self.symbols.leave_scope(); + + return Ok(ExternalDecl::FunctionDef(FunctionDef { + return_type: typ, + name, + params, + body, + pos: decl_pos, + })); + } else { + // Function declaration + self.expect_special(b';')?; + let func_type = Type::function( + typ, + params.iter().map(|p| p.typ.clone()).collect(), + variadic, + ); + // Add function declaration to symbol table so the variadic flag + // is available when the function is called + let func_sym = + Symbol::function(name.clone(), func_type.clone(), self.symbols.depth()); + let _ = self.symbols.declare(func_sym); + return Ok(ExternalDecl::Declaration(Declaration { + declarators: vec![InitDeclarator { + name, + typ: func_type, + init: None, + }], + })); + } + } + + // Check if this is a typedef declaration + let is_typedef = base_type.modifiers.contains(TypeModifiers::TYPEDEF); + + // Variable/typedef declaration + let mut declarators = Vec::new(); + + // Handle array - collect dimensions first, build type from right to left + let mut var_type = typ; + let mut dimensions: Vec> = Vec::new(); + while self.is_special(b'[') { + self.advance(); + let size = if self.is_special(b']') { + None + } else { + let arr_expr = self.parse_assignment_expr()?; + match arr_expr.kind { + ExprKind::IntLit(n) => Some(n as usize), + _ => None, + } + }; + self.expect_special(b']')?; + dimensions.push(size); + } + // Build type from right to left (innermost dimension first) + for size in dimensions.into_iter().rev() { + var_type = Type::array(var_type, size.unwrap_or(0)); + } + + // Skip any __attribute__ after variable name/array declarator + self.skip_extensions(); + + // Handle initializer (not allowed for typedef) + let init = if self.is_special(b'=') { + if is_typedef { + return Err(ParseError::new( + "typedef cannot have initializer", + self.current_pos(), + )); + } + self.advance(); + Some(self.parse_initializer()?) + } else { + None + }; + + // Add to symbol table + if is_typedef { + // Strip TYPEDEF modifier from the aliased type + let mut aliased_type = var_type.clone(); + aliased_type.modifiers.remove(TypeModifiers::TYPEDEF); + let sym = Symbol::typedef(name.clone(), aliased_type, self.symbols.depth()); + let _ = self.symbols.declare(sym); + } else { + // Add global variable to symbol table so it can be referenced by later code + let var_sym = Symbol::variable(name.clone(), var_type.clone(), self.symbols.depth()); + let _ = self.symbols.declare(var_sym); + } + + declarators.push(InitDeclarator { + name, + typ: var_type, + init, + }); + + // Handle additional declarators + while self.is_special(b',') { + self.advance(); + let (decl_name, decl_type) = self.parse_declarator(base_type.clone())?; + let decl_init = if self.is_special(b'=') { + if is_typedef { + return Err(ParseError::new( + "typedef cannot have initializer", + self.current_pos(), + )); + } + self.advance(); + Some(self.parse_initializer()?) + } else { + None + }; + // Add to symbol table + if is_typedef { + let mut aliased_type = decl_type.clone(); + aliased_type.modifiers.remove(TypeModifiers::TYPEDEF); + let sym = Symbol::typedef(decl_name.clone(), aliased_type, self.symbols.depth()); + let _ = self.symbols.declare(sym); + } else { + let var_sym = + Symbol::variable(decl_name.clone(), decl_type.clone(), self.symbols.depth()); + let _ = self.symbols.declare(var_sym); + } + declarators.push(InitDeclarator { + name: decl_name, + typ: decl_type, + init: decl_init, + }); + } + + self.expect_special(b';')?; + + Ok(ExternalDecl::Declaration(Declaration { declarators })) + } + + /// Evaluate a constant expression (for enum values and case labels) + fn eval_const_expr(&self, expr: &Expr) -> Option { + match &expr.kind { + ExprKind::IntLit(val) => Some(*val), + ExprKind::CharLit(c) => Some(*c as i64), + ExprKind::Unary { op, operand } => { + let val = self.eval_const_expr(operand)?; + match op { + UnaryOp::Neg => Some(-val), + UnaryOp::Not => Some(if val == 0 { 1 } else { 0 }), + UnaryOp::BitNot => Some(!val), + _ => None, + } + } + ExprKind::Binary { op, left, right } => { + let lval = self.eval_const_expr(left)?; + let rval = self.eval_const_expr(right)?; + match op { + BinaryOp::Add => Some(lval.wrapping_add(rval)), + BinaryOp::Sub => Some(lval.wrapping_sub(rval)), + BinaryOp::Mul => Some(lval.wrapping_mul(rval)), + BinaryOp::Div => { + if rval != 0 { + Some(lval / rval) + } else { + None + } + } + BinaryOp::Mod => { + if rval != 0 { + Some(lval % rval) + } else { + None + } + } + BinaryOp::BitAnd => Some(lval & rval), + BinaryOp::BitOr => Some(lval | rval), + BinaryOp::BitXor => Some(lval ^ rval), + BinaryOp::Shl => Some(lval << (rval as u32)), + BinaryOp::Shr => Some(lval >> (rval as u32)), + BinaryOp::Lt => Some(if lval < rval { 1 } else { 0 }), + BinaryOp::Le => Some(if lval <= rval { 1 } else { 0 }), + BinaryOp::Gt => Some(if lval > rval { 1 } else { 0 }), + BinaryOp::Ge => Some(if lval >= rval { 1 } else { 0 }), + BinaryOp::Eq => Some(if lval == rval { 1 } else { 0 }), + BinaryOp::Ne => Some(if lval != rval { 1 } else { 0 }), + BinaryOp::LogAnd => Some(if lval != 0 && rval != 0 { 1 } else { 0 }), + BinaryOp::LogOr => Some(if lval != 0 || rval != 0 { 1 } else { 0 }), + } + } + ExprKind::Ident { name } => { + // Check for enum constant + self.symbols.get_enum_value(name) + } + ExprKind::Conditional { + cond, + then_expr, + else_expr, + } => { + let cond_val = self.eval_const_expr(cond)?; + if cond_val != 0 { + self.eval_const_expr(then_expr) + } else { + self.eval_const_expr(else_expr) + } + } + _ => None, + } + } + + /// Parse a bitfield width (constant expression after ':') + fn parse_bitfield_width(&mut self) -> ParseResult { + let expr = self.parse_conditional_expr()?; + match self.eval_const_expr(&expr) { + Some(val) if val >= 0 => Ok(val as u32), + Some(_) => Err(ParseError::new( + "bitfield width must be non-negative", + self.current_pos(), + )), + None => Err(ParseError::new( + "bitfield width must be a constant expression", + self.current_pos(), + )), + } + } + + /// Validate a bitfield declaration + fn validate_bitfield(&self, typ: &Type, width: u32) -> ParseResult<()> { + // Check allowed types: _Bool, int, unsigned int (and their signed/unsigned variants) + let valid_type = matches!( + typ.kind, + TypeKind::Bool | TypeKind::Int | TypeKind::Char | TypeKind::Short | TypeKind::Long + ); + + if !valid_type { + return Err(ParseError::new( + "bitfield must have integer type", + self.current_pos(), + )); + } + + // Check that width doesn't exceed type size + let max_width = typ.size_bits(); + if width > max_width { + return Err(ParseError::new( + format!("bitfield width {} exceeds type size {}", width, max_width), + self.current_pos(), + )); + } + + Ok(()) + } +} + +// ============================================================================ +// Tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + use crate::symbol::SymbolTable; + use crate::token::lexer::Tokenizer; + + fn parse_expr(input: &str) -> ParseResult { + let mut tokenizer = Tokenizer::new(input.as_bytes(), 0); + let tokens = tokenizer.tokenize(); + let idents = tokenizer.ident_table(); + let mut symbols = SymbolTable::new(); + let mut parser = Parser::new(&tokens, idents, &mut symbols); + parser.skip_stream_tokens(); + parser.parse_expression() + } + + // ======================================================================== + // Literal tests + // ======================================================================== + + #[test] + fn test_int_literal() { + let expr = parse_expr("42").unwrap(); + assert!(matches!(expr.kind, ExprKind::IntLit(42))); + } + + #[test] + fn test_hex_literal() { + let expr = parse_expr("0xFF").unwrap(); + assert!(matches!(expr.kind, ExprKind::IntLit(255))); + } + + #[test] + fn test_octal_literal() { + let expr = parse_expr("0777").unwrap(); + assert!(matches!(expr.kind, ExprKind::IntLit(511))); + } + + #[test] + fn test_float_literal() { + let expr = parse_expr("3.14").unwrap(); + match expr.kind { + ExprKind::FloatLit(v) => assert!((v - 3.14).abs() < 0.001), + _ => panic!("Expected FloatLit"), + } + } + + #[test] + fn test_char_literal() { + let expr = parse_expr("'a'").unwrap(); + assert!(matches!(expr.kind, ExprKind::CharLit('a'))); + } + + #[test] + fn test_char_escape() { + let expr = parse_expr("'\\n'").unwrap(); + assert!(matches!(expr.kind, ExprKind::CharLit('\n'))); + } + + #[test] + fn test_string_literal() { + let expr = parse_expr("\"hello\"").unwrap(); + match expr.kind { + ExprKind::StringLit(s) => assert_eq!(s, "hello"), + _ => panic!("Expected StringLit"), + } + } + + // ======================================================================== + // Character literal escape sequence tests + // ======================================================================== + + #[test] + fn test_char_escape_newline() { + let expr = parse_expr("'\\n'").unwrap(); + assert!(matches!(expr.kind, ExprKind::CharLit('\n'))); + } + + #[test] + fn test_char_escape_tab() { + let expr = parse_expr("'\\t'").unwrap(); + assert!(matches!(expr.kind, ExprKind::CharLit('\t'))); + } + + #[test] + fn test_char_escape_carriage_return() { + let expr = parse_expr("'\\r'").unwrap(); + assert!(matches!(expr.kind, ExprKind::CharLit('\r'))); + } + + #[test] + fn test_char_escape_backslash() { + let expr = parse_expr("'\\\\'").unwrap(); + assert!(matches!(expr.kind, ExprKind::CharLit('\\'))); + } + + #[test] + fn test_char_escape_single_quote() { + let expr = parse_expr("'\\''").unwrap(); + assert!(matches!(expr.kind, ExprKind::CharLit('\''))); + } + + #[test] + fn test_char_escape_double_quote() { + let expr = parse_expr("'\\\"'").unwrap(); + assert!(matches!(expr.kind, ExprKind::CharLit('"'))); + } + + #[test] + fn test_char_escape_bell() { + let expr = parse_expr("'\\a'").unwrap(); + assert!(matches!(expr.kind, ExprKind::CharLit('\x07'))); + } + + #[test] + fn test_char_escape_backspace() { + let expr = parse_expr("'\\b'").unwrap(); + assert!(matches!(expr.kind, ExprKind::CharLit('\x08'))); + } + + #[test] + fn test_char_escape_formfeed() { + let expr = parse_expr("'\\f'").unwrap(); + assert!(matches!(expr.kind, ExprKind::CharLit('\x0C'))); + } + + #[test] + fn test_char_escape_vertical_tab() { + let expr = parse_expr("'\\v'").unwrap(); + assert!(matches!(expr.kind, ExprKind::CharLit('\x0B'))); + } + + #[test] + fn test_char_escape_null() { + let expr = parse_expr("'\\0'").unwrap(); + assert!(matches!(expr.kind, ExprKind::CharLit('\0'))); + } + + #[test] + fn test_char_escape_hex() { + let expr = parse_expr("'\\x41'").unwrap(); + assert!(matches!(expr.kind, ExprKind::CharLit('A'))); + } + + #[test] + fn test_char_escape_hex_lowercase() { + let expr = parse_expr("'\\x0a'").unwrap(); + assert!(matches!(expr.kind, ExprKind::CharLit('\n'))); + } + + #[test] + fn test_char_escape_octal() { + let expr = parse_expr("'\\101'").unwrap(); + assert!(matches!(expr.kind, ExprKind::CharLit('A'))); // octal 101 = 65 = 'A' + } + + #[test] + fn test_char_escape_octal_012() { + let expr = parse_expr("'\\012'").unwrap(); + assert!(matches!(expr.kind, ExprKind::CharLit('\n'))); // octal 012 = 10 = '\n' + } + + // ======================================================================== + // String literal escape sequence tests + // ======================================================================== + + #[test] + fn test_string_escape_newline() { + let expr = parse_expr("\"hello\\nworld\"").unwrap(); + match expr.kind { + ExprKind::StringLit(s) => assert_eq!(s, "hello\nworld"), + _ => panic!("Expected StringLit"), + } + } + + #[test] + fn test_string_escape_tab() { + let expr = parse_expr("\"hello\\tworld\"").unwrap(); + match expr.kind { + ExprKind::StringLit(s) => assert_eq!(s, "hello\tworld"), + _ => panic!("Expected StringLit"), + } + } + + #[test] + fn test_string_escape_carriage_return() { + let expr = parse_expr("\"hello\\rworld\"").unwrap(); + match expr.kind { + ExprKind::StringLit(s) => assert_eq!(s, "hello\rworld"), + _ => panic!("Expected StringLit"), + } + } + + #[test] + fn test_string_escape_backslash() { + let expr = parse_expr("\"hello\\\\world\"").unwrap(); + match expr.kind { + ExprKind::StringLit(s) => assert_eq!(s, "hello\\world"), + _ => panic!("Expected StringLit"), + } + } + + #[test] + fn test_string_escape_double_quote() { + let expr = parse_expr("\"hello\\\"world\"").unwrap(); + match expr.kind { + ExprKind::StringLit(s) => assert_eq!(s, "hello\"world"), + _ => panic!("Expected StringLit"), + } + } + + #[test] + fn test_string_escape_bell() { + let expr = parse_expr("\"\\a\"").unwrap(); + match expr.kind { + ExprKind::StringLit(s) => assert_eq!(s, "\x07"), + _ => panic!("Expected StringLit"), + } + } + + #[test] + fn test_string_escape_backspace() { + let expr = parse_expr("\"\\b\"").unwrap(); + match expr.kind { + ExprKind::StringLit(s) => assert_eq!(s, "\x08"), + _ => panic!("Expected StringLit"), + } + } + + #[test] + fn test_string_escape_formfeed() { + let expr = parse_expr("\"\\f\"").unwrap(); + match expr.kind { + ExprKind::StringLit(s) => assert_eq!(s, "\x0C"), + _ => panic!("Expected StringLit"), + } + } + + #[test] + fn test_string_escape_vertical_tab() { + let expr = parse_expr("\"\\v\"").unwrap(); + match expr.kind { + ExprKind::StringLit(s) => assert_eq!(s, "\x0B"), + _ => panic!("Expected StringLit"), + } + } + + #[test] + fn test_string_escape_null() { + let expr = parse_expr("\"hello\\0world\"").unwrap(); + match expr.kind { + ExprKind::StringLit(s) => { + assert_eq!(s.len(), 11); + assert_eq!(s.as_bytes()[5], 0); + } + _ => panic!("Expected StringLit"), + } + } + + #[test] + fn test_string_escape_hex() { + let expr = parse_expr("\"\\x41\\x42\\x43\"").unwrap(); + match expr.kind { + ExprKind::StringLit(s) => assert_eq!(s, "ABC"), + _ => panic!("Expected StringLit"), + } + } + + #[test] + fn test_string_escape_octal() { + let expr = parse_expr("\"\\101\\102\\103\"").unwrap(); + match expr.kind { + ExprKind::StringLit(s) => assert_eq!(s, "ABC"), // octal 101,102,103 = A,B,C + _ => panic!("Expected StringLit"), + } + } + + #[test] + fn test_string_escape_octal_012() { + let expr = parse_expr("\"line1\\012line2\"").unwrap(); + match expr.kind { + ExprKind::StringLit(s) => assert_eq!(s, "line1\nline2"), // octal 012 = newline + _ => panic!("Expected StringLit"), + } + } + + #[test] + fn test_string_multiple_escapes() { + let expr = parse_expr("\"\\t\\n\\r\\\\\"").unwrap(); + match expr.kind { + ExprKind::StringLit(s) => assert_eq!(s, "\t\n\r\\"), + _ => panic!("Expected StringLit"), + } + } + + #[test] + fn test_string_mixed_content() { + let expr = parse_expr("\"Name:\\tJohn\\nAge:\\t30\"").unwrap(); + match expr.kind { + ExprKind::StringLit(s) => assert_eq!(s, "Name:\tJohn\nAge:\t30"), + _ => panic!("Expected StringLit"), + } + } + + #[test] + fn test_string_empty() { + let expr = parse_expr("\"\"").unwrap(); + match expr.kind { + ExprKind::StringLit(s) => assert_eq!(s, ""), + _ => panic!("Expected StringLit"), + } + } + + #[test] + fn test_integer_literal_suffixes() { + // Plain int + let expr = parse_expr("42").unwrap(); + assert_eq!(expr.typ.as_ref().unwrap().kind, TypeKind::Int); + assert!(!expr.typ.as_ref().unwrap().is_unsigned()); + + // Unsigned int + let expr = parse_expr("42U").unwrap(); + assert_eq!(expr.typ.as_ref().unwrap().kind, TypeKind::Int); + assert!(expr.typ.as_ref().unwrap().is_unsigned()); + + // Long + let expr = parse_expr("42L").unwrap(); + assert_eq!(expr.typ.as_ref().unwrap().kind, TypeKind::Long); + assert!(!expr.typ.as_ref().unwrap().is_unsigned()); + + // Unsigned long (UL) + let expr = parse_expr("42UL").unwrap(); + assert_eq!(expr.typ.as_ref().unwrap().kind, TypeKind::Long); + assert!(expr.typ.as_ref().unwrap().is_unsigned()); + + // Unsigned long (LU) + let expr = parse_expr("42LU").unwrap(); + assert_eq!(expr.typ.as_ref().unwrap().kind, TypeKind::Long); + assert!(expr.typ.as_ref().unwrap().is_unsigned()); + + // Long long + let expr = parse_expr("42LL").unwrap(); + assert_eq!(expr.typ.as_ref().unwrap().kind, TypeKind::LongLong); + assert!(!expr.typ.as_ref().unwrap().is_unsigned()); + + // Unsigned long long (ULL) + let expr = parse_expr("42ULL").unwrap(); + assert_eq!(expr.typ.as_ref().unwrap().kind, TypeKind::LongLong); + assert!(expr.typ.as_ref().unwrap().is_unsigned()); + + // Unsigned long long (LLU) + let expr = parse_expr("42LLU").unwrap(); + assert_eq!(expr.typ.as_ref().unwrap().kind, TypeKind::LongLong); + assert!(expr.typ.as_ref().unwrap().is_unsigned()); + + // Hex with suffix + let expr = parse_expr("0xFFLL").unwrap(); + assert_eq!(expr.typ.as_ref().unwrap().kind, TypeKind::LongLong); + assert!(!expr.typ.as_ref().unwrap().is_unsigned()); + + let expr = parse_expr("0xFFULL").unwrap(); + assert_eq!(expr.typ.as_ref().unwrap().kind, TypeKind::LongLong); + assert!(expr.typ.as_ref().unwrap().is_unsigned()); + } + + #[test] + fn test_identifier() { + let expr = parse_expr("foo").unwrap(); + match expr.kind { + ExprKind::Ident { name, .. } => assert_eq!(name, "foo"), + _ => panic!("Expected Ident"), + } + } + + // ======================================================================== + // Binary operator tests + // ======================================================================== + + #[test] + fn test_addition() { + let expr = parse_expr("1 + 2").unwrap(); + match expr.kind { + ExprKind::Binary { op, left, right } => { + assert_eq!(op, BinaryOp::Add); + assert!(matches!(left.kind, ExprKind::IntLit(1))); + assert!(matches!(right.kind, ExprKind::IntLit(2))); + } + _ => panic!("Expected Binary"), + } + } + + #[test] + fn test_subtraction() { + let expr = parse_expr("5 - 3").unwrap(); + match expr.kind { + ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::Sub), + _ => panic!("Expected Binary"), + } + } + + #[test] + fn test_multiplication() { + let expr = parse_expr("2 * 3").unwrap(); + match expr.kind { + ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::Mul), + _ => panic!("Expected Binary"), + } + } + + #[test] + fn test_division() { + let expr = parse_expr("10 / 2").unwrap(); + match expr.kind { + ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::Div), + _ => panic!("Expected Binary"), + } + } + + #[test] + fn test_modulo() { + let expr = parse_expr("10 % 3").unwrap(); + match expr.kind { + ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::Mod), + _ => panic!("Expected Binary"), + } + } + + #[test] + fn test_precedence_mul_add() { + // 1 + 2 * 3 should be 1 + (2 * 3) + let expr = parse_expr("1 + 2 * 3").unwrap(); + match expr.kind { + ExprKind::Binary { op, left, right } => { + assert_eq!(op, BinaryOp::Add); + assert!(matches!(left.kind, ExprKind::IntLit(1))); + match right.kind { + ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::Mul), + _ => panic!("Expected nested Binary"), + } + } + _ => panic!("Expected Binary"), + } + } + + #[test] + fn test_left_associativity() { + // 1 - 2 - 3 should be (1 - 2) - 3 + let expr = parse_expr("1 - 2 - 3").unwrap(); + match expr.kind { + ExprKind::Binary { op, left, right } => { + assert_eq!(op, BinaryOp::Sub); + assert!(matches!(right.kind, ExprKind::IntLit(3))); + match left.kind { + ExprKind::Binary { op, left, right } => { + assert_eq!(op, BinaryOp::Sub); + assert!(matches!(left.kind, ExprKind::IntLit(1))); + assert!(matches!(right.kind, ExprKind::IntLit(2))); + } + _ => panic!("Expected nested Binary"), + } + } + _ => panic!("Expected Binary"), + } + } + + #[test] + fn test_comparison_ops() { + let expr = parse_expr("a < b").unwrap(); + match expr.kind { + ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::Lt), + _ => panic!("Expected Binary"), + } + + let expr = parse_expr("a > b").unwrap(); + match expr.kind { + ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::Gt), + _ => panic!("Expected Binary"), + } + + let expr = parse_expr("a <= b").unwrap(); + match expr.kind { + ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::Le), + _ => panic!("Expected Binary"), + } + + let expr = parse_expr("a >= b").unwrap(); + match expr.kind { + ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::Ge), + _ => panic!("Expected Binary"), + } + } + + #[test] + fn test_equality_ops() { + let expr = parse_expr("a == b").unwrap(); + match expr.kind { + ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::Eq), + _ => panic!("Expected Binary"), + } + + let expr = parse_expr("a != b").unwrap(); + match expr.kind { + ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::Ne), + _ => panic!("Expected Binary"), + } + } + + #[test] + fn test_logical_ops() { + let expr = parse_expr("a && b").unwrap(); + match expr.kind { + ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::LogAnd), + _ => panic!("Expected Binary"), + } + + let expr = parse_expr("a || b").unwrap(); + match expr.kind { + ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::LogOr), + _ => panic!("Expected Binary"), + } + } + + #[test] + fn test_bitwise_ops() { + let expr = parse_expr("a & b").unwrap(); + match expr.kind { + ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::BitAnd), + _ => panic!("Expected Binary"), + } + + let expr = parse_expr("a | b").unwrap(); + match expr.kind { + ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::BitOr), + _ => panic!("Expected Binary"), + } + + let expr = parse_expr("a ^ b").unwrap(); + match expr.kind { + ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::BitXor), + _ => panic!("Expected Binary"), + } + } + + #[test] + fn test_shift_ops() { + let expr = parse_expr("a << b").unwrap(); + match expr.kind { + ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::Shl), + _ => panic!("Expected Binary"), + } + + let expr = parse_expr("a >> b").unwrap(); + match expr.kind { + ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::Shr), + _ => panic!("Expected Binary"), + } + } + + // ======================================================================== + // Unary operator tests + // ======================================================================== + + #[test] + fn test_unary_neg() { + let expr = parse_expr("-x").unwrap(); + match expr.kind { + ExprKind::Unary { op, operand } => { + assert_eq!(op, UnaryOp::Neg); + match operand.kind { + ExprKind::Ident { name, .. } => assert_eq!(name, "x"), + _ => panic!("Expected Ident"), + } + } + _ => panic!("Expected Unary"), + } + } + + #[test] + fn test_unary_not() { + let expr = parse_expr("!x").unwrap(); + match expr.kind { + ExprKind::Unary { op, .. } => assert_eq!(op, UnaryOp::Not), + _ => panic!("Expected Unary"), + } + } + + #[test] + fn test_unary_bitnot() { + let expr = parse_expr("~x").unwrap(); + match expr.kind { + ExprKind::Unary { op, .. } => assert_eq!(op, UnaryOp::BitNot), + _ => panic!("Expected Unary"), + } + } + + #[test] + fn test_unary_addr() { + let expr = parse_expr("&x").unwrap(); + match expr.kind { + ExprKind::Unary { op, .. } => assert_eq!(op, UnaryOp::AddrOf), + _ => panic!("Expected Unary"), + } + } + + #[test] + fn test_unary_deref() { + let expr = parse_expr("*p").unwrap(); + match expr.kind { + ExprKind::Unary { op, .. } => assert_eq!(op, UnaryOp::Deref), + _ => panic!("Expected Unary"), + } + } + + #[test] + fn test_pre_increment() { + let expr = parse_expr("++x").unwrap(); + match expr.kind { + ExprKind::Unary { op, .. } => assert_eq!(op, UnaryOp::PreInc), + _ => panic!("Expected Unary"), + } + } + + #[test] + fn test_pre_decrement() { + let expr = parse_expr("--x").unwrap(); + match expr.kind { + ExprKind::Unary { op, .. } => assert_eq!(op, UnaryOp::PreDec), + _ => panic!("Expected Unary"), + } + } + + // ======================================================================== + // Postfix operator tests + // ======================================================================== + + #[test] + fn test_post_increment() { + let expr = parse_expr("x++").unwrap(); + assert!(matches!(expr.kind, ExprKind::PostInc(_))); + } + + #[test] + fn test_post_decrement() { + let expr = parse_expr("x--").unwrap(); + assert!(matches!(expr.kind, ExprKind::PostDec(_))); + } + + #[test] + fn test_array_subscript() { + let expr = parse_expr("arr[5]").unwrap(); + match expr.kind { + ExprKind::Index { array, index } => { + match array.kind { + ExprKind::Ident { name, .. } => assert_eq!(name, "arr"), + _ => panic!("Expected Ident"), + } + assert!(matches!(index.kind, ExprKind::IntLit(5))); + } + _ => panic!("Expected Index"), + } + } + + #[test] + fn test_member_access() { + let expr = parse_expr("obj.field").unwrap(); + match expr.kind { + ExprKind::Member { expr, member } => { + match expr.kind { + ExprKind::Ident { name, .. } => assert_eq!(name, "obj"), + _ => panic!("Expected Ident"), + } + assert_eq!(member, "field"); + } + _ => panic!("Expected Member"), + } + } + + #[test] + fn test_arrow_access() { + let expr = parse_expr("ptr->field").unwrap(); + match expr.kind { + ExprKind::Arrow { expr, member } => { + match expr.kind { + ExprKind::Ident { name, .. } => assert_eq!(name, "ptr"), + _ => panic!("Expected Ident"), + } + assert_eq!(member, "field"); + } + _ => panic!("Expected Arrow"), + } + } + + #[test] + fn test_function_call_no_args() { + let expr = parse_expr("foo()").unwrap(); + match expr.kind { + ExprKind::Call { func, args } => { + match func.kind { + ExprKind::Ident { name, .. } => assert_eq!(name, "foo"), + _ => panic!("Expected Ident"), + } + assert!(args.is_empty()); + } + _ => panic!("Expected Call"), + } + } + + #[test] + fn test_function_call_with_args() { + let expr = parse_expr("foo(1, 2, 3)").unwrap(); + match expr.kind { + ExprKind::Call { func, args } => { + match func.kind { + ExprKind::Ident { name, .. } => assert_eq!(name, "foo"), + _ => panic!("Expected Ident"), + } + assert_eq!(args.len(), 3); + } + _ => panic!("Expected Call"), + } + } + + #[test] + fn test_chained_postfix() { + // obj.arr[0]->next + let expr = parse_expr("obj.arr[0]").unwrap(); + match expr.kind { + ExprKind::Index { array, index } => { + match array.kind { + ExprKind::Member { expr, member } => { + match expr.kind { + ExprKind::Ident { name, .. } => assert_eq!(name, "obj"), + _ => panic!("Expected Ident"), + } + assert_eq!(member, "arr"); + } + _ => panic!("Expected Member"), + } + assert!(matches!(index.kind, ExprKind::IntLit(0))); + } + _ => panic!("Expected Index"), + } + } + + // ======================================================================== + // Assignment tests + // ======================================================================== + + #[test] + fn test_simple_assignment() { + let expr = parse_expr("x = 5").unwrap(); + match expr.kind { + ExprKind::Assign { op, target, value } => { + assert_eq!(op, AssignOp::Assign); + match target.kind { + ExprKind::Ident { name, .. } => assert_eq!(name, "x"), + _ => panic!("Expected Ident"), + } + assert!(matches!(value.kind, ExprKind::IntLit(5))); + } + _ => panic!("Expected Assign"), + } + } + + #[test] + fn test_compound_assignments() { + let expr = parse_expr("x += 5").unwrap(); + match expr.kind { + ExprKind::Assign { op, .. } => assert_eq!(op, AssignOp::AddAssign), + _ => panic!("Expected Assign"), + } + + let expr = parse_expr("x -= 5").unwrap(); + match expr.kind { + ExprKind::Assign { op, .. } => assert_eq!(op, AssignOp::SubAssign), + _ => panic!("Expected Assign"), + } + + let expr = parse_expr("x *= 5").unwrap(); + match expr.kind { + ExprKind::Assign { op, .. } => assert_eq!(op, AssignOp::MulAssign), + _ => panic!("Expected Assign"), + } + } + + #[test] + fn test_assignment_right_associativity() { + // a = b = c should be a = (b = c) + let expr = parse_expr("a = b = c").unwrap(); + match expr.kind { + ExprKind::Assign { target, value, .. } => { + match target.kind { + ExprKind::Ident { name, .. } => assert_eq!(name, "a"), + _ => panic!("Expected Ident"), + } + match value.kind { + ExprKind::Assign { target, .. } => match target.kind { + ExprKind::Ident { name, .. } => assert_eq!(name, "b"), + _ => panic!("Expected Ident"), + }, + _ => panic!("Expected nested Assign"), + } + } + _ => panic!("Expected Assign"), + } + } + + // ======================================================================== + // Ternary expression tests + // ======================================================================== + + #[test] + fn test_ternary() { + let expr = parse_expr("a ? b : c").unwrap(); + match expr.kind { + ExprKind::Conditional { + cond, + then_expr, + else_expr, + } => { + match cond.kind { + ExprKind::Ident { name, .. } => assert_eq!(name, "a"), + _ => panic!("Expected Ident"), + } + match then_expr.kind { + ExprKind::Ident { name, .. } => assert_eq!(name, "b"), + _ => panic!("Expected Ident"), + } + match else_expr.kind { + ExprKind::Ident { name, .. } => assert_eq!(name, "c"), + _ => panic!("Expected Ident"), + } + } + _ => panic!("Expected Conditional"), + } + } + + #[test] + fn test_nested_ternary() { + // a ? b : c ? d : e should be a ? b : (c ? d : e) + let expr = parse_expr("a ? b : c ? d : e").unwrap(); + match expr.kind { + ExprKind::Conditional { else_expr, .. } => { + assert!(matches!(else_expr.kind, ExprKind::Conditional { .. })); + } + _ => panic!("Expected Conditional"), + } + } + + // ======================================================================== + // Comma expression tests + // ======================================================================== + + #[test] + fn test_comma_expr() { + let expr = parse_expr("a, b, c").unwrap(); + match expr.kind { + ExprKind::Comma(exprs) => assert_eq!(exprs.len(), 3), + _ => panic!("Expected Comma"), + } + } + + // ======================================================================== + // sizeof tests + // ======================================================================== + + #[test] + fn test_sizeof_expr() { + let expr = parse_expr("sizeof x").unwrap(); + assert!(matches!(expr.kind, ExprKind::SizeofExpr(_))); + } + + #[test] + fn test_sizeof_type() { + let expr = parse_expr("sizeof(int)").unwrap(); + match expr.kind { + ExprKind::SizeofType(typ) => assert_eq!(typ.kind, TypeKind::Int), + _ => panic!("Expected SizeofType"), + } + } + + #[test] + fn test_sizeof_paren_expr() { + // sizeof(x) where x is not a type + let expr = parse_expr("sizeof(x)").unwrap(); + assert!(matches!(expr.kind, ExprKind::SizeofExpr(_))); + } + + // ======================================================================== + // Cast tests + // ======================================================================== + + #[test] + fn test_cast() { + let expr = parse_expr("(int)x").unwrap(); + match expr.kind { + ExprKind::Cast { cast_type, expr } => { + assert_eq!(cast_type.kind, TypeKind::Int); + match expr.kind { + ExprKind::Ident { name, .. } => assert_eq!(name, "x"), + _ => panic!("Expected Ident"), + } + } + _ => panic!("Expected Cast"), + } + } + + #[test] + fn test_cast_unsigned_char() { + let expr = parse_expr("(unsigned char)x").unwrap(); + match expr.kind { + ExprKind::Cast { cast_type, .. } => { + assert_eq!(cast_type.kind, TypeKind::Char); + assert!(cast_type.modifiers.contains(TypeModifiers::UNSIGNED)); + } + _ => panic!("Expected Cast"), + } + } + + #[test] + fn test_cast_signed_int() { + let expr = parse_expr("(signed int)x").unwrap(); + match expr.kind { + ExprKind::Cast { cast_type, .. } => { + assert_eq!(cast_type.kind, TypeKind::Int); + assert!(cast_type.modifiers.contains(TypeModifiers::SIGNED)); + } + _ => panic!("Expected Cast"), + } + } + + #[test] + fn test_cast_unsigned_long() { + let expr = parse_expr("(unsigned long)x").unwrap(); + match expr.kind { + ExprKind::Cast { cast_type, .. } => { + assert_eq!(cast_type.kind, TypeKind::Long); + assert!(cast_type.modifiers.contains(TypeModifiers::UNSIGNED)); + } + _ => panic!("Expected Cast"), + } + } + + #[test] + fn test_cast_long_long() { + let expr = parse_expr("(long long)x").unwrap(); + match expr.kind { + ExprKind::Cast { cast_type, .. } => { + assert_eq!(cast_type.kind, TypeKind::LongLong); + } + _ => panic!("Expected Cast"), + } + } + + #[test] + fn test_cast_unsigned_long_long() { + let expr = parse_expr("(unsigned long long)x").unwrap(); + match expr.kind { + ExprKind::Cast { cast_type, .. } => { + assert_eq!(cast_type.kind, TypeKind::LongLong); + assert!(cast_type.modifiers.contains(TypeModifiers::UNSIGNED)); + } + _ => panic!("Expected Cast"), + } + } + + #[test] + fn test_cast_pointer() { + let expr = parse_expr("(int*)x").unwrap(); + match expr.kind { + ExprKind::Cast { cast_type, .. } => { + assert_eq!(cast_type.kind, TypeKind::Pointer); + let base = cast_type.get_base().unwrap(); + assert_eq!(base.kind, TypeKind::Int); + } + _ => panic!("Expected Cast"), + } + } + + #[test] + fn test_cast_void_pointer() { + let expr = parse_expr("(void*)x").unwrap(); + match expr.kind { + ExprKind::Cast { cast_type, .. } => { + assert_eq!(cast_type.kind, TypeKind::Pointer); + let base = cast_type.get_base().unwrap(); + assert_eq!(base.kind, TypeKind::Void); + } + _ => panic!("Expected Cast"), + } + } + + #[test] + fn test_cast_unsigned_char_pointer() { + let expr = parse_expr("(unsigned char*)x").unwrap(); + match expr.kind { + ExprKind::Cast { cast_type, .. } => { + assert_eq!(cast_type.kind, TypeKind::Pointer); + let base = cast_type.get_base().unwrap(); + assert_eq!(base.kind, TypeKind::Char); + assert!(base.modifiers.contains(TypeModifiers::UNSIGNED)); + } + _ => panic!("Expected Cast"), + } + } + + #[test] + fn test_cast_const_int() { + let expr = parse_expr("(const int)x").unwrap(); + match expr.kind { + ExprKind::Cast { cast_type, .. } => { + assert_eq!(cast_type.kind, TypeKind::Int); + assert!(cast_type.modifiers.contains(TypeModifiers::CONST)); + } + _ => panic!("Expected Cast"), + } + } + + #[test] + fn test_cast_double_pointer() { + let expr = parse_expr("(int**)x").unwrap(); + match expr.kind { + ExprKind::Cast { cast_type, .. } => { + assert_eq!(cast_type.kind, TypeKind::Pointer); + let base = cast_type.get_base().unwrap(); + assert_eq!(base.kind, TypeKind::Pointer); + let innermost = base.get_base().unwrap(); + assert_eq!(innermost.kind, TypeKind::Int); + } + _ => panic!("Expected Cast"), + } + } + + #[test] + fn test_sizeof_compound_type() { + let expr = parse_expr("sizeof(unsigned long long)").unwrap(); + match expr.kind { + ExprKind::SizeofType(typ) => { + assert_eq!(typ.kind, TypeKind::LongLong); + assert!(typ.modifiers.contains(TypeModifiers::UNSIGNED)); + } + _ => panic!("Expected SizeofType"), + } + } + + #[test] + fn test_sizeof_pointer_type() { + let expr = parse_expr("sizeof(int*)").unwrap(); + match expr.kind { + ExprKind::SizeofType(typ) => { + assert_eq!(typ.kind, TypeKind::Pointer); + } + _ => panic!("Expected SizeofType"), + } + } + + // ======================================================================== + // Parentheses tests + // ======================================================================== + + #[test] + fn test_parentheses() { + let expr = parse_expr("(1 + 2) * 3").unwrap(); + match expr.kind { + ExprKind::Binary { op, left, .. } => { + assert_eq!(op, BinaryOp::Mul); + match left.kind { + ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::Add), + _ => panic!("Expected Binary"), + } + } + _ => panic!("Expected Binary"), + } + } + + // ======================================================================== + // Complex expression tests + // ======================================================================== + + #[test] + fn test_complex_expr() { + // x = a + b * c - d / e + let expr = parse_expr("x = a + b * c - d / e").unwrap(); + assert!(matches!(expr.kind, ExprKind::Assign { .. })); + } + + #[test] + fn test_function_call_complex() { + // foo(a + b, c * d) + let expr = parse_expr("foo(a + b, c * d)").unwrap(); + match expr.kind { + ExprKind::Call { args, .. } => { + assert_eq!(args.len(), 2); + assert!(matches!( + args[0].kind, + ExprKind::Binary { + op: BinaryOp::Add, + .. + } + )); + assert!(matches!( + args[1].kind, + ExprKind::Binary { + op: BinaryOp::Mul, + .. + } + )); + } + _ => panic!("Expected Call"), + } + } + + #[test] + fn test_pointer_arithmetic() { + // *p++ + let expr = parse_expr("*p++").unwrap(); + match expr.kind { + ExprKind::Unary { + op: UnaryOp::Deref, + operand, + } => { + assert!(matches!(operand.kind, ExprKind::PostInc(_))); + } + _ => panic!("Expected Unary Deref"), + } + } + + // ======================================================================== + // Statement tests + // ======================================================================== + + fn parse_stmt(input: &str) -> ParseResult { + let mut tokenizer = Tokenizer::new(input.as_bytes(), 0); + let tokens = tokenizer.tokenize(); + let idents = tokenizer.ident_table(); + let mut symbols = SymbolTable::new(); + let mut parser = Parser::new(&tokens, idents, &mut symbols); + parser.skip_stream_tokens(); + parser.parse_statement() + } + + #[test] + fn test_empty_stmt() { + let stmt = parse_stmt(";").unwrap(); + assert!(matches!(stmt, Stmt::Empty)); + } + + #[test] + fn test_expr_stmt() { + let stmt = parse_stmt("x = 5;").unwrap(); + assert!(matches!(stmt, Stmt::Expr(_))); + } + + #[test] + fn test_if_stmt() { + let stmt = parse_stmt("if (x) y = 1;").unwrap(); + match stmt { + Stmt::If { + cond, + then_stmt, + else_stmt, + } => { + assert!(matches!(cond.kind, ExprKind::Ident { .. })); + assert!(matches!(*then_stmt, Stmt::Expr(_))); + assert!(else_stmt.is_none()); + } + _ => panic!("Expected If"), + } + } + + #[test] + fn test_if_else_stmt() { + let stmt = parse_stmt("if (x) y = 1; else y = 2;").unwrap(); + match stmt { + Stmt::If { else_stmt, .. } => { + assert!(else_stmt.is_some()); + } + _ => panic!("Expected If"), + } + } + + #[test] + fn test_while_stmt() { + let stmt = parse_stmt("while (x) x--;").unwrap(); + match stmt { + Stmt::While { cond, body } => { + assert!(matches!(cond.kind, ExprKind::Ident { .. })); + assert!(matches!(*body, Stmt::Expr(_))); + } + _ => panic!("Expected While"), + } + } + + #[test] + fn test_do_while_stmt() { + let stmt = parse_stmt("do x++; while (x < 10);").unwrap(); + match stmt { + Stmt::DoWhile { body, cond } => { + assert!(matches!(*body, Stmt::Expr(_))); + assert!(matches!( + cond.kind, + ExprKind::Binary { + op: BinaryOp::Lt, + .. + } + )); + } + _ => panic!("Expected DoWhile"), + } + } + + #[test] + fn test_for_stmt_basic() { + let stmt = parse_stmt("for (i = 0; i < 10; i++) x++;").unwrap(); + match stmt { + Stmt::For { + init, + cond, + post, + body, + } => { + assert!(init.is_some()); + assert!(cond.is_some()); + assert!(post.is_some()); + assert!(matches!(*body, Stmt::Expr(_))); + } + _ => panic!("Expected For"), + } + } + + #[test] + fn test_for_stmt_with_decl() { + let stmt = parse_stmt("for (int i = 0; i < 10; i++) x++;").unwrap(); + match stmt { + Stmt::For { init, .. } => { + assert!(matches!(init, Some(ForInit::Declaration(_)))); + } + _ => panic!("Expected For"), + } + } + + #[test] + fn test_for_stmt_empty() { + let stmt = parse_stmt("for (;;) ;").unwrap(); + match stmt { + Stmt::For { + init, + cond, + post, + body, + } => { + assert!(init.is_none()); + assert!(cond.is_none()); + assert!(post.is_none()); + assert!(matches!(*body, Stmt::Empty)); + } + _ => panic!("Expected For"), + } + } + + #[test] + fn test_return_void() { + let stmt = parse_stmt("return;").unwrap(); + match stmt { + Stmt::Return(None) => {} + _ => panic!("Expected Return(None)"), + } + } + + #[test] + fn test_return_value() { + let stmt = parse_stmt("return 42;").unwrap(); + match stmt { + Stmt::Return(Some(ref e)) => { + assert!(matches!(e.kind, ExprKind::IntLit(42))); + } + _ => panic!("Expected Return(Some(42))"), + } + } + + #[test] + fn test_break_stmt() { + let stmt = parse_stmt("break;").unwrap(); + assert!(matches!(stmt, Stmt::Break)); + } + + #[test] + fn test_continue_stmt() { + let stmt = parse_stmt("continue;").unwrap(); + assert!(matches!(stmt, Stmt::Continue)); + } + + #[test] + fn test_goto_stmt() { + let stmt = parse_stmt("goto label;").unwrap(); + match stmt { + Stmt::Goto(name) => assert_eq!(name, "label"), + _ => panic!("Expected Goto"), + } + } + + #[test] + fn test_labeled_stmt() { + let stmt = parse_stmt("label: x = 1;").unwrap(); + match stmt { + Stmt::Label { name, stmt } => { + assert_eq!(name, "label"); + assert!(matches!(*stmt, Stmt::Expr(_))); + } + _ => panic!("Expected Label"), + } + } + + #[test] + fn test_block_stmt() { + let stmt = parse_stmt("{ x = 1; y = 2; }").unwrap(); + match stmt { + Stmt::Block(items) => { + assert_eq!(items.len(), 2); + assert!(matches!(items[0], BlockItem::Statement(Stmt::Expr(_)))); + assert!(matches!(items[1], BlockItem::Statement(Stmt::Expr(_)))); + } + _ => panic!("Expected Block"), + } + } + + #[test] + fn test_block_with_decl() { + let stmt = parse_stmt("{ int x = 1; x++; }").unwrap(); + match stmt { + Stmt::Block(items) => { + assert_eq!(items.len(), 2); + assert!(matches!(items[0], BlockItem::Declaration(_))); + assert!(matches!(items[1], BlockItem::Statement(_))); + } + _ => panic!("Expected Block"), + } + } + + // ======================================================================== + // Declaration tests + // ======================================================================== + + fn parse_decl(input: &str) -> ParseResult { + let mut tokenizer = Tokenizer::new(input.as_bytes(), 0); + let tokens = tokenizer.tokenize(); + let idents = tokenizer.ident_table(); + let mut symbols = SymbolTable::new(); + let mut parser = Parser::new(&tokens, idents, &mut symbols); + parser.skip_stream_tokens(); + parser.parse_declaration() + } + + #[test] + fn test_simple_decl() { + let decl = parse_decl("int x;").unwrap(); + assert_eq!(decl.declarators.len(), 1); + assert_eq!(decl.declarators[0].name, "x"); + assert_eq!(decl.declarators[0].typ.kind, TypeKind::Int); + } + + #[test] + fn test_decl_with_init() { + let decl = parse_decl("int x = 5;").unwrap(); + assert_eq!(decl.declarators.len(), 1); + assert!(decl.declarators[0].init.is_some()); + } + + #[test] + fn test_multiple_declarators() { + let decl = parse_decl("int x, y, z;").unwrap(); + assert_eq!(decl.declarators.len(), 3); + assert_eq!(decl.declarators[0].name, "x"); + assert_eq!(decl.declarators[1].name, "y"); + assert_eq!(decl.declarators[2].name, "z"); + } + + #[test] + fn test_pointer_decl() { + let decl = parse_decl("int *p;").unwrap(); + assert_eq!(decl.declarators[0].typ.kind, TypeKind::Pointer); + } + + #[test] + fn test_array_decl() { + let decl = parse_decl("int arr[10];").unwrap(); + assert_eq!(decl.declarators[0].typ.kind, TypeKind::Array); + assert_eq!(decl.declarators[0].typ.array_size, Some(10)); + } + + #[test] + fn test_const_decl() { + let decl = parse_decl("const int x = 5;").unwrap(); + assert!(decl.declarators[0] + .typ + .modifiers + .contains(TypeModifiers::CONST)); + } + + #[test] + fn test_unsigned_decl() { + let decl = parse_decl("unsigned int x;").unwrap(); + assert!(decl.declarators[0] + .typ + .modifiers + .contains(TypeModifiers::UNSIGNED)); + } + + #[test] + fn test_long_long_decl() { + let decl = parse_decl("long long x;").unwrap(); + assert_eq!(decl.declarators[0].typ.kind, TypeKind::LongLong); + } + + // ======================================================================== + // Function parsing tests + // ======================================================================== + + fn parse_func(input: &str) -> ParseResult { + let mut tokenizer = Tokenizer::new(input.as_bytes(), 0); + let tokens = tokenizer.tokenize(); + let idents = tokenizer.ident_table(); + let mut symbols = SymbolTable::new(); + let mut parser = Parser::new(&tokens, idents, &mut symbols); + parser.skip_stream_tokens(); + parser.parse_function_def() + } + + #[test] + fn test_simple_function() { + let func = parse_func("int main() { return 0; }").unwrap(); + assert_eq!(func.name, "main"); + assert_eq!(func.return_type.kind, TypeKind::Int); + assert!(func.params.is_empty()); + } + + #[test] + fn test_function_with_params() { + let func = parse_func("int add(int a, int b) { return a + b; }").unwrap(); + assert_eq!(func.name, "add"); + assert_eq!(func.params.len(), 2); + } + + #[test] + fn test_void_function() { + let func = parse_func("void foo(void) { }").unwrap(); + assert_eq!(func.return_type.kind, TypeKind::Void); + assert!(func.params.is_empty()); + } + + #[test] + fn test_variadic_function() { + // Variadic functions are parsed but variadic info is not tracked in FunctionDef + let func = parse_func("int printf(char *fmt, ...) { return 0; }").unwrap(); + assert_eq!(func.name, "printf"); + } + + #[test] + fn test_pointer_return() { + let func = parse_func("int *getptr() { return 0; }").unwrap(); + assert_eq!(func.return_type.kind, TypeKind::Pointer); + } + + // ======================================================================== + // Translation unit tests + // ======================================================================== + + fn parse_tu(input: &str) -> ParseResult { + let mut tokenizer = Tokenizer::new(input.as_bytes(), 0); + let tokens = tokenizer.tokenize(); + let idents = tokenizer.ident_table(); + let mut symbols = SymbolTable::new(); + let mut parser = Parser::new(&tokens, idents, &mut symbols); + parser.parse_translation_unit() + } + + #[test] + fn test_simple_program() { + let tu = parse_tu("int main() { return 0; }").unwrap(); + assert_eq!(tu.items.len(), 1); + assert!(matches!(tu.items[0], ExternalDecl::FunctionDef(_))); + } + + #[test] + fn test_global_var() { + let tu = parse_tu("int x = 5;").unwrap(); + assert_eq!(tu.items.len(), 1); + assert!(matches!(tu.items[0], ExternalDecl::Declaration(_))); + } + + #[test] + fn test_multiple_items() { + let tu = parse_tu("int x; int main() { return x; }").unwrap(); + assert_eq!(tu.items.len(), 2); + assert!(matches!(tu.items[0], ExternalDecl::Declaration(_))); + assert!(matches!(tu.items[1], ExternalDecl::FunctionDef(_))); + } + + #[test] + fn test_function_declaration() { + let tu = parse_tu("int foo(int x);").unwrap(); + assert_eq!(tu.items.len(), 1); + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators[0].name, "foo"); + assert_eq!(decl.declarators[0].typ.kind, TypeKind::Function); + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_struct_only_declaration() { + // Struct definition without a variable declarator + let tu = parse_tu("struct point { int x; int y; };").unwrap(); + assert_eq!(tu.items.len(), 1); + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + // No declarators for struct-only definition + assert!(decl.declarators.is_empty()); + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_struct_with_variable_declaration() { + // Struct definition with a variable declarator + let tu = parse_tu("struct point { int x; int y; } p;").unwrap(); + assert_eq!(tu.items.len(), 1); + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators.len(), 1); + assert_eq!(decl.declarators[0].name, "p"); + assert_eq!(decl.declarators[0].typ.kind, TypeKind::Struct); + } + _ => panic!("Expected Declaration"), + } + } + + // ======================================================================== + // Typedef tests + // ======================================================================== + + #[test] + fn test_typedef_basic() { + // Basic typedef declaration + let tu = parse_tu("typedef int myint;").unwrap(); + assert_eq!(tu.items.len(), 1); + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators.len(), 1); + assert_eq!(decl.declarators[0].name, "myint"); + // The type includes the TYPEDEF modifier + assert!(decl.declarators[0] + .typ + .modifiers + .contains(TypeModifiers::TYPEDEF)); + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_typedef_usage() { + // Typedef declaration followed by usage + let tu = parse_tu("typedef int myint; myint x;").unwrap(); + assert_eq!(tu.items.len(), 2); + + // First item: typedef declaration + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators[0].name, "myint"); + } + _ => panic!("Expected typedef Declaration"), + } + + // Second item: variable using typedef + match &tu.items[1] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators[0].name, "x"); + // The variable should have int type (resolved from typedef) + assert_eq!(decl.declarators[0].typ.kind, TypeKind::Int); + } + _ => panic!("Expected variable Declaration"), + } + } + + #[test] + fn test_typedef_pointer() { + // Typedef for pointer type + let tu = parse_tu("typedef int *intptr; intptr p;").unwrap(); + assert_eq!(tu.items.len(), 2); + + // Variable should have pointer type + match &tu.items[1] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators[0].name, "p"); + assert_eq!(decl.declarators[0].typ.kind, TypeKind::Pointer); + } + _ => panic!("Expected variable Declaration"), + } + } + + #[test] + fn test_typedef_struct() { + // Typedef for anonymous struct + let tu = parse_tu("typedef struct { int x; int y; } Point; Point p;").unwrap(); + assert_eq!(tu.items.len(), 2); + + // Variable should have struct type + match &tu.items[1] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators[0].name, "p"); + assert_eq!(decl.declarators[0].typ.kind, TypeKind::Struct); + } + _ => panic!("Expected variable Declaration"), + } + } + + #[test] + fn test_typedef_chained() { + // Chained typedef: typedef of typedef + let tu = parse_tu("typedef int myint; typedef myint myint2; myint2 x;").unwrap(); + assert_eq!(tu.items.len(), 3); + + // Final variable should resolve to int + match &tu.items[2] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators[0].name, "x"); + assert_eq!(decl.declarators[0].typ.kind, TypeKind::Int); + } + _ => panic!("Expected variable Declaration"), + } + } + + #[test] + fn test_typedef_multiple() { + // Multiple typedefs in one declaration + let tu = parse_tu("typedef int INT, *INTPTR;").unwrap(); + assert_eq!(tu.items.len(), 1); + + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators.len(), 2); + assert_eq!(decl.declarators[0].name, "INT"); + assert_eq!(decl.declarators[1].name, "INTPTR"); + // INTPTR should be a pointer type + assert_eq!(decl.declarators[1].typ.kind, TypeKind::Pointer); + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_typedef_in_function() { + // Typedef used in function parameter and return type + let tu = + parse_tu("typedef int myint; myint add(myint a, myint b) { return a + b; }").unwrap(); + assert_eq!(tu.items.len(), 2); + + match &tu.items[1] { + ExternalDecl::FunctionDef(func) => { + assert_eq!(func.name, "add"); + // Return type should resolve to int + assert_eq!(func.return_type.kind, TypeKind::Int); + // Parameters should also resolve to int + assert_eq!(func.params.len(), 2); + assert_eq!(func.params[0].typ.kind, TypeKind::Int); + assert_eq!(func.params[1].typ.kind, TypeKind::Int); + } + _ => panic!("Expected FunctionDef"), + } + } + + // ======================================================================== + // Restrict qualifier tests + // ======================================================================== + + #[test] + fn test_restrict_pointer_decl() { + // Local variable with restrict qualifier + let tu = parse_tu("int main(void) { int * restrict p; return 0; }").unwrap(); + assert_eq!(tu.items.len(), 1); + // Just verify it parses without error + } + + #[test] + fn test_restrict_function_param() { + // Function with restrict-qualified pointer parameters + let tu = parse_tu("void copy(int * restrict dest, int * restrict src) { *dest = *src; }") + .unwrap(); + assert_eq!(tu.items.len(), 1); + + match &tu.items[0] { + ExternalDecl::FunctionDef(func) => { + assert_eq!(func.name, "copy"); + assert_eq!(func.params.len(), 2); + // Both params should be restrict-qualified pointers + assert_eq!(func.params[0].typ.kind, TypeKind::Pointer); + assert!(func.params[0] + .typ + .modifiers + .contains(TypeModifiers::RESTRICT)); + assert_eq!(func.params[1].typ.kind, TypeKind::Pointer); + assert!(func.params[1] + .typ + .modifiers + .contains(TypeModifiers::RESTRICT)); + } + _ => panic!("Expected FunctionDef"), + } + } + + #[test] + fn test_restrict_with_const() { + // Pointer with both const and restrict qualifiers + let tu = parse_tu("int main(void) { int * const restrict p = 0; return 0; }").unwrap(); + assert_eq!(tu.items.len(), 1); + // Just verify it parses without error - both qualifiers should be accepted + } + + #[test] + fn test_restrict_global_pointer() { + // Global pointer with restrict qualifier + let tu = parse_tu("int * restrict global_ptr;").unwrap(); + assert_eq!(tu.items.len(), 1); + + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators[0].name, "global_ptr"); + assert_eq!(decl.declarators[0].typ.kind, TypeKind::Pointer); + assert!(decl.declarators[0] + .typ + .modifiers + .contains(TypeModifiers::RESTRICT)); + } + _ => panic!("Expected Declaration"), + } + } + + // ======================================================================== + // Volatile qualifier tests + // ======================================================================== + + #[test] + fn test_volatile_basic() { + // Basic volatile variable + let tu = parse_tu("volatile int x;").unwrap(); + assert_eq!(tu.items.len(), 1); + + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators[0].name, "x"); + assert!(decl.declarators[0] + .typ + .modifiers + .contains(TypeModifiers::VOLATILE)); + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_volatile_pointer() { + // Pointer to volatile int + let tu = parse_tu("volatile int *p;").unwrap(); + assert_eq!(tu.items.len(), 1); + + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators[0].name, "p"); + assert_eq!(decl.declarators[0].typ.kind, TypeKind::Pointer); + // The base type should be volatile + let base = decl.declarators[0].typ.base.as_ref().unwrap(); + assert!(base.modifiers.contains(TypeModifiers::VOLATILE)); + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_volatile_pointer_itself() { + // Volatile pointer to int (pointer itself is volatile) + let tu = parse_tu("int * volatile p;").unwrap(); + assert_eq!(tu.items.len(), 1); + + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators[0].name, "p"); + assert_eq!(decl.declarators[0].typ.kind, TypeKind::Pointer); + // The pointer type itself should be volatile + assert!(decl.declarators[0] + .typ + .modifiers + .contains(TypeModifiers::VOLATILE)); + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_volatile_const_combined() { + // Both const and volatile + let tu = parse_tu("const volatile int x;").unwrap(); + assert_eq!(tu.items.len(), 1); + + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators[0].name, "x"); + assert!(decl.declarators[0] + .typ + .modifiers + .contains(TypeModifiers::VOLATILE)); + assert!(decl.declarators[0] + .typ + .modifiers + .contains(TypeModifiers::CONST)); + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_volatile_function_param() { + // Function with volatile pointer parameter + let tu = parse_tu("void foo(volatile int *p) { *p = 1; }").unwrap(); + assert_eq!(tu.items.len(), 1); + + match &tu.items[0] { + ExternalDecl::FunctionDef(func) => { + assert_eq!(func.name, "foo"); + assert_eq!(func.params.len(), 1); + // Parameter is pointer to volatile int + assert_eq!(func.params[0].typ.kind, TypeKind::Pointer); + let base = func.params[0].typ.base.as_ref().unwrap(); + assert!(base.modifiers.contains(TypeModifiers::VOLATILE)); + } + _ => panic!("Expected FunctionDef"), + } + } + + // ======================================================================== + // __attribute__ tests + // ======================================================================== + + #[test] + fn test_attribute_on_function_declaration() { + // Attribute on function declaration + let tu = parse_tu("void foo(void) __attribute__((noreturn));").unwrap(); + assert_eq!(tu.items.len(), 1); + + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators.len(), 1); + assert_eq!(decl.declarators[0].name, "foo"); + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_attribute_on_struct() { + // Attribute between struct keyword and name (with variable) + let tu = parse_tu("struct __attribute__((packed)) foo { int x; } s;").unwrap(); + assert_eq!(tu.items.len(), 1); + + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators.len(), 1); + assert_eq!(decl.declarators[0].name, "s"); + assert_eq!(decl.declarators[0].typ.kind, TypeKind::Struct); + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_attribute_after_struct() { + // Attribute after struct closing brace (with variable) + let tu = parse_tu("struct foo { int x; } __attribute__((aligned(16))) s;").unwrap(); + assert_eq!(tu.items.len(), 1); + + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators.len(), 1); + assert_eq!(decl.declarators[0].name, "s"); + assert_eq!(decl.declarators[0].typ.kind, TypeKind::Struct); + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_attribute_on_struct_only() { + // Attribute on struct-only definition (no variable) + let tu = parse_tu("struct __attribute__((packed)) foo { int x; };").unwrap(); + assert_eq!(tu.items.len(), 1); + + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + // No variable declared, just the struct definition + assert_eq!(decl.declarators.len(), 0); + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_attribute_on_variable() { + // Attribute on variable declaration + let tu = parse_tu("int x __attribute__((aligned(8)));").unwrap(); + assert_eq!(tu.items.len(), 1); + + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators.len(), 1); + assert_eq!(decl.declarators[0].name, "x"); + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_attribute_multiple() { + // Multiple attributes in one list + let tu = parse_tu("void foo(void) __attribute__((noreturn, cold));").unwrap(); + assert_eq!(tu.items.len(), 1); + + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators[0].name, "foo"); + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_attribute_with_args() { + // Attribute with multiple arguments + let tu = parse_tu( + "void foo(const char *fmt, ...) __attribute__((__format__(__printf__, 1, 2)));", + ) + .unwrap(); + assert_eq!(tu.items.len(), 1); + + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators[0].name, "foo"); + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_attribute_before_declaration() { + // Attribute before declaration + let tu = parse_tu("__attribute__((visibility(\"default\"))) int exported_var;").unwrap(); + assert_eq!(tu.items.len(), 1); + + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators[0].name, "exported_var"); + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_attribute_underscore_variant() { + // __attribute variant (single underscore pair) + let tu = parse_tu("void foo(void) __attribute((noreturn));").unwrap(); + assert_eq!(tu.items.len(), 1); + + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators[0].name, "foo"); + } + _ => panic!("Expected Declaration"), + } + } + + // ======================================================================== + // Const enforcement tests + // ======================================================================== + // Note: Error detection is tested via integration tests, not unit tests, + // because the error counter is global state shared across parallel tests. + // These tests verify that const parsing works and code with const violations + // still produces a valid AST (parsing continues after reporting errors). + + #[test] + fn test_const_assignment_parses() { + // Assignment to const variable should still parse (errors are reported but parsing continues) + let tu = parse_tu("int main(void) { const int x = 42; x = 10; return 0; }").unwrap(); + assert_eq!(tu.items.len(), 1); + // Verify we got a function definition + assert!(matches!(tu.items[0], ExternalDecl::FunctionDef(_))); + } + + #[test] + fn test_const_pointer_deref_parses() { + // Assignment through pointer to const should still parse + let tu = + parse_tu("int main(void) { int v = 1; const int *p = &v; *p = 2; return 0; }").unwrap(); + assert_eq!(tu.items.len(), 1); + assert!(matches!(tu.items[0], ExternalDecl::FunctionDef(_))); + } + + #[test] + fn test_const_usage_valid() { + // Valid const usage - reading const values + let tu = parse_tu("int main(void) { const int x = 42; int y = x + 1; return y; }").unwrap(); + assert_eq!(tu.items.len(), 1); + assert!(matches!(tu.items[0], ExternalDecl::FunctionDef(_))); + } + + #[test] + fn test_const_pointer_types() { + // Different const pointer combinations + let tu = parse_tu( + "int main(void) { int v = 1; const int *a = &v; int * const b = &v; const int * const c = &v; return 0; }", + ) + .unwrap(); + assert_eq!(tu.items.len(), 1); + assert!(matches!(tu.items[0], ExternalDecl::FunctionDef(_))); + } + + // ======================================================================== + // Function declaration tests (prototypes) + // ======================================================================== + + #[test] + fn test_function_decl_no_params() { + let tu = parse_tu("int foo(void);").unwrap(); + assert_eq!(tu.items.len(), 1); + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators.len(), 1); + assert_eq!(decl.declarators[0].name, "foo"); + assert_eq!(decl.declarators[0].typ.kind, TypeKind::Function); + assert!(!decl.declarators[0].typ.variadic); + // Check return type + if let Some(ref base) = decl.declarators[0].typ.base { + assert_eq!(base.kind, TypeKind::Int); + } + // Check params (void means empty) + if let Some(ref params) = decl.declarators[0].typ.params { + assert!(params.is_empty()); + } + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_function_decl_one_param() { + let tu = parse_tu("int square(int x);").unwrap(); + assert_eq!(tu.items.len(), 1); + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators[0].name, "square"); + assert_eq!(decl.declarators[0].typ.kind, TypeKind::Function); + assert!(!decl.declarators[0].typ.variadic); + if let Some(ref params) = decl.declarators[0].typ.params { + assert_eq!(params.len(), 1); + assert_eq!(params[0].kind, TypeKind::Int); + } + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_function_decl_multiple_params() { + let tu = parse_tu("int add(int a, int b, int c);").unwrap(); + assert_eq!(tu.items.len(), 1); + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators[0].name, "add"); + assert_eq!(decl.declarators[0].typ.kind, TypeKind::Function); + assert!(!decl.declarators[0].typ.variadic); + if let Some(ref params) = decl.declarators[0].typ.params { + assert_eq!(params.len(), 3); + for p in params { + assert_eq!(p.kind, TypeKind::Int); + } + } + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_function_decl_void_return() { + let tu = parse_tu("void do_something(int x);").unwrap(); + assert_eq!(tu.items.len(), 1); + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators[0].name, "do_something"); + assert_eq!(decl.declarators[0].typ.kind, TypeKind::Function); + if let Some(ref base) = decl.declarators[0].typ.base { + assert_eq!(base.kind, TypeKind::Void); + } + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_function_decl_pointer_return() { + let tu = parse_tu("char *get_string(void);").unwrap(); + assert_eq!(tu.items.len(), 1); + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators[0].name, "get_string"); + assert_eq!(decl.declarators[0].typ.kind, TypeKind::Function); + if let Some(ref base) = decl.declarators[0].typ.base { + assert_eq!(base.kind, TypeKind::Pointer); + } + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_function_decl_pointer_param() { + let tu = parse_tu("void process(int *data, int count);").unwrap(); + assert_eq!(tu.items.len(), 1); + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators[0].name, "process"); + if let Some(ref params) = decl.declarators[0].typ.params { + assert_eq!(params.len(), 2); + assert_eq!(params[0].kind, TypeKind::Pointer); + assert_eq!(params[1].kind, TypeKind::Int); + } + } + _ => panic!("Expected Declaration"), + } + } + + // ======================================================================== + // Variadic function declaration tests + // ======================================================================== + + #[test] + fn test_function_decl_variadic_printf() { + // Classic printf prototype + let tu = parse_tu("int printf(const char *fmt, ...);").unwrap(); + assert_eq!(tu.items.len(), 1); + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators[0].name, "printf"); + assert_eq!(decl.declarators[0].typ.kind, TypeKind::Function); + // Should be marked as variadic + assert!( + decl.declarators[0].typ.variadic, + "printf should be marked as variadic" + ); + if let Some(ref params) = decl.declarators[0].typ.params { + // Only the fixed parameter (fmt) should be in params + assert_eq!(params.len(), 1); + assert_eq!(params[0].kind, TypeKind::Pointer); + } + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_function_decl_variadic_sprintf() { + let tu = parse_tu("int sprintf(char *buf, const char *fmt, ...);").unwrap(); + assert_eq!(tu.items.len(), 1); + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators[0].name, "sprintf"); + assert!( + decl.declarators[0].typ.variadic, + "sprintf should be marked as variadic" + ); + if let Some(ref params) = decl.declarators[0].typ.params { + // Two fixed parameters: buf and fmt + assert_eq!(params.len(), 2); + } + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_function_decl_variadic_custom() { + // Custom variadic function with int first param + let tu = parse_tu("int sum_ints(int count, ...);").unwrap(); + assert_eq!(tu.items.len(), 1); + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators[0].name, "sum_ints"); + assert!( + decl.declarators[0].typ.variadic, + "sum_ints should be marked as variadic" + ); + if let Some(ref params) = decl.declarators[0].typ.params { + assert_eq!(params.len(), 1); + assert_eq!(params[0].kind, TypeKind::Int); + } + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_function_decl_variadic_multiple_fixed() { + // Variadic function with multiple fixed parameters + let tu = parse_tu("int variadic_func(int a, double b, char *c, ...);").unwrap(); + assert_eq!(tu.items.len(), 1); + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators[0].name, "variadic_func"); + assert!( + decl.declarators[0].typ.variadic, + "variadic_func should be marked as variadic" + ); + if let Some(ref params) = decl.declarators[0].typ.params { + assert_eq!(params.len(), 3); + assert_eq!(params[0].kind, TypeKind::Int); + assert_eq!(params[1].kind, TypeKind::Double); + assert_eq!(params[2].kind, TypeKind::Pointer); + } + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_function_decl_variadic_void_return() { + // Variadic function with void return type + let tu = parse_tu("void log_message(const char *fmt, ...);").unwrap(); + assert_eq!(tu.items.len(), 1); + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators[0].name, "log_message"); + assert!( + decl.declarators[0].typ.variadic, + "log_message should be marked as variadic" + ); + if let Some(ref base) = decl.declarators[0].typ.base { + assert_eq!(base.kind, TypeKind::Void); + } + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_function_decl_not_variadic() { + // Make sure non-variadic functions are NOT marked as variadic + let tu = parse_tu("int regular_func(int a, int b);").unwrap(); + assert_eq!(tu.items.len(), 1); + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators[0].name, "regular_func"); + assert!( + !decl.declarators[0].typ.variadic, + "regular_func should NOT be marked as variadic" + ); + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_variadic_function_definition() { + // Variadic function definition (not just declaration) + let func = parse_func("int my_printf(char *fmt, ...) { return 0; }").unwrap(); + assert_eq!(func.name, "my_printf"); + // Note: FunctionDef doesn't directly expose variadic, but the function + // body can use va_start etc. This test just ensures parsing succeeds. + assert_eq!(func.params.len(), 1); + } + + #[test] + fn test_multiple_function_decls_mixed() { + // Mix of variadic and non-variadic declarations + let tu = parse_tu( + "int printf(const char *fmt, ...); int puts(const char *s); int sprintf(char *buf, const char *fmt, ...);", + ) + .unwrap(); + assert_eq!(tu.items.len(), 3); + + // printf - variadic + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators[0].name, "printf"); + assert!(decl.declarators[0].typ.variadic); + } + _ => panic!("Expected Declaration"), + } + + // puts - not variadic + match &tu.items[1] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators[0].name, "puts"); + assert!(!decl.declarators[0].typ.variadic); + } + _ => panic!("Expected Declaration"), + } + + // sprintf - variadic + match &tu.items[2] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators[0].name, "sprintf"); + assert!(decl.declarators[0].typ.variadic); + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_function_decl_with_struct_param() { + // Function declaration with struct parameter + let tu = + parse_tu("struct point { int x; int y; }; void move_point(struct point p);").unwrap(); + assert_eq!(tu.items.len(), 2); + match &tu.items[1] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators[0].name, "move_point"); + assert_eq!(decl.declarators[0].typ.kind, TypeKind::Function); + assert!(!decl.declarators[0].typ.variadic); + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_function_decl_array_decay() { + // Array parameters decay to pointers in function declarations + let tu = parse_tu("void process_array(int arr[]);").unwrap(); + assert_eq!(tu.items.len(), 1); + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators[0].name, "process_array"); + assert_eq!(decl.declarators[0].typ.kind, TypeKind::Function); + // The array parameter should decay to pointer + if let Some(ref params) = decl.declarators[0].typ.params { + assert_eq!(params.len(), 1); + // Array params in function declarations become pointers + assert!( + params[0].kind == TypeKind::Pointer || params[0].kind == TypeKind::Array + ); + } + } + _ => panic!("Expected Declaration"), + } + } + + // ======================================================================== + // Function pointer tests + // ======================================================================== + + #[test] + fn test_function_pointer_declaration() { + // Basic function pointer: void (*fp)(int) + let tu = parse_tu("void (*fp)(int);").unwrap(); + assert_eq!(tu.items.len(), 1); + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators.len(), 1); + assert_eq!(decl.declarators[0].name, "fp"); + // fp should be a pointer to a function + assert_eq!(decl.declarators[0].typ.kind, TypeKind::Pointer); + // The base type of the pointer should be a function + if let Some(ref base) = decl.declarators[0].typ.base { + assert_eq!(base.kind, TypeKind::Function); + // Function returns void + if let Some(ref ret) = base.base { + assert_eq!(ret.kind, TypeKind::Void); + } + // Function takes one int parameter + if let Some(ref params) = base.params { + assert_eq!(params.len(), 1); + assert_eq!(params[0].kind, TypeKind::Int); + } + } else { + panic!("Expected function pointer base type"); + } + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_function_pointer_no_params() { + // Function pointer with no parameters: int (*fp)(void) + let tu = parse_tu("int (*fp)(void);").unwrap(); + assert_eq!(tu.items.len(), 1); + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators[0].name, "fp"); + assert_eq!(decl.declarators[0].typ.kind, TypeKind::Pointer); + if let Some(ref base) = decl.declarators[0].typ.base { + assert_eq!(base.kind, TypeKind::Function); + // (void) means no parameters + if let Some(ref params) = base.params { + assert!(params.is_empty()); + } + } + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_function_pointer_multiple_params() { + // Function pointer with multiple parameters: int (*fp)(int, char, double) + let tu = parse_tu("int (*fp)(int, char, double);").unwrap(); + assert_eq!(tu.items.len(), 1); + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators[0].name, "fp"); + assert_eq!(decl.declarators[0].typ.kind, TypeKind::Pointer); + if let Some(ref base) = decl.declarators[0].typ.base { + assert_eq!(base.kind, TypeKind::Function); + if let Some(ref params) = base.params { + assert_eq!(params.len(), 3); + assert_eq!(params[0].kind, TypeKind::Int); + assert_eq!(params[1].kind, TypeKind::Char); + assert_eq!(params[2].kind, TypeKind::Double); + } + } + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_function_pointer_returning_pointer() { + // Function pointer returning a pointer: char *(*fp)(int) + let tu = parse_tu("char *(*fp)(int);").unwrap(); + assert_eq!(tu.items.len(), 1); + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators[0].name, "fp"); + assert_eq!(decl.declarators[0].typ.kind, TypeKind::Pointer); + if let Some(ref base) = decl.declarators[0].typ.base { + assert_eq!(base.kind, TypeKind::Function); + // Return type is char* + if let Some(ref ret) = base.base { + assert_eq!(ret.kind, TypeKind::Pointer); + if let Some(ref char_type) = ret.base { + assert_eq!(char_type.kind, TypeKind::Char); + } + } + } + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_function_pointer_variadic() { + // Variadic function pointer: int (*fp)(const char *, ...) + let tu = parse_tu("int (*fp)(const char *, ...);").unwrap(); + assert_eq!(tu.items.len(), 1); + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators[0].name, "fp"); + assert_eq!(decl.declarators[0].typ.kind, TypeKind::Pointer); + if let Some(ref base) = decl.declarators[0].typ.base { + assert_eq!(base.kind, TypeKind::Function); + assert!(base.variadic, "Function should be variadic"); + if let Some(ref params) = base.params { + assert_eq!(params.len(), 1); + assert_eq!(params[0].kind, TypeKind::Pointer); + } + } + } + _ => panic!("Expected Declaration"), + } + } + + // ======================================================================== + // Bitfield tests + // ======================================================================== + + #[test] + fn test_bitfield_basic() { + // Basic bitfield parsing - include a variable declarator + let tu = parse_tu("struct flags { unsigned int a : 4; unsigned int b : 4; } f;").unwrap(); + assert_eq!(tu.items.len(), 1); + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators.len(), 1); + assert_eq!(decl.declarators[0].name, "f"); + assert_eq!(decl.declarators[0].typ.kind, TypeKind::Struct); + if let Some(ref composite) = decl.declarators[0].typ.composite { + assert_eq!(composite.members.len(), 2); + // First bitfield + assert_eq!(composite.members[0].name, "a"); + assert_eq!(composite.members[0].bit_width, Some(4)); + assert_eq!(composite.members[0].bit_offset, Some(0)); + // Second bitfield + assert_eq!(composite.members[1].name, "b"); + assert_eq!(composite.members[1].bit_width, Some(4)); + assert_eq!(composite.members[1].bit_offset, Some(4)); + } + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_bitfield_unnamed() { + // Unnamed bitfield for padding + let tu = parse_tu( + "struct padded { unsigned int a : 4; unsigned int : 4; unsigned int b : 8; } p;", + ) + .unwrap(); + assert_eq!(tu.items.len(), 1); + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators.len(), 1); + if let Some(ref composite) = decl.declarators[0].typ.composite { + assert_eq!(composite.members.len(), 3); + // First named bitfield + assert_eq!(composite.members[0].name, "a"); + assert_eq!(composite.members[0].bit_width, Some(4)); + // Unnamed padding bitfield + assert_eq!(composite.members[1].name, ""); + assert_eq!(composite.members[1].bit_width, Some(4)); + // Second named bitfield + assert_eq!(composite.members[2].name, "b"); + assert_eq!(composite.members[2].bit_width, Some(8)); + } + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_bitfield_zero_width() { + // Zero-width bitfield forces alignment + let tu = parse_tu( + "struct aligned { unsigned int a : 4; unsigned int : 0; unsigned int b : 4; } x;", + ) + .unwrap(); + assert_eq!(tu.items.len(), 1); + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators.len(), 1); + if let Some(ref composite) = decl.declarators[0].typ.composite { + assert_eq!(composite.members.len(), 3); + // After zero-width bitfield, b should start at new storage unit + assert_eq!(composite.members[2].name, "b"); + assert_eq!(composite.members[2].bit_width, Some(4)); + // b should be at offset 0 within its storage unit + assert_eq!(composite.members[2].bit_offset, Some(0)); + // b's byte offset should be different from a's + assert!(composite.members[2].offset > composite.members[0].offset); + } + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_bitfield_mixed_with_regular() { + // Bitfield mixed with regular member + let tu = parse_tu("struct mixed { int x; unsigned int bits : 8; int y; } m;").unwrap(); + assert_eq!(tu.items.len(), 1); + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators.len(), 1); + if let Some(ref composite) = decl.declarators[0].typ.composite { + assert_eq!(composite.members.len(), 3); + // x is regular member + assert_eq!(composite.members[0].name, "x"); + assert!(composite.members[0].bit_width.is_none()); + // bits is a bitfield + assert_eq!(composite.members[1].name, "bits"); + assert_eq!(composite.members[1].bit_width, Some(8)); + // y is regular member + assert_eq!(composite.members[2].name, "y"); + assert!(composite.members[2].bit_width.is_none()); + } + } + _ => panic!("Expected Declaration"), + } + } + + #[test] + fn test_bitfield_struct_size() { + // Verify struct size calculation with bitfields + let tu = parse_tu( + "struct small { unsigned int a : 1; unsigned int b : 1; unsigned int c : 1; } s;", + ) + .unwrap(); + match &tu.items[0] { + ExternalDecl::Declaration(decl) => { + assert_eq!(decl.declarators.len(), 1); + if let Some(ref composite) = decl.declarators[0].typ.composite { + // Three 1-bit fields should fit in one 4-byte int + assert_eq!(composite.size, 4); + } + } + _ => panic!("Expected Declaration"), + } + } +} diff --git a/cc/ssa.rs b/cc/ssa.rs new file mode 100644 index 000000000..6fb8a8cc2 --- /dev/null +++ b/cc/ssa.rs @@ -0,0 +1,800 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// SSA Conversion for pcc C99 compiler +// +// Based on sparse's ssa.c: +// - Converts memory-based local variables to SSA form +// - Inserts phi nodes at dominance frontiers +// - Renames variables to complete SSA construction +// + +use crate::dominate::{compute_dominance_frontiers, domtree_build, idf_compute}; +use crate::ir::{ + BasicBlockId, Function, InsnRef, Instruction, Opcode, Pseudo, PseudoId, PseudoKind, +}; +use crate::types::Type; +use std::collections::{HashMap, HashSet}; + +// ============================================================================ +// Promotability Check +// ============================================================================ + +/// Check if a type is promotable to SSA form. +/// Only scalar types (integers, pointers, floats) can be promoted. +fn is_promotable_type(typ: &Type) -> bool { + typ.is_scalar() +} + +// ============================================================================ +// SSA Conversion State +// ============================================================================ + +/// State for SSA conversion +struct SsaConverter<'a> { + func: &'a mut Function, + + /// Variables marked for renaming (name -> type) + to_rename: HashSet, + + /// All inserted phi nodes for later processing + all_phis: Vec<(BasicBlockId, usize)>, // (block, instruction index) + + /// Stores to remove after conversion + dead_stores: Vec, + + /// Counter for generating new pseudo IDs + next_pseudo_id: u32, + + /// Counter for generating new register numbers + next_reg_nr: u32, +} + +impl<'a> SsaConverter<'a> { + fn new(func: &'a mut Function) -> Self { + // Find the maximum pseudo ID and reg number currently in use + let max_pseudo_id = func.pseudos.iter().map(|p| p.id.0).max().unwrap_or(0); + + let max_reg_nr = func + .pseudos + .iter() + .filter_map(|p| match &p.kind { + PseudoKind::Reg(nr) => Some(*nr), + _ => None, + }) + .max() + .unwrap_or(0); + + Self { + func, + to_rename: HashSet::new(), + all_phis: Vec::new(), + dead_stores: Vec::new(), + next_pseudo_id: max_pseudo_id + 1, + next_reg_nr: max_reg_nr + 1, + } + } + + /// Allocate a new phi pseudo + fn alloc_phi(&mut self) -> PseudoId { + let id = PseudoId(self.next_pseudo_id); + self.next_pseudo_id += 1; + + let nr = self.next_reg_nr; + self.next_reg_nr += 1; + + let pseudo = Pseudo::phi(id, nr); + self.func.add_pseudo(pseudo); + id + } + + /// Create an undef pseudo + fn undef_pseudo(&mut self) -> PseudoId { + let id = PseudoId(self.next_pseudo_id); + self.next_pseudo_id += 1; + + let pseudo = Pseudo::undef(id); + self.func.add_pseudo(pseudo); + id + } +} + +// ============================================================================ +// Phase 1: Variable Analysis and Phi Placement +// ============================================================================ + +/// Information about a local variable during SSA conversion +#[derive(Default)] +struct VarInfo { + /// Type of the variable + typ: Type, + /// Blocks that store to this variable + def_blocks: Vec, + /// Total number of stores + store_count: usize, + /// Total number of uses (loads + stores) + use_count: usize, + /// Is the address of this variable taken? + addr_taken: bool, + /// Is all usage in a single block? + single_block: Option, + /// Block where this variable was declared (for scope-aware phi placement) + decl_block: Option, +} + +/// Analyze a variable to determine if it can be promoted to SSA. +fn analyze_variable(func: &Function, var_name: &str) -> Option { + let local = func.get_local(var_name)?; + let sym_id = local.sym; + let typ = local.typ.clone(); + let decl_block = local.decl_block; + + // Check basic promotability + if local.is_volatile || !is_promotable_type(&typ) { + return None; + } + + let mut info = VarInfo { + typ, + decl_block, + ..Default::default() + }; + + let mut seen_block: Option = None; + let mut same_block = true; + + // Scan all instructions looking for uses of this variable + for bb in &func.blocks { + for insn in &bb.insns { + match insn.op { + Opcode::Store => { + // Store: check if storing to this variable + if !insn.src.is_empty() && insn.src[0] == sym_id { + info.store_count += 1; + info.use_count += 1; + + if !info.def_blocks.contains(&bb.id) { + info.def_blocks.push(bb.id); + } + + if same_block { + if let Some(prev) = seen_block { + if prev != bb.id { + same_block = false; + } + } else { + seen_block = Some(bb.id); + } + } + } + } + Opcode::Load => { + // Load: check if loading from this variable + if !insn.src.is_empty() && insn.src[0] == sym_id { + info.use_count += 1; + + if same_block { + if let Some(prev) = seen_block { + if prev != bb.id { + same_block = false; + } + } else { + seen_block = Some(bb.id); + } + } + } + } + Opcode::SymAddr => { + // Address taken - can't promote + // SymAddr has the symbol in src[0], not target + if !insn.src.is_empty() && insn.src[0] == sym_id { + info.addr_taken = true; + } + } + _ => {} + } + } + } + + if info.addr_taken { + return None; + } + + if same_block { + info.single_block = seen_block; + } + + Some(info) +} + +/// Insert phi nodes for a variable at its iterated dominance frontier. +fn insert_phi_nodes(converter: &mut SsaConverter, var_name: &str, var_info: &VarInfo) { + // Compute IDF of definition blocks + let idf = idf_compute(converter.func, &var_info.def_blocks); + + // Insert phi node at each IDF block, but only if it's dominated by the declaration block. + // This ensures that variables declared inside inner scopes don't get phi nodes + // at outer loop headers where they're not in scope. + for bb_id in idf { + // Filter: only insert phi if the IDF block is dominated by the declaration block + if let Some(decl_bb) = var_info.decl_block { + if !converter.func.dominates(decl_bb, bb_id) { + // This IDF block is not dominated by the declaration block, + // so the variable is not in scope here - skip phi insertion + continue; + } + } + let target = converter.alloc_phi(); + + // Create phi instruction + let phi = Instruction::phi(target, var_info.typ.clone()); + + // Store the variable name for later phi renaming + // We'll use the phi_list field to store this temporarily + // (the actual phi sources will be added during renaming) + + // Insert at beginning of block (after any entry instruction and existing phis) + if let Some(bb) = converter.func.get_block_mut(bb_id) { + // Find insertion point: after Entry (if present) and after existing phis + let mut insert_pos = 0; + for insn in &bb.insns { + if insn.op == Opcode::Entry || insn.op == Opcode::Phi { + insert_pos += 1; + } else { + break; + } + } + + bb.insns.insert(insert_pos, phi); + + // Record for later processing + converter.all_phis.push((bb_id, insert_pos)); + + // Store in phi_map (position is stable because we insert at the end of existing phis) + bb.phi_map.insert(var_name.to_string(), insert_pos); + } + } +} + +// ============================================================================ +// Phase 2: Variable Renaming +// ============================================================================ + +/// Definition stack for variable renaming +struct DefStack { + /// Variable name -> stack of (defining block, defining pseudo) + stacks: HashMap>, +} + +impl DefStack { + fn new() -> Self { + Self { + stacks: HashMap::new(), + } + } + + /// Push a new definition + fn push(&mut self, var: &str, bb: BasicBlockId, val: PseudoId) { + self.stacks + .entry(var.to_string()) + .or_default() + .push((bb, val)); + } + + /// Get current definition (from top of stack) + fn current(&self, var: &str) -> Option { + self.stacks.get(var).and_then(|s| s.last().map(|(_, v)| *v)) + } + + /// Pop definitions made in a specific block + fn pop_block(&mut self, bb: BasicBlockId) { + for stack in self.stacks.values_mut() { + while stack.last().map(|(b, _)| *b == bb).unwrap_or(false) { + stack.pop(); + } + } + } +} + +/// Lookup the current definition of a variable, walking up the dominator tree. +fn lookup_var( + func: &Function, + bb_id: BasicBlockId, + var: &str, + def_stack: &DefStack, +) -> Option { + // First check if there's a definition from processing this block + if let Some(val) = def_stack.current(var) { + return Some(val); + } + + // Walk up dominator tree looking for a phi or store + let mut current = bb_id; + while let Some(bb) = func.get_block(current) { + // Check if there's a phi for this variable in this block + if let Some(&phi_idx) = bb.phi_map.get(var) { + if let Some(phi_insn) = bb.insns.get(phi_idx) { + return phi_insn.target; + } + } + + // Move to immediate dominator + if let Some(idom) = bb.idom { + current = idom; + } else { + break; + } + } + + None +} + +/// Rename variables in a single instruction. +fn rename_insn( + converter: &mut SsaConverter, + bb_id: BasicBlockId, + insn_idx: usize, + def_stack: &mut DefStack, +) { + let insn = if let Some(bb) = converter.func.get_block(bb_id) { + bb.insns.get(insn_idx).cloned() + } else { + return; + }; + + let insn = match insn { + Some(i) => i, + None => return, + }; + + match insn.op { + Opcode::Store => { + // Check if this is a store to a promotable variable + if insn.src.len() >= 2 { + let addr = insn.src[0]; + + // Look up what variable this address corresponds to + let var_name = converter + .func + .pseudos + .iter() + .find(|p| p.id == addr) + .and_then(|p| match &p.kind { + PseudoKind::Sym(name) => Some(name.clone()), + _ => None, + }); + + if let Some(name) = var_name { + if converter.to_rename.contains(&name) { + // Get the value being stored + let val = insn.src[1]; + + // Push as new definition + def_stack.push(&name, bb_id, val); + + // Mark store for removal + converter.dead_stores.push(InsnRef::new(bb_id, insn_idx)); + } + } + } + } + + Opcode::Load => { + // Check if this is a load from a promotable variable + if !insn.src.is_empty() { + let addr = insn.src[0]; + + let var_name = converter + .func + .pseudos + .iter() + .find(|p| p.id == addr) + .and_then(|p| match &p.kind { + PseudoKind::Sym(name) => Some(name.clone()), + _ => None, + }); + + if let Some(name) = var_name { + if converter.to_rename.contains(&name) { + // Get the reaching definition + let val = lookup_var(converter.func, bb_id, &name, def_stack) + .unwrap_or_else(|| converter.undef_pseudo()); + + // Replace load with the value + if insn.target.is_some() { + if let Some(bb) = converter.func.get_block_mut(bb_id) { + if let Some(load_insn) = bb.insns.get_mut(insn_idx) { + // Convert to a copy + load_insn.op = Opcode::Copy; + load_insn.src = vec![val]; + } + } + } + } + } + } + } + + Opcode::Phi => { + // Record phi as a definition + if let Some(target) = insn.target { + // Find which variable this phi is for (from phi_map) + if let Some(bb) = converter.func.get_block(bb_id) { + for (name, &idx) in &bb.phi_map { + if idx == insn_idx { + def_stack.push(name, bb_id, target); + break; + } + } + } + } + } + + _ => {} + } +} + +/// Rename variables in a block and its dominated children. +fn rename_block(converter: &mut SsaConverter, bb_id: BasicBlockId, def_stack: &mut DefStack) { + // Get instruction count first + let insn_count = converter + .func + .get_block(bb_id) + .map(|bb| bb.insns.len()) + .unwrap_or(0); + + // Process all instructions in this block + for i in 0..insn_count { + rename_insn(converter, bb_id, i, def_stack); + } + + // Get dominated children + let dom_children: Vec = converter + .func + .get_block(bb_id) + .map(|bb| bb.dom_children.clone()) + .unwrap_or_default(); + + // Recurse into dominated children + for child in dom_children { + rename_block(converter, child, def_stack); + } + + // Pop definitions made in this block + def_stack.pop_block(bb_id); +} + +/// Fill in phi operands from predecessor blocks. +fn fill_phi_operands(converter: &mut SsaConverter) { + // Collect phi info first + let phi_info: Vec<(BasicBlockId, usize, String)> = converter + .func + .blocks + .iter() + .flat_map(|bb| { + bb.phi_map + .iter() + .map(move |(name, &idx)| (bb.id, idx, name.clone())) + }) + .collect(); + + for (bb_id, phi_idx, var_name) in phi_info { + // Get predecessor blocks + let preds: Vec = converter + .func + .get_block(bb_id) + .map(|bb| bb.parents.clone()) + .unwrap_or_default(); + + // For each predecessor, find the reaching definition + for pred_id in preds { + // Find the definition in the predecessor + let val = lookup_var_in_pred(converter.func, pred_id, &var_name).unwrap_or_else(|| { + // Create undef pseudo + let id = PseudoId(converter.next_pseudo_id); + converter.next_pseudo_id += 1; + let pseudo = Pseudo::undef(id); + converter.func.add_pseudo(pseudo); + id + }); + + // Add to phi_list + if let Some(bb) = converter.func.get_block_mut(bb_id) { + if let Some(phi_insn) = bb.insns.get_mut(phi_idx) { + phi_insn.phi_list.push((pred_id, val)); + } + } + } + } +} + +/// Find the reaching definition of a variable in a predecessor block. +/// +/// For phi operand filling, we need to find the LAST definition of a variable +/// that reaches a predecessor block. This is tricky for back-edges in loops: +/// +/// In a loop like: +/// .L1: phi %x = [.L0: %init], [.L2: ???] +/// ... use %x ... +/// %new = %x + 1 +/// store %x, %new +/// br .L2 +/// .L2: cbr .L1, .L3 +/// +/// When filling the phi operand from .L2, we need %new (the stored value), +/// not the phi target. So we check stores BEFORE checking phis. +fn lookup_var_in_pred(func: &Function, bb_id: BasicBlockId, var: &str) -> Option { + // Walk up the dominator tree from this predecessor looking for a definition + let mut current = bb_id; + + loop { + let bb = func.get_block(current)?; + + // Check for store in this block (walking backwards) FIRST + // This is important for back-edges: the store happens after the phi + for insn in bb.insns.iter().rev() { + if insn.op == Opcode::Store && insn.src.len() >= 2 { + let addr = insn.src[0]; + + // Check if this is a store to our variable + if let Some(pseudo) = func.pseudos.iter().find(|p| p.id == addr) { + if let PseudoKind::Sym(name) = &pseudo.kind { + if name == var { + return Some(insn.src[1]); + } + } + } + } + } + + // Check for phi in this block (only if no store found) + if let Some(&phi_idx) = bb.phi_map.get(var) { + if let Some(phi_insn) = bb.insns.get(phi_idx) { + return phi_insn.target; + } + } + + // Move to immediate dominator + current = bb.idom?; + } +} + +// ============================================================================ +// Phase 3: Cleanup +// ============================================================================ + +/// Remove dead stores that were converted to SSA. +fn remove_dead_stores(func: &mut Function, dead_stores: &[InsnRef]) { + // Sort by block and index (descending) so we can remove from end first + let mut sorted: Vec<_> = dead_stores.to_vec(); + sorted.sort_by(|a, b| { + if a.bb == b.bb { + b.idx.cmp(&a.idx) // Descending by index + } else { + a.bb.0.cmp(&b.bb.0) + } + }); + + for insn_ref in sorted { + if let Some(bb) = func.get_block_mut(insn_ref.bb) { + if insn_ref.idx < bb.insns.len() { + // Convert to Nop instead of removing to preserve indices + bb.insns[insn_ref.idx].op = Opcode::Nop; + } + } + } +} + +// ============================================================================ +// Main Entry Point +// ============================================================================ + +/// Convert a function to SSA form. +/// +/// This promotes eligible local variables from memory to SSA registers, +/// inserting phi nodes at control flow merge points. +/// +/// # Algorithm +/// 1. Build dominator tree +/// 2. For each promotable local variable: +/// - Compute its iterated dominance frontier (IDF) +/// - Insert phi nodes at IDF blocks +/// 3. Rename variables: +/// - Walk blocks in dominator tree order +/// - Replace loads with reaching definitions +/// - Record stores as new definitions +/// - Fill in phi operands from predecessors +/// 4. Remove dead stores +pub fn ssa_convert(func: &mut Function) { + if func.blocks.is_empty() { + return; + } + + // Phase 0: Build dominator tree + domtree_build(func); + compute_dominance_frontiers(func); + + let mut converter = SsaConverter::new(func); + + // Phase 1: Analyze variables and insert phi nodes + let local_names: Vec = converter.func.locals.keys().cloned().collect(); + + for var_name in &local_names { + if let Some(var_info) = analyze_variable(converter.func, var_name) { + // Skip if all usage is in a single block (no phi needed) + if var_info.single_block.is_some() { + // Could do local rewriting here but skip for now + continue; + } + + // Skip if no stores + if var_info.store_count == 0 { + continue; + } + + // Mark for renaming + converter.to_rename.insert(var_name.clone()); + + // Insert phi nodes at IDF + insert_phi_nodes(&mut converter, var_name, &var_info); + } + } + + // Phase 2: Rename variables + let mut def_stack = DefStack::new(); + let entry = converter.func.entry; + rename_block(&mut converter, entry, &mut def_stack); + + // Fill in phi operands + fill_phi_operands(&mut converter); + + // Phase 3: Remove dead stores + let dead_stores = std::mem::take(&mut converter.dead_stores); + remove_dead_stores(converter.func, &dead_stores); +} + +// ============================================================================ +// Tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + use crate::ir::BasicBlock; + use crate::types::TypeKind; + + fn make_simple_if_cfg() -> Function { + // Create a CFG with a simple if-then-else: + // + // int x = 1; + // if (cond) x = 2; + // return x; + // + // entry(0) + // / \ + // v v + // then(1) else(2) + // \ / + // v v + // merge(3) + + let mut func = Function::new("test", Type::basic(TypeKind::Int)); + + // Create symbol pseudo for local variable 'x' + let x_sym = PseudoId(0); + func.add_pseudo(Pseudo::sym(x_sym, "x".to_string())); + // Variable x is declared in entry block (BasicBlockId(0)) + func.add_local( + "x", + x_sym, + Type::basic(TypeKind::Int), + false, // not volatile + Some(BasicBlockId(0)), + ); + + // Value pseudos + let val1 = PseudoId(1); + func.add_pseudo(Pseudo::val(val1, 1)); + let val2 = PseudoId(2); + func.add_pseudo(Pseudo::val(val2, 2)); + let cond = PseudoId(3); + func.add_pseudo(Pseudo::val(cond, 1)); // Non-zero condition + + // Entry block: x = 1; if (cond) goto then else goto else + let mut entry = BasicBlock::new(BasicBlockId(0)); + entry.children = vec![BasicBlockId(1), BasicBlockId(2)]; + entry.add_insn(Instruction::new(Opcode::Entry)); + // Store x = 1 + entry.add_insn(Instruction::store( + val1, + x_sym, + 0, + Type::basic(TypeKind::Int), + )); + // Conditional branch + entry.add_insn(Instruction::cbr(cond, BasicBlockId(1), BasicBlockId(2))); + + // Then block: x = 2 + let mut then_bb = BasicBlock::new(BasicBlockId(1)); + then_bb.parents = vec![BasicBlockId(0)]; + then_bb.children = vec![BasicBlockId(3)]; + then_bb.add_insn(Instruction::store( + val2, + x_sym, + 0, + Type::basic(TypeKind::Int), + )); + then_bb.add_insn(Instruction::br(BasicBlockId(3))); + + // Else block: (no assignment) + let mut else_bb = BasicBlock::new(BasicBlockId(2)); + else_bb.parents = vec![BasicBlockId(0)]; + else_bb.children = vec![BasicBlockId(3)]; + else_bb.add_insn(Instruction::br(BasicBlockId(3))); + + // Merge block: return x + let mut merge = BasicBlock::new(BasicBlockId(3)); + merge.parents = vec![BasicBlockId(1), BasicBlockId(2)]; + let result = PseudoId(4); + func.add_pseudo(Pseudo::reg(result, 0)); + // Load x + merge.add_insn(Instruction::load( + result, + x_sym, + 0, + Type::basic(TypeKind::Int), + )); + merge.add_insn(Instruction::ret(Some(result))); + + func.entry = BasicBlockId(0); + func.blocks = vec![entry, then_bb, else_bb, merge]; + func + } + + #[test] + fn test_analyze_variable() { + let func = make_simple_if_cfg(); + + let info = analyze_variable(&func, "x").unwrap(); + assert_eq!(info.store_count, 2); // One in entry, one in then + assert_eq!(info.def_blocks.len(), 2); + assert!(!info.addr_taken); + } + + #[test] + fn test_ssa_convert_creates_phi() { + let mut func = make_simple_if_cfg(); + ssa_convert(&mut func); + + // After SSA conversion, merge block should have a phi node + let merge = func.get_block(BasicBlockId(3)).unwrap(); + + let has_phi = merge.insns.iter().any(|i| i.op == Opcode::Phi); + assert!(has_phi, "Merge block should have a phi node"); + } + + #[test] + fn test_domtree_built() { + let mut func = make_simple_if_cfg(); + domtree_build(&mut func); + + // Entry should be the root (no idom) + let entry = func.get_block(BasicBlockId(0)).unwrap(); + assert!(entry.idom.is_none()); + + // Then, else, and merge should all have entry as idom + let then_bb = func.get_block(BasicBlockId(1)).unwrap(); + assert_eq!(then_bb.idom, Some(BasicBlockId(0))); + + let else_bb = func.get_block(BasicBlockId(2)).unwrap(); + assert_eq!(else_bb.idom, Some(BasicBlockId(0))); + + let merge = func.get_block(BasicBlockId(3)).unwrap(); + assert_eq!(merge.idom, Some(BasicBlockId(0))); + } +} diff --git a/cc/symbol.rs b/cc/symbol.rs new file mode 100644 index 000000000..14e3fa64d --- /dev/null +++ b/cc/symbol.rs @@ -0,0 +1,516 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Symbol table for pcc C99 compiler +// Based on sparse's scope-aware symbol management +// + +use crate::types::Type; +use std::collections::HashMap; + +// ============================================================================ +// Symbol ID +// ============================================================================ + +/// Unique identifier for a symbol in the symbol table +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct SymbolId(pub u32); + +// ============================================================================ +// Namespace +// ============================================================================ + +/// C has multiple namespaces (C99 6.2.3) +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum Namespace { + /// Ordinary identifiers: variables, functions, typedef names, enum constants + Ordinary, + /// Tags: struct, union, enum tags + Tag, +} + +// ============================================================================ +// Symbol Kind +// ============================================================================ + +/// What kind of entity this symbol represents +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SymbolKind { + /// A variable (local or global) + Variable, + /// A function + Function, + /// A function parameter + Parameter, + /// An enum constant + EnumConstant, + /// A struct/union/enum tag + Tag, + /// A typedef name + Typedef, +} + +// ============================================================================ +// Storage Class +// ============================================================================ + +/// Storage class for variables/functions +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum StorageClass { + #[default] + None, +} + +// ============================================================================ +// Symbol +// ============================================================================ + +/// A symbol in the symbol table +#[derive(Debug, Clone)] +pub struct Symbol { + /// The symbol's name + pub name: String, + + /// What kind of symbol this is + pub kind: SymbolKind, + + /// Which namespace this symbol belongs to + pub namespace: Namespace, + + /// The type of this symbol + pub typ: Type, + + /// Scope depth where this symbol was declared + pub scope_depth: u32, + + /// Is this a definition (vs just a declaration)? + pub defined: bool, + + /// Value for enum constants + pub enum_value: Option, +} + +impl Symbol { + /// Create a new variable symbol + pub fn variable(name: String, typ: Type, scope_depth: u32) -> Self { + Self { + name, + kind: SymbolKind::Variable, + namespace: Namespace::Ordinary, + typ, + scope_depth, + defined: true, + enum_value: None, + } + } + + /// Create a new function symbol + pub fn function(name: String, typ: Type, scope_depth: u32) -> Self { + Self { + name, + kind: SymbolKind::Function, + namespace: Namespace::Ordinary, + typ, + scope_depth, + defined: false, // Functions are declarations until we see the body + enum_value: None, + } + } + + /// Create a new parameter symbol + pub fn parameter(name: String, typ: Type, scope_depth: u32) -> Self { + Self { + name, + kind: SymbolKind::Parameter, + namespace: Namespace::Ordinary, + typ, + scope_depth, + defined: true, + enum_value: None, + } + } + + /// Create a new enum constant symbol + pub fn enum_constant(name: String, value: i64, scope_depth: u32) -> Self { + Self { + name, + kind: SymbolKind::EnumConstant, + namespace: Namespace::Ordinary, + typ: Type::basic(crate::types::TypeKind::Int), + scope_depth, + defined: true, + enum_value: Some(value), + } + } + + /// Create a new tag symbol (struct/union/enum tag) + pub fn tag(name: String, typ: Type, scope_depth: u32) -> Self { + Self { + name, + kind: SymbolKind::Tag, + namespace: Namespace::Tag, + typ, + scope_depth, + defined: true, + enum_value: None, + } + } + + /// Create a new typedef symbol + pub fn typedef(name: String, typ: Type, scope_depth: u32) -> Self { + Self { + name, + kind: SymbolKind::Typedef, + namespace: Namespace::Ordinary, + typ, + scope_depth, + defined: true, + enum_value: None, + } + } + + /// Check if this is an enum constant + pub fn is_enum_constant(&self) -> bool { + self.kind == SymbolKind::EnumConstant + } + + /// Check if this is a typedef + pub fn is_typedef(&self) -> bool { + self.kind == SymbolKind::Typedef + } +} + +// ============================================================================ +// Scope +// ============================================================================ + +/// A scope in the symbol table +#[derive(Debug)] +struct Scope { + /// Symbol IDs declared in this scope + symbols: Vec, + /// Parent scope (None for global scope) + parent: Option, +} + +impl Scope { + fn new(parent: Option) -> Self { + Self { + symbols: Vec::new(), + parent, + } + } +} + +// ============================================================================ +// Symbol Table +// ============================================================================ + +/// Scope-aware symbol table +/// +/// Implements C's scoping rules: +/// - File scope (global) +/// - Function scope (for labels) +/// - Block scope (compound statements) +/// - Function prototype scope (parameters in declarations) +pub struct SymbolTable { + /// All symbols (indexed by SymbolId) + symbols: Vec, + + /// All scopes (indexed by scope id) + scopes: Vec, + + /// Current scope id + current_scope: u32, + + /// Current scope depth + scope_depth: u32, + + /// Fast lookup: name -> list of symbol IDs with that name + /// (most recent first, for shadowing) + name_map: HashMap<(String, Namespace), Vec>, +} + +impl SymbolTable { + /// Create a new symbol table with global scope + pub fn new() -> Self { + let mut table = Self { + symbols: Vec::new(), + scopes: Vec::new(), + current_scope: 0, + scope_depth: 0, + name_map: HashMap::new(), + }; + // Create the global scope + table.scopes.push(Scope::new(None)); + table + } + + /// Enter a new scope + pub fn enter_scope(&mut self) { + let new_scope_id = self.scopes.len() as u32; + self.scopes.push(Scope::new(Some(self.current_scope))); + self.current_scope = new_scope_id; + self.scope_depth += 1; + } + + /// Leave the current scope, returning to the parent + pub fn leave_scope(&mut self) { + if let Some(parent) = self.scopes[self.current_scope as usize].parent { + // Remove symbols from name_map that were in this scope + let scope = &self.scopes[self.current_scope as usize]; + for &sym_id in &scope.symbols { + let sym = &self.symbols[sym_id.0 as usize]; + let key = (sym.name.clone(), sym.namespace); + if let Some(ids) = self.name_map.get_mut(&key) { + // Remove this symbol from the list + ids.retain(|&id| id != sym_id); + if ids.is_empty() { + self.name_map.remove(&key); + } + } + } + self.current_scope = parent; + self.scope_depth -= 1; + } + } + + /// Get the current scope depth + pub fn depth(&self) -> u32 { + self.scope_depth + } + + /// Declare a symbol in the current scope + /// + /// Returns the SymbolId on success, or an error if redefinition + pub fn declare(&mut self, mut sym: Symbol) -> Result { + // Set scope depth + sym.scope_depth = self.scope_depth; + + // Check for redefinition in the same scope + let key = (sym.name.clone(), sym.namespace); + if let Some(ids) = self.name_map.get(&key) { + for &id in ids { + let existing = &self.symbols[id.0 as usize]; + if existing.scope_depth == self.scope_depth { + // Same scope - check for redefinition + if existing.defined && sym.defined { + return Err(SymbolError::Redefinition(sym.name.clone())); + } + } + } + } + + // Add the symbol + let id = SymbolId(self.symbols.len() as u32); + self.symbols.push(sym.clone()); + + // Add to current scope + self.scopes[self.current_scope as usize].symbols.push(id); + + // Add to name map (at front for shadowing) + self.name_map.entry(key).or_default().insert(0, id); + + Ok(id) + } + + /// Look up a symbol by name in the given namespace + /// + /// Searches from innermost scope outward + pub fn lookup(&self, name: &str, ns: Namespace) -> Option<&Symbol> { + let key = (name.to_string(), ns); + self.name_map + .get(&key) + .and_then(|ids| ids.first().map(|id| &self.symbols[id.0 as usize])) + } + + /// Look up a tag (struct/union/enum) by name + pub fn lookup_tag(&self, name: &str) -> Option<&Symbol> { + self.lookup(name, Namespace::Tag) + } + + /// Look up a typedef by name + /// Returns the aliased type if found + pub fn lookup_typedef(&self, name: &str) -> Option<&Type> { + self.lookup(name, Namespace::Ordinary).and_then(|s| { + if s.is_typedef() { + Some(&s.typ) + } else { + None + } + }) + } + + /// Get the value of an enum constant + pub fn get_enum_value(&self, name: &str) -> Option { + self.lookup(name, Namespace::Ordinary).and_then(|s| { + if s.is_enum_constant() { + s.enum_value + } else { + None + } + }) + } +} + +impl Default for SymbolTable { + fn default() -> Self { + Self::new() + } +} + +// ============================================================================ +// Errors +// ============================================================================ + +/// Symbol table errors +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum SymbolError { + /// Attempted to redefine an existing symbol + Redefinition(String), +} + +impl std::fmt::Display for SymbolError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + SymbolError::Redefinition(name) => write!(f, "redefinition of '{}'", name), + } + } +} + +impl std::error::Error for SymbolError {} + +// ============================================================================ +// Tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + use crate::types::{Type, TypeKind}; + + #[test] + fn test_declare_and_lookup() { + let mut table = SymbolTable::new(); + + // Declare a variable + let sym = Symbol::variable("x".to_string(), Type::basic(TypeKind::Int), 0); + let _id = table.declare(sym).unwrap(); + + // Look it up + let found = table.lookup("x", Namespace::Ordinary).unwrap(); + assert_eq!(found.name, "x"); + assert_eq!(found.kind, SymbolKind::Variable); + } + + #[test] + fn test_scopes() { + let mut table = SymbolTable::new(); + + // Declare x in global scope + let sym1 = Symbol::variable("x".to_string(), Type::basic(TypeKind::Int), 0); + table.declare(sym1).unwrap(); + + // Enter a new scope + table.enter_scope(); + + // Declare y in inner scope + let sym2 = Symbol::variable("y".to_string(), Type::basic(TypeKind::Char), 0); + table.declare(sym2).unwrap(); + + // Both should be visible + assert!(table.lookup("x", Namespace::Ordinary).is_some()); + assert!(table.lookup("y", Namespace::Ordinary).is_some()); + + // Leave scope + table.leave_scope(); + + // x should still be visible, y should not + assert!(table.lookup("x", Namespace::Ordinary).is_some()); + assert!(table.lookup("y", Namespace::Ordinary).is_none()); + } + + #[test] + fn test_shadowing() { + let mut table = SymbolTable::new(); + + // Declare x as int in global scope + let sym1 = Symbol::variable("x".to_string(), Type::basic(TypeKind::Int), 0); + table.declare(sym1).unwrap(); + + // Enter a new scope + table.enter_scope(); + + // Shadow x with char + let sym2 = Symbol::variable("x".to_string(), Type::basic(TypeKind::Char), 0); + table.declare(sym2).unwrap(); + + // Should find the inner x (char) + let found = table.lookup("x", Namespace::Ordinary).unwrap(); + assert_eq!(found.typ.kind, TypeKind::Char); + + // Leave scope + table.leave_scope(); + + // Should find the outer x (int) + let found = table.lookup("x", Namespace::Ordinary).unwrap(); + assert_eq!(found.typ.kind, TypeKind::Int); + } + + #[test] + fn test_redefinition_error() { + let mut table = SymbolTable::new(); + + // Declare x + let sym1 = Symbol::variable("x".to_string(), Type::basic(TypeKind::Int), 0); + table.declare(sym1).unwrap(); + + // Try to redeclare x in the same scope + let sym2 = Symbol::variable("x".to_string(), Type::basic(TypeKind::Char), 0); + let result = table.declare(sym2); + + assert!(matches!(result, Err(SymbolError::Redefinition(_)))); + } + + #[test] + fn test_scope_depth() { + let mut table = SymbolTable::new(); + + assert_eq!(table.depth(), 0); + + table.enter_scope(); + assert_eq!(table.depth(), 1); + + table.enter_scope(); + assert_eq!(table.depth(), 2); + + table.leave_scope(); + assert_eq!(table.depth(), 1); + + table.leave_scope(); + assert_eq!(table.depth(), 0); + } + + #[test] + fn test_function_symbol() { + let mut table = SymbolTable::new(); + + // Declare a function + let func_type = Type::function( + Type::basic(TypeKind::Int), + vec![Type::basic(TypeKind::Int)], + false, + ); + let func = Symbol::function("foo".to_string(), func_type, 0); + table.declare(func).unwrap(); + + let found = table.lookup("foo", Namespace::Ordinary).unwrap(); + assert_eq!(found.kind, SymbolKind::Function); + assert!(!found.defined); // Not yet defined + } +} diff --git a/cc/target.rs b/cc/target.rs new file mode 100644 index 000000000..63730c799 --- /dev/null +++ b/cc/target.rs @@ -0,0 +1,208 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Target configuration for pcc +// +// Handles architecture, OS, and ABI-specific settings needed for +// preprocessing and code generation. +// + +use std::fmt; + +/// Target CPU architecture +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[allow(dead_code)] // Variants used via cfg-based detection and tests +pub enum Arch { + X86_64, + Aarch64, +} + +impl fmt::Display for Arch { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Arch::X86_64 => write!(f, "x86_64"), + Arch::Aarch64 => write!(f, "aarch64"), + } + } +} + +/// Target operating system +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[allow(dead_code)] // Variants used via cfg-based detection and tests +pub enum Os { + Linux, + MacOS, + FreeBSD, +} + +impl fmt::Display for Os { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Os::Linux => write!(f, "linux"), + Os::MacOS => write!(f, "macos"), + Os::FreeBSD => write!(f, "freebsd"), + } + } +} + +/// Target configuration +#[derive(Debug, Clone)] +pub struct Target { + /// CPU architecture + pub arch: Arch, + /// Operating system + pub os: Os, + /// Pointer size in bits + pub pointer_width: u32, + /// Size of long in bits + pub long_width: u32, + /// char is signed by default + pub char_signed: bool, +} + +impl Target { + /// Create target for the host system + pub fn host() -> Self { + let arch = Self::detect_arch(); + let os = Self::detect_os(); + + Self::new(arch, os) + } + + /// Create target for a specific arch/os combination + pub fn new(arch: Arch, os: Os) -> Self { + let pointer_width = 64; + + // LP64 model for Unix-like systems (long and pointer are 64-bit) + let long_width = match os { + Os::Linux | Os::MacOS | Os::FreeBSD => 64, + }; + + // char signedness varies by platform + let char_signed = match (arch, os) { + // ARM defaults to unsigned char + (Arch::Aarch64, _) => false, + // x86_64 defaults to signed char + (Arch::X86_64, _) => true, + }; + + Self { + arch, + os, + pointer_width, + long_width, + char_signed, + } + } + + /// Detect host architecture at runtime + fn detect_arch() -> Arch { + #[cfg(target_arch = "x86_64")] + { + Arch::X86_64 + } + #[cfg(target_arch = "aarch64")] + { + Arch::Aarch64 + } + #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] + { + // Default to x86_64 for unknown architectures + Arch::X86_64 + } + } + + /// Detect host OS at runtime + fn detect_os() -> Os { + #[cfg(target_os = "linux")] + { + Os::Linux + } + #[cfg(target_os = "macos")] + { + Os::MacOS + } + #[cfg(target_os = "freebsd")] + { + Os::FreeBSD + } + #[cfg(not(any(target_os = "linux", target_os = "macos", target_os = "freebsd")))] + { + // Default to Linux for unknown OS + Os::Linux + } + } +} + +impl Default for Target { + fn default() -> Self { + Self::host() + } +} + +impl Target { + /// Parse a target triple (e.g., "aarch64-apple-darwin", "x86_64-unknown-linux-gnu") + pub fn from_triple(triple: &str) -> Option { + let parts: Vec<&str> = triple.split('-').collect(); + if parts.is_empty() { + return None; + } + + let arch = match parts[0] { + "x86_64" => Arch::X86_64, + "aarch64" | "arm64" => Arch::Aarch64, + _ => return None, + }; + + // Detect OS from triple (second or third part typically) + let os = if triple.contains("linux") { + Os::Linux + } else if triple.contains("darwin") || triple.contains("macos") || triple.contains("apple") + { + Os::MacOS + } else if triple.contains("freebsd") { + Os::FreeBSD + } else { + // Default based on arch + Os::Linux + }; + + Some(Self::new(arch, os)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_host_target() { + let target = Target::host(); + // Basic sanity checks + assert_eq!(target.pointer_width, 64); + } + + #[test] + fn test_x86_64_linux() { + let target = Target::new(Arch::X86_64, Os::Linux); + assert_eq!(target.arch, Arch::X86_64); + assert_eq!(target.os, Os::Linux); + assert_eq!(target.pointer_width, 64); + assert_eq!(target.long_width, 64); // LP64 + assert!(target.char_signed); // x86 default + } + + #[test] + fn test_aarch64_linux() { + let target = Target::new(Arch::Aarch64, Os::Linux); + assert_eq!(target.arch, Arch::Aarch64); + assert_eq!(target.os, Os::Linux); + assert_eq!(target.pointer_width, 64); + assert!(!target.char_signed); // ARM default + } +} diff --git a/cc/tests/common/mod.rs b/cc/tests/common/mod.rs new file mode 100644 index 000000000..94b3ceaaf --- /dev/null +++ b/cc/tests/common/mod.rs @@ -0,0 +1,135 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Common test utilities for pcc integration tests +// + +use plib::testing::run_test_base; +use std::io::Write; +use std::path::PathBuf; +use std::process::Command; +use tempfile::NamedTempFile; + +/// Get path to a test input file in a specific test directory +pub fn get_test_file_path(test_dir: &str, filename: &str) -> PathBuf { + let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + path.push("tests"); + path.push(test_dir); + path.push(filename); + path +} + +/// Create a temporary C file with the given content +/// Returns NamedTempFile which auto-deletes on drop +pub fn create_c_file(name: &str, content: &str) -> NamedTempFile { + let mut file = tempfile::Builder::new() + .prefix(&format!("pcc_test_{}_", name)) + .suffix(".c") + .tempfile() + .expect("failed to create temp file"); + file.write_all(content.as_bytes()) + .expect("failed to write test file"); + file +} + +/// Compile a C file using pcc and return the path to the executable +/// The executable is placed in temp dir and must be cleaned up by caller +pub fn compile(c_file: &PathBuf) -> Option { + let exe_path = std::env::temp_dir().join(format!( + "pcc_exe_{}", + c_file.file_stem().unwrap().to_string_lossy() + )); + + let output = run_test_base( + "pcc", + &vec![ + "-o".to_string(), + exe_path.to_string_lossy().to_string(), + c_file.to_string_lossy().to_string(), + ], + &[], + ); + + if output.status.success() { + Some(exe_path) + } else { + eprintln!("pcc failed: {}", String::from_utf8_lossy(&output.stderr)); + None + } +} + +/// Compile a C file from a specific test directory +pub fn compile_test_file(test_dir: &str, filename: &str) -> Option { + let c_file = get_test_file_path(test_dir, filename); + compile(&c_file) +} + +/// Run an executable and return its exit code +pub fn run(exe: &PathBuf) -> i32 { + let output = Command::new(exe) + .output() + .expect("failed to run executable"); + output.status.code().unwrap_or(-1) +} + +/// Run an executable and return (exit_code, stdout) +pub fn run_with_output(exe: &PathBuf) -> (i32, String) { + let output = Command::new(exe) + .output() + .expect("failed to run executable"); + ( + output.status.code().unwrap_or(-1), + String::from_utf8_lossy(&output.stdout).to_string(), + ) +} + +/// Clean up an executable file +pub fn cleanup_exe(exe_file: &Option) { + if let Some(exe) = exe_file { + let _ = std::fs::remove_file(exe); + } +} + +/// Compile inline C code and run, returning exit code +/// Temp files are cleaned up automatically via tempfile +pub fn compile_and_run(name: &str, content: &str) -> i32 { + let c_file = create_c_file(name, content); + let c_path = c_file.path().to_path_buf(); + + let exe_path = std::env::temp_dir().join(format!("pcc_exe_{}", name)); + + let output = run_test_base( + "pcc", + &vec![ + "-o".to_string(), + exe_path.to_string_lossy().to_string(), + c_path.to_string_lossy().to_string(), + ], + &[], + ); + + if !output.status.success() { + eprintln!( + "pcc compilation failed for {}:\n{}", + name, + String::from_utf8_lossy(&output.stderr) + ); + return -1; + } + + let run_output = Command::new(&exe_path) + .output() + .expect("failed to run executable"); + + let exit_code = run_output.status.code().unwrap_or(-1); + + // Cleanup exe (c_file auto-cleaned by NamedTempFile drop) + let _ = std::fs::remove_file(&exe_path); + + exit_code +} diff --git a/cc/tests/datatypes/array_type.rs b/cc/tests/datatypes/array_type.rs new file mode 100644 index 000000000..4a3fe9292 --- /dev/null +++ b/cc/tests/datatypes/array_type.rs @@ -0,0 +1,797 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Tests for C99 array types +// +// C99 array semantics: +// - Arrays are contiguous sequences of elements of the same type +// - Array names decay to pointers to the first element in most contexts +// - arr[i] is equivalent to *(arr + i) +// - Multi-dimensional arrays are arrays of arrays +// - Arrays cannot be assigned or passed by value +// - sizeof(array) returns total size in bytes +// + +use crate::common::compile_and_run; + +// ============================================================================ +// Basic Array Declaration and Access +// ============================================================================ + +#[test] +fn array_basic_int() { + let code = r#" +int main(void) { + int arr[3]; + arr[0] = 10; + arr[1] = 20; + arr[2] = 30; + + if (arr[0] != 10) return 1; + if (arr[1] != 20) return 2; + if (arr[2] != 30) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("arr_basic_int", code), 0); +} + +#[test] +fn array_basic_char() { + let code = r#" +int main(void) { + char arr[4]; + arr[0] = 'a'; + arr[1] = 'b'; + arr[2] = 'c'; + arr[3] = 0; + + if (arr[0] != 'a') return 1; + if (arr[1] != 'b') return 2; + if (arr[2] != 'c') return 3; + if (arr[3] != 0) return 4; + + return 0; +} +"#; + assert_eq!(compile_and_run("arr_basic_char", code), 0); +} + +#[test] +fn array_basic_long() { + let code = r#" +int main(void) { + long arr[3]; + arr[0] = 1000000000L; + arr[1] = 2000000000L; + arr[2] = 3000000000L; + + if (arr[0] != 1000000000L) return 1; + if (arr[1] != 2000000000L) return 2; + if (arr[2] != 3000000000L) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("arr_basic_long", code), 0); +} + +// ============================================================================ +// Array Initialization +// ============================================================================ + +#[test] +fn array_init_full() { + let code = r#" +int main(void) { + int arr[5] = {1, 2, 3, 4, 5}; + + if (arr[0] != 1) return 1; + if (arr[1] != 2) return 2; + if (arr[2] != 3) return 3; + if (arr[3] != 4) return 4; + if (arr[4] != 5) return 5; + + return 0; +} +"#; + assert_eq!(compile_and_run("arr_init_full", code), 0); +} + +#[test] +fn array_init_designated() { + let code = r#" +int main(void) { + int arr[5] = {[1] = 10, [3] = 30}; + + if (arr[1] != 10) return 1; + if (arr[3] != 30) return 2; + + return 0; +} +"#; + assert_eq!(compile_and_run("arr_init_desig", code), 0); +} + +#[test] +fn array_init_char_list() { + let code = r#" +int main(void) { + char arr[5] = {'h', 'e', 'l', 'l', 'o'}; + + if (arr[0] != 'h') return 1; + if (arr[1] != 'e') return 2; + if (arr[2] != 'l') return 3; + if (arr[3] != 'l') return 4; + if (arr[4] != 'o') return 5; + + return 0; +} +"#; + assert_eq!(compile_and_run("arr_init_char", code), 0); +} + +// ============================================================================ +// Array sizeof +// ============================================================================ + +#[test] +fn array_sizeof_int() { + let code = r#" +int main(void) { + int arr[10]; + + // sizeof(int) is 4, so 10 * 4 = 40 + if (sizeof(arr) != 40) return 1; + + return 0; +} +"#; + assert_eq!(compile_and_run("arr_sizeof_int", code), 0); +} + +#[test] +fn array_sizeof_char() { + let code = r#" +int main(void) { + char arr[20]; + + // sizeof(char) is 1, so 20 * 1 = 20 + if (sizeof(arr) != 20) return 1; + + return 0; +} +"#; + assert_eq!(compile_and_run("arr_sizeof_char", code), 0); +} + +#[test] +fn array_sizeof_long() { + let code = r#" +int main(void) { + long arr[5]; + + // sizeof(long) is 8, so 5 * 8 = 40 + if (sizeof(arr) != 40) return 1; + + return 0; +} +"#; + assert_eq!(compile_and_run("arr_sizeof_long", code), 0); +} + +// ============================================================================ +// Array Decay to Pointer +// ============================================================================ + +#[test] +fn array_decay_basic() { + let code = r#" +int main(void) { + int arr[3] = {10, 20, 30}; + int *p = arr; // Array decays to pointer + + if (*p != 10) return 1; + if (p[0] != 10) return 2; + if (p[1] != 20) return 3; + if (p[2] != 30) return 4; + + return 0; +} +"#; + assert_eq!(compile_and_run("arr_decay_basic", code), 0); +} + +#[test] +fn array_decay_arithmetic() { + let code = r#" +int main(void) { + int arr[5] = {1, 2, 3, 4, 5}; + int *p = arr; + + // Pointer arithmetic on decayed array + if (*(p + 0) != 1) return 1; + if (*(p + 1) != 2) return 2; + if (*(p + 2) != 3) return 3; + if (*(p + 3) != 4) return 4; + if (*(p + 4) != 5) return 5; + + return 0; +} +"#; + assert_eq!(compile_and_run("arr_decay_arith", code), 0); +} + +#[test] +fn array_decay_modify() { + let code = r#" +int main(void) { + int arr[3] = {1, 2, 3}; + int *p = arr; + + // Modify through pointer + *p = 100; + *(p + 1) = 200; + *(p + 2) = 300; + + if (arr[0] != 100) return 1; + if (arr[1] != 200) return 2; + if (arr[2] != 300) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("arr_decay_modify", code), 0); +} + +// ============================================================================ +// Multi-dimensional Arrays +// ============================================================================ + +#[test] +fn array_2d_basic() { + let code = r#" +int main(void) { + int arr[2][3]; + + arr[0][0] = 1; + arr[0][1] = 2; + arr[0][2] = 3; + arr[1][0] = 4; + arr[1][1] = 5; + arr[1][2] = 6; + + if (arr[0][0] != 1) return 1; + if (arr[0][1] != 2) return 2; + if (arr[0][2] != 3) return 3; + if (arr[1][0] != 4) return 4; + if (arr[1][1] != 5) return 5; + if (arr[1][2] != 6) return 6; + + return 0; +} +"#; + assert_eq!(compile_and_run("arr_2d_basic", code), 0); +} + +#[test] +fn array_2d_init() { + let code = r#" +int main(void) { + int arr[2][3] = {{1, 2, 3}, {4, 5, 6}}; + + if (arr[0][0] != 1) return 1; + if (arr[0][1] != 2) return 2; + if (arr[0][2] != 3) return 3; + if (arr[1][0] != 4) return 4; + if (arr[1][1] != 5) return 5; + if (arr[1][2] != 6) return 6; + + return 0; +} +"#; + assert_eq!(compile_and_run("arr_2d_init", code), 0); +} + +#[test] +fn array_2d_sizeof() { + let code = r#" +int main(void) { + int arr[2][3]; + + // Total size: 2 * 3 * 4 = 24 bytes + if (sizeof(arr) != 24) return 1; + + // Row size: 3 * 4 = 12 bytes + if (sizeof(arr[0]) != 12) return 2; + + return 0; +} +"#; + assert_eq!(compile_and_run("arr_2d_sizeof", code), 0); +} + +#[test] +fn array_3d_basic() { + let code = r#" +int main(void) { + int arr[2][2][2]; + + arr[0][0][0] = 1; + arr[0][0][1] = 2; + arr[0][1][0] = 3; + arr[0][1][1] = 4; + arr[1][0][0] = 5; + arr[1][0][1] = 6; + arr[1][1][0] = 7; + arr[1][1][1] = 8; + + if (arr[0][0][0] != 1) return 1; + if (arr[0][0][1] != 2) return 2; + if (arr[0][1][0] != 3) return 3; + if (arr[0][1][1] != 4) return 4; + if (arr[1][0][0] != 5) return 5; + if (arr[1][0][1] != 6) return 6; + if (arr[1][1][0] != 7) return 7; + if (arr[1][1][1] != 8) return 8; + + return 0; +} +"#; + assert_eq!(compile_and_run("arr_3d_basic", code), 0); +} + +// ============================================================================ +// Arrays of Pointers +// ============================================================================ + +#[test] +fn array_of_int_pointers() { + let code = r#" +int main(void) { + int a = 10, b = 20, c = 30; + int *arr[3]; + + arr[0] = &a; + arr[1] = &b; + arr[2] = &c; + + if (*arr[0] != 10) return 1; + if (*arr[1] != 20) return 2; + if (*arr[2] != 30) return 3; + + // Modify through pointer array + *arr[0] = 100; + if (a != 100) return 4; + + return 0; +} +"#; + assert_eq!(compile_and_run("arr_int_ptrs", code), 0); +} + +#[test] +fn array_of_char_pointers() { + let code = r#" +int main(void) { + char x = 'X', y = 'Y', z = 'Z'; + char *arr[3]; + + arr[0] = &x; + arr[1] = &y; + arr[2] = &z; + + if (*arr[0] != 'X') return 1; + if (*arr[1] != 'Y') return 2; + if (*arr[2] != 'Z') return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("arr_char_ptrs", code), 0); +} + +// ============================================================================ +// Pointer to Array +// ============================================================================ + +#[test] +fn pointer_to_array_element() { + let code = r#" +int main(void) { + int arr[5] = {1, 2, 3, 4, 5}; + int *p; + + p = &arr[2]; // Pointer to third element + + if (*p != 3) return 1; + if (p[-1] != 2) return 2; + if (p[0] != 3) return 3; + if (p[1] != 4) return 4; + + return 0; +} +"#; + assert_eq!(compile_and_run("ptr_to_elem", code), 0); +} + +// ============================================================================ +// Array as Function Parameter +// ============================================================================ + +#[test] +fn array_param_basic() { + let code = r#" +int sum_array(int arr[], int size) { + int total = 0; + int i; + for (i = 0; i < size; i = i + 1) { + total = total + arr[i]; + } + return total; +} + +int main(void) { + int arr[5] = {1, 2, 3, 4, 5}; + int result = sum_array(arr, 5); + + // 1 + 2 + 3 + 4 + 5 = 15 + if (result != 15) return 1; + + return 0; +} +"#; + assert_eq!(compile_and_run("arr_param_basic", code), 0); +} + +#[test] +fn array_param_modify() { + let code = r#" +void double_array(int arr[], int size) { + int i; + for (i = 0; i < size; i = i + 1) { + arr[i] = arr[i] * 2; + } +} + +int main(void) { + int arr[3] = {1, 2, 3}; + double_array(arr, 3); + + if (arr[0] != 2) return 1; + if (arr[1] != 4) return 2; + if (arr[2] != 6) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("arr_param_modify", code), 0); +} + +#[test] +fn array_param_as_pointer() { + let code = r#" +int get_first(int *arr) { + return arr[0]; +} + +int get_second(int *arr) { + return arr[1]; +} + +int main(void) { + int arr[3] = {10, 20, 30}; + + if (get_first(arr) != 10) return 1; + if (get_second(arr) != 20) return 2; + + return 0; +} +"#; + assert_eq!(compile_and_run("arr_param_ptr", code), 0); +} + +// ============================================================================ +// Struct with Array Member +// ============================================================================ + +#[test] +fn struct_with_array() { + let code = r#" +struct data { + int id; + int values[3]; +}; + +int main(void) { + struct data d; + d.id = 42; + d.values[0] = 10; + d.values[1] = 20; + d.values[2] = 30; + + if (d.id != 42) return 1; + if (d.values[0] != 10) return 2; + if (d.values[1] != 20) return 3; + if (d.values[2] != 30) return 4; + + return 0; +} +"#; + assert_eq!(compile_and_run("struct_arr", code), 0); +} + +#[test] +fn struct_with_array_init() { + let code = r#" +struct data { + int id; + int values[3]; +}; + +int main(void) { + struct data d = {42, {10, 20, 30}}; + + if (d.id != 42) return 1; + if (d.values[0] != 10) return 2; + if (d.values[1] != 20) return 3; + if (d.values[2] != 30) return 4; + + return 0; +} +"#; + assert_eq!(compile_and_run("struct_arr_init", code), 0); +} + +#[test] +fn struct_array_member_pointer() { + let code = r#" +struct data { + int id; + int values[3]; +}; + +int sum_values(int *arr, int size) { + int total = 0; + int i; + for (i = 0; i < size; i = i + 1) { + total = total + arr[i]; + } + return total; +} + +int main(void) { + struct data d = {1, {10, 20, 30}}; + + // Pass struct array member to function + int sum = sum_values(d.values, 3); + if (sum != 60) return 1; + + return 0; +} +"#; + assert_eq!(compile_and_run("struct_arr_ptr", code), 0); +} + +// ============================================================================ +// Array of Structs +// ============================================================================ + +#[test] +fn array_of_structs() { + let code = r#" +struct point { + int x; + int y; +}; + +int main(void) { + struct point points[3]; + + points[0].x = 1; + points[0].y = 2; + points[1].x = 3; + points[1].y = 4; + points[2].x = 5; + points[2].y = 6; + + if (points[0].x != 1) return 1; + if (points[0].y != 2) return 2; + if (points[1].x != 3) return 3; + if (points[1].y != 4) return 4; + if (points[2].x != 5) return 5; + if (points[2].y != 6) return 6; + + return 0; +} +"#; + assert_eq!(compile_and_run("arr_of_structs", code), 0); +} + +#[test] +fn array_of_structs_init() { + let code = r#" +struct point { + int x; + int y; +}; + +int main(void) { + struct point points[3] = {{1, 2}, {3, 4}, {5, 6}}; + + if (points[0].x != 1) return 1; + if (points[0].y != 2) return 2; + if (points[1].x != 3) return 3; + if (points[1].y != 4) return 4; + if (points[2].x != 5) return 5; + if (points[2].y != 6) return 6; + + return 0; +} +"#; + assert_eq!(compile_and_run("arr_structs_init", code), 0); +} + +// ============================================================================ +// Array Index Expressions +// ============================================================================ + +#[test] +fn array_index_variable() { + let code = r#" +int main(void) { + int arr[5] = {10, 20, 30, 40, 50}; + int i; + int sum = 0; + + for (i = 0; i < 5; i = i + 1) { + sum = sum + arr[i]; + } + + // 10 + 20 + 30 + 40 + 50 = 150 + if (sum != 150) return 1; + + return 0; +} +"#; + assert_eq!(compile_and_run("arr_idx_var", code), 0); +} + +#[test] +fn array_index_expression() { + let code = r#" +int main(void) { + int arr[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + int i = 2; + + if (arr[i * 2] != 4) return 1; // arr[4] + if (arr[i + 3] != 5) return 2; // arr[5] + if (arr[i * 2 + 1] != 5) return 3; // arr[5] + + return 0; +} +"#; + assert_eq!(compile_and_run("arr_idx_expr", code), 0); +} + +// ============================================================================ +// Pointer and Array Equivalence +// ============================================================================ + +#[test] +fn array_pointer_equivalence() { + let code = r#" +int main(void) { + int arr[5] = {10, 20, 30, 40, 50}; + + // arr[i] is equivalent to *(arr + i) + if (arr[0] != *(arr + 0)) return 1; + if (arr[1] != *(arr + 1)) return 2; + if (arr[2] != *(arr + 2)) return 3; + if (arr[3] != *(arr + 3)) return 4; + if (arr[4] != *(arr + 4)) return 5; + + // Also equivalent: i[arr] == arr[i] == *(arr + i) == *(i + arr) + if (0[arr] != 10) return 6; + if (1[arr] != 20) return 7; + + return 0; +} +"#; + assert_eq!(compile_and_run("arr_ptr_equiv", code), 0); +} + +// ============================================================================ +// Array Bounds (no checking, but layout) +// ============================================================================ + +#[test] +fn array_adjacent_layout() { + let code = r#" +int main(void) { + int arr[3] = {1, 2, 3}; + int *p = arr; + + // Elements are contiguous in memory + // Second element is at p + 1 + if (*(p + 1) != 2) return 1; + + // Using pointer arithmetic to compute addresses + long addr0 = (long)&arr[0]; + long addr1 = (long)&arr[1]; + long addr2 = (long)&arr[2]; + + // Each int is 4 bytes apart + if (addr1 - addr0 != 4) return 2; + if (addr2 - addr1 != 4) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("arr_layout", code), 0); +} + +// ============================================================================ +// Void Pointer and Array +// ============================================================================ + +#[test] +fn array_void_pointer() { + let code = r#" +int main(void) { + int arr[3] = {100, 200, 300}; + void *vp = arr; + + int *ip = vp; + if (ip[0] != 100) return 1; + if (ip[1] != 200) return 2; + if (ip[2] != 300) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("arr_void_ptr", code), 0); +} + +// ============================================================================ +// Edge Cases +// ============================================================================ + +#[test] +fn array_single_element() { + let code = r#" +int main(void) { + int arr[1] = {42}; + + if (arr[0] != 42) return 1; + if (sizeof(arr) != 4) return 2; + + return 0; +} +"#; + assert_eq!(compile_and_run("arr_single", code), 0); +} + +#[test] +fn array_nested_index() { + let code = r#" +int main(void) { + int arr[3] = {0, 1, 2}; + int indices[3] = {2, 0, 1}; + + // arr[indices[i]] - nested array access + if (arr[indices[0]] != 2) return 1; // arr[2] = 2 + if (arr[indices[1]] != 0) return 2; // arr[0] = 0 + if (arr[indices[2]] != 1) return 3; // arr[1] = 1 + + return 0; +} +"#; + assert_eq!(compile_and_run("arr_nested_idx", code), 0); +} diff --git a/cc/tests/datatypes/bitfield_type.rs b/cc/tests/datatypes/bitfield_type.rs new file mode 100644 index 000000000..7cebc8d56 --- /dev/null +++ b/cc/tests/datatypes/bitfield_type.rs @@ -0,0 +1,303 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Tests for C99 bitfield support +// + +use crate::common::compile_and_run; + +// ============================================================================ +// Basic Bitfield: Declaration and access +// ============================================================================ + +#[test] +fn bitfield_basic() { + let code = r#" +int main(void) { + struct flags { + unsigned int a : 1; + unsigned int b : 3; + unsigned int c : 4; + }; + + struct flags f; + f.a = 1; + f.b = 5; + f.c = 15; + + if (f.a != 1) return 1; + if (f.b != 5) return 2; + if (f.c != 15) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("bitfield_basic", code), 0); +} + +// ============================================================================ +// Bitfield Packing: Multiple bitfields packed into single storage unit +// ============================================================================ + +#[test] +fn bitfield_packed() { + let code = r#" +int main(void) { + struct packed { + unsigned int a : 8; + unsigned int b : 8; + unsigned int c : 8; + unsigned int d : 8; + }; + + // Four 8-bit fields should pack into one 32-bit int + if (sizeof(struct packed) != 4) return 1; + + struct packed p; + p.a = 0x12; + p.b = 0x34; + p.c = 0x56; + p.d = 0x78; + + if (p.a != 0x12) return 2; + if (p.b != 0x34) return 3; + if (p.c != 0x56) return 4; + if (p.d != 0x78) return 5; + + return 0; +} +"#; + assert_eq!(compile_and_run("bitfield_packed", code), 0); +} + +// ============================================================================ +// Bitfield Sizeof: Verify struct size with bitfields +// ============================================================================ + +#[test] +fn bitfield_sizeof() { + let code = r#" +int main(void) { + struct small { + unsigned int a : 1; + unsigned int b : 1; + unsigned int c : 1; + }; + + // Three 1-bit fields should fit in one int (4 bytes) + if (sizeof(struct small) != 4) return 1; + + return 0; +} +"#; + assert_eq!(compile_and_run("bitfield_sizeof", code), 0); +} + +// ============================================================================ +// Zero-Width Bitfield: Force alignment to next storage unit +// ============================================================================ + +#[test] +fn bitfield_zero_width() { + let code = r#" +int main(void) { + struct aligned { + unsigned int a : 4; + unsigned int : 0; // Force next field to new unit + unsigned int b : 4; + }; + + // a and b should be in different storage units + if (sizeof(struct aligned) != 8) return 1; + + struct aligned x; + x.a = 15; + x.b = 7; + + if (x.a != 15) return 2; + if (x.b != 7) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("bitfield_zero_width", code), 0); +} + +// ============================================================================ +// Unnamed Bitfield: Padding bits +// ============================================================================ + +#[test] +fn bitfield_unnamed() { + let code = r#" +int main(void) { + struct padded { + unsigned int a : 4; + unsigned int : 4; // 4 bits padding (unnamed) + unsigned int b : 8; + }; + + struct padded p; + p.a = 15; + p.b = 200; + + if (p.a != 15) return 1; + if (p.b != 200) return 2; + + return 0; +} +"#; + assert_eq!(compile_and_run("bitfield_unnamed", code), 0); +} + +// ============================================================================ +// Bitfield with _Bool type +// ============================================================================ + +#[test] +fn bitfield_bool() { + let code = r#" +int main(void) { + struct bool_bits { + _Bool flag1 : 1; + _Bool flag2 : 1; + _Bool flag3 : 1; + }; + + struct bool_bits b; + b.flag1 = 1; + b.flag2 = 0; + b.flag3 = 1; + + if (b.flag1 != 1) return 1; + if (b.flag2 != 0) return 2; + if (b.flag3 != 1) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("bitfield_bool", code), 0); +} + +// ============================================================================ +// Bitfield with Regular Members +// ============================================================================ + +#[test] +fn bitfield_mixed() { + let code = r#" +int main(void) { + struct mixed { + int regular; + unsigned int bits : 8; + int another; + }; + + struct mixed m; + m.regular = 100; + m.bits = 255; + m.another = 200; + + if (m.regular != 100) return 1; + if (m.bits != 255) return 2; + if (m.another != 200) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("bitfield_mixed", code), 0); +} + +// ============================================================================ +// Bitfield Overflow: Value truncation +// ============================================================================ + +#[test] +fn bitfield_overflow() { + let code = r#" +int main(void) { + struct overflow { + unsigned int a : 3; // Range 0-7 + }; + + struct overflow o; + o.a = 15; // Should be masked to 7 (15 & 0x7 = 7) + + if (o.a != 7) return 1; + + o.a = 8; // Should be masked to 0 (8 & 0x7 = 0) + if (o.a != 0) return 2; + + return 0; +} +"#; + assert_eq!(compile_and_run("bitfield_overflow", code), 0); +} + +// ============================================================================ +// Bitfield with Pointer Access +// ============================================================================ + +#[test] +fn bitfield_pointer() { + let code = r#" +int main(void) { + struct flags { + unsigned int a : 4; + unsigned int b : 4; + }; + + struct flags f; + struct flags *p = &f; + + p->a = 10; + p->b = 5; + + if (p->a != 10) return 1; + if (p->b != 5) return 2; + if (f.a != 10) return 3; + if (f.b != 5) return 4; + + return 0; +} +"#; + assert_eq!(compile_and_run("bitfield_pointer", code), 0); +} + +// ============================================================================ +// Bitfield Arithmetic +// ============================================================================ + +#[test] +fn bitfield_arithmetic() { + let code = r#" +int main(void) { + struct bits { + unsigned int a : 4; + unsigned int b : 4; + }; + + struct bits s; + s.a = 5; + s.b = 3; + + int sum = s.a + s.b; + if (sum != 8) return 1; + + int product = s.a * s.b; + if (product != 15) return 2; + + // Increment + s.a = s.a + 1; + if (s.a != 6) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("bitfield_arithmetic", code), 0); +} diff --git a/cc/tests/datatypes/bool.rs b/cc/tests/datatypes/bool.rs new file mode 100644 index 000000000..ad46d8181 --- /dev/null +++ b/cc/tests/datatypes/bool.rs @@ -0,0 +1,701 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Tests for C99 `_Bool` data type +// +// C99 _Bool has special semantics: +// - When any scalar value is converted to _Bool: +// - Result is 0 if the value compares equal to 0 +// - Result is 1 otherwise +// - _Bool is an unsigned integer type that can store 0 and 1 +// - sizeof(_Bool) is implementation-defined but typically 1 +// + +use crate::common::compile_and_run; + +// ============================================================================ +// Basic Declaration and Assignment +// ============================================================================ + +#[test] +fn bool_basic() { + let code = r#" +int main(void) { + _Bool a, b; + + // Direct assignment of 0 + a = 0; + if (a != 0) return 1; + + // Direct assignment of 1 + b = 1; + if (b != 1) return 2; + + // Size of _Bool (should be 1 byte on most platforms) + if (sizeof(_Bool) != 1) return 3; + + // _Bool in conditions + a = 0; + if (a) return 4; // Should not execute + + b = 1; + if (!b) return 5; // Should not execute + + return 0; +} +"#; + assert_eq!(compile_and_run("bool_basic", code), 0); +} + +// ============================================================================ +// Conversion Semantics (The key C99 _Bool feature) +// ============================================================================ + +#[test] +fn bool_conversion_from_int() { + let code = r#" +int main(void) { + _Bool b; + int i; + + // Zero converts to 0 + i = 0; + b = i; + if (b != 0) return 1; + + // One converts to 1 + i = 1; + b = i; + if (b != 1) return 2; + + // Any non-zero positive converts to 1 + i = 42; + b = i; + if (b != 1) return 3; + + // Large positive converts to 1 + i = 1000000; + b = i; + if (b != 1) return 4; + + // Negative converts to 1 (non-zero) + i = -1; + b = i; + if (b != 1) return 5; + + // Large negative converts to 1 + i = -1000000; + b = i; + if (b != 1) return 6; + + return 0; +} +"#; + assert_eq!(compile_and_run("bool_conv_int", code), 0); +} + +#[test] +fn bool_conversion_from_char() { + let code = r#" +int main(void) { + _Bool b; + char c; + unsigned char uc; + + // Zero char converts to 0 + c = 0; + b = c; + if (b != 0) return 1; + + // Non-zero char converts to 1 + c = 'A'; + b = c; + if (b != 1) return 2; + + // Null character + c = '\0'; + b = c; + if (b != 0) return 3; + + // Unsigned char 255 converts to 1 + uc = 255; + b = uc; + if (b != 1) return 4; + + return 0; +} +"#; + assert_eq!(compile_and_run("bool_conv_char", code), 0); +} + +#[test] +fn bool_conversion_literal() { + let code = r#" +int main(void) { + _Bool b; + + // Literal 0 + b = 0; + if (b != 0) return 1; + + // Literal 1 + b = 1; + if (b != 1) return 2; + + // Literal non-zero becomes 1 + b = 42; + if (b != 1) return 3; + + // Literal 255 becomes 1 + b = 255; + if (b != 1) return 4; + + // Literal negative becomes 1 + b = -1; + if (b != 1) return 5; + + // Explicit cast of non-zero + b = (_Bool)100; + if (b != 1) return 6; + + // Explicit cast of zero + b = (_Bool)0; + if (b != 0) return 7; + + return 0; +} +"#; + assert_eq!(compile_and_run("bool_conv_lit", code), 0); +} + +// ============================================================================ +// Logical Operators +// ============================================================================ + +#[test] +fn bool_logical_operators() { + let code = r#" +int main(void) { + _Bool a, b, result; + + // Logical NOT on false + a = 0; + result = !a; + if (result != 1) return 1; + + // Logical NOT on true + a = 1; + result = !a; + if (result != 0) return 2; + + // Logical AND: true && true + a = 1; b = 1; + result = a && b; + if (result != 1) return 3; + + // Logical AND: true && false + a = 1; b = 0; + result = a && b; + if (result != 0) return 4; + + // Logical AND: false && true + a = 0; b = 1; + result = a && b; + if (result != 0) return 5; + + // Logical AND: false && false + a = 0; b = 0; + result = a && b; + if (result != 0) return 6; + + // Logical OR: true || true + a = 1; b = 1; + result = a || b; + if (result != 1) return 7; + + // Logical OR: true || false + a = 1; b = 0; + result = a || b; + if (result != 1) return 8; + + // Logical OR: false || true + a = 0; b = 1; + result = a || b; + if (result != 1) return 9; + + // Logical OR: false || false + a = 0; b = 0; + result = a || b; + if (result != 0) return 10; + + return 0; +} +"#; + assert_eq!(compile_and_run("bool_logical", code), 0); +} + +// ============================================================================ +// Comparison Operators +// ============================================================================ + +#[test] +fn bool_comparison_operators() { + let code = r#" +int main(void) { + _Bool a, b; + + // Equal - both true + a = 1; b = 1; + if ((a == b) != 1) return 1; + + // Equal - both false + a = 0; b = 0; + if ((a == b) != 1) return 2; + + // Equal - different + a = 1; b = 0; + if ((a == b) != 0) return 3; + + // Not equal - same + a = 1; b = 1; + if ((a != b) != 0) return 4; + + // Not equal - different + a = 1; b = 0; + if ((a != b) != 1) return 5; + + // Less than: 0 < 1 + a = 0; b = 1; + if ((a < b) != 1) return 6; + + // Less than: 1 < 0 (false) + a = 1; b = 0; + if ((a < b) != 0) return 7; + + // Greater than: 1 > 0 + a = 1; b = 0; + if ((a > b) != 1) return 8; + + // Less than or equal: 0 <= 1 + a = 0; b = 1; + if ((a <= b) != 1) return 9; + + // Less than or equal: 1 <= 1 + a = 1; b = 1; + if ((a <= b) != 1) return 10; + + return 0; +} +"#; + assert_eq!(compile_and_run("bool_compare", code), 0); +} + +// ============================================================================ +// Arithmetic Operators (Limited for _Bool) +// ============================================================================ + +#[test] +fn bool_arithmetic_operators() { + let code = r#" +int main(void) { + _Bool a, b; + int result; + + // _Bool values promote to int in arithmetic + a = 1; b = 1; + result = a + b; + if (result != 2) return 1; + + a = 1; b = 0; + result = a + b; + if (result != 1) return 2; + + a = 1; b = 1; + result = a * b; + if (result != 1) return 3; + + // Storing result back to _Bool (should normalize) + a = 1; b = 1; + a = a + b; // 2 -> 1 + if (a != 1) return 4; + + return 0; +} +"#; + assert_eq!(compile_and_run("bool_arith", code), 0); +} + +// ============================================================================ +// Bitwise Operators +// ============================================================================ + +#[test] +fn bool_bitwise_operators() { + let code = r#" +int main(void) { + _Bool a, b, result; + + // Bitwise AND + a = 1; b = 1; + result = a & b; + if (result != 1) return 1; + + a = 1; b = 0; + result = a & b; + if (result != 0) return 2; + + // Bitwise OR + a = 1; b = 0; + result = a | b; + if (result != 1) return 3; + + a = 0; b = 0; + result = a | b; + if (result != 0) return 4; + + // Bitwise XOR + a = 1; b = 1; + result = a ^ b; + if (result != 0) return 5; + + a = 1; b = 0; + result = a ^ b; + if (result != 1) return 6; + + // Bitwise NOT (result stored to _Bool normalizes) + a = 0; + result = ~a; // ~0 in int is all 1s, but stored to _Bool becomes 1 + // Note: ~0 is -1 (or large value), which converts to 1 when stored to _Bool + if (result != 1) return 7; + + return 0; +} +"#; + assert_eq!(compile_and_run("bool_bitwise", code), 0); +} + +// ============================================================================ +// Assignment Operators +// ============================================================================ + +#[test] +fn bool_assignment_operators() { + let code = r#" +int main(void) { + _Bool a; + + // Simple assignment + a = 1; + if (a != 1) return 1; + + // Assignment from expression + a = (5 > 3); // true -> 1 + if (a != 1) return 2; + + a = (3 > 5); // false -> 0 + if (a != 0) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("bool_assign", code), 0); +} + +// ============================================================================ +// Increment and Decrement (Special behavior for _Bool) +// ============================================================================ + +#[test] +fn bool_increment_decrement() { + let code = r#" +int main(void) { + _Bool a; + + // Pre-increment on 0 -> 1 + a = 0; + ++a; + if (a != 1) return 1; + + // Pre-increment on 1 -> still 1 (2 normalizes to 1) + a = 1; + ++a; + if (a != 1) return 2; + + // Post-increment on 0 + a = 0; + if (a++ != 0) return 3; // Returns old value 0 + if (a != 1) return 4; // New value is 1 + + // Post-increment on 1 + a = 1; + if (a++ != 1) return 5; // Returns old value 1 + if (a != 1) return 6; // New value is still 1 (2 normalizes) + + return 0; +} +"#; + assert_eq!(compile_and_run("bool_incdec", code), 0); +} + +// ============================================================================ +// _Bool in Control Flow +// ============================================================================ + +#[test] +fn bool_control_flow() { + let code = r#" +int main(void) { + _Bool flag; + int count; + + // if with _Bool + flag = 1; + if (flag) { + count = 1; + } else { + return 1; + } + + flag = 0; + if (flag) { + return 2; + } else { + count = 2; + } + + // while with _Bool + flag = 1; + count = 0; + while (flag) { + count = count + 1; + if (count >= 3) flag = 0; + } + if (count != 3) return 3; + + // for with _Bool condition + count = 0; + flag = 1; + for (; flag; ) { + count = count + 1; + if (count >= 5) flag = 0; + } + if (count != 5) return 4; + + return 0; +} +"#; + assert_eq!(compile_and_run("bool_flow", code), 0); +} + +// ============================================================================ +// _Bool with Pointers +// ============================================================================ + +#[test] +fn bool_pointers() { + let code = r#" +int main(void) { + _Bool a, b; + _Bool *p; + + a = 1; + p = &a; + + // Read through pointer + if (*p != 1) return 1; + + // Write through pointer + *p = 0; + if (a != 0) return 2; + + // Pointer to different _Bool + b = 1; + p = &b; + if (*p != 1) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("bool_ptr", code), 0); +} + +// ============================================================================ +// _Bool in Arrays +// ============================================================================ + +#[test] +fn bool_arrays() { + let code = r#" +int main(void) { + _Bool arr[4]; + int i; + + // Initialize array + arr[0] = 0; + arr[1] = 1; + arr[2] = 42; // Should normalize to 1 + arr[3] = 0; + + // Verify values + if (arr[0] != 0) return 1; + if (arr[1] != 1) return 2; + if (arr[2] != 1) return 3; // 42 normalized to 1 + if (arr[3] != 0) return 4; + + // Count true values + int count = 0; + for (i = 0; i < 4; i = i + 1) { + if (arr[i]) count = count + 1; + } + if (count != 2) return 5; + + return 0; +} +"#; + assert_eq!(compile_and_run("bool_array", code), 0); +} + +// ============================================================================ +// _Bool Type Conversions +// ============================================================================ + +#[test] +fn bool_type_conversions() { + let code = r#" +int main(void) { + _Bool b; + int i; + long l; + char c; + + // _Bool to int + b = 1; + i = b; + if (i != 1) return 1; + + b = 0; + i = b; + if (i != 0) return 2; + + // _Bool to long + b = 1; + l = b; + if (l != 1) return 3; + + // _Bool to char + b = 1; + c = b; + if (c != 1) return 4; + + // int to _Bool (explicit) + i = 100; + b = (_Bool)i; + if (b != 1) return 5; + + // Comparison result (always 0 or 1) + i = 10; + b = (i > 5); // true + if (b != 1) return 6; + + b = (i < 5); // false + if (b != 0) return 7; + + return 0; +} +"#; + assert_eq!(compile_and_run("bool_typeconv", code), 0); +} + +// ============================================================================ +// _Bool: Structs, Functions, and Initializers +// ============================================================================ + +#[test] +fn bool_structs_functions_initializers() { + let code = r#" +// Struct with _Bool member +struct bool_container { + _Bool flag; + _Bool active; +}; + +// Function taking _Bool parameter and returning _Bool +_Bool negate_bool(_Bool b) { + return !b; +} + +// Function taking struct parameter +int count_true(struct bool_container c) { + int count = 0; + if (c.flag) count = count + 1; + if (c.active) count = count + 1; + return count; +} + +// Function returning struct +struct bool_container make_container(_Bool a, _Bool b) { + struct bool_container c; + c.flag = a; + c.active = b; + return c; +} + +int main(void) { + // Variable initializers + _Bool t = 1; + if (t != 1) return 1; + + _Bool f = 0; + if (f != 0) return 2; + + // Initializer with non-zero value (should normalize to 1) + _Bool norm = 42; + if (norm != 1) return 3; + + // Struct member access + struct bool_container bc; + bc.flag = 1; + bc.active = 0; + if (bc.flag != 1) return 4; + if (bc.active != 0) return 5; + + // _Bool normalization in struct member + bc.flag = 100; // Should normalize to 1 + if (bc.flag != 1) return 6; + + // Function with _Bool param/return + if (negate_bool(1) != 0) return 7; + if (negate_bool(0) != 1) return 8; + + // Struct as function parameter + bc.flag = 1; + bc.active = 1; + if (count_true(bc) != 2) return 9; + + bc.flag = 1; + bc.active = 0; + if (count_true(bc) != 1) return 10; + + // Struct as function return + struct bool_container bc2; + bc2 = make_container(1, 0); + if (bc2.flag != 1) return 11; + if (bc2.active != 0) return 12; + + // Pointer to _Bool in struct + struct bool_container *p; + p = &bc; + if (p->flag != 1) return 13; + p->flag = 0; + if (bc.flag != 0) return 14; + + return 0; +} +"#; + assert_eq!(compile_and_run("bool_advanced", code), 0); +} diff --git a/cc/tests/datatypes/char.rs b/cc/tests/datatypes/char.rs new file mode 100644 index 000000000..a609989cc --- /dev/null +++ b/cc/tests/datatypes/char.rs @@ -0,0 +1,992 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Tests for `char`, `signed char`, and `unsigned char` data types +// +// C99 Note: These three types are distinct. Plain `char` has implementation- +// defined signedness: +// - x86-64: char is signed (range -128 to 127) +// - ARM/aarch64: char is unsigned (range 0 to 255) +// +// Character literals (e.g., 'A') have type `int` in C, not `char`. +// + +use crate::common::compile_and_run; + +// ############################################################################ +// CHAR TESTS (platform-dependent signedness, signed on x86-64) +// ############################################################################ + +// ============================================================================ +// Char: Basic Operations and Character Literals +// ============================================================================ + +#[test] +fn char_basic_and_literals() { + let code = r#" +int main(void) { + char a, b; + + // Basic assignment + a = 'A'; + if (a != 65) return 1; + + // Character literal comparison + a = 'Z'; + if (a != 'Z') return 2; + + // Lowercase letter + a = 'a'; + if (a != 97) return 3; + + // Digit character + a = '0'; + if (a != 48) return 4; + + // Space character + a = ' '; + if (a != 32) return 5; + + // Arithmetic with chars + a = 'A'; + b = a + 1; + if (b != 'B') return 6; + + // Character difference + a = 'Z'; + b = 'A'; + if (a - b != 25) return 7; + + return 0; +} +"#; + assert_eq!(compile_and_run("char_basic", code), 0); +} + +// ============================================================================ +// Char: Escape Sequences +// ============================================================================ + +#[test] +fn char_escape_sequences() { + let code = r#" +int main(void) { + char c; + + // Null character + c = '\0'; + if (c != 0) return 1; + + // Newline + c = '\n'; + if (c != 10) return 2; + + // Tab + c = '\t'; + if (c != 9) return 3; + + // Carriage return + c = '\r'; + if (c != 13) return 4; + + // Backslash + c = '\\'; + if (c != 92) return 5; + + // Single quote + c = '\''; + if (c != 39) return 6; + + // Double quote + c = '"'; + if (c != 34) return 7; + + // Bell (alert) + c = '\a'; + if (c != 7) return 8; + + // Backspace + c = '\b'; + if (c != 8) return 9; + + // Form feed + c = '\f'; + if (c != 12) return 10; + + // Vertical tab + c = '\v'; + if (c != 11) return 11; + + return 0; +} +"#; + assert_eq!(compile_and_run("char_escape", code), 0); +} + +// ============================================================================ +// Char: Hex and Octal Escapes +// ============================================================================ + +#[test] +fn char_hex_octal_escapes() { + let code = r#" +int main(void) { + char c; + + // Hex escape for 'A' (0x41 = 65) + c = '\x41'; + if (c != 'A') return 1; + + // Hex escape for newline (0x0A = 10) + c = '\x0a'; + if (c != '\n') return 2; + + // Octal escape for 'A' (0101 = 65) + c = '\101'; + if (c != 'A') return 3; + + // Octal escape for null (0 = 0) + c = '\0'; + if (c != 0) return 4; + + // Hex escape for space (0x20 = 32) + c = '\x20'; + if (c != ' ') return 5; + + return 0; +} +"#; + assert_eq!(compile_and_run("char_hex_oct", code), 0); +} + +// ============================================================================ +// Char: Arithmetic Operators +// ============================================================================ + +#[test] +fn char_arithmetic_operators() { + let code = r#" +int main(void) { + char a, b, result; + + // Addition + a = 30; b = 12; + result = a + b; + if (result != 42) return 1; + + // Subtraction + a = 100; b = 58; + result = a - b; + if (result != 42) return 2; + + // Multiplication + a = 6; b = 7; + result = a * b; + if (result != 42) return 3; + + // Division + a = 84; b = 2; + result = a / b; + if (result != 42) return 4; + + // Modulo + a = 47; b = 10; + result = a % b; + if (result != 7) return 5; + + // Unary negation (char is signed on x86-64) + a = 42; + result = -a; + if (result != -42) return 6; + + // Unary plus + a = 42; + result = +a; + if (result != 42) return 7; + + return 0; +} +"#; + assert_eq!(compile_and_run("char_arith", code), 0); +} + +// ============================================================================ +// Char: Comparison Operators +// ============================================================================ + +#[test] +fn char_comparison_operators() { + let code = r#" +int main(void) { + char a, b; + + // Equal - true + a = 'X'; b = 'X'; + if ((a == b) != 1) return 1; + + // Equal - false + a = 'X'; b = 'Y'; + if ((a == b) != 0) return 2; + + // Not equal - true + a = 'A'; b = 'B'; + if ((a != b) != 1) return 3; + + // Not equal - false + a = 'Z'; b = 'Z'; + if ((a != b) != 0) return 4; + + // Less than - true + a = 'A'; b = 'Z'; + if ((a < b) != 1) return 5; + + // Less than - false + a = 'Z'; b = 'A'; + if ((a < b) != 0) return 6; + + // Less or equal - true (less) + a = 'A'; b = 'B'; + if ((a <= b) != 1) return 7; + + // Less or equal - true (equal) + a = 'M'; b = 'M'; + if ((a <= b) != 1) return 8; + + // Greater than - true + a = 'Z'; b = 'A'; + if ((a > b) != 1) return 9; + + // Greater than - false + a = 'A'; b = 'Z'; + if ((a > b) != 0) return 10; + + // Greater or equal - true + a = 'Z'; b = 'A'; + if ((a >= b) != 1) return 11; + + // Greater or equal - true (equal) + a = 'K'; b = 'K'; + if ((a >= b) != 1) return 12; + + return 0; +} +"#; + assert_eq!(compile_and_run("char_cmp", code), 0); +} + +// ============================================================================ +// Char: Bitwise Operators +// ============================================================================ + +#[test] +fn char_bitwise_operators() { + let code = r#" +int main(void) { + char a, b; + + // Bitwise AND + a = 0x3F; b = 0x0F; + if ((a & b) != 0x0F) return 1; + + // Bitwise OR + a = 0x30; b = 0x0F; + if ((a | b) != 0x3F) return 2; + + // Bitwise XOR + a = 0x3F; b = 0x30; + if ((a ^ b) != 0x0F) return 3; + + // Left shift + a = 1; + if ((a << 4) != 16) return 4; + + // Right shift + a = 64; + if ((a >> 2) != 16) return 5; + + // Case conversion: uppercase to lowercase (set bit 5) + a = 'A'; + if ((a | 0x20) != 'a') return 6; + + // Case conversion: lowercase to uppercase (clear bit 5) + a = 'a'; + if ((a & ~0x20) != 'A') return 7; + + return 0; +} +"#; + assert_eq!(compile_and_run("char_bitwise", code), 0); +} + +// ============================================================================ +// Char: Assignment Operators +// ============================================================================ + +#[test] +fn char_assignment_operators() { + let code = r#" +int main(void) { + char a; + + // Simple assignment + a = 42; + if (a != 42) return 1; + + // Add assign + a = 40; + a += 2; + if (a != 42) return 2; + + // Sub assign + a = 50; + a -= 8; + if (a != 42) return 3; + + // Mul assign + a = 21; + a *= 2; + if (a != 42) return 4; + + // Div assign + a = 84; + a /= 2; + if (a != 42) return 5; + + // Mod assign + a = 50; + a %= 8; + if (a != 2) return 6; + + // And assign + a = 0x7F; + a &= 0x0F; + if (a != 15) return 7; + + // Or assign + a = 0x30; + a |= 0x0F; + if (a != 0x3F) return 8; + + // Xor assign + a = 0x3F; + a ^= 0x30; + if (a != 0x0F) return 9; + + return 0; +} +"#; + assert_eq!(compile_and_run("char_assign", code), 0); +} + +// ============================================================================ +// Char: Increment/Decrement Operators +// ============================================================================ + +#[test] +fn char_increment_decrement_operators() { + let code = r#" +int main(void) { + char a, b; + + // Pre-increment + a = 'A'; + b = ++a; + if (b != 'B') return 1; + + // Post-increment (returns original) + a = 'M'; + b = a++; + if (b != 'M') return 2; + if (a != 'N') return 3; + + // Pre-decrement + a = 'Z'; + b = --a; + if (b != 'Y') return 4; + + // Post-decrement (returns original) + a = 'D'; + b = a--; + if (b != 'D') return 5; + if (a != 'C') return 6; + + return 0; +} +"#; + assert_eq!(compile_and_run("char_incdec", code), 0); +} + +// ============================================================================ +// Char: Signed Behavior (x86-64 specific - char is signed) +// ============================================================================ + +#[test] +fn char_signed_behavior() { + let code = r#" +int main(void) { + char a; + + // Negative value (valid for signed char on x86-64) + a = -1; + if (a != -1) return 1; + + // Min value for signed char + a = -128; + if (a != -128) return 2; + + // Max value for signed char + a = 127; + if (a != 127) return 3; + + // Negative comparison + a = -10; + if (a >= 0) return 4; // Should be false (negative) + + // Signed comparison + a = -1; + char b = 1; + if (a > b) return 5; // -1 > 1 should be false for signed + + return 0; +} +"#; + assert_eq!(compile_and_run("char_signed", code), 0); +} + +// ############################################################################ +// SIGNED CHAR TESTS (explicitly signed, always -128 to 127) +// ############################################################################ + +// ============================================================================ +// Signed Char: Arithmetic Operators +// ============================================================================ + +#[test] +fn schar_arithmetic_operators() { + let code = r#" +int main(void) { + signed char a, b, result; + + // Addition + a = 30; b = 12; + result = a + b; + if (result != 42) return 1; + + // Subtraction + a = 100; b = 58; + result = a - b; + if (result != 42) return 2; + + // Multiplication + a = 6; b = 7; + result = a * b; + if (result != 42) return 3; + + // Division + a = 84; b = 2; + result = a / b; + if (result != 42) return 4; + + // Modulo + a = 47; b = 10; + result = a % b; + if (result != 7) return 5; + + // Negative values + a = -42; + if (-a != 42) return 6; + + // Negative arithmetic + a = -20; b = -22; + if (a + b != -42) return 7; + + return 0; +} +"#; + assert_eq!(compile_and_run("schar_arith", code), 0); +} + +// ============================================================================ +// Signed Char: Comparison Operators +// ============================================================================ + +#[test] +fn schar_comparison_operators() { + let code = r#" +int main(void) { + signed char a, b; + + // Negative less than positive + a = -10; b = 10; + if ((a < b) != 1) return 1; + + // Negative comparison + a = -20; b = -10; + if ((a < b) != 1) return 2; // -20 < -10 + + // Equal negative + a = -42; b = -42; + if ((a == b) != 1) return 3; + + // Range boundaries + a = -128; b = 127; + if ((a < b) != 1) return 4; + + // Min value comparison + a = -128; + if (a >= 0) return 5; // Should be false + + return 0; +} +"#; + assert_eq!(compile_and_run("schar_cmp", code), 0); +} + +// ============================================================================ +// Signed Char: Assignment Operators +// ============================================================================ + +#[test] +fn schar_assignment_operators() { + let code = r#" +int main(void) { + signed char a; + + // Simple assignment + a = -42; + if (a != -42) return 1; + + // Add assign + a = -50; + a += 8; + if (a != -42) return 2; + + // Sub assign + a = -40; + a -= 2; + if (a != -42) return 3; + + // Mul assign with sign + a = -21; + a *= 2; + if (a != -42) return 4; + + // Div assign with sign + a = -84; + a /= 2; + if (a != -42) return 5; + + return 0; +} +"#; + assert_eq!(compile_and_run("schar_assign", code), 0); +} + +// ############################################################################ +// UNSIGNED CHAR TESTS (always 0 to 255) +// ############################################################################ + +// ============================================================================ +// Unsigned Char: Arithmetic Operators +// ============================================================================ + +#[test] +fn uchar_arithmetic_operators() { + let code = r#" +int main(void) { + unsigned char a, b, result; + + // Addition + a = 30; b = 12; + result = a + b; + if (result != 42) return 1; + + // Subtraction + a = 100; b = 58; + result = a - b; + if (result != 42) return 2; + + // Multiplication + a = 6; b = 7; + result = a * b; + if (result != 42) return 3; + + // Division + a = 84; b = 2; + result = a / b; + if (result != 42) return 4; + + // Modulo + a = 47; b = 10; + result = a % b; + if (result != 7) return 5; + + // Max value + a = 255; + if (a != 255) return 6; + + // Large values + a = 200; b = 50; + result = a - b; + if (result != 150) return 7; + + return 0; +} +"#; + assert_eq!(compile_and_run("uchar_arith", code), 0); +} + +// ============================================================================ +// Unsigned Char: Comparison Operators +// ============================================================================ + +#[test] +fn uchar_comparison_operators() { + let code = r#" +int main(void) { + unsigned char a, b; + + // Basic comparison + a = 200; b = 100; + if ((a > b) != 1) return 1; + + // Equal + a = 255; b = 255; + if ((a == b) != 1) return 2; + + // Max value comparison + a = 255; b = 0; + if ((a > b) != 1) return 3; + + // Zero is minimum + a = 0; b = 1; + if ((a < b) != 1) return 4; + + // Unsigned comparison (no negative) + a = 255; + if (a < 0) return 5; // Should always be false for unsigned + + return 0; +} +"#; + assert_eq!(compile_and_run("uchar_cmp", code), 0); +} + +// ============================================================================ +// Unsigned Char: Bitwise Operators +// ============================================================================ + +#[test] +fn uchar_bitwise_operators() { + let code = r#" +int main(void) { + unsigned char a, b; + + // Bitwise AND + a = 0xFF; b = 0x0F; + if ((a & b) != 0x0F) return 1; + + // Bitwise OR + a = 0xF0; b = 0x0F; + if ((a | b) != 0xFF) return 2; + + // Bitwise XOR + a = 0xFF; b = 0xF0; + if ((a ^ b) != 0x0F) return 3; + + // Bitwise NOT (result masked to 8 bits when stored) + a = 0; + b = ~a; + if (b != 0xFF) return 4; + + // Left shift + a = 1; + if ((a << 4) != 16) return 5; + + // Right shift (logical for unsigned) + a = 128; + if ((a >> 1) != 64) return 6; + + return 0; +} +"#; + assert_eq!(compile_and_run("uchar_bitwise", code), 0); +} + +// ============================================================================ +// Unsigned Char: Assignment Operators +// ============================================================================ + +#[test] +fn uchar_assignment_operators() { + let code = r#" +int main(void) { + unsigned char a; + + // Simple assignment + a = 200; + if (a != 200) return 1; + + // Add assign + a = 40; + a += 2; + if (a != 42) return 2; + + // Sub assign + a = 50; + a -= 8; + if (a != 42) return 3; + + // Mul assign + a = 21; + a *= 2; + if (a != 42) return 4; + + // Div assign + a = 84; + a /= 2; + if (a != 42) return 5; + + // And assign + a = 0xFF; + a &= 0x0F; + if (a != 15) return 6; + + // Or assign + a = 0xF0; + a |= 0x0F; + if (a != 255) return 7; + + return 0; +} +"#; + assert_eq!(compile_and_run("uchar_assign", code), 0); +} + +// ============================================================================ +// Unsigned Char: Increment/Decrement Operators +// ============================================================================ + +#[test] +fn uchar_increment_decrement_operators() { + let code = r#" +int main(void) { + unsigned char a, b; + + // Pre-increment + a = 41; + if (++a != 42) return 1; + + // Post-increment + a = 42; + b = a++; + if (b != 42) return 2; + if (a != 43) return 3; + + // Pre-decrement + a = 43; + if (--a != 42) return 4; + + // Post-decrement + a = 42; + b = a--; + if (b != 42) return 5; + if (a != 41) return 6; + + return 0; +} +"#; + assert_eq!(compile_and_run("uchar_incdec", code), 0); +} + +// ============================================================================ +// Mixed Char Types and Conversions +// ============================================================================ + +#[test] +fn char_type_conversions() { + let code = r#" +int main(void) { + char c; + signed char sc; + unsigned char uc; + int i; + + // Char to int + c = 'A'; + i = c; + if (i != 65) return 1; + + // Int to char + i = 66; + c = i; + if (c != 'B') return 2; + + // Signed char to int (sign extension) + sc = -1; + i = sc; + if (i != -1) return 3; + + // Unsigned char to int (zero extension) + uc = 255; + i = uc; + if (i != 255) return 4; + + // Cross-type assignment + uc = 200; + sc = uc; // Value > 127 becomes negative when viewed as signed + // Just check it compiles and doesn't crash + + return 0; +} +"#; + assert_eq!(compile_and_run("char_conv", code), 0); +} + +// ============================================================================ +// Character Classification (using comparison) +// ============================================================================ + +#[test] +fn char_classification() { + let code = r#" +int main(void) { + char c; + + // Is uppercase? + c = 'G'; + if (!(c >= 'A' && c <= 'Z')) return 1; + + // Is lowercase? + c = 'g'; + if (!(c >= 'a' && c <= 'z')) return 2; + + // Is digit? + c = '5'; + if (!(c >= '0' && c <= '9')) return 3; + + // Is alphabetic? + c = 'M'; + if (!((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))) return 4; + + // Is alphanumeric? + c = '7'; + if (!((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9'))) return 5; + + return 0; +} +"#; + assert_eq!(compile_and_run("char_class", code), 0); +} + +// ============================================================================ +// Char Types: Structs, Functions, and Initializers +// ============================================================================ + +#[test] +fn char_structs_functions_initializers() { + let code = r#" +// Struct with char members +struct char_container { + char c; + signed char sc; + unsigned char uc; +}; + +// Function taking char parameter and returning char +char next_char(char c) { + return c + 1; +} + +// Function taking signed char parameter +signed char negate_schar(signed char sc) { + return -sc; +} + +// Function taking unsigned char parameter +unsigned char double_uchar(unsigned char uc) { + return uc * 2; +} + +// Function taking struct parameter +int sum_chars(struct char_container c) { + return c.c + c.sc + c.uc; +} + +// Function returning struct +struct char_container make_container(char a, signed char b, unsigned char c) { + struct char_container container; + container.c = a; + container.sc = b; + container.uc = c; + return container; +} + +int main(void) { + // Variable initializers + char c = 'A'; + if (c != 'A') return 1; + + signed char sc = -10; + if (sc != -10) return 2; + + unsigned char uc = 200; + if (uc != 200) return 3; + + // Multiple initializers + char x = 'X', y = 'Y', z = 'Z'; + if (x != 'X') return 4; + if (y != 'Y') return 5; + if (z != 'Z') return 6; + + // Struct member access + struct char_container cc; + cc.c = 'M'; + cc.sc = -50; + cc.uc = 255; + if (cc.c != 'M') return 7; + if (cc.sc != -50) return 8; + if (cc.uc != 255) return 9; + + // Function with char param/return + if (next_char('A') != 'B') return 10; + if (next_char('Z') != '[') return 11; + + // Function with signed char param/return + if (negate_schar(10) != -10) return 12; + if (negate_schar(-42) != 42) return 13; + + // Function with unsigned char param/return + if (double_uchar(50) != 100) return 14; + if (double_uchar(100) != 200) return 15; + + // Struct as function parameter + cc.c = 10; + cc.sc = 20; + cc.uc = 30; + if (sum_chars(cc) != 60) return 16; + + // Struct as function return + struct char_container cc2; + cc2 = make_container('Q', -5, 100); + if (cc2.c != 'Q') return 17; + if (cc2.sc != -5) return 18; + if (cc2.uc != 100) return 19; + + // Pointer to char in struct + struct char_container *p; + p = &cc; + if (p->c != 10) return 20; + p->c = 'P'; + if (cc.c != 'P') return 21; + + return 0; +} +"#; + assert_eq!(compile_and_run("char_advanced", code), 0); +} diff --git a/cc/tests/datatypes/float.rs b/cc/tests/datatypes/float.rs new file mode 100644 index 000000000..4ef3de52b --- /dev/null +++ b/cc/tests/datatypes/float.rs @@ -0,0 +1,660 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Tests for `float` and `double` data types with all applicable operators +// Tests are aggregated by category to reduce compile/link overhead +// + +use crate::common::compile_and_run; + +// ============================================================================ +// Float Arithmetic Operators (add, sub, mul, div, unary neg, unary plus) +// ============================================================================ + +#[test] +fn float_arithmetic_operators() { + let code = r#" +int main(void) { + float a, b, result; + + // Addition + a = 30.5f; b = 11.5f; + result = a + b; + if (result < 41.9f || result > 42.1f) return 1; + + // Subtraction + a = 100.0f; b = 58.0f; + result = a - b; + if (result < 41.9f || result > 42.1f) return 2; + + // Multiplication + a = 6.0f; b = 7.0f; + result = a * b; + if (result < 41.9f || result > 42.1f) return 3; + + // Division + a = 84.0f; b = 2.0f; + result = a / b; + if (result < 41.9f || result > 42.1f) return 4; + + // Unary negation + a = -42.0f; + result = -a; + if (result < 41.9f || result > 42.1f) return 5; + + // Unary plus + a = 42.0f; + result = +a; + if (result < 41.9f || result > 42.1f) return 6; + + return 0; +} +"#; + assert_eq!(compile_and_run("float_arith", code), 0); +} + +// ============================================================================ +// Float Comparison Operators (==, !=, <, <=, >, >=) +// ============================================================================ + +#[test] +fn float_comparison_operators() { + let code = r#" +int main(void) { + float a, b; + + // Less than - true + a = 10.0f; b = 20.0f; + if ((a < b) != 1) return 1; + + // Less than - false + a = 20.0f; b = 10.0f; + if ((a < b) != 0) return 2; + + // Less or equal - true (less) + a = 10.0f; b = 20.0f; + if ((a <= b) != 1) return 3; + + // Less or equal - true (equal) + a = 20.0f; b = 20.0f; + if ((a <= b) != 1) return 4; + + // Less or equal - false + a = 30.0f; b = 20.0f; + if ((a <= b) != 0) return 5; + + // Greater than - true + a = 20.0f; b = 10.0f; + if ((a > b) != 1) return 6; + + // Greater than - false + a = 10.0f; b = 20.0f; + if ((a > b) != 0) return 7; + + // Greater or equal - true (greater) + a = 30.0f; b = 20.0f; + if ((a >= b) != 1) return 8; + + // Greater or equal - true (equal) + a = 20.0f; b = 20.0f; + if ((a >= b) != 1) return 9; + + // Greater or equal - false + a = 10.0f; b = 20.0f; + if ((a >= b) != 0) return 10; + + return 0; +} +"#; + assert_eq!(compile_and_run("float_cmp", code), 0); +} + +// ============================================================================ +// Float Assignment Operators (=, +=, -=, *=, /=) +// ============================================================================ + +#[test] +fn float_assignment_operators() { + let code = r#" +int main(void) { + float a; + + // Simple assignment + a = 42.0f; + if (a < 41.9f || a > 42.1f) return 1; + + // Add assign + a = 40.0f; + a += 2.0f; + if (a < 41.9f || a > 42.1f) return 2; + + // Sub assign + a = 50.0f; + a -= 8.0f; + if (a < 41.9f || a > 42.1f) return 3; + + // Mul assign + a = 21.0f; + a *= 2.0f; + if (a < 41.9f || a > 42.1f) return 4; + + // Div assign + a = 84.0f; + a /= 2.0f; + if (a < 41.9f || a > 42.1f) return 5; + + return 0; +} +"#; + assert_eq!(compile_and_run("float_assign", code), 0); +} + +// ============================================================================ +// Float Increment/Decrement Operators (++a, a++, --a, a--) +// ============================================================================ + +#[test] +fn float_increment_decrement_operators() { + let code = r#" +int main(void) { + float a, b; + + // Pre-increment + a = 41.0f; + b = ++a; + if (b < 41.9f || b > 42.1f) return 1; + + // Post-increment (returns original) + a = 42.0f; + b = a++; + if (b < 41.9f || b > 42.1f) return 2; + + // Post-increment (side effect) + a = 41.0f; + a++; + if (a < 41.9f || a > 42.1f) return 3; + + // Pre-decrement + a = 43.0f; + b = --a; + if (b < 41.9f || b > 42.1f) return 4; + + // Post-decrement (returns original) + a = 42.0f; + b = a--; + if (b < 41.9f || b > 42.1f) return 5; + + // Post-decrement (side effect) + a = 43.0f; + a--; + if (a < 41.9f || a > 42.1f) return 6; + + return 0; +} +"#; + assert_eq!(compile_and_run("float_incdec", code), 0); +} + +// ============================================================================ +// Float-Int Conversions +// ============================================================================ + +#[test] +fn float_int_conversions() { + let code = r#" +int main(void) { + float f; + int i; + + // Int to float conversion + i = 42; + f = i; + if (f < 41.9f || f > 42.1f) return 1; + + // Float to int conversion (truncation) + f = 42.7f; + i = (int)f; + if (i != 42) return 2; + + // Float to int conversion (negative) + f = -42.7f; + i = (int)f; + if (i != -42) return 3; + + // Mixed arithmetic: float + int + f = 40.0f; + i = 2; + f = f + i; + if (f < 41.9f || f > 42.1f) return 4; + + return 0; +} +"#; + assert_eq!(compile_and_run("float_int_conv", code), 0); +} + +// ############################################################################ +// DOUBLE TESTS +// ############################################################################ + +// ============================================================================ +// Double Arithmetic Operators (add, sub, mul, div, unary neg, unary plus) +// ============================================================================ + +#[test] +fn double_arithmetic_operators() { + let code = r#" +int main(void) { + double a, b, result; + + // Addition + a = 30.5; b = 11.5; + result = a + b; + if (result < 41.9 || result > 42.1) return 1; + + // Subtraction + a = 100.0; b = 58.0; + result = a - b; + if (result < 41.9 || result > 42.1) return 2; + + // Multiplication + a = 6.0; b = 7.0; + result = a * b; + if (result < 41.9 || result > 42.1) return 3; + + // Division + a = 84.0; b = 2.0; + result = a / b; + if (result < 41.9 || result > 42.1) return 4; + + // Unary negation + a = -42.0; + result = -a; + if (result < 41.9 || result > 42.1) return 5; + + // Unary plus + a = 42.0; + result = +a; + if (result < 41.9 || result > 42.1) return 6; + + return 0; +} +"#; + assert_eq!(compile_and_run("double_arith", code), 0); +} + +// ============================================================================ +// Double Comparison Operators (==, !=, <, <=, >, >=) +// ============================================================================ + +#[test] +fn double_comparison_operators() { + let code = r#" +int main(void) { + double a, b; + + // Less than - true + a = 10.0; b = 20.0; + if ((a < b) != 1) return 1; + + // Less than - false + a = 20.0; b = 10.0; + if ((a < b) != 0) return 2; + + // Less or equal - true (less) + a = 10.0; b = 20.0; + if ((a <= b) != 1) return 3; + + // Less or equal - true (equal) + a = 20.0; b = 20.0; + if ((a <= b) != 1) return 4; + + // Less or equal - false + a = 30.0; b = 20.0; + if ((a <= b) != 0) return 5; + + // Greater than - true + a = 20.0; b = 10.0; + if ((a > b) != 1) return 6; + + // Greater than - false + a = 10.0; b = 20.0; + if ((a > b) != 0) return 7; + + // Greater or equal - true (greater) + a = 30.0; b = 20.0; + if ((a >= b) != 1) return 8; + + // Greater or equal - true (equal) + a = 20.0; b = 20.0; + if ((a >= b) != 1) return 9; + + // Greater or equal - false + a = 10.0; b = 20.0; + if ((a >= b) != 0) return 10; + + return 0; +} +"#; + assert_eq!(compile_and_run("double_cmp", code), 0); +} + +// ============================================================================ +// Double Assignment Operators (=, +=, -=, *=, /=) +// ============================================================================ + +#[test] +fn double_assignment_operators() { + let code = r#" +int main(void) { + double a; + + // Simple assignment + a = 42.0; + if (a < 41.9 || a > 42.1) return 1; + + // Add assign + a = 40.0; + a += 2.0; + if (a < 41.9 || a > 42.1) return 2; + + // Sub assign + a = 50.0; + a -= 8.0; + if (a < 41.9 || a > 42.1) return 3; + + // Mul assign + a = 21.0; + a *= 2.0; + if (a < 41.9 || a > 42.1) return 4; + + // Div assign + a = 84.0; + a /= 2.0; + if (a < 41.9 || a > 42.1) return 5; + + return 0; +} +"#; + assert_eq!(compile_and_run("double_assign", code), 0); +} + +// ============================================================================ +// Double-Int Conversions +// ============================================================================ + +#[test] +fn double_int_conversions() { + let code = r#" +int main(void) { + double d; + int i; + + // Int to double conversion + i = 42; + d = i; + if (d < 41.9 || d > 42.1) return 1; + + // Double to int conversion (truncation) + d = 42.7; + i = (int)d; + if (i != 42) return 2; + + // Double to int conversion (negative) + d = -42.7; + i = (int)d; + if (i != -42) return 3; + + // Mixed arithmetic: double + int + d = 40.0; + i = 2; + d = d + i; + if (d < 41.9 || d > 42.1) return 4; + + return 0; +} +"#; + assert_eq!(compile_and_run("double_int_conv", code), 0); +} + +// ============================================================================ +// Float-Double Conversions +// ============================================================================ + +#[test] +fn float_double_conversions() { + let code = r#" +int main(void) { + float f; + double d; + + // Float to double (implicit promotion) + f = 42.0f; + d = f; + if (d < 41.9 || d > 42.1) return 1; + + // Double to float (explicit conversion) + d = 42.0; + f = (float)d; + if (f < 41.9f || f > 42.1f) return 2; + + // Mixed arithmetic: float + double promotes to double + f = 20.0f; + d = 22.0; + d = f + d; + if (d < 41.9 || d > 42.1) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("float_double_conv", code), 0); +} + +// ============================================================================ +// Float Literals +// ============================================================================ + +#[test] +fn float_literals() { + let code = r#" +int main(void) { + float f; + double d; + + // Float literal with f suffix + f = 42.0f; + if (f < 41.9f || f > 42.1f) return 1; + + // Float literal with F suffix + f = 42.0F; + if (f < 41.9f || f > 42.1f) return 2; + + // Double literal (no suffix) + d = 42.0; + if (d < 41.9 || d > 42.1) return 3; + + // Exponential notation + d = 4.2e1; + if (d < 41.9 || d > 42.1) return 4; + + // Negative exponent + d = 4200.0e-2; + if (d < 41.9 || d > 42.1) return 5; + + return 0; +} +"#; + assert_eq!(compile_and_run("float_literals", code), 0); +} + +// ============================================================================ +// Complex Float Expressions +// ============================================================================ + +#[test] +fn float_complex_expressions() { + let code = r#" +int main(void) { + float a, b, c, result; + + // Mixed arithmetic with precedence + a = 10.0f; b = 5.0f; c = 2.0f; + result = a + b * c - 3.0f; // 10 + 10 - 3 = 17 + if (result < 16.9f || result > 17.1f) return 1; + + // Chained comparison + a = 5.0f; b = 10.0f; c = 15.0f; + if (((a < b) && (b < c)) != 1) return 2; + + // Division precision test + a = 1.0f; b = 3.0f; + result = a / b; // Should be approximately 0.333... + if (result < 0.3f || result > 0.4f) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("float_complex", code), 0); +} + +// ============================================================================ +// Float/Double: Structs, Functions, and Initializers +// ============================================================================ + +#[test] +fn float_structs_functions_initializers() { + let code = r#" +struct float_container { + float value; + float second; +}; + +struct double_container { + double value; + double second; +}; + +float double_float(float x) { + return x * 2.0f; +} + +double double_double(double x) { + return x * 2.0; +} + +float sum_float_container(struct float_container c) { + return c.value + c.second; +} + +double sum_double_container(struct double_container c) { + return c.value + c.second; +} + +struct float_container make_float_container(float a, float b) { + struct float_container c; + c.value = a; + c.second = b; + return c; +} + +struct double_container make_double_container(double a, double b) { + struct double_container c; + c.value = a; + c.second = b; + return c; +} + +int main(void) { + // Variable initializers + float f = 42.0f; + if (f < 41.9f || f > 42.1f) return 1; + + double d = 42.0; + if (d < 41.9 || d > 42.1) return 2; + + // Multiple variable declarations with initializers + float a = 10.0f, b = 20.0f, c = 30.0f; + if (a + b + c < 59.9f || a + b + c > 60.1f) return 3; + + // Struct member access - float + struct float_container fc; + fc.value = 100.5f; + fc.second = 50.25f; + if (fc.value < 100.4f || fc.value > 100.6f) return 4; + if (fc.second < 50.2f || fc.second > 50.3f) return 5; + + // Struct member access - double + struct double_container dc; + dc.value = 1000.5; + dc.second = 500.25; + if (dc.value < 1000.4 || dc.value > 1000.6) return 6; + if (dc.second < 500.2 || dc.second > 500.3) return 7; + + // Function with float parameter and return + float fdoubled = double_float(21.0f); + if (fdoubled < 41.9f || fdoubled > 42.1f) return 8; + + // Function with double parameter and return + double ddoubled = double_double(21.0); + if (ddoubled < 41.9 || ddoubled > 42.1) return 9; + + // Negative float + float fneg = -100.0f; + float fneg_doubled = double_float(fneg); + if (fneg_doubled < -200.1f || fneg_doubled > -199.9f) return 10; + + // Negative double + double dneg = -100.0; + double dneg_doubled = double_double(dneg); + if (dneg_doubled < -200.1 || dneg_doubled > -199.9) return 11; + + // Struct as function parameter - float + struct float_container fparam; + fparam.value = 100.0f; + fparam.second = 50.0f; + float fsum = sum_float_container(fparam); + if (fsum < 149.9f || fsum > 150.1f) return 12; + + // Struct as function parameter - double + struct double_container dparam; + dparam.value = 100.0; + dparam.second = 50.0; + double dsum = sum_double_container(dparam); + if (dsum < 149.9 || dsum > 150.1) return 13; + + // Struct as function return value - float + struct float_container freturned = make_float_container(300.0f, 200.0f); + if (freturned.value < 299.9f || freturned.value > 300.1f) return 14; + if (freturned.second < 199.9f || freturned.second > 200.1f) return 15; + + // Struct as function return value - double + struct double_container dreturned = make_double_container(300.0, 200.0); + if (dreturned.value < 299.9 || dreturned.value > 300.1) return 16; + if (dreturned.second < 199.9 || dreturned.second > 200.1) return 17; + + // Pointer to struct - float + struct float_container ftarget; + struct float_container *fptr = &ftarget; + fptr->value = 2000.0f; + fptr->second = 1000.0f; + if (ftarget.value < 1999.9f || ftarget.value > 2000.1f) return 18; + if (ftarget.second < 999.9f || ftarget.second > 1000.1f) return 19; + + // Pointer to struct - double + struct double_container dtarget; + struct double_container *dptr = &dtarget; + dptr->value = 2000.0; + dptr->second = 1000.0; + if (dtarget.value < 1999.9 || dtarget.value > 2000.1) return 20; + if (dtarget.second < 999.9 || dtarget.second > 1000.1) return 21; + + return 0; +} +"#; + assert_eq!(compile_and_run("float_advanced", code), 0); +} diff --git a/cc/tests/datatypes/int.rs b/cc/tests/datatypes/int.rs new file mode 100644 index 000000000..9b3b913f1 --- /dev/null +++ b/cc/tests/datatypes/int.rs @@ -0,0 +1,878 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Tests for `int` and `unsigned int` data types with all applicable operators +// Tests are aggregated by category to reduce compile/link overhead +// + +use crate::common::compile_and_run; + +// ============================================================================ +// Arithmetic Operators (add, sub, mul, div, mod, unary neg, unary plus) +// ============================================================================ + +#[test] +fn int_arithmetic_operators() { + let code = r#" +int main(void) { + int a, b, result; + + // Addition + a = 30; b = 12; + if (a + b != 42) return 1; + + // Subtraction + a = 100; b = 58; + if (a - b != 42) return 2; + + // Multiplication + a = 6; b = 7; + if (a * b != 42) return 3; + + // Division + a = 84; b = 2; + if (a / b != 42) return 4; + + // Modulo + a = 47; b = 10; + if (a % b != 7) return 5; + + // Unary negation + a = -42; + if (-a != 42) return 6; + + // Unary plus + a = 42; + if (+a != 42) return 7; + + // Literal without suffix (default int) + if (100 + 23 != 123) return 8; + + // Hex literal + if (0x10 + 0x20 != 0x30) return 9; + + // Octal literal + if (010 + 010 != 020) return 10; + + return 0; +} +"#; + assert_eq!(compile_and_run("int_arith", code), 0); +} + +// ============================================================================ +// Comparison Operators (==, !=, <, <=, >, >=) +// ============================================================================ + +#[test] +fn int_comparison_operators() { + let code = r#" +int main(void) { + int a, b; + + // Equal - true + a = 42; b = 42; + if ((a == b) != 1) return 1; + + // Equal - false + a = 42; b = 43; + if ((a == b) != 0) return 2; + + // Not equal - true + a = 42; b = 43; + if ((a != b) != 1) return 3; + + // Not equal - false + a = 42; b = 42; + if ((a != b) != 0) return 4; + + // Less than - true + a = 10; b = 20; + if ((a < b) != 1) return 5; + + // Less than - false + a = 20; b = 10; + if ((a < b) != 0) return 6; + + // Less or equal - true (less) + a = 10; b = 20; + if ((a <= b) != 1) return 7; + + // Less or equal - true (equal) + a = 20; b = 20; + if ((a <= b) != 1) return 8; + + // Less or equal - false + a = 30; b = 20; + if ((a <= b) != 0) return 9; + + // Greater than - true + a = 20; b = 10; + if ((a > b) != 1) return 10; + + // Greater than - false + a = 10; b = 20; + if ((a > b) != 0) return 11; + + // Greater or equal - true (greater) + a = 30; b = 20; + if ((a >= b) != 1) return 12; + + // Greater or equal - true (equal) + a = 20; b = 20; + if ((a >= b) != 1) return 13; + + // Greater or equal - false + a = 10; b = 20; + if ((a >= b) != 0) return 14; + + return 0; +} +"#; + assert_eq!(compile_and_run("int_cmp", code), 0); +} + +// ============================================================================ +// Logical Operators (&&, ||, !) +// ============================================================================ + +#[test] +fn int_logical_operators() { + let code = r#" +int main(void) { + int a, b; + + // Logical AND - true + a = 1; b = 1; + if ((a && b) != 1) return 1; + + // Logical AND - false (left) + a = 0; b = 1; + if ((a && b) != 0) return 2; + + // Logical AND - false (right) + a = 1; b = 0; + if ((a && b) != 0) return 3; + + // Logical OR - true (left) + a = 1; b = 0; + if ((a || b) != 1) return 4; + + // Logical OR - true (right) + a = 0; b = 1; + if ((a || b) != 1) return 5; + + // Logical OR - false + a = 0; b = 0; + if ((a || b) != 0) return 6; + + // Logical NOT - true (input 0) + a = 0; + if (!a != 1) return 7; + + // Logical NOT - false (input non-zero) + a = 42; + if (!a != 0) return 8; + + return 0; +} +"#; + assert_eq!(compile_and_run("int_logical", code), 0); +} + +// ============================================================================ +// Bitwise Operators (&, |, ^, ~, <<, >>) +// ============================================================================ + +#[test] +fn int_bitwise_operators() { + let code = r#" +int main(void) { + int a, b; + + // Bitwise AND + a = 0xFF; b = 0x0F; + if ((a & b) != 0x0F) return 1; + + // Bitwise OR + a = 0xF0; b = 0x0F; + if ((a | b) != 0xFF) return 2; + + // Bitwise XOR + a = 0xFF; b = 0xF0; + if ((a ^ b) != 0x0F) return 3; + + // Bitwise NOT + a = 0; + if (~a != -1) return 4; + + // Left shift + a = 1; + if ((a << 4) != 16) return 5; + + // Right shift + a = 64; + if ((a >> 2) != 16) return 6; + + return 0; +} +"#; + assert_eq!(compile_and_run("int_bitwise", code), 0); +} + +// ============================================================================ +// Assignment Operators (=, +=, -=, *=, /=, %=, &=, |=, ^=, <<=, >>=) +// ============================================================================ + +#[test] +fn int_assignment_operators() { + let code = r#" +int main(void) { + int a; + + // Simple assignment + a = 42; + if (a != 42) return 1; + + // Add assign + a = 40; + a += 2; + if (a != 42) return 2; + + // Sub assign + a = 50; + a -= 8; + if (a != 42) return 3; + + // Mul assign + a = 21; + a *= 2; + if (a != 42) return 4; + + // Div assign + a = 84; + a /= 2; + if (a != 42) return 5; + + // Mod assign + a = 50; + a %= 8; + if (a != 2) return 6; + + // And assign + a = 0xFF; + a &= 0x0F; + if (a != 15) return 7; + + // Or assign + a = 0xF0; + a |= 0x0F; + if (a != 255) return 8; + + // Xor assign + a = 0xFF; + a ^= 0xF0; + if (a != 15) return 9; + + // Shl assign + a = 1; + a <<= 4; + if (a != 16) return 10; + + // Shr assign + a = 64; + a >>= 2; + if (a != 16) return 11; + + return 0; +} +"#; + assert_eq!(compile_and_run("int_assign", code), 0); +} + +// ============================================================================ +// Increment/Decrement Operators (++a, a++, --a, a--) +// ============================================================================ + +#[test] +fn int_increment_decrement_operators() { + let code = r#" +int main(void) { + int a, b; + + // Pre-increment + a = 41; + if (++a != 42) return 1; + + // Post-increment (returns original) + a = 42; + b = a++; + if (b != 42) return 2; + + // Post-increment (side effect) + a = 41; + a++; + if (a != 42) return 3; + + // Pre-decrement + a = 43; + if (--a != 42) return 4; + + // Post-decrement (returns original) + a = 42; + b = a--; + if (b != 42) return 5; + + // Post-decrement (side effect) + a = 43; + a--; + if (a != 42) return 6; + + return 0; +} +"#; + assert_eq!(compile_and_run("int_incdec", code), 0); +} + +// ============================================================================ +// Ternary and Comma Operators +// ============================================================================ + +#[test] +fn int_ternary_comma_operators() { + let code = r#" +int main(void) { + int a, b; + + // Ternary - true branch + a = 1; + if ((a ? 42 : 0) != 42) return 1; + + // Ternary - false branch + a = 0; + if ((a ? 0 : 42) != 42) return 2; + + // Comma operator + a = 1; + b = (a = 10, a + 32); + if (b != 42) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("int_ternary_comma", code), 0); +} + +// ============================================================================ +// Complex Expressions +// ============================================================================ + +#[test] +fn int_complex_expressions() { + let code = r#" +int main(void) { + int a, b, c, result; + + // Mixed arithmetic with precedence + a = 10; b = 5; c = 2; + result = a + b * c - 3; // 10 + 10 - 3 = 17 + if (result != 17) return 1; + + // Chained comparison + a = 5; b = 10; c = 15; + if (((a < b) && (b < c)) != 1) return 2; + + // Mixed bitwise and arithmetic + a = 8; b = 3; + result = ((a | b) & 0x0F) + (a >> 1); // (11 & 15) + 4 = 15 + if (result != 15) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("int_complex", code), 0); +} + +// ############################################################################ +// UNSIGNED INT TESTS +// ############################################################################ + +// ============================================================================ +// Unsigned Int: Arithmetic Operators (add, sub, mul, div, mod, unary plus) +// ============================================================================ + +#[test] +fn uint_arithmetic_operators() { + let code = r#" +int main(void) { + unsigned int a, b; + + // Addition + a = 30; b = 12; + if (a + b != 42) return 1; + + // Subtraction + a = 100; b = 58; + if (a - b != 42) return 2; + + // Multiplication + a = 6; b = 7; + if (a * b != 42) return 3; + + // Division + a = 84; b = 2; + if (a / b != 42) return 4; + + // Modulo + a = 47; b = 10; + if (a % b != 7) return 5; + + // Unary plus + a = 42; + if (+a != 42) return 6; + + // U suffix for unsigned int literals + if (100U + 23U != 123U) return 7; + + // Hex with U suffix + if (0x10U + 0x20U != 0x30U) return 8; + + // Mixed case suffix (u) + if (50u + 50u != 100u) return 9; + + return 0; +} +"#; + assert_eq!(compile_and_run("uint_arith", code), 0); +} + +// ============================================================================ +// Unsigned Int: Comparison Operators (==, !=, <, <=, >, >=) +// ============================================================================ + +#[test] +fn uint_comparison_operators() { + let code = r#" +int main(void) { + unsigned int a, b; + + // Equal - true + a = 42; b = 42; + if ((a == b) != 1) return 1; + + // Equal - false + a = 42; b = 43; + if ((a == b) != 0) return 2; + + // Not equal - true + a = 42; b = 43; + if ((a != b) != 1) return 3; + + // Not equal - false + a = 42; b = 42; + if ((a != b) != 0) return 4; + + // Less than - true + a = 10; b = 20; + if ((a < b) != 1) return 5; + + // Less than - false + a = 20; b = 10; + if ((a < b) != 0) return 6; + + // Less or equal - true (less) + a = 10; b = 20; + if ((a <= b) != 1) return 7; + + // Less or equal - true (equal) + a = 20; b = 20; + if ((a <= b) != 1) return 8; + + // Less or equal - false + a = 30; b = 20; + if ((a <= b) != 0) return 9; + + // Greater than - true + a = 20; b = 10; + if ((a > b) != 1) return 10; + + // Greater than - false + a = 10; b = 20; + if ((a > b) != 0) return 11; + + // Greater or equal - true (greater) + a = 30; b = 20; + if ((a >= b) != 1) return 12; + + // Greater or equal - true (equal) + a = 20; b = 20; + if ((a >= b) != 1) return 13; + + // Greater or equal - false + a = 10; b = 20; + if ((a >= b) != 0) return 14; + + return 0; +} +"#; + assert_eq!(compile_and_run("uint_cmp", code), 0); +} + +// ============================================================================ +// Unsigned Int: Logical Operators (&&, ||, !) +// ============================================================================ + +#[test] +fn uint_logical_operators() { + let code = r#" +int main(void) { + unsigned int a, b; + + // Logical AND - true + a = 1; b = 1; + if ((a && b) != 1) return 1; + + // Logical AND - false (left) + a = 0; b = 1; + if ((a && b) != 0) return 2; + + // Logical AND - false (right) + a = 1; b = 0; + if ((a && b) != 0) return 3; + + // Logical OR - true (left) + a = 1; b = 0; + if ((a || b) != 1) return 4; + + // Logical OR - true (right) + a = 0; b = 1; + if ((a || b) != 1) return 5; + + // Logical OR - false + a = 0; b = 0; + if ((a || b) != 0) return 6; + + // Logical NOT - true (input 0) + a = 0; + if (!a != 1) return 7; + + // Logical NOT - false (input non-zero) + a = 42; + if (!a != 0) return 8; + + return 0; +} +"#; + assert_eq!(compile_and_run("uint_logical", code), 0); +} + +// ============================================================================ +// Unsigned Int: Bitwise Operators (&, |, ^, ~, <<, >>) +// ============================================================================ + +#[test] +fn uint_bitwise_operators() { + let code = r#" +int main(void) { + unsigned int a, b; + + // Bitwise AND + a = 0xFF; b = 0x0F; + if ((a & b) != 0x0F) return 1; + + // Bitwise OR + a = 0xF0; b = 0x0F; + if ((a | b) != 0xFF) return 2; + + // Bitwise XOR + a = 0xFF; b = 0xF0; + if ((a ^ b) != 0x0F) return 3; + + // Bitwise NOT - check low byte is 0xFF when input is 0xFFFFFF00 + a = 0xFFFFFF00; + if ((~a & 0xFF) != 0xFF) return 4; + + // Left shift + a = 1; + if ((a << 4) != 16) return 5; + + // Right shift (logical for unsigned) + a = 64; + if ((a >> 2) != 16) return 6; + + return 0; +} +"#; + assert_eq!(compile_and_run("uint_bitwise", code), 0); +} + +// ============================================================================ +// Unsigned Int: Assignment Operators (=, +=, -=, *=, /=, %=, &=, |=, ^=, <<=, >>=) +// ============================================================================ + +#[test] +fn uint_assignment_operators() { + let code = r#" +int main(void) { + unsigned int a; + + // Simple assignment + a = 42; + if (a != 42) return 1; + + // Add assign + a = 40; + a += 2; + if (a != 42) return 2; + + // Sub assign + a = 50; + a -= 8; + if (a != 42) return 3; + + // Mul assign + a = 21; + a *= 2; + if (a != 42) return 4; + + // Div assign + a = 84; + a /= 2; + if (a != 42) return 5; + + // Mod assign + a = 50; + a %= 8; + if (a != 2) return 6; + + // And assign + a = 0xFF; + a &= 0x0F; + if (a != 15) return 7; + + // Or assign + a = 0xF0; + a |= 0x0F; + if (a != 255) return 8; + + // Xor assign + a = 0xFF; + a ^= 0xF0; + if (a != 15) return 9; + + // Shl assign + a = 1; + a <<= 4; + if (a != 16) return 10; + + // Shr assign + a = 64; + a >>= 2; + if (a != 16) return 11; + + return 0; +} +"#; + assert_eq!(compile_and_run("uint_assign", code), 0); +} + +// ============================================================================ +// Unsigned Int: Increment/Decrement Operators (++a, a++, --a, a--) +// ============================================================================ + +#[test] +fn uint_increment_decrement_operators() { + let code = r#" +int main(void) { + unsigned int a, b; + + // Pre-increment + a = 41; + if (++a != 42) return 1; + + // Post-increment (returns original) + a = 42; + b = a++; + if (b != 42) return 2; + + // Post-increment (side effect) + a = 41; + a++; + if (a != 42) return 3; + + // Pre-decrement + a = 43; + if (--a != 42) return 4; + + // Post-decrement (returns original) + a = 42; + b = a--; + if (b != 42) return 5; + + // Post-decrement (side effect) + a = 43; + a--; + if (a != 42) return 6; + + return 0; +} +"#; + assert_eq!(compile_and_run("uint_incdec", code), 0); +} + +// ============================================================================ +// Unsigned Int: Ternary and Comma Operators +// ============================================================================ + +#[test] +fn uint_ternary_comma_operators() { + let code = r#" +int main(void) { + unsigned int a, b; + + // Ternary - true branch + a = 1; + if ((a ? 42 : 0) != 42) return 1; + + // Ternary - false branch + a = 0; + if ((a ? 0 : 42) != 42) return 2; + + // Comma operator + a = 1; + b = (a = 10, a + 32); + if (b != 42) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("uint_ternary_comma", code), 0); +} + +// ============================================================================ +// Unsigned Int: Complex Expressions +// ============================================================================ + +#[test] +fn uint_complex_expressions() { + let code = r#" +int main(void) { + unsigned int a, b, c, result; + + // Mixed arithmetic with precedence + a = 10; b = 5; c = 2; + result = a + b * c - 3; // 10 + 10 - 3 = 17 + if (result != 17) return 1; + + // Chained comparison + a = 5; b = 10; c = 15; + if (((a < b) && (b < c)) != 1) return 2; + + // Mixed bitwise and arithmetic + a = 8; b = 3; + result = ((a | b) & 0x0F) + (a >> 1); // (11 & 15) + 4 = 15 + if (result != 15) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("uint_complex", code), 0); +} + +// ============================================================================ +// Int: Structs, Functions, and Initializers +// ============================================================================ + +#[test] +fn int_structs_functions_initializers() { + let code = r#" +// Struct with int members +struct int_container { + int value; + int second; +}; + +// Struct with unsigned int members +struct uint_container { + unsigned int value; +}; + +// Function taking int parameter and returning int +int double_int(int x) { + return x * 2; +} + +// Function taking unsigned int parameter and returning unsigned int +unsigned int double_uint(unsigned int x) { + return x * 2; +} + +// Function taking struct parameter +int sum_container(struct int_container c) { + return c.value + c.second; +} + +// Function returning struct +struct int_container make_container(int a, int b) { + struct int_container c; + c.value = a; + c.second = b; + return c; +} + +int main(void) { + // Variable initializers + int x = 42; + if (x != 42) return 1; + + unsigned int ux = 100U; + if (ux != 100) return 2; + + int a = 10, b = 20, c = 30; + if (a + b + c != 60) return 3; + + // Struct member access + struct int_container ic; + ic.value = 42; + ic.second = 58; + if (ic.value != 42) return 4; + if (ic.second != 58) return 5; + + // Unsigned in struct + struct uint_container uc; + uc.value = 0xFFFFFFFF; + if (uc.value != 0xFFFFFFFF) return 6; + + // Function with int param/return + if (double_int(21) != 42) return 7; + if (double_int(-21) != -42) return 8; + + // Function with unsigned int param/return + if (double_uint(21) != 42) return 9; + + // Struct as function parameter + if (sum_container(ic) != 100) return 10; + + // Struct as function return + struct int_container ic2; + ic2 = make_container(10, 20); + if (ic2.value != 10) return 11; + if (ic2.second != 20) return 12; + + // Pointer to int in struct + struct int_container *p; + p = ⁣ + if (p->value != 42) return 13; + p->value = 100; + if (ic.value != 100) return 14; + + return 0; +} +"#; + assert_eq!(compile_and_run("int_advanced", code), 0); +} diff --git a/cc/tests/datatypes/long.rs b/cc/tests/datatypes/long.rs new file mode 100644 index 000000000..272aed965 --- /dev/null +++ b/cc/tests/datatypes/long.rs @@ -0,0 +1,896 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Tests for `long` and `unsigned long` data types with all applicable operators +// Tests are aggregated by category to reduce compile/link overhead +// + +use crate::common::compile_and_run; + +// ############################################################################ +// SIGNED LONG TESTS +// ############################################################################ + +// ============================================================================ +// Long: Arithmetic Operators (add, sub, mul, div, mod, unary neg, unary plus) +// ============================================================================ + +#[test] +fn long_arithmetic_operators() { + let code = r#" +int main(void) { + long a, b; + + // Addition + a = 30; b = 12; + if (a + b != 42) return 1; + + // Subtraction + a = 100; b = 58; + if (a - b != 42) return 2; + + // Multiplication + a = 6; b = 7; + if (a * b != 42) return 3; + + // Division + a = 84; b = 2; + if (a / b != 42) return 4; + + // Modulo + a = 47; b = 10; + if (a % b != 7) return 5; + + // Unary negation + a = -42; + if (-a != 42) return 6; + + // Unary plus + a = 42; + if (+a != 42) return 7; + + // L suffix for long literals + if (100L + 23L != 123L) return 8; + + // Hex with L suffix + if (0x10L + 0x20L != 0x30L) return 9; + + // Mixed case suffix (l) + if (50l + 50l != 100l) return 10; + + return 0; +} +"#; + assert_eq!(compile_and_run("long_arith", code), 0); +} + +// ============================================================================ +// Long: Comparison Operators (==, !=, <, <=, >, >=) +// ============================================================================ + +#[test] +fn long_comparison_operators() { + let code = r#" +int main(void) { + long a, b; + + // Equal - true + a = 42; b = 42; + if ((a == b) != 1) return 1; + + // Equal - false + a = 42; b = 43; + if ((a == b) != 0) return 2; + + // Not equal - true + a = 42; b = 43; + if ((a != b) != 1) return 3; + + // Not equal - false + a = 42; b = 42; + if ((a != b) != 0) return 4; + + // Less than - true + a = 10; b = 20; + if ((a < b) != 1) return 5; + + // Less than - false + a = 20; b = 10; + if ((a < b) != 0) return 6; + + // Less or equal - true (less) + a = 10; b = 20; + if ((a <= b) != 1) return 7; + + // Less or equal - true (equal) + a = 20; b = 20; + if ((a <= b) != 1) return 8; + + // Less or equal - false + a = 30; b = 20; + if ((a <= b) != 0) return 9; + + // Greater than - true + a = 20; b = 10; + if ((a > b) != 1) return 10; + + // Greater than - false + a = 10; b = 20; + if ((a > b) != 0) return 11; + + // Greater or equal - true (greater) + a = 30; b = 20; + if ((a >= b) != 1) return 12; + + // Greater or equal - true (equal) + a = 20; b = 20; + if ((a >= b) != 1) return 13; + + // Greater or equal - false + a = 10; b = 20; + if ((a >= b) != 0) return 14; + + return 0; +} +"#; + assert_eq!(compile_and_run("long_cmp", code), 0); +} + +// ============================================================================ +// Long: Logical Operators (&&, ||, !) +// ============================================================================ + +#[test] +fn long_logical_operators() { + let code = r#" +int main(void) { + long a, b; + + // Logical AND - true + a = 1; b = 1; + if ((a && b) != 1) return 1; + + // Logical AND - false (left) + a = 0; b = 1; + if ((a && b) != 0) return 2; + + // Logical AND - false (right) + a = 1; b = 0; + if ((a && b) != 0) return 3; + + // Logical OR - true (left) + a = 1; b = 0; + if ((a || b) != 1) return 4; + + // Logical OR - true (right) + a = 0; b = 1; + if ((a || b) != 1) return 5; + + // Logical OR - false + a = 0; b = 0; + if ((a || b) != 0) return 6; + + // Logical NOT - true (input 0) + a = 0; + if (!a != 1) return 7; + + // Logical NOT - false (input non-zero) + a = 42; + if (!a != 0) return 8; + + return 0; +} +"#; + assert_eq!(compile_and_run("long_logical", code), 0); +} + +// ============================================================================ +// Long: Bitwise Operators (&, |, ^, ~, <<, >>) +// ============================================================================ + +#[test] +fn long_bitwise_operators() { + let code = r#" +int main(void) { + long a, b; + + // Bitwise AND + a = 0xFF; b = 0x0F; + if ((a & b) != 0x0F) return 1; + + // Bitwise OR + a = 0xF0; b = 0x0F; + if ((a | b) != 0xFF) return 2; + + // Bitwise XOR + a = 0xFF; b = 0xF0; + if ((a ^ b) != 0x0F) return 3; + + // Bitwise NOT + a = 0; + if (~a != -1) return 4; + + // Left shift + a = 1; + if ((a << 4) != 16) return 5; + + // Right shift + a = 64; + if ((a >> 2) != 16) return 6; + + return 0; +} +"#; + assert_eq!(compile_and_run("long_bitwise", code), 0); +} + +// ============================================================================ +// Long: Assignment Operators (=, +=, -=, *=, /=, %=, &=, |=, ^=, <<=, >>=) +// ============================================================================ + +#[test] +fn long_assignment_operators() { + let code = r#" +int main(void) { + long a; + + // Simple assignment + a = 42; + if (a != 42) return 1; + + // Add assign + a = 40; + a += 2; + if (a != 42) return 2; + + // Sub assign + a = 50; + a -= 8; + if (a != 42) return 3; + + // Mul assign + a = 21; + a *= 2; + if (a != 42) return 4; + + // Div assign + a = 84; + a /= 2; + if (a != 42) return 5; + + // Mod assign + a = 50; + a %= 8; + if (a != 2) return 6; + + // And assign + a = 0xFF; + a &= 0x0F; + if (a != 15) return 7; + + // Or assign + a = 0xF0; + a |= 0x0F; + if (a != 255) return 8; + + // Xor assign + a = 0xFF; + a ^= 0xF0; + if (a != 15) return 9; + + // Shl assign + a = 1; + a <<= 4; + if (a != 16) return 10; + + // Shr assign + a = 64; + a >>= 2; + if (a != 16) return 11; + + return 0; +} +"#; + assert_eq!(compile_and_run("long_assign", code), 0); +} + +// ============================================================================ +// Long: Increment/Decrement Operators (++a, a++, --a, a--) +// ============================================================================ + +#[test] +fn long_increment_decrement_operators() { + let code = r#" +int main(void) { + long a, b; + + // Pre-increment + a = 41; + if (++a != 42) return 1; + + // Post-increment (returns original) + a = 42; + b = a++; + if (b != 42) return 2; + + // Post-increment (side effect) + a = 41; + a++; + if (a != 42) return 3; + + // Pre-decrement + a = 43; + if (--a != 42) return 4; + + // Post-decrement (returns original) + a = 42; + b = a--; + if (b != 42) return 5; + + // Post-decrement (side effect) + a = 43; + a--; + if (a != 42) return 6; + + return 0; +} +"#; + assert_eq!(compile_and_run("long_incdec", code), 0); +} + +// ============================================================================ +// Long: Ternary and Comma Operators +// ============================================================================ + +#[test] +fn long_ternary_comma_operators() { + let code = r#" +int main(void) { + long a, b; + + // Ternary - true branch + a = 1; + if ((a ? 42 : 0) != 42) return 1; + + // Ternary - false branch + a = 0; + if ((a ? 0 : 42) != 42) return 2; + + // Comma operator + a = 1; + b = (a = 10, a + 32); + if (b != 42) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("long_ternary_comma", code), 0); +} + +// ============================================================================ +// Long: Complex Expressions +// ============================================================================ + +#[test] +fn long_complex_expressions() { + let code = r#" +int main(void) { + long a, b, c, result; + + // Mixed arithmetic with precedence + a = 10; b = 5; c = 2; + result = a + b * c - 3; // 10 + 10 - 3 = 17 + if (result != 17) return 1; + + // Chained comparison + a = 5; b = 10; c = 15; + if (((a < b) && (b < c)) != 1) return 2; + + // Mixed bitwise and arithmetic + a = 8; b = 3; + result = ((a | b) & 0x0F) + (a >> 1); // (11 & 15) + 4 = 15 + if (result != 15) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("long_complex", code), 0); +} + +// ############################################################################ +// UNSIGNED LONG TESTS +// ############################################################################ + +// ============================================================================ +// Unsigned Long: Arithmetic Operators (add, sub, mul, div, mod, unary plus) +// ============================================================================ + +#[test] +fn ulong_arithmetic_operators() { + let code = r#" +int main(void) { + unsigned long a, b; + + // Addition + a = 30; b = 12; + if (a + b != 42) return 1; + + // Subtraction + a = 100; b = 58; + if (a - b != 42) return 2; + + // Multiplication + a = 6; b = 7; + if (a * b != 42) return 3; + + // Division + a = 84; b = 2; + if (a / b != 42) return 4; + + // Modulo + a = 47; b = 10; + if (a % b != 7) return 5; + + // Unary plus + a = 42; + if (+a != 42) return 6; + + // UL suffix for unsigned long literals + if (100UL + 23UL != 123UL) return 7; + + // Hex with UL suffix + if (0x10UL + 0x20UL != 0x30UL) return 8; + + // Mixed case/order suffixes (ul, Lu, lU) + if (50ul + 50ul != 100ul) return 9; + if (25LU + 25LU != 50LU) return 10; + + // Large unsigned long value (tests u64 parsing) + a = 0xFFFFFFFFFFFFFF00UL; + if ((a & 0xFF) != 0) return 11; + + return 0; +} +"#; + assert_eq!(compile_and_run("ulong_arith", code), 0); +} + +// ============================================================================ +// Unsigned Long: Comparison Operators (==, !=, <, <=, >, >=) +// ============================================================================ + +#[test] +fn ulong_comparison_operators() { + let code = r#" +int main(void) { + unsigned long a, b; + + // Equal - true + a = 42; b = 42; + if ((a == b) != 1) return 1; + + // Equal - false + a = 42; b = 43; + if ((a == b) != 0) return 2; + + // Not equal - true + a = 42; b = 43; + if ((a != b) != 1) return 3; + + // Not equal - false + a = 42; b = 42; + if ((a != b) != 0) return 4; + + // Less than - true + a = 10; b = 20; + if ((a < b) != 1) return 5; + + // Less than - false + a = 20; b = 10; + if ((a < b) != 0) return 6; + + // Less or equal - true (less) + a = 10; b = 20; + if ((a <= b) != 1) return 7; + + // Less or equal - true (equal) + a = 20; b = 20; + if ((a <= b) != 1) return 8; + + // Less or equal - false + a = 30; b = 20; + if ((a <= b) != 0) return 9; + + // Greater than - true + a = 20; b = 10; + if ((a > b) != 1) return 10; + + // Greater than - false + a = 10; b = 20; + if ((a > b) != 0) return 11; + + // Greater or equal - true (greater) + a = 30; b = 20; + if ((a >= b) != 1) return 12; + + // Greater or equal - true (equal) + a = 20; b = 20; + if ((a >= b) != 1) return 13; + + // Greater or equal - false + a = 10; b = 20; + if ((a >= b) != 0) return 14; + + return 0; +} +"#; + assert_eq!(compile_and_run("ulong_cmp", code), 0); +} + +// ============================================================================ +// Unsigned Long: Logical Operators (&&, ||, !) +// ============================================================================ + +#[test] +fn ulong_logical_operators() { + let code = r#" +int main(void) { + unsigned long a, b; + + // Logical AND - true + a = 1; b = 1; + if ((a && b) != 1) return 1; + + // Logical AND - false (left) + a = 0; b = 1; + if ((a && b) != 0) return 2; + + // Logical AND - false (right) + a = 1; b = 0; + if ((a && b) != 0) return 3; + + // Logical OR - true (left) + a = 1; b = 0; + if ((a || b) != 1) return 4; + + // Logical OR - true (right) + a = 0; b = 1; + if ((a || b) != 1) return 5; + + // Logical OR - false + a = 0; b = 0; + if ((a || b) != 0) return 6; + + // Logical NOT - true (input 0) + a = 0; + if (!a != 1) return 7; + + // Logical NOT - false (input non-zero) + a = 42; + if (!a != 0) return 8; + + return 0; +} +"#; + assert_eq!(compile_and_run("ulong_logical", code), 0); +} + +// ============================================================================ +// Unsigned Long: Bitwise Operators (&, |, ^, ~, <<, >>) +// ============================================================================ + +#[test] +fn ulong_bitwise_operators() { + let code = r#" +int main(void) { + unsigned long a, b; + + // Bitwise AND + a = 0xFF; b = 0x0F; + if ((a & b) != 0x0F) return 1; + + // Bitwise OR + a = 0xF0; b = 0x0F; + if ((a | b) != 0xFF) return 2; + + // Bitwise XOR + a = 0xFF; b = 0xF0; + if ((a ^ b) != 0x0F) return 3; + + // Bitwise NOT - check low byte is 0xFF when input is 0xFFFFFFFFFFFFFF00 + a = 0xFFFFFFFFFFFFFF00UL; + if ((~a & 0xFF) != 0xFF) return 4; + + // Left shift + a = 1; + if ((a << 4) != 16) return 5; + + // Right shift (logical for unsigned) + a = 64; + if ((a >> 2) != 16) return 6; + + return 0; +} +"#; + assert_eq!(compile_and_run("ulong_bitwise", code), 0); +} + +// ============================================================================ +// Unsigned Long: Assignment Operators (=, +=, -=, *=, /=, %=, &=, |=, ^=, <<=, >>=) +// ============================================================================ + +#[test] +fn ulong_assignment_operators() { + let code = r#" +int main(void) { + unsigned long a; + + // Simple assignment + a = 42; + if (a != 42) return 1; + + // Add assign + a = 40; + a += 2; + if (a != 42) return 2; + + // Sub assign + a = 50; + a -= 8; + if (a != 42) return 3; + + // Mul assign + a = 21; + a *= 2; + if (a != 42) return 4; + + // Div assign + a = 84; + a /= 2; + if (a != 42) return 5; + + // Mod assign + a = 50; + a %= 8; + if (a != 2) return 6; + + // And assign + a = 0xFF; + a &= 0x0F; + if (a != 15) return 7; + + // Or assign + a = 0xF0; + a |= 0x0F; + if (a != 255) return 8; + + // Xor assign + a = 0xFF; + a ^= 0xF0; + if (a != 15) return 9; + + // Shl assign + a = 1; + a <<= 4; + if (a != 16) return 10; + + // Shr assign + a = 64; + a >>= 2; + if (a != 16) return 11; + + return 0; +} +"#; + assert_eq!(compile_and_run("ulong_assign", code), 0); +} + +// ============================================================================ +// Unsigned Long: Increment/Decrement Operators (++a, a++, --a, a--) +// ============================================================================ + +#[test] +fn ulong_increment_decrement_operators() { + let code = r#" +int main(void) { + unsigned long a, b; + + // Pre-increment + a = 41; + if (++a != 42) return 1; + + // Post-increment (returns original) + a = 42; + b = a++; + if (b != 42) return 2; + + // Post-increment (side effect) + a = 41; + a++; + if (a != 42) return 3; + + // Pre-decrement + a = 43; + if (--a != 42) return 4; + + // Post-decrement (returns original) + a = 42; + b = a--; + if (b != 42) return 5; + + // Post-decrement (side effect) + a = 43; + a--; + if (a != 42) return 6; + + return 0; +} +"#; + assert_eq!(compile_and_run("ulong_incdec", code), 0); +} + +// ============================================================================ +// Unsigned Long: Ternary and Comma Operators +// ============================================================================ + +#[test] +fn ulong_ternary_comma_operators() { + let code = r#" +int main(void) { + unsigned long a, b; + + // Ternary - true branch + a = 1; + if ((a ? 42 : 0) != 42) return 1; + + // Ternary - false branch + a = 0; + if ((a ? 0 : 42) != 42) return 2; + + // Comma operator + a = 1; + b = (a = 10, a + 32); + if (b != 42) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("ulong_ternary_comma", code), 0); +} + +// ============================================================================ +// Unsigned Long: Complex Expressions +// ============================================================================ + +#[test] +fn ulong_complex_expressions() { + let code = r#" +int main(void) { + unsigned long a, b, c, result; + + // Mixed arithmetic with precedence + a = 10; b = 5; c = 2; + result = a + b * c - 3; // 10 + 10 - 3 = 17 + if (result != 17) return 1; + + // Chained comparison + a = 5; b = 10; c = 15; + if (((a < b) && (b < c)) != 1) return 2; + + // Mixed bitwise and arithmetic + a = 8; b = 3; + result = ((a | b) & 0x0F) + (a >> 1); // (11 & 15) + 4 = 15 + if (result != 15) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("ulong_complex", code), 0); +} + +// ============================================================================ +// Long: Structs, Functions, and Initializers +// ============================================================================ + +#[test] +fn long_structs_functions_initializers() { + let code = r#" +struct long_container { + long value; + long second; +}; + +struct ulong_container { + unsigned long value; +}; + +long double_long(long x) { + return x * 2; +} + +unsigned long double_ulong(unsigned long x) { + return x * 2; +} + +long sum_container(struct long_container c) { + return c.value + c.second; +} + +struct long_container make_container(long a, long b) { + struct long_container c; + c.value = a; + c.second = b; + return c; +} + +int main(void) { + // Variable initializers + long x = 42L; + if (x != 42) return 1; + + unsigned long ux = 100UL; + if (ux != 100) return 2; + + // Multiple variable declarations with initializers + long a = 10L, b = 20L, c = 30L; + if (a + b + c != 60) return 3; + + // Struct member access + struct long_container lc; + lc.value = 1000000L; + lc.second = 500000L; + if (lc.value != 1000000L) return 4; + if (lc.second != 500000L) return 5; + + struct ulong_container ulc; + ulc.value = 4000000000UL; + if (ulc.value != 4000000000UL) return 6; + + // Function with long parameter and return + long doubled = double_long(21L); + if (doubled != 42) return 7; + + // Function with unsigned long parameter and return + unsigned long udoubled = double_ulong(2000000000UL); + if (udoubled != 4000000000UL) return 8; + + // Negative long + long neg = -100L; + if (double_long(neg) != -200) return 9; + + // Struct as function parameter + struct long_container param; + param.value = 100000L; + param.second = 50000L; + if (sum_container(param) != 150000L) return 10; + + // Struct as function return value + struct long_container returned = make_container(300000L, 200000L); + if (returned.value != 300000L) return 11; + if (returned.second != 200000L) return 12; + + // Pointer to struct + struct long_container target; + struct long_container *ptr = ⌖ + ptr->value = 2000000L; + ptr->second = 1000000L; + if (target.value != 2000000L) return 13; + if (target.second != 1000000L) return 14; + + // Large long values (beyond 32-bit int range) + long big = 3000000000L; + if (big != 3000000000L) return 15; + + unsigned long ubig = 10000000000UL; + if (ubig != 10000000000UL) return 16; + + return 0; +} +"#; + assert_eq!(compile_and_run("long_advanced", code), 0); +} diff --git a/cc/tests/datatypes/longdouble.rs b/cc/tests/datatypes/longdouble.rs new file mode 100644 index 000000000..f1932ca8c --- /dev/null +++ b/cc/tests/datatypes/longdouble.rs @@ -0,0 +1,437 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Tests for `long double` data type with all applicable operators +// Tests are aggregated by category to reduce compile/link overhead +// +// Note: On many platforms (including macOS ARM64), long double is equivalent +// to double. These tests verify the type is correctly recognized and behaves +// as expected for floating-point operations. +// + +use crate::common::compile_and_run; + +// ============================================================================ +// Long Double Arithmetic Operators (add, sub, mul, div, unary neg, unary plus) +// ============================================================================ + +#[test] +fn longdouble_arithmetic_operators() { + let code = r#" +int main(void) { + long double a, b, result; + + // Addition + a = 30.5L; b = 11.5L; + result = a + b; + if (result < 41.9L || result > 42.1L) return 1; + + // Subtraction + a = 100.0L; b = 58.0L; + result = a - b; + if (result < 41.9L || result > 42.1L) return 2; + + // Multiplication + a = 6.0L; b = 7.0L; + result = a * b; + if (result < 41.9L || result > 42.1L) return 3; + + // Division + a = 84.0L; b = 2.0L; + result = a / b; + if (result < 41.9L || result > 42.1L) return 4; + + // Unary negation + a = -42.0L; + result = -a; + if (result < 41.9L || result > 42.1L) return 5; + + // Unary plus + a = 42.0L; + result = +a; + if (result < 41.9L || result > 42.1L) return 6; + + return 0; +} +"#; + assert_eq!(compile_and_run("longdouble_arith", code), 0); +} + +// ============================================================================ +// Long Double Comparison Operators (==, !=, <, <=, >, >=) +// ============================================================================ + +#[test] +fn longdouble_comparison_operators() { + let code = r#" +int main(void) { + long double a, b; + + // Less than - true + a = 10.0L; b = 20.0L; + if ((a < b) != 1) return 1; + + // Less than - false + a = 20.0L; b = 10.0L; + if ((a < b) != 0) return 2; + + // Less or equal - true (less) + a = 10.0L; b = 20.0L; + if ((a <= b) != 1) return 3; + + // Less or equal - true (equal) + a = 20.0L; b = 20.0L; + if ((a <= b) != 1) return 4; + + // Less or equal - false + a = 30.0L; b = 20.0L; + if ((a <= b) != 0) return 5; + + // Greater than - true + a = 20.0L; b = 10.0L; + if ((a > b) != 1) return 6; + + // Greater than - false + a = 10.0L; b = 20.0L; + if ((a > b) != 0) return 7; + + // Greater or equal - true (greater) + a = 30.0L; b = 20.0L; + if ((a >= b) != 1) return 8; + + // Greater or equal - true (equal) + a = 20.0L; b = 20.0L; + if ((a >= b) != 1) return 9; + + // Greater or equal - false + a = 10.0L; b = 20.0L; + if ((a >= b) != 0) return 10; + + return 0; +} +"#; + assert_eq!(compile_and_run("longdouble_cmp", code), 0); +} + +// ============================================================================ +// Long Double Assignment Operators (=, +=, -=, *=, /=) +// ============================================================================ + +#[test] +fn longdouble_assignment_operators() { + let code = r#" +int main(void) { + long double a; + + // Simple assignment + a = 42.0L; + if (a < 41.9L || a > 42.1L) return 1; + + // Add assign + a = 40.0L; + a += 2.0L; + if (a < 41.9L || a > 42.1L) return 2; + + // Sub assign + a = 50.0L; + a -= 8.0L; + if (a < 41.9L || a > 42.1L) return 3; + + // Mul assign + a = 21.0L; + a *= 2.0L; + if (a < 41.9L || a > 42.1L) return 4; + + // Div assign + a = 84.0L; + a /= 2.0L; + if (a < 41.9L || a > 42.1L) return 5; + + return 0; +} +"#; + assert_eq!(compile_and_run("longdouble_assign", code), 0); +} + +// ============================================================================ +// Long Double Increment/Decrement Operators (++a, a++, --a, a--) +// ============================================================================ + +#[test] +fn longdouble_increment_decrement_operators() { + let code = r#" +int main(void) { + long double a, b; + + // Pre-increment + a = 41.0L; + b = ++a; + if (b < 41.9L || b > 42.1L) return 1; + + // Post-increment (returns original) + a = 42.0L; + b = a++; + if (b < 41.9L || b > 42.1L) return 2; + + // Post-increment (side effect) + a = 41.0L; + a++; + if (a < 41.9L || a > 42.1L) return 3; + + // Pre-decrement + a = 43.0L; + b = --a; + if (b < 41.9L || b > 42.1L) return 4; + + // Post-decrement (returns original) + a = 42.0L; + b = a--; + if (b < 41.9L || b > 42.1L) return 5; + + // Post-decrement (side effect) + a = 43.0L; + a--; + if (a < 41.9L || a > 42.1L) return 6; + + return 0; +} +"#; + assert_eq!(compile_and_run("longdouble_incdec", code), 0); +} + +// ============================================================================ +// Long Double-Int Conversions +// ============================================================================ + +#[test] +fn longdouble_int_conversions() { + let code = r#" +int main(void) { + long double ld; + int i; + + // Int to long double conversion + i = 42; + ld = i; + if (ld < 41.9L || ld > 42.1L) return 1; + + // Long double to int conversion (truncation) + ld = 42.7L; + i = (int)ld; + if (i != 42) return 2; + + // Long double to int conversion (negative) + ld = -42.7L; + i = (int)ld; + if (i != -42) return 3; + + // Mixed arithmetic: long double + int + ld = 40.0L; + i = 2; + ld = ld + i; + if (ld < 41.9L || ld > 42.1L) return 4; + + return 0; +} +"#; + assert_eq!(compile_and_run("longdouble_int_conv", code), 0); +} + +// ============================================================================ +// Long Double-Float-Double Conversions +// ============================================================================ + +#[test] +fn longdouble_float_double_conversions() { + let code = r#" +int main(void) { + float f; + double d; + long double ld; + + // Float to long double (implicit promotion) + f = 42.0f; + ld = f; + if (ld < 41.9L || ld > 42.1L) return 1; + + // Long double to float (explicit conversion) + ld = 42.0L; + f = (float)ld; + if (f < 41.9f || f > 42.1f) return 2; + + // Double to long double (implicit promotion) + d = 42.0; + ld = d; + if (ld < 41.9L || ld > 42.1L) return 3; + + // Long double to double (explicit conversion) + ld = 42.0L; + d = (double)ld; + if (d < 41.9 || d > 42.1) return 4; + + // Mixed arithmetic: float + long double promotes to long double + f = 20.0f; + ld = 22.0L; + ld = f + ld; + if (ld < 41.9L || ld > 42.1L) return 5; + + // Mixed arithmetic: double + long double promotes to long double + d = 20.0; + ld = 22.0L; + ld = d + ld; + if (ld < 41.9L || ld > 42.1L) return 6; + + return 0; +} +"#; + assert_eq!(compile_and_run("longdouble_float_conv", code), 0); +} + +// ============================================================================ +// Long Double Literals +// ============================================================================ + +#[test] +fn longdouble_literals() { + let code = r#" +int main(void) { + long double ld; + + // Long double literal with L suffix + ld = 42.0L; + if (ld < 41.9L || ld > 42.1L) return 1; + + // Long double literal with l suffix (lowercase) + ld = 42.0l; + if (ld < 41.9L || ld > 42.1L) return 2; + + // Exponential notation + ld = 4.2e1L; + if (ld < 41.9L || ld > 42.1L) return 3; + + // Negative exponent + ld = 4200.0e-2L; + if (ld < 41.9L || ld > 42.1L) return 4; + + // No suffix defaults to double, assigned to long double + ld = 42.0; + if (ld < 41.9L || ld > 42.1L) return 5; + + return 0; +} +"#; + assert_eq!(compile_and_run("longdouble_literals", code), 0); +} + +// ============================================================================ +// Complex Long Double Expressions +// ============================================================================ + +#[test] +fn longdouble_complex_expressions() { + let code = r#" +int main(void) { + long double a, b, c, result; + + // Mixed arithmetic with precedence + a = 10.0L; b = 5.0L; c = 2.0L; + result = a + b * c - 3.0L; // 10 + 10 - 3 = 17 + if (result < 16.9L || result > 17.1L) return 1; + + // Chained comparison + a = 5.0L; b = 10.0L; c = 15.0L; + if (((a < b) && (b < c)) != 1) return 2; + + // Division precision test + a = 1.0L; b = 3.0L; + result = a / b; // Should be approximately 0.333... + if (result < 0.3L || result > 0.4L) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("longdouble_complex", code), 0); +} + +// ============================================================================ +// Long Double: Structs, Functions, and Initializers +// ============================================================================ + +#[test] +fn longdouble_structs_functions_initializers() { + let code = r#" +struct longdouble_container { + long double value; + long double second; +}; + +long double double_longdouble(long double x) { + return x * 2.0L; +} + +long double sum_longdouble_container(struct longdouble_container c) { + return c.value + c.second; +} + +struct longdouble_container make_longdouble_container(long double a, long double b) { + struct longdouble_container c; + c.value = a; + c.second = b; + return c; +} + +int main(void) { + // Variable initializers + long double ld = 42.0L; + if (ld < 41.9L || ld > 42.1L) return 1; + + // Multiple variable declarations with initializers + long double a = 10.0L, b = 20.0L, c = 30.0L; + if (a + b + c < 59.9L || a + b + c > 60.1L) return 2; + + // Struct member access - long double + struct longdouble_container ldc; + ldc.value = 100.5L; + ldc.second = 50.25L; + if (ldc.value < 100.4L || ldc.value > 100.6L) return 3; + if (ldc.second < 50.2L || ldc.second > 50.3L) return 4; + + // Function with long double parameter and return + long double lddoubled = double_longdouble(21.0L); + if (lddoubled < 41.9L || lddoubled > 42.1L) return 5; + + // Negative long double + long double ldneg = -100.0L; + long double ldneg_doubled = double_longdouble(ldneg); + if (ldneg_doubled < -200.1L || ldneg_doubled > -199.9L) return 6; + + // Struct as function parameter - long double + struct longdouble_container ldparam; + ldparam.value = 100.0L; + ldparam.second = 50.0L; + long double ldsum = sum_longdouble_container(ldparam); + if (ldsum < 149.9L || ldsum > 150.1L) return 7; + + // Struct as function return value - long double + struct longdouble_container ldreturned = make_longdouble_container(300.0L, 200.0L); + if (ldreturned.value < 299.9L || ldreturned.value > 300.1L) return 8; + if (ldreturned.second < 199.9L || ldreturned.second > 200.1L) return 9; + + // Pointer to struct - long double + struct longdouble_container ldtarget; + struct longdouble_container *ldptr = &ldtarget; + ldptr->value = 2000.0L; + ldptr->second = 1000.0L; + if (ldtarget.value < 1999.9L || ldtarget.value > 2000.1L) return 10; + if (ldtarget.second < 999.9L || ldtarget.second > 1000.1L) return 11; + + return 0; +} +"#; + assert_eq!(compile_and_run("longdouble_advanced", code), 0); +} diff --git a/cc/tests/datatypes/longlong.rs b/cc/tests/datatypes/longlong.rs new file mode 100644 index 000000000..932ad39b6 --- /dev/null +++ b/cc/tests/datatypes/longlong.rs @@ -0,0 +1,1008 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Tests for `long long` and `unsigned long long` data types with all applicable operators +// Tests are aggregated by category to reduce compile/link overhead +// + +use crate::common::compile_and_run; + +// ############################################################################ +// SIGNED LONG LONG TESTS +// ############################################################################ + +// ============================================================================ +// Long Long: Arithmetic Operators (add, sub, mul, div, mod, unary neg, unary plus) +// ============================================================================ + +#[test] +fn llong_arithmetic_operators() { + let code = r#" +int main(void) { + long long a, b; + + // Addition + a = 30; b = 12; + if (a + b != 42) return 1; + + // Subtraction + a = 100; b = 58; + if (a - b != 42) return 2; + + // Multiplication + a = 6; b = 7; + if (a * b != 42) return 3; + + // Division + a = 84; b = 2; + if (a / b != 42) return 4; + + // Modulo + a = 47; b = 10; + if (a % b != 7) return 5; + + // Unary negation + a = -42; + if (-a != 42) return 6; + + // Unary plus + a = 42; + if (+a != 42) return 7; + + // LL suffix for long long literals + if (100LL + 23LL != 123LL) return 8; + + // Hex with LL suffix + if (0x10LL + 0x20LL != 0x30LL) return 9; + + // Mixed case suffix (ll) + if (50ll + 50ll != 100ll) return 10; + + // Large long long value (beyond 32-bit range) + a = 0x100000000LL; // 2^32 + b = 0x100000000LL; + if (a + b != 0x200000000LL) return 11; + + // Multiplication producing large result + a = 1000000LL; + b = 1000000LL; + if (a * b != 1000000000000LL) return 12; + + return 0; +} +"#; + assert_eq!(compile_and_run("llong_arith", code), 0); +} + +// ============================================================================ +// Long Long: Comparison Operators (==, !=, <, <=, >, >=) +// ============================================================================ + +#[test] +fn llong_comparison_operators() { + let code = r#" +int main(void) { + long long a, b; + + // Equal - true + a = 42; b = 42; + if ((a == b) != 1) return 1; + + // Equal - false + a = 42; b = 43; + if ((a == b) != 0) return 2; + + // Not equal - true + a = 42; b = 43; + if ((a != b) != 1) return 3; + + // Not equal - false + a = 42; b = 42; + if ((a != b) != 0) return 4; + + // Less than - true + a = 10; b = 20; + if ((a < b) != 1) return 5; + + // Less than - false + a = 20; b = 10; + if ((a < b) != 0) return 6; + + // Less or equal - true (less) + a = 10; b = 20; + if ((a <= b) != 1) return 7; + + // Less or equal - true (equal) + a = 20; b = 20; + if ((a <= b) != 1) return 8; + + // Less or equal - false + a = 30; b = 20; + if ((a <= b) != 0) return 9; + + // Greater than - true + a = 20; b = 10; + if ((a > b) != 1) return 10; + + // Greater than - false + a = 10; b = 20; + if ((a > b) != 0) return 11; + + // Greater or equal - true (greater) + a = 30; b = 20; + if ((a >= b) != 1) return 12; + + // Greater or equal - true (equal) + a = 20; b = 20; + if ((a >= b) != 1) return 13; + + // Greater or equal - false + a = 10; b = 20; + if ((a >= b) != 0) return 14; + + // Large value comparisons (beyond 32-bit range) + a = 0x100000000LL; + b = 0x100000001LL; + if ((a < b) != 1) return 15; + if ((a > b) != 0) return 16; + + return 0; +} +"#; + assert_eq!(compile_and_run("llong_cmp", code), 0); +} + +// ============================================================================ +// Long Long: Logical Operators (&&, ||, !) +// ============================================================================ + +#[test] +fn llong_logical_operators() { + let code = r#" +int main(void) { + long long a, b; + + // Logical AND - true + a = 1; b = 1; + if ((a && b) != 1) return 1; + + // Logical AND - false (left) + a = 0; b = 1; + if ((a && b) != 0) return 2; + + // Logical AND - false (right) + a = 1; b = 0; + if ((a && b) != 0) return 3; + + // Logical OR - true (left) + a = 1; b = 0; + if ((a || b) != 1) return 4; + + // Logical OR - true (right) + a = 0; b = 1; + if ((a || b) != 1) return 5; + + // Logical OR - false + a = 0; b = 0; + if ((a || b) != 0) return 6; + + // Logical NOT - true (input 0) + a = 0; + if (!a != 1) return 7; + + // Logical NOT - false (input non-zero) + a = 42; + if (!a != 0) return 8; + + // Large non-zero value is truthy + a = 0x100000000LL; + if (!a != 0) return 9; + + return 0; +} +"#; + assert_eq!(compile_and_run("llong_logical", code), 0); +} + +// ============================================================================ +// Long Long: Bitwise Operators (&, |, ^, ~, <<, >>) +// ============================================================================ + +#[test] +fn llong_bitwise_operators() { + let code = r#" +int main(void) { + long long a, b; + + // Bitwise AND + a = 0xFF; b = 0x0F; + if ((a & b) != 0x0F) return 1; + + // Bitwise OR + a = 0xF0; b = 0x0F; + if ((a | b) != 0xFF) return 2; + + // Bitwise XOR + a = 0xFF; b = 0xF0; + if ((a ^ b) != 0x0F) return 3; + + // Bitwise NOT + a = 0; + if (~a != -1) return 4; + + // Left shift + a = 1; + if ((a << 4) != 16) return 5; + + // Right shift (arithmetic for signed) + a = 64; + if ((a >> 2) != 16) return 6; + + // Large shifts (beyond 32 bits) + a = 1LL; + if ((a << 40) != 0x10000000000LL) return 7; + + // Bitwise ops on large values + a = 0xFF00000000LL; + b = 0x00FF000000LL; + if ((a | b) != 0xFFFF000000LL) return 8; + + return 0; +} +"#; + assert_eq!(compile_and_run("llong_bitwise", code), 0); +} + +// ============================================================================ +// Long Long: Assignment Operators (=, +=, -=, *=, /=, %=, &=, |=, ^=, <<=, >>=) +// ============================================================================ + +#[test] +fn llong_assignment_operators() { + let code = r#" +int main(void) { + long long a; + + // Simple assignment + a = 42; + if (a != 42) return 1; + + // Add assign + a = 40; + a += 2; + if (a != 42) return 2; + + // Sub assign + a = 50; + a -= 8; + if (a != 42) return 3; + + // Mul assign + a = 21; + a *= 2; + if (a != 42) return 4; + + // Div assign + a = 84; + a /= 2; + if (a != 42) return 5; + + // Mod assign + a = 50; + a %= 8; + if (a != 2) return 6; + + // And assign + a = 0xFF; + a &= 0x0F; + if (a != 15) return 7; + + // Or assign + a = 0xF0; + a |= 0x0F; + if (a != 255) return 8; + + // Xor assign + a = 0xFF; + a ^= 0xF0; + if (a != 15) return 9; + + // Shl assign + a = 1; + a <<= 4; + if (a != 16) return 10; + + // Shr assign + a = 64; + a >>= 2; + if (a != 16) return 11; + + // Large value assignment + a = 0x123456789ABCLL; + if (a != 0x123456789ABCLL) return 12; + + return 0; +} +"#; + assert_eq!(compile_and_run("llong_assign", code), 0); +} + +// ============================================================================ +// Long Long: Increment/Decrement Operators (++a, a++, --a, a--) +// ============================================================================ + +#[test] +fn llong_increment_decrement_operators() { + let code = r#" +int main(void) { + long long a, b; + + // Pre-increment + a = 41; + if (++a != 42) return 1; + + // Post-increment (returns original) + a = 42; + b = a++; + if (b != 42) return 2; + + // Post-increment (side effect) + a = 41; + a++; + if (a != 42) return 3; + + // Pre-decrement + a = 43; + if (--a != 42) return 4; + + // Post-decrement (returns original) + a = 42; + b = a--; + if (b != 42) return 5; + + // Post-decrement (side effect) + a = 43; + a--; + if (a != 42) return 6; + + // Increment on large value + a = 0xFFFFFFFFLL; + a++; + if (a != 0x100000000LL) return 7; + + return 0; +} +"#; + assert_eq!(compile_and_run("llong_incdec", code), 0); +} + +// ============================================================================ +// Long Long: Ternary and Comma Operators +// ============================================================================ + +#[test] +fn llong_ternary_comma_operators() { + let code = r#" +int main(void) { + long long a, b; + + // Ternary - true branch + a = 1; + if ((a ? 42 : 0) != 42) return 1; + + // Ternary - false branch + a = 0; + if ((a ? 0 : 42) != 42) return 2; + + // Comma operator + a = 1; + b = (a = 10, a + 32); + if (b != 42) return 3; + + // Ternary with large values + a = 0x100000000LL; + b = (a ? 0x200000000LL : 0); + if (b != 0x200000000LL) return 4; + + return 0; +} +"#; + assert_eq!(compile_and_run("llong_ternary_comma", code), 0); +} + +// ============================================================================ +// Long Long: Complex Expressions +// ============================================================================ + +#[test] +fn llong_complex_expressions() { + let code = r#" +int main(void) { + long long a, b, c, result; + + // Mixed arithmetic with precedence + a = 10; b = 5; c = 2; + result = a + b * c - 3; // 10 + 10 - 3 = 17 + if (result != 17) return 1; + + // Chained comparison + a = 5; b = 10; c = 15; + if (((a < b) && (b < c)) != 1) return 2; + + // Mixed bitwise and arithmetic + a = 8; b = 3; + result = ((a | b) & 0x0F) + (a >> 1); // (11 & 15) + 4 = 15 + if (result != 15) return 3; + + // Large value expressions + a = 0x100000000LL; + b = 0x200000000LL; + result = (a + b) >> 1; + if (result != 0x180000000LL) return 4; + + return 0; +} +"#; + assert_eq!(compile_and_run("llong_complex", code), 0); +} + +// ############################################################################ +// UNSIGNED LONG LONG TESTS +// ############################################################################ + +// ============================================================================ +// Unsigned Long Long: Arithmetic Operators (add, sub, mul, div, mod, unary plus) +// ============================================================================ + +#[test] +fn ullong_arithmetic_operators() { + let code = r#" +int main(void) { + unsigned long long a, b; + + // Addition + a = 30; b = 12; + if (a + b != 42) return 1; + + // Subtraction + a = 100; b = 58; + if (a - b != 42) return 2; + + // Multiplication + a = 6; b = 7; + if (a * b != 42) return 3; + + // Division + a = 84; b = 2; + if (a / b != 42) return 4; + + // Modulo + a = 47; b = 10; + if (a % b != 7) return 5; + + // Unary plus + a = 42; + if (+a != 42) return 6; + + // ULL suffix for unsigned long long literals + if (100ULL + 23ULL != 123ULL) return 7; + + // Hex with ULL suffix + if (0x10ULL + 0x20ULL != 0x30ULL) return 8; + + // Mixed case/order suffixes (ull, LLU, llu) + if (50ull + 50ull != 100ull) return 9; + if (25LLU + 25LLU != 50LLU) return 10; + + // Large unsigned long long value (beyond 32-bit range) + a = 0x100000000ULL; + b = 0x100000000ULL; + if (a + b != 0x200000000ULL) return 11; + + // Very large value (tests full 64-bit range) + a = 0xFFFFFFFFFFFFFF00ULL; + if ((a & 0xFF) != 0) return 12; + + // Multiplication producing large result + a = 0x100000000ULL; + b = 16ULL; + if (a * b != 0x1000000000ULL) return 13; + + return 0; +} +"#; + assert_eq!(compile_and_run("ullong_arith", code), 0); +} + +// ============================================================================ +// Unsigned Long Long: Comparison Operators (==, !=, <, <=, >, >=) +// ============================================================================ + +#[test] +fn ullong_comparison_operators() { + let code = r#" +int main(void) { + unsigned long long a, b; + + // Equal - true + a = 42; b = 42; + if ((a == b) != 1) return 1; + + // Equal - false + a = 42; b = 43; + if ((a == b) != 0) return 2; + + // Not equal - true + a = 42; b = 43; + if ((a != b) != 1) return 3; + + // Not equal - false + a = 42; b = 42; + if ((a != b) != 0) return 4; + + // Less than - true + a = 10; b = 20; + if ((a < b) != 1) return 5; + + // Less than - false + a = 20; b = 10; + if ((a < b) != 0) return 6; + + // Less or equal - true (less) + a = 10; b = 20; + if ((a <= b) != 1) return 7; + + // Less or equal - true (equal) + a = 20; b = 20; + if ((a <= b) != 1) return 8; + + // Less or equal - false + a = 30; b = 20; + if ((a <= b) != 0) return 9; + + // Greater than - true + a = 20; b = 10; + if ((a > b) != 1) return 10; + + // Greater than - false + a = 10; b = 20; + if ((a > b) != 0) return 11; + + // Greater or equal - true (greater) + a = 30; b = 20; + if ((a >= b) != 1) return 12; + + // Greater or equal - true (equal) + a = 20; b = 20; + if ((a >= b) != 1) return 13; + + // Greater or equal - false + a = 10; b = 20; + if ((a >= b) != 0) return 14; + + // Large value comparisons (beyond 32-bit range) + a = 0x100000000ULL; + b = 0x100000001ULL; + if ((a < b) != 1) return 15; + if ((a > b) != 0) return 16; + + // Very large values + a = 0xFFFFFFFFFFFFFFFEULL; + b = 0xFFFFFFFFFFFFFFFFULL; + if ((a < b) != 1) return 17; + + return 0; +} +"#; + assert_eq!(compile_and_run("ullong_cmp", code), 0); +} + +// ============================================================================ +// Unsigned Long Long: Logical Operators (&&, ||, !) +// ============================================================================ + +#[test] +fn ullong_logical_operators() { + let code = r#" +int main(void) { + unsigned long long a, b; + + // Logical AND - true + a = 1; b = 1; + if ((a && b) != 1) return 1; + + // Logical AND - false (left) + a = 0; b = 1; + if ((a && b) != 0) return 2; + + // Logical AND - false (right) + a = 1; b = 0; + if ((a && b) != 0) return 3; + + // Logical OR - true (left) + a = 1; b = 0; + if ((a || b) != 1) return 4; + + // Logical OR - true (right) + a = 0; b = 1; + if ((a || b) != 1) return 5; + + // Logical OR - false + a = 0; b = 0; + if ((a || b) != 0) return 6; + + // Logical NOT - true (input 0) + a = 0; + if (!a != 1) return 7; + + // Logical NOT - false (input non-zero) + a = 42; + if (!a != 0) return 8; + + // Large non-zero value is truthy + a = 0xFFFFFFFFFFFFFFFFULL; + if (!a != 0) return 9; + + return 0; +} +"#; + assert_eq!(compile_and_run("ullong_logical", code), 0); +} + +// ============================================================================ +// Unsigned Long Long: Bitwise Operators (&, |, ^, ~, <<, >>) +// ============================================================================ + +#[test] +fn ullong_bitwise_operators() { + let code = r#" +int main(void) { + unsigned long long a, b; + + // Bitwise AND + a = 0xFF; b = 0x0F; + if ((a & b) != 0x0F) return 1; + + // Bitwise OR + a = 0xF0; b = 0x0F; + if ((a | b) != 0xFF) return 2; + + // Bitwise XOR + a = 0xFF; b = 0xF0; + if ((a ^ b) != 0x0F) return 3; + + // Bitwise NOT - check low byte is 0xFF when input is 0xFFFFFFFFFFFFFF00 + a = 0xFFFFFFFFFFFFFF00ULL; + if ((~a & 0xFF) != 0xFF) return 4; + + // Left shift + a = 1; + if ((a << 4) != 16) return 5; + + // Right shift (logical for unsigned) + a = 64; + if ((a >> 2) != 16) return 6; + + // Large shifts (beyond 32 bits) + a = 1ULL; + if ((a << 40) != 0x10000000000ULL) return 7; + + // Shift high bit (logical shift, not arithmetic) + a = 0x8000000000000000ULL; + if ((a >> 63) != 1) return 8; + + // Bitwise ops on large values + a = 0xFF00000000000000ULL; + b = 0x00FF000000000000ULL; + if ((a | b) != 0xFFFF000000000000ULL) return 9; + + return 0; +} +"#; + assert_eq!(compile_and_run("ullong_bitwise", code), 0); +} + +// ============================================================================ +// Unsigned Long Long: Assignment Operators (=, +=, -=, *=, /=, %=, &=, |=, ^=, <<=, >>=) +// ============================================================================ + +#[test] +fn ullong_assignment_operators() { + let code = r#" +int main(void) { + unsigned long long a; + + // Simple assignment + a = 42; + if (a != 42) return 1; + + // Add assign + a = 40; + a += 2; + if (a != 42) return 2; + + // Sub assign + a = 50; + a -= 8; + if (a != 42) return 3; + + // Mul assign + a = 21; + a *= 2; + if (a != 42) return 4; + + // Div assign + a = 84; + a /= 2; + if (a != 42) return 5; + + // Mod assign + a = 50; + a %= 8; + if (a != 2) return 6; + + // And assign + a = 0xFF; + a &= 0x0F; + if (a != 15) return 7; + + // Or assign + a = 0xF0; + a |= 0x0F; + if (a != 255) return 8; + + // Xor assign + a = 0xFF; + a ^= 0xF0; + if (a != 15) return 9; + + // Shl assign + a = 1; + a <<= 4; + if (a != 16) return 10; + + // Shr assign + a = 64; + a >>= 2; + if (a != 16) return 11; + + // Large value assignment + a = 0xFEDCBA9876543210ULL; + if (a != 0xFEDCBA9876543210ULL) return 12; + + return 0; +} +"#; + assert_eq!(compile_and_run("ullong_assign", code), 0); +} + +// ============================================================================ +// Unsigned Long Long: Increment/Decrement Operators (++a, a++, --a, a--) +// ============================================================================ + +#[test] +fn ullong_increment_decrement_operators() { + let code = r#" +int main(void) { + unsigned long long a, b; + + // Pre-increment + a = 41; + if (++a != 42) return 1; + + // Post-increment (returns original) + a = 42; + b = a++; + if (b != 42) return 2; + + // Post-increment (side effect) + a = 41; + a++; + if (a != 42) return 3; + + // Pre-decrement + a = 43; + if (--a != 42) return 4; + + // Post-decrement (returns original) + a = 42; + b = a--; + if (b != 42) return 5; + + // Post-decrement (side effect) + a = 43; + a--; + if (a != 42) return 6; + + // Increment crossing 32-bit boundary + a = 0xFFFFFFFFULL; + a++; + if (a != 0x100000000ULL) return 7; + + return 0; +} +"#; + assert_eq!(compile_and_run("ullong_incdec", code), 0); +} + +// ============================================================================ +// Unsigned Long Long: Ternary and Comma Operators +// ============================================================================ + +#[test] +fn ullong_ternary_comma_operators() { + let code = r#" +int main(void) { + unsigned long long a, b; + + // Ternary - true branch + a = 1; + if ((a ? 42 : 0) != 42) return 1; + + // Ternary - false branch + a = 0; + if ((a ? 0 : 42) != 42) return 2; + + // Comma operator + a = 1; + b = (a = 10, a + 32); + if (b != 42) return 3; + + // Ternary with large values + a = 0x100000000ULL; + b = (a ? 0x200000000ULL : 0); + if (b != 0x200000000ULL) return 4; + + return 0; +} +"#; + assert_eq!(compile_and_run("ullong_ternary_comma", code), 0); +} + +// ============================================================================ +// Unsigned Long Long: Complex Expressions +// ============================================================================ + +#[test] +fn ullong_complex_expressions() { + let code = r#" +int main(void) { + unsigned long long a, b, c, result; + + // Mixed arithmetic with precedence + a = 10; b = 5; c = 2; + result = a + b * c - 3; // 10 + 10 - 3 = 17 + if (result != 17) return 1; + + // Chained comparison + a = 5; b = 10; c = 15; + if (((a < b) && (b < c)) != 1) return 2; + + // Mixed bitwise and arithmetic + a = 8; b = 3; + result = ((a | b) & 0x0F) + (a >> 1); // (11 & 15) + 4 = 15 + if (result != 15) return 3; + + // Large value expressions + a = 0x100000000ULL; + b = 0x200000000ULL; + result = (a + b) >> 1; + if (result != 0x180000000ULL) return 4; + + // Division of large values + a = 0x1000000000000ULL; + b = 0x1000000ULL; + if (a / b != 0x1000000ULL) return 5; + + return 0; +} +"#; + assert_eq!(compile_and_run("ullong_complex", code), 0); +} + +// ============================================================================ +// Long Long: Structs, Functions, and Initializers +// ============================================================================ + +#[test] +fn llong_structs_functions_initializers() { + let code = r#" +struct llong_container { + long long value; + long long second; +}; + +struct ullong_container { + unsigned long long value; +}; + +long long double_llong(long long x) { + return x * 2; +} + +unsigned long long double_ullong(unsigned long long x) { + return x * 2; +} + +long long sum_container(struct llong_container c) { + return c.value + c.second; +} + +struct llong_container make_container(long long a, long long b) { + struct llong_container c; + c.value = a; + c.second = b; + return c; +} + +int main(void) { + // Variable initializers + long long x = 42LL; + if (x != 42) return 1; + + unsigned long long ux = 100ULL; + if (ux != 100) return 2; + + // Multiple variable declarations with initializers + long long a = 10LL, b = 20LL, c = 30LL; + if (a + b + c != 60) return 3; + + // Struct member access + struct llong_container llc; + llc.value = 1000000000000LL; + llc.second = 500000000000LL; + if (llc.value != 1000000000000LL) return 4; + if (llc.second != 500000000000LL) return 5; + + struct ullong_container ullc; + ullc.value = 10000000000000000000ULL; + if (ullc.value != 10000000000000000000ULL) return 6; + + // Function with long long parameter and return + long long doubled = double_llong(21LL); + if (doubled != 42) return 7; + + // Function with unsigned long long parameter and return + unsigned long long udoubled = double_ullong(5000000000ULL); + if (udoubled != 10000000000ULL) return 8; + + // Negative long long + long long neg = -100LL; + if (double_llong(neg) != -200) return 9; + + // Struct as function parameter + struct llong_container param; + param.value = 100000000000LL; + param.second = 50000000000LL; + if (sum_container(param) != 150000000000LL) return 10; + + // Struct as function return value + struct llong_container returned = make_container(300000000000LL, 200000000000LL); + if (returned.value != 300000000000LL) return 11; + if (returned.second != 200000000000LL) return 12; + + // Pointer to struct + struct llong_container target; + struct llong_container *ptr = ⌖ + ptr->value = 2000000000000LL; + ptr->second = 1000000000000LL; + if (target.value != 2000000000000LL) return 13; + if (target.second != 1000000000000LL) return 14; + + // Very large long long values + long long big = 9000000000000000000LL; + if (big != 9000000000000000000LL) return 15; + + unsigned long long ubig = 18000000000000000000ULL; + if (ubig != 18000000000000000000ULL) return 16; + + return 0; +} +"#; + assert_eq!(compile_and_run("llong_advanced", code), 0); +} diff --git a/cc/tests/datatypes/mixed_cmp.rs b/cc/tests/datatypes/mixed_cmp.rs new file mode 100644 index 000000000..97c1f82f1 --- /dev/null +++ b/cc/tests/datatypes/mixed_cmp.rs @@ -0,0 +1,346 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Tests for C99 mixed-type comparisons (usual arithmetic conversions) +// Tests are aggregated by category to reduce compile/link overhead +// + +use crate::common::compile_and_run; + +// ============================================================================ +// C99 Integer Promotions (6.3.1.1) - char/short promoted to int before comparison +// ============================================================================ + +#[test] +fn mixed_cmp_integer_promotions() { + let code = r#" +int main(void) { + // Test 1: signed char vs unsigned char + // Both promoted to int, then signed comparison + // -1 < 1 is true (1) + { + signed char sc = -1; + unsigned char uc = 1; + if ((sc < uc) != 1) return 1; + if ((sc > uc) != 0) return 2; + if ((sc == uc) != 0) return 3; + if ((sc != uc) != 1) return 4; + } + + // Test 2: short vs unsigned short + // Both promoted to int, then signed comparison + // -1 < 1 is true (1) + { + short ss = -1; + unsigned short us = 1; + if ((ss < us) != 1) return 11; + if ((ss > us) != 0) return 12; + if ((ss == us) != 0) return 13; + if ((ss != us) != 1) return 14; + } + + // Test 3: signed char vs short + // Both promoted to int + { + signed char sc = -10; + short ss = 5; + if ((sc < ss) != 1) return 21; + if ((sc > ss) != 0) return 22; + } + + // Test 4: unsigned char vs signed short + // Both promoted to int + { + unsigned char uc = 200; + short ss = 100; + if ((uc > ss) != 1) return 31; + if ((uc < ss) != 0) return 32; + } + + // Test 5: _Bool comparisons (promoted to int) + { + _Bool b = 1; + char c = 2; + if ((b < c) != 1) return 41; + if ((b == c) != 0) return 42; + } + + return 0; +} +"#; + assert_eq!(compile_and_run("mixed_cmp_int_promo", code), 0); +} + +// ============================================================================ +// C99 Usual Arithmetic Conversions (6.3.1.8) - same rank, different signedness +// ============================================================================ + +#[test] +fn mixed_cmp_same_rank_different_sign() { + let code = r#" +int main(void) { + // Test 1: int vs unsigned int + // C99: convert to unsigned int + // -1 becomes 0xFFFFFFFF, so -1 > 1 as unsigned + { + int si = -1; + unsigned int ui = 1; + if ((si < ui) != 0) return 1; // -1 as unsigned is large + if ((si > ui) != 1) return 2; + if ((si == ui) != 0) return 3; + } + + // Test 2: long vs unsigned long + // C99: convert to unsigned long + { + long sl = -1; + unsigned long ul = 1; + if ((sl < ul) != 0) return 11; // -1 as unsigned is large + if ((sl > ul) != 1) return 12; + } + + // Test 3: long long vs unsigned long long + { + long long sll = -1; + unsigned long long ull = 1; + if ((sll < ull) != 0) return 21; + if ((sll > ull) != 1) return 22; + } + + // Test 4: Positive values should work normally + { + int si = 5; + unsigned int ui = 10; + if ((si < ui) != 1) return 31; + if ((si > ui) != 0) return 32; + } + + return 0; +} +"#; + assert_eq!(compile_and_run("mixed_cmp_same_rank", code), 0); +} + +// ============================================================================ +// C99 Usual Arithmetic Conversions - different ranks +// ============================================================================ + +#[test] +fn mixed_cmp_different_ranks() { + let code = r#" +int main(void) { + // Test 1: long vs unsigned int + // long (64-bit) can represent all unsigned int values + // So convert both to long (signed comparison) + { + long sl = -1; + unsigned int ui = 1; + if ((sl < ui) != 1) return 1; // -1 < 1 as signed + if ((sl > ui) != 0) return 2; + } + + // Test 2: long long vs unsigned int + // long long can represent all unsigned int values + { + long long sll = -1; + unsigned int ui = 1; + if ((sll < ui) != 1) return 11; + if ((sll > ui) != 0) return 12; + } + + // Test 3: int vs unsigned short + // int can represent all unsigned short values + { + int si = -1; + unsigned short us = 1; + if ((si < us) != 1) return 21; // signed comparison after promotion + if ((si > us) != 0) return 22; + } + + // Test 4: long vs short + // Both signed, long is wider + { + long sl = -1000000; + short ss = 1; + if ((sl < ss) != 1) return 31; + if ((sl > ss) != 0) return 32; + } + + return 0; +} +"#; + assert_eq!(compile_and_run("mixed_cmp_diff_ranks", code), 0); +} + +// ============================================================================ +// Float vs Integer comparisons +// ============================================================================ + +#[test] +fn mixed_cmp_float_int() { + let code = r#" +int main(void) { + // Test 1: int vs float + { + int i = -5; + float f = 2.5f; + if ((i < f) != 1) return 1; + if ((i > f) != 0) return 2; + if ((i == f) != 0) return 3; + } + + // Test 2: unsigned int vs float + { + unsigned int ui = 10; + float f = 5.5f; + if ((ui > f) != 1) return 11; + if ((ui < f) != 0) return 12; + } + + // Test 3: int vs double + { + int i = 42; + double d = 42.0; + if ((i == d) != 1) return 21; + if ((i != d) != 0) return 22; + } + + // Test 4: long vs double + { + long l = -100; + double d = -50.5; + if ((l < d) != 1) return 31; + } + + // Test 5: float vs double + { + float f = 1.5f; + double d = 2.5; + if ((f < d) != 1) return 41; + if ((f > d) != 0) return 42; + } + + return 0; +} +"#; + assert_eq!(compile_and_run("mixed_cmp_float_int", code), 0); +} + +// ============================================================================ +// Edge cases and boundary values +// ============================================================================ + +#[test] +fn mixed_cmp_edge_cases() { + let code = r#" +int main(void) { + // Test 1: Maximum unsigned char vs signed comparison + { + unsigned char uc = 255; + signed char sc = -1; + // Both promoted to int: 255 vs -1 + if ((uc > sc) != 1) return 1; + if ((uc == sc) != 0) return 2; + } + + // Test 2: Zero comparisons across types + { + int si = 0; + unsigned int ui = 0; + float f = 0.0f; + if ((si == ui) != 1) return 11; + if ((si == f) != 1) return 12; + if ((ui == f) != 1) return 13; + } + + // Test 3: Large unsigned vs negative signed (same rank) + { + unsigned int ui = 0xFFFFFFFF; + int si = -1; + // Both become unsigned, -1 becomes 0xFFFFFFFF + if ((ui == si) != 1) return 21; + } + + // Test 4: Comparison result is always int (0 or 1) + { + char a = 10; + char b = 5; + int result = (a > b); + if (result != 1) return 31; + result = (a < b); + if (result != 0) return 32; + } + + // Test 5: Chained comparisons with mixed types + { + char c = 5; + int i = 10; + long l = 15; + // (c < i) is 1, (i < l) is 1 + if ((c < i) != 1) return 41; + if ((i < l) != 1) return 42; + } + + return 0; +} +"#; + assert_eq!(compile_and_run("mixed_cmp_edge", code), 0); +} + +// ============================================================================ +// Relational operators (<, <=, >, >=) with all combinations +// ============================================================================ + +#[test] +fn mixed_cmp_relational_all() { + let code = r#" +int main(void) { + // Test all relational operators with char vs int + { + char c = -10; + int i = 5; + if ((c < i) != 1) return 1; + if ((c <= i) != 1) return 2; + if ((c > i) != 0) return 3; + if ((c >= i) != 0) return 4; + } + + // Test all relational operators with int vs unsigned int + { + int si = -1; + unsigned int ui = 1; + // -1 as unsigned is MAX_UINT, so -1 > 1 + if ((si < ui) != 0) return 11; + if ((si <= ui) != 0) return 12; + if ((si > ui) != 1) return 13; + if ((si >= ui) != 1) return 14; + } + + // Test all relational operators with int vs float + { + int i = 3; + float f = 3.5f; + if ((i < f) != 1) return 21; + if ((i <= f) != 1) return 22; + if ((i > f) != 0) return 23; + if ((i >= f) != 0) return 24; + } + + // Test all equality operators + { + short s = 100; + long l = 100; + if ((s == l) != 1) return 31; + if ((s != l) != 0) return 32; + } + + return 0; +} +"#; + assert_eq!(compile_and_run("mixed_cmp_relational", code), 0); +} diff --git a/cc/tests/datatypes/mod.rs b/cc/tests/datatypes/mod.rs new file mode 100644 index 000000000..9e363d8db --- /dev/null +++ b/cc/tests/datatypes/mod.rs @@ -0,0 +1,29 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Data type and operator tests for pcc +// +// Each submodule tests all operators for a specific data type. +// + +mod array_type; +mod bitfield_type; +mod bool; +mod char; +mod float; +mod int; +mod long; +mod longdouble; +mod longlong; +mod mixed_cmp; +mod pointer_type; +mod short; +mod struct_type; +mod typedef_type; +mod union_type; +mod void_type; diff --git a/cc/tests/datatypes/pointer_type.rs b/cc/tests/datatypes/pointer_type.rs new file mode 100644 index 000000000..f011bfc68 --- /dev/null +++ b/cc/tests/datatypes/pointer_type.rs @@ -0,0 +1,691 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Tests for C99 pointer types +// +// C99 pointer semantics: +// - Pointers hold memory addresses of objects +// - Pointer arithmetic scales by the size of the pointed-to type +// - void* is a generic pointer that can point to any object +// - Null pointers compare equal to 0 +// - Arrays decay to pointers in most contexts +// - Pointers can be compared with relational operators +// +// Tests are aggregated to reduce compile/link cycles while maintaining coverage. +// + +use crate::common::compile_and_run; + +// ============================================================================ +// Basic Pointer Operations: address-of, dereference, modify, reassign +// ============================================================================ + +#[test] +fn pointer_basic_comprehensive() { + let code = r#" +int main(void) { + // Test 1: Basic int pointer + int x = 42; + int *p = &x; + if (*p != 42) return 1; + + // Test 2: Basic char pointer + char c = 'A'; + char *cp = &c; + if (*cp != 'A') return 2; + + // Test 3: Basic long pointer + long lx = 123456789L; + long *lp = &lx; + if (*lp != 123456789L) return 3; + + // Test 4: Modify through pointer + int y = 10; + int *py = &y; + *py = 20; + if (y != 20) return 4; + + // Test 5: Pointer reassignment + int a = 10; + int b = 20; + int *pa = &a; + if (*pa != 10) return 5; + pa = &b; + if (*pa != 20) return 6; + + return 0; +} +"#; + assert_eq!(compile_and_run("ptr_basic_comprehensive", code), 0); +} + +// ============================================================================ +// Pointer Arithmetic: add, sub, increment, decrement, compound, difference +// ============================================================================ + +#[test] +fn pointer_arithmetic_comprehensive() { + let code = r#" +int main(void) { + // Test 1: Addition + int arr[5] = {10, 20, 30, 40, 50}; + int *p = arr; + if (*(p + 0) != 10) return 1; + if (*(p + 1) != 20) return 2; + if (*(p + 2) != 30) return 3; + if (*(p + 3) != 40) return 4; + if (*(p + 4) != 50) return 5; + + // Test 2: Subtraction + int *p2 = &arr[4]; + if (*(p2 - 0) != 50) return 6; + if (*(p2 - 1) != 40) return 7; + if (*(p2 - 2) != 30) return 8; + if (*(p2 - 3) != 20) return 9; + if (*(p2 - 4) != 10) return 10; + + // Test 3: Increment + int arr2[3] = {1, 2, 3}; + int *p3 = arr2; + if (*p3 != 1) return 11; + p3++; + if (*p3 != 2) return 12; + p3++; + if (*p3 != 3) return 13; + + // Test 4: Decrement + int *p4 = &arr2[2]; + if (*p4 != 3) return 14; + p4--; + if (*p4 != 2) return 15; + p4--; + if (*p4 != 1) return 16; + + // Test 5: Compound add + int *p5 = arr; + p5 += 2; + if (*p5 != 30) return 17; + p5 += 2; + if (*p5 != 50) return 18; + + // Test 6: Compound sub + int *p6 = &arr[4]; + p6 -= 2; + if (*p6 != 30) return 19; + p6 -= 2; + if (*p6 != 10) return 20; + + // Test 7: Pointer difference + int *p7 = &arr[0]; + int *p8 = &arr[4]; + long diff = p8 - p7; + if (diff != 4) return 21; + diff = p7 - p8; + if (diff != -4) return 22; + + // Test 8: Char arithmetic + char carr[4] = {'a', 'b', 'c', 'd'}; + char *cp = carr; + if (*(cp + 0) != 'a') return 23; + if (*(cp + 1) != 'b') return 24; + if (*(cp + 2) != 'c') return 25; + if (*(cp + 3) != 'd') return 26; + + return 0; +} +"#; + assert_eq!(compile_and_run("ptr_arith_comprehensive", code), 0); +} + +// ============================================================================ +// Pointer Comparisons and NULL Pointers +// ============================================================================ + +#[test] +fn pointer_comparison_comprehensive() { + let code = r#" +int main(void) { + // Test 1: Equal pointers + int x = 42; + int *p1 = &x; + int *p2 = &x; + if (p1 != p2) return 1; + if (!(p1 == p2)) return 2; + + // Test 2: Not equal pointers + int y = 2; + int *p3 = &x; + int *p4 = &y; + if (p3 == p4) return 3; + if (!(p3 != p4)) return 4; + + // Test 3: Relational comparisons + int arr[3] = {1, 2, 3}; + int *pa = &arr[0]; + int *pb = &arr[2]; + if (!(pa < pb)) return 5; + if (!(pa <= pb)) return 6; + if (!(pb > pa)) return 7; + if (!(pb >= pa)) return 8; + + // Equal pointers with <= and >= + int *pc = &arr[1]; + int *pd = &arr[1]; + if (!(pc <= pd)) return 9; + if (!(pc >= pd)) return 10; + + // Test 4: NULL pointer init + int *pnull = 0; + if (pnull != 0) return 11; + + // Test 5: NULL pointer comparison + int *pn = 0; + if (pn) return 12; // Should be false + if (pn != 0) return 13; + if (!(pn == 0)) return 14; + + int z = 42; + pn = &z; + if (!pn) return 15; // Should be true now + if (pn == 0) return 16; + + return 0; +} +"#; + assert_eq!(compile_and_run("ptr_cmp_comprehensive", code), 0); +} + +// ============================================================================ +// Void Pointers +// ============================================================================ + +#[test] +fn void_pointer_comprehensive() { + let code = r#" +int main(void) { + // Test 1: Basic void pointer + int x = 42; + void *vp = &x; + int *ip = vp; + if (*ip != 42) return 1; + + // Test 2: Multiple types + int i = 10; + char c = 'X'; + long l = 12345L; + + vp = &i; + int *ip2 = vp; + if (*ip2 != 10) return 2; + + vp = &c; + char *cp = vp; + if (*cp != 'X') return 3; + + vp = &l; + long *lp = vp; + if (*lp != 12345L) return 4; + + // Test 3: NULL void pointer + void *vnull = 0; + if (vnull != 0) return 5; + + int y = 1; + vnull = &y; + if (vnull == 0) return 6; + + return 0; +} +"#; + assert_eq!(compile_and_run("void_ptr_comprehensive", code), 0); +} + +// ============================================================================ +// Double/Nested Pointers +// ============================================================================ + +#[test] +fn pointer_double_comprehensive() { + let code = r#" +int main(void) { + // Test 1: Basic double pointer + int x = 42; + int *p = &x; + int **pp = &p; + if (**pp != 42) return 1; + + // Test 2: Modify through double pointer + int y = 10; + int *py = &y; + int **ppy = &py; + **ppy = 20; + if (y != 20) return 2; + if (*py != 20) return 3; + + // Test 3: Reassign through double pointer + int a = 10; + int b = 20; + int *p1 = &a; + int *p2 = &b; + int **pp2 = &p1; + if (**pp2 != 10) return 4; + *pp2 = p2; // Now pp2 points to p2 + if (**pp2 != 20) return 5; + + // Test 4: Triple pointer + int z = 42; + int *pz = &z; + int **ppz = &pz; + int ***pppz = &ppz; + if (***pppz != 42) return 6; + ***pppz = 100; + if (z != 100) return 7; + + return 0; +} +"#; + assert_eq!(compile_and_run("ptr_double_comprehensive", code), 0); +} + +// ============================================================================ +// Char Pointers and String Literals +// ============================================================================ + +#[test] +fn char_pointer_comprehensive() { + let code = r#" +int main(void) { + // Test 1: Char pointer to array + char arr[5] = {'H', 'e', 'l', 'l', 'o'}; + char *p = arr; + if (p[0] != 'H') return 1; + if (p[1] != 'e') return 2; + if (p[2] != 'l') return 3; + if (p[3] != 'l') return 4; + if (p[4] != 'o') return 5; + + // Test 2: String literal + char *s = "Hello"; + if (s[0] != 'H') return 6; + if (s[1] != 'e') return 7; + if (s[2] != 'l') return 8; + if (s[3] != 'l') return 9; + if (s[4] != 'o') return 10; + if (s[5] != 0) return 11; // Null terminator + + // Test 3: Walk string + char *s2 = "abc"; + int count = 0; + while (*s2 != 0) { + count++; + s2++; + } + if (count != 3) return 12; + + // Test 4: Multiple strings + char *s3 = "ABC"; + char *s4 = "XYZ"; + if (s3[0] != 'A') return 13; + if (s4[0] != 'X') return 14; + if (s3[2] != 'C') return 15; + if (s4[2] != 'Z') return 16; + + return 0; +} +"#; + assert_eq!(compile_and_run("char_ptr_comprehensive", code), 0); +} + +// ============================================================================ +// Arrays of Pointers and Pointer to Array +// ============================================================================ + +#[test] +fn pointer_array_comprehensive() { + let code = r#" +int main(void) { + // Test 1: Array of int pointers + int a = 1, b = 2, c = 3; + int *arr[3]; + arr[0] = &a; + arr[1] = &b; + arr[2] = &c; + if (*arr[0] != 1) return 1; + if (*arr[1] != 2) return 2; + if (*arr[2] != 3) return 3; + + // Test 2: Array of char pointers + char *carr[3]; + carr[0] = "one"; + carr[1] = "two"; + carr[2] = "three"; + if (carr[0][0] != 'o') return 4; + if (carr[1][0] != 't') return 5; + if (carr[2][0] != 't') return 6; + if (carr[2][2] != 'r') return 7; + + // Test 3: Array of pointers with initializer + int d = 10, e = 20, f = 30; + int *iarr[3] = {&d, &e, &f}; + if (*iarr[0] != 10) return 8; + if (*iarr[1] != 20) return 9; + if (*iarr[2] != 30) return 10; + + // Test 4: Array of string pointers + char *strs[3] = {"apple", "banana", "cherry"}; + if (strs[0][0] != 'a') return 11; + if (strs[1][0] != 'b') return 12; + if (strs[2][0] != 'c') return 13; + if (strs[0][1] != 'p') return 14; + if (strs[1][1] != 'a') return 15; + if (strs[2][1] != 'h') return 16; + + // Test 5: Pointer to array + int parr[3] = {1, 2, 3}; + int (*p)[3] = &parr; + if ((*p)[0] != 1) return 17; + if ((*p)[1] != 2) return 18; + if ((*p)[2] != 3) return 19; + + // Test 6: Pointer to 2D array + int arr2d[2][3] = {{1, 2, 3}, {4, 5, 6}}; + int (*p2d)[3] = arr2d; + if ((*p2d)[0] != 1) return 20; + if ((*p2d)[1] != 2) return 21; + if ((*p2d)[2] != 3) return 22; + p2d++; + if ((*p2d)[0] != 4) return 23; + if ((*p2d)[1] != 5) return 24; + if ((*p2d)[2] != 6) return 25; + + return 0; +} +"#; + assert_eq!(compile_and_run("ptr_array_comprehensive", code), 0); +} + +// ============================================================================ +// Pointer Parameters and Return Values +// ============================================================================ + +#[test] +fn pointer_param_return_comprehensive() { + let code = r#" +int read_ptr(int *p) { + return *p; +} + +void write_ptr(int *p, int val) { + *p = val; +} + +void swap(int *a, int *b) { + int tmp = *a; + *a = *b; + *b = tmp; +} + +void set_ptr(int **pp, int *new_val) { + *pp = new_val; +} + +int global_val = 42; + +int* get_global_ptr(void) { + return &global_val; +} + +int* identity(int *p) { + return p; +} + +int* select_ptr(int *a, int *b, int use_first) { + if (use_first) { + return a; + } + return b; +} + +int main(void) { + // Test 1: Read through pointer param + int x = 42; + if (read_ptr(&x) != 42) return 1; + + // Test 2: Write through pointer param + int y = 0; + write_ptr(&y, 42); + if (y != 42) return 2; + + // Test 3: Swap + int a = 10; + int b = 20; + swap(&a, &b); + if (a != 20) return 3; + if (b != 10) return 4; + + // Test 4: Double pointer param + int c = 10; + int d = 20; + int *p = &c; + if (*p != 10) return 5; + set_ptr(&p, &d); + if (*p != 20) return 6; + + // Test 5: Return global pointer + int *gp = get_global_ptr(); + if (*gp != 42) return 7; + *gp = 100; + if (global_val != 100) return 8; + + // Test 6: Return param pointer + int e = 42; + int *pe = identity(&e); + if (*pe != 42) return 9; + + // Test 7: Conditional return + int f = 10; + int g = 20; + int *ps = select_ptr(&f, &g, 1); + if (*ps != 10) return 10; + ps = select_ptr(&f, &g, 0); + if (*ps != 20) return 11; + + return 0; +} +"#; + assert_eq!(compile_and_run("ptr_param_ret_comprehensive", code), 0); +} + +// ============================================================================ +// Pointer Casts and Structs +// ============================================================================ + +#[test] +fn pointer_cast_struct_comprehensive() { + let code = r#" +struct Point { + int x; + int y; +}; + +struct Container { + int *ptr; + int val; +}; + +struct Item { + int value; +}; + +int main(void) { + // Test 1: Cast int* to char* + int x = 0x44434241; // "ABCD" in little-endian + char *cp = (char*)&x; + if (cp[0] != 'A') return 1; + if (cp[1] != 'B') return 2; + if (cp[2] != 'C') return 3; + if (cp[3] != 'D') return 4; + + // Test 2: Cast to void* and back + int y = 42; + void *vp = (void*)&y; + int *ip = (int*)vp; + if (*ip != 42) return 5; + + // Test 3: Pointer to struct + struct Point p = {10, 20}; + struct Point *ptr = &p; + if (ptr->x != 10) return 6; + if (ptr->y != 20) return 7; + ptr->x = 30; + ptr->y = 40; + if (p.x != 30) return 8; + if (p.y != 40) return 9; + + // Test 4: Pointer in struct + int z = 42; + struct Container c; + c.ptr = &z; + c.val = 10; + if (*c.ptr != 42) return 10; + if (c.val != 10) return 11; + *c.ptr = 100; + if (z != 100) return 12; + + // Test 5: Pointer to struct array + struct Item items[3] = {{10}, {20}, {30}}; + struct Item *pi = items; + if (pi->value != 10) return 13; + pi++; + if (pi->value != 20) return 14; + pi++; + if (pi->value != 30) return 15; + + return 0; +} +"#; + assert_eq!(compile_and_run("ptr_cast_struct_comprehensive", code), 0); +} + +// ============================================================================ +// Sizeof, Expressions, and Edge Cases +// ============================================================================ + +#[test] +fn pointer_expr_comprehensive() { + let code = r#" +int main(void) { + // Test 1: Sizeof pointers (64-bit) + if (sizeof(int*) != 8) return 1; + if (sizeof(char*) != 8) return 2; + if (sizeof(long*) != 8) return 3; + if (sizeof(void*) != 8) return 4; + int *p; + if (sizeof(p) != 8) return 5; + + // Test 2: Address of dereference + int x = 42; + int *px = &x; + if (&*px != px) return 6; + if (*&x != 42) return 7; + + // Test 3: Complex expressions + int arr[3] = {10, 20, 30}; + int *pa = arr; + if (*(pa + 1) != pa[1]) return 8; + if (*(1 + pa) != pa[1]) return 9; // commutative + if (*pa + 1 != 11) return 10; + + // Test 4: Negative index + int arr2[5] = {1, 2, 3, 4, 5}; + int *pm = &arr2[2]; // Points to arr2[2] = 3 + if (pm[-2] != 1) return 11; + if (pm[-1] != 2) return 12; + if (pm[0] != 3) return 13; + if (pm[1] != 4) return 14; + if (pm[2] != 5) return 15; + + // Test 5: Self reference + int y = 42; + int *py = &y; + int **ppy = &py; + if (*ppy != py) return 16; + if (**ppy != y) return 17; + + // Test 6: Post-increment dereference (*p++) + int arr3[3] = {10, 20, 30}; + int *pi = arr3; + int val = *pi++; + if (val != 10) return 18; + if (*pi != 20) return 19; + val = *pi++; + if (val != 20) return 20; + if (*pi != 30) return 21; + + // Test 7: Pre-increment dereference (*++p) + int arr4[3] = {10, 20, 30}; + int *pj = arr4; + val = *++pj; + if (val != 20) return 22; + val = *++pj; + if (val != 30) return 23; + + // Test 8: Dereference then increment ((*p)++) + int arr5[3] = {10, 20, 30}; + int *pk = arr5; + val = (*pk)++; + if (val != 10) return 24; + if (arr5[0] != 11) return 25; + if (*pk != 11) return 26; // pk still points to arr5[0] + + return 0; +} +"#; + assert_eq!(compile_and_run("ptr_expr_comprehensive", code), 0); +} + +// ============================================================================ +// Pointer Aliasing and Const Pointers +// ============================================================================ + +#[test] +fn pointer_alias_const_comprehensive() { + let code = r#" +int main(void) { + // Test 1: Aliasing + int x = 10; + int *p1 = &x; + int *p2 = &x; + + *p1 = 20; + if (*p2 != 20) return 1; + + *p2 = 30; + if (*p1 != 30) return 2; + if (x != 30) return 3; + + // Test 2: Const pointer (int * const p) + int y = 10; + int * const cp = &y; // Constant pointer to int + *cp = 15; + if (y != 15) return 4; + // Cannot reassign cp (would be compile error) + + // Test 3: Pointer to const (const int *p) + int a = 10; + int b = 20; + const int *pc = &a; // Pointer to constant int + pc = &b; // Can reassign + if (*pc != 20) return 5; + // Cannot modify through pc (would be compile error) + + return 0; +} +"#; + assert_eq!(compile_and_run("ptr_alias_const_comprehensive", code), 0); +} diff --git a/cc/tests/datatypes/short.rs b/cc/tests/datatypes/short.rs new file mode 100644 index 000000000..e0aed100c --- /dev/null +++ b/cc/tests/datatypes/short.rs @@ -0,0 +1,887 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Tests for `short` and `unsigned short` data types with all applicable operators +// Tests are aggregated by category to reduce compile/link overhead +// + +use crate::common::compile_and_run; + +// ============================================================================ +// Arithmetic Operators (add, sub, mul, div, mod, unary neg, unary plus) +// ============================================================================ + +#[test] +fn short_arithmetic_operators() { + let code = r#" +int main(void) { + short a, b, result; + + // Addition + a = 30; b = 12; + if (a + b != 42) return 1; + + // Subtraction + a = 100; b = 58; + if (a - b != 42) return 2; + + // Multiplication + a = 6; b = 7; + if (a * b != 42) return 3; + + // Division + a = 84; b = 2; + if (a / b != 42) return 4; + + // Modulo + a = 47; b = 10; + if (a % b != 7) return 5; + + // Unary negation + a = -42; + if (-a != 42) return 6; + + // Unary plus + a = 42; + if (+a != 42) return 7; + + // Literal operations (result fits in short range) + if (100 + 23 != 123) return 8; + + // Hex literal + if (0x10 + 0x20 != 0x30) return 9; + + // Octal literal + if (010 + 010 != 020) return 10; + + return 0; +} +"#; + assert_eq!(compile_and_run("short_arith", code), 0); +} + +// ============================================================================ +// Comparison Operators (==, !=, <, <=, >, >=) +// ============================================================================ + +#[test] +fn short_comparison_operators() { + let code = r#" +int main(void) { + short a, b; + + // Equal - true + a = 42; b = 42; + if ((a == b) != 1) return 1; + + // Equal - false + a = 42; b = 43; + if ((a == b) != 0) return 2; + + // Not equal - true + a = 42; b = 43; + if ((a != b) != 1) return 3; + + // Not equal - false + a = 42; b = 42; + if ((a != b) != 0) return 4; + + // Less than - true + a = 10; b = 20; + if ((a < b) != 1) return 5; + + // Less than - false + a = 20; b = 10; + if ((a < b) != 0) return 6; + + // Less or equal - true (less) + a = 10; b = 20; + if ((a <= b) != 1) return 7; + + // Less or equal - true (equal) + a = 20; b = 20; + if ((a <= b) != 1) return 8; + + // Less or equal - false + a = 30; b = 20; + if ((a <= b) != 0) return 9; + + // Greater than - true + a = 20; b = 10; + if ((a > b) != 1) return 10; + + // Greater than - false + a = 10; b = 20; + if ((a > b) != 0) return 11; + + // Greater or equal - true (greater) + a = 30; b = 20; + if ((a >= b) != 1) return 12; + + // Greater or equal - true (equal) + a = 20; b = 20; + if ((a >= b) != 1) return 13; + + // Greater or equal - false + a = 10; b = 20; + if ((a >= b) != 0) return 14; + + return 0; +} +"#; + assert_eq!(compile_and_run("short_cmp", code), 0); +} + +// ============================================================================ +// Logical Operators (&&, ||, !) +// ============================================================================ + +#[test] +fn short_logical_operators() { + let code = r#" +int main(void) { + short a, b; + + // Logical AND - true + a = 1; b = 1; + if ((a && b) != 1) return 1; + + // Logical AND - false (left) + a = 0; b = 1; + if ((a && b) != 0) return 2; + + // Logical AND - false (right) + a = 1; b = 0; + if ((a && b) != 0) return 3; + + // Logical OR - true (left) + a = 1; b = 0; + if ((a || b) != 1) return 4; + + // Logical OR - true (right) + a = 0; b = 1; + if ((a || b) != 1) return 5; + + // Logical OR - false + a = 0; b = 0; + if ((a || b) != 0) return 6; + + // Logical NOT - true (input 0) + a = 0; + if (!a != 1) return 7; + + // Logical NOT - false (input non-zero) + a = 42; + if (!a != 0) return 8; + + return 0; +} +"#; + assert_eq!(compile_and_run("short_logical", code), 0); +} + +// ============================================================================ +// Bitwise Operators (&, |, ^, ~, <<, >>) +// ============================================================================ + +#[test] +fn short_bitwise_operators() { + let code = r#" +int main(void) { + short a, b; + + // Bitwise AND + a = 0xFF; b = 0x0F; + if ((a & b) != 0x0F) return 1; + + // Bitwise OR + a = 0xF0; b = 0x0F; + if ((a | b) != 0xFF) return 2; + + // Bitwise XOR + a = 0xFF; b = 0xF0; + if ((a ^ b) != 0x0F) return 3; + + // Bitwise NOT + a = 0; + if (~a != -1) return 4; + + // Left shift + a = 1; + if ((a << 4) != 16) return 5; + + // Right shift + a = 64; + if ((a >> 2) != 16) return 6; + + return 0; +} +"#; + assert_eq!(compile_and_run("short_bitwise", code), 0); +} + +// ============================================================================ +// Assignment Operators (=, +=, -=, *=, /=, %=, &=, |=, ^=, <<=, >>=) +// ============================================================================ + +#[test] +fn short_assignment_operators() { + let code = r#" +int main(void) { + short a; + + // Simple assignment + a = 42; + if (a != 42) return 1; + + // Add assign + a = 40; + a += 2; + if (a != 42) return 2; + + // Sub assign + a = 50; + a -= 8; + if (a != 42) return 3; + + // Mul assign + a = 21; + a *= 2; + if (a != 42) return 4; + + // Div assign + a = 84; + a /= 2; + if (a != 42) return 5; + + // Mod assign + a = 50; + a %= 8; + if (a != 2) return 6; + + // And assign + a = 0xFF; + a &= 0x0F; + if (a != 15) return 7; + + // Or assign + a = 0xF0; + a |= 0x0F; + if (a != 255) return 8; + + // Xor assign + a = 0xFF; + a ^= 0xF0; + if (a != 15) return 9; + + // Shl assign + a = 1; + a <<= 4; + if (a != 16) return 10; + + // Shr assign + a = 64; + a >>= 2; + if (a != 16) return 11; + + return 0; +} +"#; + assert_eq!(compile_and_run("short_assign", code), 0); +} + +// ============================================================================ +// Increment/Decrement Operators (++a, a++, --a, a--) +// ============================================================================ + +#[test] +fn short_increment_decrement_operators() { + let code = r#" +int main(void) { + short a, b; + + // Pre-increment + a = 41; + if (++a != 42) return 1; + + // Post-increment (returns original) + a = 42; + b = a++; + if (b != 42) return 2; + + // Post-increment (side effect) + a = 41; + a++; + if (a != 42) return 3; + + // Pre-decrement + a = 43; + if (--a != 42) return 4; + + // Post-decrement (returns original) + a = 42; + b = a--; + if (b != 42) return 5; + + // Post-decrement (side effect) + a = 43; + a--; + if (a != 42) return 6; + + return 0; +} +"#; + assert_eq!(compile_and_run("short_incdec", code), 0); +} + +// ============================================================================ +// Ternary and Comma Operators +// ============================================================================ + +#[test] +fn short_ternary_comma_operators() { + let code = r#" +int main(void) { + short a, b; + + // Ternary - true branch + a = 1; + if ((a ? 42 : 0) != 42) return 1; + + // Ternary - false branch + a = 0; + if ((a ? 0 : 42) != 42) return 2; + + // Comma operator + a = 1; + b = (a = 10, a + 32); + if (b != 42) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("short_ternary_comma", code), 0); +} + +// ============================================================================ +// Complex Expressions +// ============================================================================ + +#[test] +fn short_complex_expressions() { + let code = r#" +int main(void) { + short a, b, c, result; + + // Mixed arithmetic with precedence + a = 10; b = 5; c = 2; + result = a + b * c - 3; // 10 + 10 - 3 = 17 + if (result != 17) return 1; + + // Chained comparison + a = 5; b = 10; c = 15; + if (((a < b) && (b < c)) != 1) return 2; + + // Mixed bitwise and arithmetic + a = 8; b = 3; + result = ((a | b) & 0x0F) + (a >> 1); // (11 & 15) + 4 = 15 + if (result != 15) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("short_complex", code), 0); +} + +// ############################################################################ +// UNSIGNED SHORT TESTS +// ############################################################################ + +// ============================================================================ +// Unsigned Short: Arithmetic Operators (add, sub, mul, div, mod, unary plus) +// ============================================================================ + +#[test] +fn ushort_arithmetic_operators() { + let code = r#" +int main(void) { + unsigned short a, b; + + // Addition + a = 30; b = 12; + if (a + b != 42) return 1; + + // Subtraction + a = 100; b = 58; + if (a - b != 42) return 2; + + // Multiplication + a = 6; b = 7; + if (a * b != 42) return 3; + + // Division + a = 84; b = 2; + if (a / b != 42) return 4; + + // Modulo + a = 47; b = 10; + if (a % b != 7) return 5; + + // Unary plus + a = 42; + if (+a != 42) return 6; + + // Literal operations + if (100 + 23 != 123) return 7; + + // Hex literal + if (0x10 + 0x20 != 0x30) return 8; + + return 0; +} +"#; + assert_eq!(compile_and_run("ushort_arith", code), 0); +} + +// ============================================================================ +// Unsigned Short: Comparison Operators (==, !=, <, <=, >, >=) +// ============================================================================ + +#[test] +fn ushort_comparison_operators() { + let code = r#" +int main(void) { + unsigned short a, b; + + // Equal - true + a = 42; b = 42; + if ((a == b) != 1) return 1; + + // Equal - false + a = 42; b = 43; + if ((a == b) != 0) return 2; + + // Not equal - true + a = 42; b = 43; + if ((a != b) != 1) return 3; + + // Not equal - false + a = 42; b = 42; + if ((a != b) != 0) return 4; + + // Less than - true + a = 10; b = 20; + if ((a < b) != 1) return 5; + + // Less than - false + a = 20; b = 10; + if ((a < b) != 0) return 6; + + // Less or equal - true (less) + a = 10; b = 20; + if ((a <= b) != 1) return 7; + + // Less or equal - true (equal) + a = 20; b = 20; + if ((a <= b) != 1) return 8; + + // Less or equal - false + a = 30; b = 20; + if ((a <= b) != 0) return 9; + + // Greater than - true + a = 20; b = 10; + if ((a > b) != 1) return 10; + + // Greater than - false + a = 10; b = 20; + if ((a > b) != 0) return 11; + + // Greater or equal - true (greater) + a = 30; b = 20; + if ((a >= b) != 1) return 12; + + // Greater or equal - true (equal) + a = 20; b = 20; + if ((a >= b) != 1) return 13; + + // Greater or equal - false + a = 10; b = 20; + if ((a >= b) != 0) return 14; + + return 0; +} +"#; + assert_eq!(compile_and_run("ushort_cmp", code), 0); +} + +// ============================================================================ +// Unsigned Short: Logical Operators (&&, ||, !) +// ============================================================================ + +#[test] +fn ushort_logical_operators() { + let code = r#" +int main(void) { + unsigned short a, b; + + // Logical AND - true + a = 1; b = 1; + if ((a && b) != 1) return 1; + + // Logical AND - false (left) + a = 0; b = 1; + if ((a && b) != 0) return 2; + + // Logical AND - false (right) + a = 1; b = 0; + if ((a && b) != 0) return 3; + + // Logical OR - true (left) + a = 1; b = 0; + if ((a || b) != 1) return 4; + + // Logical OR - true (right) + a = 0; b = 1; + if ((a || b) != 1) return 5; + + // Logical OR - false + a = 0; b = 0; + if ((a || b) != 0) return 6; + + // Logical NOT - true (input 0) + a = 0; + if (!a != 1) return 7; + + // Logical NOT - false (input non-zero) + a = 42; + if (!a != 0) return 8; + + return 0; +} +"#; + assert_eq!(compile_and_run("ushort_logical", code), 0); +} + +// ============================================================================ +// Unsigned Short: Bitwise Operators (&, |, ^, ~, <<, >>) +// ============================================================================ + +#[test] +fn ushort_bitwise_operators() { + let code = r#" +int main(void) { + unsigned short a, b; + + // Bitwise AND + a = 0xFF; b = 0x0F; + if ((a & b) != 0x0F) return 1; + + // Bitwise OR + a = 0xF0; b = 0x0F; + if ((a | b) != 0xFF) return 2; + + // Bitwise XOR + a = 0xFF; b = 0xF0; + if ((a ^ b) != 0x0F) return 3; + + // Bitwise NOT - check low byte is 0xFF when input is 0xFF00 + a = 0xFF00; + if ((~a & 0xFF) != 0xFF) return 4; + + // Left shift + a = 1; + if ((a << 4) != 16) return 5; + + // Right shift (logical for unsigned) + a = 64; + if ((a >> 2) != 16) return 6; + + return 0; +} +"#; + assert_eq!(compile_and_run("ushort_bitwise", code), 0); +} + +// ============================================================================ +// Unsigned Short: Assignment Operators (=, +=, -=, *=, /=, %=, &=, |=, ^=, <<=, >>=) +// ============================================================================ + +#[test] +fn ushort_assignment_operators() { + let code = r#" +int main(void) { + unsigned short a; + + // Simple assignment + a = 42; + if (a != 42) return 1; + + // Add assign + a = 40; + a += 2; + if (a != 42) return 2; + + // Sub assign + a = 50; + a -= 8; + if (a != 42) return 3; + + // Mul assign + a = 21; + a *= 2; + if (a != 42) return 4; + + // Div assign + a = 84; + a /= 2; + if (a != 42) return 5; + + // Mod assign + a = 50; + a %= 8; + if (a != 2) return 6; + + // And assign + a = 0xFF; + a &= 0x0F; + if (a != 15) return 7; + + // Or assign + a = 0xF0; + a |= 0x0F; + if (a != 255) return 8; + + // Xor assign + a = 0xFF; + a ^= 0xF0; + if (a != 15) return 9; + + // Shl assign + a = 1; + a <<= 4; + if (a != 16) return 10; + + // Shr assign + a = 64; + a >>= 2; + if (a != 16) return 11; + + return 0; +} +"#; + assert_eq!(compile_and_run("ushort_assign", code), 0); +} + +// ============================================================================ +// Unsigned Short: Increment/Decrement Operators (++a, a++, --a, a--) +// ============================================================================ + +#[test] +fn ushort_increment_decrement_operators() { + let code = r#" +int main(void) { + unsigned short a, b; + + // Pre-increment + a = 41; + if (++a != 42) return 1; + + // Post-increment (returns original) + a = 42; + b = a++; + if (b != 42) return 2; + + // Post-increment (side effect) + a = 41; + a++; + if (a != 42) return 3; + + // Pre-decrement + a = 43; + if (--a != 42) return 4; + + // Post-decrement (returns original) + a = 42; + b = a--; + if (b != 42) return 5; + + // Post-decrement (side effect) + a = 43; + a--; + if (a != 42) return 6; + + return 0; +} +"#; + assert_eq!(compile_and_run("ushort_incdec", code), 0); +} + +// ============================================================================ +// Unsigned Short: Ternary and Comma Operators +// ============================================================================ + +#[test] +fn ushort_ternary_comma_operators() { + let code = r#" +int main(void) { + unsigned short a, b; + + // Ternary - true branch + a = 1; + if ((a ? 42 : 0) != 42) return 1; + + // Ternary - false branch + a = 0; + if ((a ? 0 : 42) != 42) return 2; + + // Comma operator + a = 1; + b = (a = 10, a + 32); + if (b != 42) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("ushort_ternary_comma", code), 0); +} + +// ============================================================================ +// Unsigned Short: Complex Expressions +// ============================================================================ + +#[test] +fn ushort_complex_expressions() { + let code = r#" +int main(void) { + unsigned short a, b, c, result; + + // Mixed arithmetic with precedence + a = 10; b = 5; c = 2; + result = a + b * c - 3; // 10 + 10 - 3 = 17 + if (result != 17) return 1; + + // Chained comparison + a = 5; b = 10; c = 15; + if (((a < b) && (b < c)) != 1) return 2; + + // Mixed bitwise and arithmetic + a = 8; b = 3; + result = ((a | b) & 0x0F) + (a >> 1); // (11 & 15) + 4 = 15 + if (result != 15) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("ushort_complex", code), 0); +} + +// ============================================================================ +// Short: Structs, Functions, and Initializers +// ============================================================================ + +#[test] +fn short_structs_functions_initializers() { + let code = r#" +struct short_container { + short value; + short second; +}; + +struct ushort_container { + unsigned short value; +}; + +short double_short(short x) { + return x * 2; +} + +unsigned short double_ushort(unsigned short x) { + return x * 2; +} + +int sum_container(struct short_container c) { + return c.value + c.second; +} + +struct short_container make_container(short a, short b) { + struct short_container c; + c.value = a; + c.second = b; + return c; +} + +int main(void) { + // Variable initializers + short x = 42; + if (x != 42) return 1; + + unsigned short ux = 100; + if (ux != 100) return 2; + + // Multiple variable declarations with initializers + short a = 10, b = 20, c = 30; + if (a + b + c != 60) return 3; + + // Struct member access + struct short_container sc; + sc.value = 1000; + sc.second = 500; + if (sc.value != 1000) return 4; + if (sc.second != 500) return 5; + + struct ushort_container usc; + usc.value = 60000; + if (usc.value != 60000) return 6; + + // Function with short parameter and return + short doubled = double_short(21); + if (doubled != 42) return 7; + + // Function with unsigned short parameter and return + unsigned short udoubled = double_ushort(50); + if (udoubled != 100) return 8; + + // Negative short + short neg = -100; + if (double_short(neg) != -200) return 9; + + // Struct as function parameter + struct short_container param; + param.value = 100; + param.second = 50; + if (sum_container(param) != 150) return 10; + + // Struct as function return value + struct short_container returned = make_container(300, 200); + if (returned.value != 300) return 11; + if (returned.second != 200) return 12; + + // Pointer to struct + struct short_container target; + struct short_container *ptr = ⌖ + ptr->value = 2000; + ptr->second = 1000; + if (target.value != 2000) return 13; + if (target.second != 1000) return 14; + + // Short range edge cases + short max_short = 32767; + if (max_short != 32767) return 15; + + short min_short = -32768; + if (min_short != -32768) return 16; + + unsigned short max_ushort = 65535; + if (max_ushort != 65535) return 17; + + return 0; +} +"#; + assert_eq!(compile_and_run("short_advanced", code), 0); +} diff --git a/cc/tests/datatypes/struct_type.rs b/cc/tests/datatypes/struct_type.rs new file mode 100644 index 000000000..d197f7f6f --- /dev/null +++ b/cc/tests/datatypes/struct_type.rs @@ -0,0 +1,271 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Tests for `struct` composite type +// + +use crate::common::compile_and_run; + +// ============================================================================ +// Basic Struct: Declaration, member access, assignment +// ============================================================================ + +#[test] +fn struct_basic() { + let code = r#" +int main(void) { + struct point { + int x; + int y; + }; + + struct point p; + + // Member assignment + p.x = 10; + p.y = 20; + + // Member read + if (p.x != 10) return 1; + if (p.y != 20) return 2; + + // Member arithmetic + if (p.x + p.y != 30) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("struct_basic", code), 0); +} + +// ============================================================================ +// Nested Structs: Struct containing another struct +// ============================================================================ + +#[test] +fn struct_nested() { + let code = r#" +int main(void) { + struct point { + int x; + int y; + }; + + struct rect { + struct point top_left; + struct point bottom_right; + }; + + struct rect r; + + // Assign nested struct members + r.top_left.x = 0; + r.top_left.y = 0; + r.bottom_right.x = 100; + r.bottom_right.y = 50; + + // Read nested struct members + if (r.top_left.x != 0) return 1; + if (r.top_left.y != 0) return 2; + if (r.bottom_right.x != 100) return 3; + if (r.bottom_right.y != 50) return 4; + + // Compute width and height + int width; + int height; + width = r.bottom_right.x - r.top_left.x; + height = r.bottom_right.y - r.top_left.y; + if (width != 100) return 5; + if (height != 50) return 6; + + return 0; +} +"#; + assert_eq!(compile_and_run("struct_nested", code), 0); +} + +// ============================================================================ +// Struct Assignment: Copy whole struct to another variable +// ============================================================================ + +#[test] +fn struct_assignment() { + let code = r#" +int main(void) { + struct point { + int x; + int y; + }; + + struct point p1; + struct point p2; + + // Initialize p1 + p1.x = 10; + p1.y = 20; + + // Copy p1 to p2 + p2 = p1; + + // Verify p2 has the copied values + if (p2.x != 10) return 1; + if (p2.y != 20) return 2; + + // Modify p1 and verify p2 is independent + p1.x = 100; + if (p2.x != 10) return 3; // p2 should still be 10 + + return 0; +} +"#; + assert_eq!(compile_and_run("struct_assignment", code), 0); +} + +// ============================================================================ +// Struct as Function Parameter: Pass struct by value +// ============================================================================ + +#[test] +fn struct_param() { + let code = r#" +struct point { + int x; + int y; +}; + +int sum_point(struct point p) { + return p.x + p.y; +} + +int main(void) { + struct point p; + p.x = 10; + p.y = 20; + + int result; + result = sum_point(p); + if (result != 30) return 1; + + return 0; +} +"#; + assert_eq!(compile_and_run("struct_param", code), 0); +} + +// ============================================================================ +// Struct as Function Return: Return struct by value +// ============================================================================ + +#[test] +fn struct_return() { + let code = r#" +struct point { + int x; + int y; +}; + +struct point make_point(int x, int y) { + struct point p; + p.x = x; + p.y = y; + return p; +} + +int main(void) { + struct point p; + p = make_point(10, 20); + + if (p.x != 10) return 1; + if (p.y != 20) return 2; + + return 0; +} +"#; + assert_eq!(compile_and_run("struct_return", code), 0); +} + +// ============================================================================ +// Array of Structs +// ============================================================================ + +#[test] +fn struct_array() { + let code = r#" +struct point { + int x; + int y; +}; + +int main(void) { + struct point points[3]; + + // Initialize array elements + points[0].x = 0; + points[0].y = 0; + points[1].x = 10; + points[1].y = 20; + points[2].x = 30; + points[2].y = 40; + + // Verify values + if (points[0].x != 0) return 1; + if (points[0].y != 0) return 2; + if (points[1].x != 10) return 3; + if (points[1].y != 20) return 4; + if (points[2].x != 30) return 5; + if (points[2].y != 40) return 6; + + // Sum all x values + int sum; + sum = points[0].x + points[1].x + points[2].x; + if (sum != 40) return 7; + + return 0; +} +"#; + assert_eq!(compile_and_run("struct_array", code), 0); +} + +// ============================================================================ +// Pointer to Struct: Arrow operator (->) +// ============================================================================ + +#[test] +fn struct_pointer() { + let code = r#" +struct point { + int x; + int y; +}; + +int main(void) { + struct point p; + struct point *ptr; + + ptr = &p; + + // Write through pointer using -> + ptr->x = 10; + ptr->y = 20; + + // Read through pointer using -> + if (ptr->x != 10) return 1; + if (ptr->y != 20) return 2; + + // Verify p was modified + if (p.x != 10) return 3; + if (p.y != 20) return 4; + + // Arithmetic with -> + if (ptr->x + ptr->y != 30) return 5; + + return 0; +} +"#; + assert_eq!(compile_and_run("struct_pointer", code), 0); +} diff --git a/cc/tests/datatypes/typedef_type.rs b/cc/tests/datatypes/typedef_type.rs new file mode 100644 index 000000000..4e8b0bbe4 --- /dev/null +++ b/cc/tests/datatypes/typedef_type.rs @@ -0,0 +1,377 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Tests for `typedef` type aliasing +// + +use crate::common::compile_and_run; + +// ============================================================================ +// Basic Typedef: Simple type alias for int +// ============================================================================ + +#[test] +fn typedef_basic_int() { + let code = r#" +typedef int myint; + +int main(void) { + myint x; + x = 42; + if (x != 42) return 1; + return 0; +} +"#; + assert_eq!(compile_and_run("typedef_basic_int", code), 0); +} + +// ============================================================================ +// Typedef for unsigned types +// ============================================================================ + +#[test] +fn typedef_unsigned() { + let code = r#" +typedef unsigned int uint; + +int main(void) { + uint x; + x = 100; + if (x != 100) return 1; + return 0; +} +"#; + assert_eq!(compile_and_run("typedef_unsigned", code), 0); +} + +// ============================================================================ +// Typedef for pointer types +// ============================================================================ + +#[test] +fn typedef_pointer() { + let code = r#" +typedef int *intptr; + +int main(void) { + int x; + intptr p; + + x = 42; + p = &x; + + if (*p != 42) return 1; + + *p = 100; + if (x != 100) return 2; + + return 0; +} +"#; + assert_eq!(compile_and_run("typedef_pointer", code), 0); +} + +// ============================================================================ +// Typedef for struct types +// ============================================================================ + +#[test] +fn typedef_struct() { + let code = r#" +typedef struct { + int x; + int y; +} Point; + +int main(void) { + Point p; + + p.x = 10; + p.y = 20; + + if (p.x != 10) return 1; + if (p.y != 20) return 2; + if (p.x + p.y != 30) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("typedef_struct", code), 0); +} + +// ============================================================================ +// Typedef for named struct +// ============================================================================ + +#[test] +fn typedef_named_struct() { + let code = r#" +struct point { + int x; + int y; +}; + +typedef struct point Point; + +int main(void) { + Point p; + + p.x = 10; + p.y = 20; + + if (p.x != 10) return 1; + if (p.y != 20) return 2; + + return 0; +} +"#; + assert_eq!(compile_and_run("typedef_named_struct", code), 0); +} + +// ============================================================================ +// Chained typedef: typedef of typedef +// ============================================================================ + +#[test] +fn typedef_chained() { + let code = r#" +typedef int myint; +typedef myint myint2; + +int main(void) { + myint2 x; + x = 42; + if (x != 42) return 1; + return 0; +} +"#; + assert_eq!(compile_and_run("typedef_chained", code), 0); +} + +// ============================================================================ +// Typedef for arrays +// ============================================================================ + +#[test] +fn typedef_array() { + let code = r#" +typedef int IntArray[10]; + +int main(void) { + IntArray arr; + + arr[0] = 1; + arr[1] = 2; + arr[9] = 10; + + if (arr[0] != 1) return 1; + if (arr[1] != 2) return 2; + if (arr[9] != 10) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("typedef_array", code), 0); +} + +// ============================================================================ +// Typedef used in function parameter +// ============================================================================ + +#[test] +fn typedef_function_param() { + let code = r#" +typedef int myint; + +int double_it(myint x) { + return x * 2; +} + +int main(void) { + myint n; + n = 21; + if (double_it(n) != 42) return 1; + return 0; +} +"#; + assert_eq!(compile_and_run("typedef_function_param", code), 0); +} + +// ============================================================================ +// Typedef used as function return type +// ============================================================================ + +#[test] +fn typedef_function_return() { + let code = r#" +typedef int myint; + +myint get_value(void) { + return 42; +} + +int main(void) { + myint x; + x = get_value(); + if (x != 42) return 1; + return 0; +} +"#; + assert_eq!(compile_and_run("typedef_function_return", code), 0); +} + +// ============================================================================ +// Local typedef (inside function) +// ============================================================================ + +#[test] +fn typedef_local() { + let code = r#" +int main(void) { + typedef int localint; + localint x; + x = 42; + if (x != 42) return 1; + return 0; +} +"#; + assert_eq!(compile_and_run("typedef_local", code), 0); +} + +// ============================================================================ +// Multiple typedefs in one declaration +// ============================================================================ + +#[test] +fn typedef_multiple() { + let code = r#" +typedef int INT, *INTPTR; + +int main(void) { + INT x; + INTPTR p; + + x = 42; + p = &x; + + if (x != 42) return 1; + if (*p != 42) return 2; + + return 0; +} +"#; + assert_eq!(compile_and_run("typedef_multiple", code), 0); +} + +// ============================================================================ +// Typedef for char +// ============================================================================ + +#[test] +fn typedef_char() { + let code = r#" +typedef char byte; + +int main(void) { + byte b; + b = 65; + if (b != 65) return 1; + return 0; +} +"#; + assert_eq!(compile_and_run("typedef_char", code), 0); +} + +// ============================================================================ +// Typedef with const qualifier +// ============================================================================ + +#[test] +fn typedef_const() { + let code = r#" +typedef int myint; + +int main(void) { + const myint x = 42; + if (x != 42) return 1; + return 0; +} +"#; + assert_eq!(compile_and_run("typedef_const", code), 0); +} + +// ============================================================================ +// Typedef for struct pointer (common pattern) +// ============================================================================ + +#[test] +fn typedef_struct_pointer() { + let code = r#" +typedef struct node { + int value; +} Node, *NodePtr; + +int main(void) { + Node n; + NodePtr p; + + n.value = 42; + p = &n; + + if (p->value != 42) return 1; + + p->value = 100; + if (n.value != 100) return 2; + + return 0; +} +"#; + assert_eq!(compile_and_run("typedef_struct_pointer", code), 0); +} + +// ============================================================================ +// Typedef in sizeof +// ============================================================================ + +#[test] +fn typedef_sizeof() { + let code = r#" +typedef int myint; +typedef long long mylong; + +int main(void) { + if (sizeof(myint) != sizeof(int)) return 1; + if (sizeof(mylong) != sizeof(long long)) return 2; + return 0; +} +"#; + assert_eq!(compile_and_run("typedef_sizeof", code), 0); +} + +// ============================================================================ +// Typedef in cast +// ============================================================================ + +#[test] +fn typedef_cast() { + let code = r#" +typedef long mylong; + +int main(void) { + int x; + mylong y; + + x = 42; + y = (mylong)x; + + if (y != 42) return 1; + return 0; +} +"#; + assert_eq!(compile_and_run("typedef_cast", code), 0); +} diff --git a/cc/tests/datatypes/union_type.rs b/cc/tests/datatypes/union_type.rs new file mode 100644 index 000000000..a58cd03fe --- /dev/null +++ b/cc/tests/datatypes/union_type.rs @@ -0,0 +1,414 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Tests for `union` composite type +// + +use crate::common::compile_and_run; + +// ============================================================================ +// Basic Union: Declaration, member access, assignment +// ============================================================================ + +#[test] +fn union_basic() { + let code = r#" +int main(void) { + union data { + int i; + float f; + char c; + }; + + union data d; + + // Member assignment + d.i = 42; + + // Member read + if (d.i != 42) return 1; + + // Assign different member + d.c = 'A'; + if (d.c != 'A') return 2; + + return 0; +} +"#; + assert_eq!(compile_and_run("union_basic", code), 0); +} + +// ============================================================================ +// Union Size: Size should be max of all members +// ============================================================================ + +#[test] +fn union_size() { + let code = r#" +int main(void) { + union data { + char c; // 1 byte + int i; // 4 bytes + double d; // 8 bytes + }; + + // Union size should be size of largest member (double = 8) + if (sizeof(union data) != 8) return 1; + + return 0; +} +"#; + assert_eq!(compile_and_run("union_size", code), 0); +} + +// ============================================================================ +// Union Type Punning: All members share the same memory +// ============================================================================ + +#[test] +fn union_type_punning() { + let code = r#" +int main(void) { + union data { + int i; + char bytes[4]; + }; + + union data d; + d.i = 0x01020304; + + // On little-endian, bytes[0] should be 0x04 (LSB) + if (d.bytes[0] != 0x04) return 1; + if (d.bytes[3] != 0x01) return 2; + + return 0; +} +"#; + assert_eq!(compile_and_run("union_type_punning", code), 0); +} + +// ============================================================================ +// Union Assignment: Copy whole union to another variable +// ============================================================================ + +#[test] +fn union_assignment() { + let code = r#" +int main(void) { + union data { + int i; + float f; + }; + + union data d1; + union data d2; + + // Initialize d1 + d1.i = 12345; + + // Copy d1 to d2 + d2 = d1; + + // Verify d2 has the copied value + if (d2.i != 12345) return 1; + + // Modify d1 and verify d2 is independent + d1.i = 99999; + if (d2.i != 12345) return 2; // d2 should still be 12345 + + return 0; +} +"#; + assert_eq!(compile_and_run("union_assignment", code), 0); +} + +// ============================================================================ +// Union as Function Parameter: Pass union by value +// ============================================================================ + +#[test] +fn union_param() { + let code = r#" +union data { + int i; + float f; +}; + +int get_int(union data d) { + return d.i; +} + +int main(void) { + union data d; + d.i = 789; + + int result; + result = get_int(d); + if (result != 789) return 1; + + return 0; +} +"#; + assert_eq!(compile_and_run("union_param", code), 0); +} + +// ============================================================================ +// Union as Function Return: Return union by value +// ============================================================================ + +#[test] +fn union_return() { + let code = r#" +union data { + int i; + float f; +}; + +union data make_data(int val) { + union data d; + d.i = val; + return d; +} + +int main(void) { + union data d; + d = make_data(456); + + if (d.i != 456) return 1; + + return 0; +} +"#; + assert_eq!(compile_and_run("union_return", code), 0); +} + +// ============================================================================ +// Array of Unions +// ============================================================================ + +#[test] +fn union_array() { + let code = r#" +union data { + int i; + float f; +}; + +int main(void) { + union data arr[3]; + + // Initialize array elements + arr[0].i = 10; + arr[1].i = 20; + arr[2].i = 30; + + // Verify values + if (arr[0].i != 10) return 1; + if (arr[1].i != 20) return 2; + if (arr[2].i != 30) return 3; + + // Sum all values + int sum; + sum = arr[0].i + arr[1].i + arr[2].i; + if (sum != 60) return 4; + + return 0; +} +"#; + assert_eq!(compile_and_run("union_array", code), 0); +} + +// ============================================================================ +// Pointer to Union: Arrow operator (->) +// ============================================================================ + +#[test] +fn union_pointer() { + let code = r#" +union data { + int i; + float f; +}; + +int main(void) { + union data d; + union data *ptr; + + ptr = &d; + + // Write through pointer using -> + ptr->i = 555; + + // Read through pointer using -> + if (ptr->i != 555) return 1; + + // Verify d was modified + if (d.i != 555) return 2; + + return 0; +} +"#; + assert_eq!(compile_and_run("union_pointer", code), 0); +} + +// ============================================================================ +// Nested Union in Struct +// ============================================================================ + +#[test] +fn union_nested_in_struct() { + let code = r#" +int main(void) { + struct tagged_value { + int type; + union { + int i; + float f; + char c; + } data; + }; + + struct tagged_value tv; + tv.type = 1; + tv.data.i = 100; + + if (tv.type != 1) return 1; + if (tv.data.i != 100) return 2; + + // Change to char type + tv.type = 3; + tv.data.c = 'X'; + if (tv.data.c != 'X') return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("union_nested_in_struct", code), 0); +} + +// ============================================================================ +// Union Containing Struct +// ============================================================================ + +#[test] +fn union_containing_struct() { + let code = r#" +int main(void) { + union variant { + int i; + struct { + int x; + int y; + } point; + }; + + union variant v; + v.point.x = 10; + v.point.y = 20; + + if (v.point.x != 10) return 1; + if (v.point.y != 20) return 2; + + // Writing to i should overwrite part of point + v.i = 999; + if (v.i != 999) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("union_containing_struct", code), 0); +} + +// ============================================================================ +// Union with Different Integer Sizes +// ============================================================================ + +#[test] +fn union_integer_sizes() { + let code = r#" +int main(void) { + union sizes { + char c; + short s; + int i; + long l; + }; + + union sizes u; + + // Write long, read back + u.l = 0x0123456789ABCDEFLL; + if (u.l != 0x0123456789ABCDEFLL) return 1; + + // Check smaller members see lower bytes (little-endian) + u.l = 0x0000000000000042LL; + if (u.c != 0x42) return 2; + if (u.s != 0x42) return 3; + if (u.i != 0x42) return 4; + + return 0; +} +"#; + assert_eq!(compile_and_run("union_integer_sizes", code), 0); +} + +// ============================================================================ +// Forward Declared Union +// ============================================================================ + +#[test] +fn union_forward_decl() { + let code = r#" +union data; // Forward declaration + +union data { + int i; + float f; +}; + +int main(void) { + union data d; + d.i = 42; + if (d.i != 42) return 1; + return 0; +} +"#; + assert_eq!(compile_and_run("union_forward_decl", code), 0); +} + +// ============================================================================ +// Union Pointer Arithmetic +// ============================================================================ + +#[test] +fn union_pointer_arithmetic() { + let code = r#" +union data { + int i; + double d; // 8 bytes +}; + +int main(void) { + union data arr[3]; + union data *p = arr; + + arr[0].i = 100; + arr[1].i = 200; + arr[2].i = 300; + + if (p->i != 100) return 1; + p++; // Should advance by sizeof(union data) = 8 + if (p->i != 200) return 2; + p++; + if (p->i != 300) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("union_pointer_arithmetic", code), 0); +} diff --git a/cc/tests/datatypes/void_type.rs b/cc/tests/datatypes/void_type.rs new file mode 100644 index 000000000..c4ce77803 --- /dev/null +++ b/cc/tests/datatypes/void_type.rs @@ -0,0 +1,631 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Tests for C99 `void` type +// +// C99 void semantics: +// - void is an incomplete type that cannot be completed +// - Used for functions that don't return a value +// - void* is a generic pointer that can point to any object type +// - (void) parameter list indicates a function takes no arguments +// - Cast to (void) discards a value +// - sizeof(void) is undefined behavior (but some compilers allow it as 1) +// + +use crate::common::compile_and_run; + +// ============================================================================ +// Void Functions - Basic +// ============================================================================ + +#[test] +fn void_function_basic() { + let code = r#" +void do_nothing(void) { + // Empty function body +} + +int main(void) { + do_nothing(); + return 0; +} +"#; + assert_eq!(compile_and_run("void_func_basic", code), 0); +} + +#[test] +fn void_function_with_statements() { + let code = r#" +void set_value(int *dest, int val) { + int local = val; + *dest = local; +} + +void modify_ptr(int *p, int val) { + *p = val; +} + +int main(void) { + int x = 0; + modify_ptr(&x, 42); + if (x != 42) return 1; + + modify_ptr(&x, 100); + if (x != 100) return 2; + + set_value(&x, 200); + if (x != 200) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("void_func_stmts", code), 0); +} + +#[test] +fn void_function_explicit_return() { + let code = r#" +void with_return(void) { + return; +} + +void with_early_return(int condition) { + if (condition) { + return; + } + // Fall through +} + +int main(void) { + with_return(); + with_early_return(1); + with_early_return(0); + return 0; +} +"#; + assert_eq!(compile_and_run("void_func_return", code), 0); +} + +#[test] +fn void_function_implicit_return() { + let code = r#" +void no_return_stmt(void) { + int x = 1; + int y = 2; + // No explicit return - should work for void functions +} + +int main(void) { + no_return_stmt(); + return 0; +} +"#; + assert_eq!(compile_and_run("void_func_implicit", code), 0); +} + +// ============================================================================ +// Void Functions - Parameters +// ============================================================================ + +#[test] +fn void_function_with_params() { + let code = r#" +void take_int(int x) { + int local = x; +} + +void take_multiple(int a, int b, int c) { + int sum = a + b + c; +} + +void take_pointer(int *p) { + *p = *p + 1; +} + +int main(void) { + take_int(42); + take_multiple(1, 2, 3); + + int x = 10; + take_pointer(&x); + if (x != 11) return 1; + + return 0; +} +"#; + assert_eq!(compile_and_run("void_func_params", code), 0); +} + +#[test] +fn void_function_void_parameter() { + let code = r#" +// (void) explicitly means no parameters +void no_params(void) { +} + +int main(void) { + no_params(); + return 0; +} +"#; + assert_eq!(compile_and_run("void_func_void_param", code), 0); +} + +// ============================================================================ +// Void Pointer (void*) +// ============================================================================ + +#[test] +fn void_pointer_basic() { + let code = r#" +int main(void) { + int x = 42; + void *vp; + + // Assign address to void pointer + vp = &x; + + // Convert back to int pointer + int *ip = vp; + + if (*ip != 42) return 1; + + return 0; +} +"#; + assert_eq!(compile_and_run("void_ptr_basic", code), 0); +} + +#[test] +fn void_pointer_multiple_types() { + let code = r#" +int main(void) { + void *vp; + + // Point to int + int i = 42; + vp = &i; + int *ip = vp; + if (*ip != 42) return 1; + + // Point to char + char c = 'A'; + vp = &c; + char *cp = vp; + if (*cp != 'A') return 2; + + // Point to long + long l = 1234567890L; + vp = &l; + long *lp = vp; + if (*lp != 1234567890L) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("void_ptr_types", code), 0); +} + +#[test] +fn void_pointer_function_param() { + let code = r#" +void set_int(void *ptr, int val) { + int *ip = ptr; + *ip = val; +} + +void set_char(void *ptr, char val) { + char *cp = ptr; + *cp = val; +} + +int main(void) { + int x = 0; + set_int(&x, 42); + if (x != 42) return 1; + + char c = 0; + set_char(&c, 'Z'); + if (c != 'Z') return 2; + + return 0; +} +"#; + assert_eq!(compile_and_run("void_ptr_param", code), 0); +} + +#[test] +fn void_pointer_function_return() { + let code = r#" +void *get_ptr(int *p) { + return p; +} + +int main(void) { + int x = 100; + void *vp = get_ptr(&x); + int *ip = vp; + + if (*ip != 100) return 1; + + return 0; +} +"#; + assert_eq!(compile_and_run("void_ptr_return", code), 0); +} + +#[test] +fn void_pointer_array() { + let code = r#" +int main(void) { + int arr[3]; + arr[0] = 10; + arr[1] = 20; + arr[2] = 30; + + void *vp = arr; + int *ip = vp; + + if (ip[0] != 10) return 1; + if (ip[1] != 20) return 2; + if (ip[2] != 30) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("void_ptr_array", code), 0); +} + +#[test] +fn void_pointer_comparison() { + let code = r#" +int main(void) { + int x = 1; + int y = 2; + + void *vp1 = &x; + void *vp2 = &x; + void *vp3 = &y; + + // Same address + if (vp1 != vp2) return 1; + + // Different address + if (vp1 == vp3) return 2; + + // Null comparison + void *null_ptr = 0; + if (null_ptr != 0) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("void_ptr_compare", code), 0); +} + +// ============================================================================ +// Cast to Void +// ============================================================================ + +#[test] +fn void_cast_discard_result() { + let code = r#" +int get_value(void) { + return 42; +} + +int main(void) { + // Cast to void discards result + (void)get_value(); + + // Multiple discards + (void)get_value(); + (void)get_value(); + + // Discard variable + int x = 10; + (void)x; + + // Discard expression + (void)(1 + 2 + 3); + + return 0; +} +"#; + assert_eq!(compile_and_run("void_cast_discard", code), 0); +} + +#[test] +fn void_cast_suppress_warning() { + let code = r#" +// Common pattern: cast to void to suppress unused parameter warning +void unused_param(int x) { + (void)x; // Explicitly ignore parameter +} + +int main(void) { + unused_param(42); + return 0; +} +"#; + assert_eq!(compile_and_run("void_cast_suppress", code), 0); +} + +// ============================================================================ +// Void Function Return Void Call +// ============================================================================ + +#[test] +fn void_return_void_call() { + let code = r#" +void inner(void) { + // Do nothing +} + +void outer(void) { + return inner(); // void function returning void call +} + +int main(void) { + outer(); + return 0; +} +"#; + assert_eq!(compile_and_run("void_return_void", code), 0); +} + +#[test] +fn void_chained_calls() { + let code = r#" +void step1(int *p) { + *p = *p + 1; +} + +void step2(int *p) { + *p = *p * 2; +} + +void step3(int *p) { + *p = *p - 3; +} + +int main(void) { + int x = 5; + + step1(&x); // 5 + 1 = 6 + if (x != 6) return 1; + + step2(&x); // 6 * 2 = 12 + if (x != 12) return 2; + + step3(&x); // 12 - 3 = 9 + if (x != 9) return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("void_chained", code), 0); +} + +// ============================================================================ +// Void in Structs +// ============================================================================ + +#[test] +fn void_pointer_in_struct() { + let code = r#" +struct container { + void *data; + int size; +}; + +int main(void) { + int value = 42; + struct container c; + c.data = &value; + c.size = 4; + + int *ip = c.data; + if (*ip != 42) return 1; + if (c.size != 4) return 2; + + return 0; +} +"#; + assert_eq!(compile_and_run("void_ptr_struct", code), 0); +} + +#[test] +fn void_pointer_struct_param() { + let code = r#" +struct wrapper { + void *ptr; +}; + +void set_data(struct wrapper *w, void *data) { + w->ptr = data; +} + +void *get_data(struct wrapper *w) { + return w->ptr; +} + +int main(void) { + struct wrapper w; + int x = 100; + + set_data(&w, &x); + + int *result = get_data(&w); + if (*result != 100) return 1; + + return 0; +} +"#; + assert_eq!(compile_and_run("void_ptr_struct_param", code), 0); +} + +// ============================================================================ +// Void Function Pointers +// ============================================================================ + +// Note: Function pointer declarations like `void (*fp)(int *)` are not yet +// supported by the parser. This test is commented out until that feature +// is implemented. +// +// #[test] +// fn void_function_pointer() { +// let code = r#" +// void action1(int *p) { +// *p = 10; +// } +// +// void action2(int *p) { +// *p = 20; +// } +// +// int main(void) { +// void (*fp)(int *); +// int x = 0; +// +// fp = action1; +// fp(&x); +// if (x != 10) return 1; +// +// fp = action2; +// fp(&x); +// if (x != 20) return 2; +// +// return 0; +// } +// "#; +// assert_eq!(compile_and_run("void_func_ptr", code), 0); +// } + +// ============================================================================ +// Void with Control Flow +// ============================================================================ + +#[test] +fn void_with_conditionals() { + let code = r#" +void conditional_action(int *p, int condition) { + if (condition) { + *p = 1; + return; + } else { + *p = 0; + return; + } +} + +int main(void) { + int x; + + conditional_action(&x, 1); + if (x != 1) return 1; + + conditional_action(&x, 0); + if (x != 0) return 2; + + return 0; +} +"#; + assert_eq!(compile_and_run("void_conditional", code), 0); +} + +#[test] +fn void_with_loops() { + let code = r#" +void fill_array(int *arr, int size, int value) { + int i; + for (i = 0; i < size; i = i + 1) { + arr[i] = value; + } +} + +void sum_array(int *arr, int size, int *result) { + int i; + *result = 0; + for (i = 0; i < size; i = i + 1) { + *result = *result + arr[i]; + } +} + +int main(void) { + int arr[5]; + int sum; + + fill_array(arr, 5, 10); + sum_array(arr, 5, &sum); + + // 5 * 10 = 50 + if (sum != 50) return 1; + + return 0; +} +"#; + assert_eq!(compile_and_run("void_loops", code), 0); +} + +// ============================================================================ +// Edge Cases +// ============================================================================ + +#[test] +fn void_nested_calls() { + let code = r#" +void level3(int *p) { + *p = *p + 1; +} + +void level2(int *p) { + level3(p); + level3(p); +} + +void level1(int *p) { + level2(p); + level2(p); +} + +int main(void) { + int x = 0; + level1(&x); + // level1 calls level2 twice + // each level2 calls level3 twice + // total: 4 increments + if (x != 4) return 1; + + return 0; +} +"#; + assert_eq!(compile_and_run("void_nested", code), 0); +} + +#[test] +fn void_recursive() { + let code = r#" +void countdown(int *p, int n) { + if (n <= 0) { + return; + } + *p = *p + 1; + countdown(p, n - 1); +} + +int main(void) { + int count = 0; + countdown(&count, 5); + if (count != 5) return 1; + + return 0; +} +"#; + assert_eq!(compile_and_run("void_recursive", code), 0); +} diff --git a/cc/tests/features/alloca.rs b/cc/tests/features/alloca.rs new file mode 100644 index 000000000..0dd445d1f --- /dev/null +++ b/cc/tests/features/alloca.rs @@ -0,0 +1,153 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Tests for __builtin_alloca (dynamic stack allocation) +// +// Tests are aggregated to reduce compile/link cycles while maintaining coverage. +// + +use crate::common::compile_and_run; + +// ============================================================================ +// __builtin_alloca: Basic tests +// ============================================================================ + +#[test] +fn alloca_basic() { + let code = r#" +int main(void) { + // Test 1: Basic allocation and write/read + char *buf = __builtin_alloca(64); + buf[0] = 'A'; + buf[1] = 'B'; + buf[2] = 'C'; + buf[3] = '\0'; + if (buf[0] != 'A') return 1; + if (buf[1] != 'B') return 2; + if (buf[2] != 'C') return 3; + + // Test 2: Multiple allocations + int *arr1 = __builtin_alloca(sizeof(int) * 4); + int *arr2 = __builtin_alloca(sizeof(int) * 4); + + // Write to first array + arr1[0] = 100; + arr1[1] = 200; + arr1[2] = 300; + arr1[3] = 400; + + // Write to second array + arr2[0] = 1000; + arr2[1] = 2000; + arr2[2] = 3000; + arr2[3] = 4000; + + // Verify first array wasn't corrupted + if (arr1[0] != 100) return 4; + if (arr1[1] != 200) return 5; + if (arr1[2] != 300) return 6; + if (arr1[3] != 400) return 7; + + // Verify second array + if (arr2[0] != 1000) return 8; + if (arr2[1] != 2000) return 9; + if (arr2[2] != 3000) return 10; + if (arr2[3] != 4000) return 11; + + return 0; +} +"#; + assert_eq!(compile_and_run("alloca_basic", code), 0); +} + +#[test] +fn alloca_computed_size() { + let code = r#" +int test_alloca(int n) { + // Allocate n integers on the stack + int *arr = __builtin_alloca(n * sizeof(int)); + + // Initialize + for (int i = 0; i < n; i++) { + arr[i] = i * 10; + } + + // Sum and verify + int sum = 0; + for (int i = 0; i < n; i++) { + sum += arr[i]; + } + + return sum; +} + +int main(void) { + // Test with size 5: sum = 0 + 10 + 20 + 30 + 40 = 100 + int result = test_alloca(5); + if (result != 100) return 1; + + // Test with size 10: sum = 0 + 10 + 20 + ... + 90 = 450 + result = test_alloca(10); + if (result != 450) return 2; + + return 0; +} +"#; + assert_eq!(compile_and_run("alloca_computed_size", code), 0); +} + +#[test] +fn alloca_alignment() { + let code = r#" +int main(void) { + // Allocate various sizes and verify alignment + // All pointers should be 16-byte aligned + + void *p1 = __builtin_alloca(1); + void *p2 = __builtin_alloca(7); + void *p3 = __builtin_alloca(16); + void *p4 = __builtin_alloca(17); + void *p5 = __builtin_alloca(100); + + // Check 16-byte alignment (pointer & 0xF should be 0) + if (((long)p1 & 0xF) != 0) return 1; + if (((long)p2 & 0xF) != 0) return 2; + if (((long)p3 & 0xF) != 0) return 3; + if (((long)p4 & 0xF) != 0) return 4; + if (((long)p5 & 0xF) != 0) return 5; + + return 0; +} +"#; + assert_eq!(compile_and_run("alloca_alignment", code), 0); +} + +#[test] +fn alloca_in_loop() { + let code = r#" +int main(void) { + int total = 0; + + // Allocate in a loop - memory should be reclaimed on function return + for (int i = 0; i < 5; i++) { + int *p = __builtin_alloca(sizeof(int) * 4); + p[0] = i; + p[1] = i * 2; + p[2] = i * 3; + p[3] = i * 4; + total += p[0] + p[1] + p[2] + p[3]; + } + + // Sum: for i=0: 0, i=1: 10, i=2: 20, i=3: 30, i=4: 40 = 100 + if (total != 100) return 1; + + return 0; +} +"#; + assert_eq!(compile_and_run("alloca_in_loop", code), 0); +} diff --git a/cc/tests/features/bswap.rs b/cc/tests/features/bswap.rs new file mode 100644 index 000000000..6fefb96c2 --- /dev/null +++ b/cc/tests/features/bswap.rs @@ -0,0 +1,115 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Tests for byte-swapping builtins (__builtin_bswap16/32/64) +// +// Tests are aggregated to reduce compile/link cycles while maintaining coverage. +// + +use crate::common::compile_and_run; + +// ============================================================================ +// __builtin_bswap16: All 16-bit byte swap tests in one binary +// ============================================================================ + +#[test] +fn bswap16_comprehensive() { + let code = r#" +int main(void) { + // Test 1: Basic swap (0xABCD -> 0xCDAB) + unsigned short val = 0xABCD; + unsigned short result = __builtin_bswap16(val); + if (result != 0xCDAB) return 1; + + // Test 2: Identity - swapping twice gives original value + val = 0x1234; + result = __builtin_bswap16(__builtin_bswap16(val)); + if (result != val) return 2; + + // Test 3: Constant argument + result = __builtin_bswap16(0xFF00); + if (result != 0x00FF) return 3; + + // Test 4: Zero and max values + if (__builtin_bswap16(0x0000) != 0x0000) return 4; + if (__builtin_bswap16(0xFFFF) != 0xFFFF) return 5; + + return 0; +} +"#; + assert_eq!(compile_and_run("bswap16_comprehensive", code), 0); +} + +// ============================================================================ +// __builtin_bswap32: All 32-bit byte swap tests in one binary +// ============================================================================ + +#[test] +fn bswap32_comprehensive() { + let code = r#" +int main(void) { + // Test 1: Basic swap (0x12345678 -> 0x78563412) + unsigned int val = 0x12345678; + unsigned int result = __builtin_bswap32(val); + if (result != 0x78563412) return 1; + + // Test 2: Identity - swapping twice gives original value + val = 0xDEADBEEF; + result = __builtin_bswap32(__builtin_bswap32(val)); + if (result != val) return 2; + + // Test 3: Constant argument + result = __builtin_bswap32(0xFF000000); + if (result != 0x000000FF) return 3; + + // Test 4: Zero and max values + if (__builtin_bswap32(0x00000000) != 0x00000000) return 4; + if (__builtin_bswap32(0xFFFFFFFF) != 0xFFFFFFFF) return 5; + + // Test 5: Network byte order conversion + unsigned int network_val = 0x01020304; // big-endian: 1.2.3.4 + unsigned int host_val = __builtin_bswap32(network_val); + if (host_val != 0x04030201) return 6; + + return 0; +} +"#; + assert_eq!(compile_and_run("bswap32_comprehensive", code), 0); +} + +// ============================================================================ +// __builtin_bswap64: All 64-bit byte swap tests in one binary +// ============================================================================ + +#[test] +fn bswap64_comprehensive() { + let code = r#" +int main(void) { + // Test 1: Basic swap (0x0102030405060708 -> 0x0807060504030201) + unsigned long long val = 0x0102030405060708ULL; + unsigned long long result = __builtin_bswap64(val); + if (result != 0x0807060504030201ULL) return 1; + + // Test 2: Identity - swapping twice gives original value + val = 0xDEADBEEFCAFEBABEULL; + result = __builtin_bswap64(__builtin_bswap64(val)); + if (result != val) return 2; + + // Test 3: Constant argument + result = __builtin_bswap64(0xFF00000000000000ULL); + if (result != 0x00000000000000FFULL) return 3; + + // Test 4: Zero and max values + if (__builtin_bswap64(0x0000000000000000ULL) != 0x0000000000000000ULL) return 4; + if (__builtin_bswap64(0xFFFFFFFFFFFFFFFFULL) != 0xFFFFFFFFFFFFFFFFULL) return 5; + + return 0; +} +"#; + assert_eq!(compile_and_run("bswap64_comprehensive", code), 0); +} diff --git a/cc/tests/features/constant_p.rs b/cc/tests/features/constant_p.rs new file mode 100644 index 000000000..865ab32c6 --- /dev/null +++ b/cc/tests/features/constant_p.rs @@ -0,0 +1,68 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Tests for __builtin_constant_p builtin +// + +use crate::common::compile_and_run; + +#[test] +fn constant_p_comprehensive() { + let code = r#" +int main(void) { + // Test 1: Integer literal - should be constant + if (__builtin_constant_p(42) != 1) return 1; + + // Test 2: Character literal - should be constant + if (__builtin_constant_p('x') != 1) return 2; + + // Test 3: Constant expression (arithmetic) - should be constant + if (__builtin_constant_p(10 + 20) != 1) return 3; + + // Test 4: Constant expression (comparison) - should be constant + if (__builtin_constant_p(5 < 10) != 1) return 4; + + // Test 5: Constant expression (ternary) - should be constant + if (__builtin_constant_p(1 ? 100 : 200) != 1) return 5; + + // Test 6: Variable - should NOT be constant + int x = 10; + if (__builtin_constant_p(x) != 0) return 6; + + // Test 7: Pointer to variable - should NOT be constant + if (__builtin_constant_p(&x) != 0) return 7; + + // Test 8: Complex constant expression - should be constant + if (__builtin_constant_p((5 * 3) + (8 / 2) - 1) != 1) return 8; + + // Test 9: Negative constant - should be constant + if (__builtin_constant_p(-42) != 1) return 9; + + // Test 10: Bitwise operations on constants - should be constant + if (__builtin_constant_p(0xFF & 0x0F) != 1) return 10; + + // Test 11: Shift operations on constants - should be constant + if (__builtin_constant_p(1 << 4) != 1) return 11; + + // Test 12: Use in conditional - common pattern + int result; + if (__builtin_constant_p(42)) { + result = 1; + } else { + result = 0; + } + if (result != 1) return 12; + + // Test 13: Zero literal - should be constant + if (__builtin_constant_p(0) != 1) return 13; + + return 0; +} +"#; + assert_eq!(compile_and_run("constant_p_comprehensive", code), 0); +} diff --git a/cc/tests/features/debug.rs b/cc/tests/features/debug.rs new file mode 100644 index 000000000..77fe92a67 --- /dev/null +++ b/cc/tests/features/debug.rs @@ -0,0 +1,273 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Integration tests for debug info generation (-g flag) +// + +use crate::common::create_c_file; +use plib::testing::run_test_base; +use std::process::Command; + +/// Test that -g flag generates debug sections in object file +#[test] +fn test_debug_sections_generated() { + let c_file = create_c_file( + "debug_test", + r#" +int foo(int x) { + return x + 1; +} + +int main() { + return foo(41); +} +"#, + ); + let c_path = c_file.path().to_path_buf(); + let obj_path = std::env::temp_dir().join("pcc_debug_test.o"); + + // Compile with -g -c to produce object file + let output = run_test_base( + "pcc", + &vec![ + "-g".to_string(), + "-c".to_string(), + "-o".to_string(), + obj_path.to_string_lossy().to_string(), + c_path.to_string_lossy().to_string(), + ], + &[], + ); + + assert!( + output.status.success(), + "pcc -g -c failed: {}", + String::from_utf8_lossy(&output.stderr) + ); + + // Check for debug sections using platform-specific tools + #[cfg(target_os = "macos")] + { + // On macOS, use otool to check for __DWARF segment + let otool_output = Command::new("otool") + .args(["-l", obj_path.to_str().unwrap()]) + .output() + .expect("failed to run otool"); + + let otool_stdout = String::from_utf8_lossy(&otool_output.stdout); + assert!( + otool_stdout.contains("__debug_line") || otool_stdout.contains("__DWARF"), + "No debug sections found in object file. otool output:\n{}", + otool_stdout + ); + } + + #[cfg(target_os = "linux")] + { + // On Linux, use objdump or readelf to check for .debug sections + let objdump_output = Command::new("objdump") + .args(["-h", obj_path.to_str().unwrap()]) + .output() + .expect("failed to run objdump"); + + let objdump_stdout = String::from_utf8_lossy(&objdump_output.stdout); + assert!( + objdump_stdout.contains(".debug_line") || objdump_stdout.contains(".debug_info"), + "No debug sections found in object file. objdump output:\n{}", + objdump_stdout + ); + } + + // Cleanup + let _ = std::fs::remove_file(&obj_path); +} + +/// Test that basic CFI directives (startproc/endproc) are emitted by default +/// Detailed CFI (cfi_def_cfa, cfi_offset) are only emitted with -g +#[test] +fn test_cfi_directives_default() { + let c_file = create_c_file( + "cfi_test", + r#" +int main() { + return 42; +} +"#, + ); + let c_path = c_file.path().to_path_buf(); + + // Compile with --dump-asm to see generated assembly + let output = run_test_base( + "pcc", + &vec![ + "--dump-asm".to_string(), + c_path.to_string_lossy().to_string(), + ], + &[], + ); + + assert!( + output.status.success(), + "pcc --dump-asm failed: {}", + String::from_utf8_lossy(&output.stderr) + ); + + let asm = String::from_utf8_lossy(&output.stdout); + + // Check for basic CFI directives (always emitted by default) + assert!( + asm.contains(".cfi_startproc"), + "Missing .cfi_startproc in assembly output" + ); + assert!( + asm.contains(".cfi_endproc"), + "Missing .cfi_endproc in assembly output" + ); + // Detailed CFI (cfi_def_cfa, cfi_offset) are only emitted with -g + assert!( + !asm.contains(".cfi_def_cfa"), + "Unexpected .cfi_def_cfa in assembly output without -g" + ); +} + +/// Test that --fno-unwind-tables disables CFI directives +#[test] +fn test_cfi_directives_disabled() { + let c_file = create_c_file( + "no_cfi_test", + r#" +int main() { + return 42; +} +"#, + ); + let c_path = c_file.path().to_path_buf(); + + // Compile with --dump-asm --fno-unwind-tables + let output = run_test_base( + "pcc", + &vec![ + "--dump-asm".to_string(), + "--fno-unwind-tables".to_string(), + c_path.to_string_lossy().to_string(), + ], + &[], + ); + + assert!( + output.status.success(), + "pcc --dump-asm --fno-unwind-tables failed: {}", + String::from_utf8_lossy(&output.stderr) + ); + + let asm = String::from_utf8_lossy(&output.stdout); + + // Check that CFI directives are NOT present + assert!( + !asm.contains(".cfi_startproc"), + "Unexpected .cfi_startproc in assembly output with --fno-unwind-tables" + ); + assert!( + !asm.contains(".cfi_endproc"), + "Unexpected .cfi_endproc in assembly output with --fno-unwind-tables" + ); +} + +/// Test that -g emits .file, .loc, and detailed CFI directives +#[test] +fn test_debug_file_loc_directives() { + let c_file = create_c_file( + "file_loc_test", + r#" +int main() { + return 42; +} +"#, + ); + let c_path = c_file.path().to_path_buf(); + + // Compile with -g --dump-asm to see generated assembly + let output = run_test_base( + "pcc", + &vec![ + "-g".to_string(), + "--dump-asm".to_string(), + c_path.to_string_lossy().to_string(), + ], + &[], + ); + + assert!( + output.status.success(), + "pcc -g --dump-asm failed: {}", + String::from_utf8_lossy(&output.stderr) + ); + + let asm = String::from_utf8_lossy(&output.stdout); + + // Check for .file directive + assert!( + asm.contains(".file 1"), + "Missing .file directive in assembly output with -g" + ); + + // Check for .loc directive + assert!( + asm.contains(".loc 1"), + "Missing .loc directive in assembly output with -g" + ); + + // Check for detailed CFI directives (only with -g) + assert!( + asm.contains(".cfi_def_cfa"), + "Missing .cfi_def_cfa in assembly output with -g" + ); +} + +/// Test that without -g, .file IS emitted (unconditionally) but .loc is NOT +#[test] +fn test_no_debug_no_loc() { + let c_file = create_c_file( + "no_debug_test", + r#" +int main() { + return 42; +} +"#, + ); + let c_path = c_file.path().to_path_buf(); + + // Compile without -g + let output = run_test_base( + "pcc", + &vec![ + "--dump-asm".to_string(), + c_path.to_string_lossy().to_string(), + ], + &[], + ); + + assert!( + output.status.success(), + "pcc --dump-asm failed: {}", + String::from_utf8_lossy(&output.stderr) + ); + + let asm = String::from_utf8_lossy(&output.stdout); + + // .file is now emitted unconditionally + assert!( + asm.contains(".file 1"), + "Missing .file directive in assembly output (should be unconditional)" + ); + // .loc should NOT be present without -g + assert!( + !asm.contains(".loc 1"), + "Unexpected .loc directive in assembly output without -g" + ); +} diff --git a/cc/tests/features/mod.rs b/cc/tests/features/mod.rs new file mode 100644 index 000000000..bb6604e4e --- /dev/null +++ b/cc/tests/features/mod.rs @@ -0,0 +1,18 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Tests for compiler builtin features +// + +mod alloca; +mod bswap; +mod constant_p; +mod debug; +mod storage; +mod types_compatible; +mod varargs; diff --git a/cc/tests/features/storage.rs b/cc/tests/features/storage.rs new file mode 100644 index 000000000..74bf37875 --- /dev/null +++ b/cc/tests/features/storage.rs @@ -0,0 +1,232 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Tests for storage class specifiers (static, extern) +// +// C99 defines storage class specifiers that affect: +// - Storage duration (static vs automatic) +// - Linkage (external vs internal vs none) +// + +use crate::common::compile_and_run; + +// ============================================================================ +// Static Local Variables: Block scope with static duration +// ============================================================================ + +#[test] +fn static_local_basic() { + let code = r#" +// Static local variable persists across function calls +int counter(void) { + static int count = 0; + count = count + 1; + return count; +} + +int main(void) { + int a = counter(); // Should be 1 + int b = counter(); // Should be 2 + int c = counter(); // Should be 3 + + // Return the sum: 1 + 2 + 3 = 6 + return a + b + c; +} +"#; + assert_eq!(compile_and_run("static_local_basic", code), 6); +} + +#[test] +fn static_local_initialized() { + let code = r#" +// Static local with non-zero initializer +int get_base(void) { + static int base = 100; + base = base + 1; + return base; +} + +int main(void) { + int a = get_base(); // 101 + int b = get_base(); // 102 + if (a != 101) return 1; + if (b != 102) return 2; + return 0; +} +"#; + assert_eq!(compile_and_run("static_local_initialized", code), 0); +} + +#[test] +fn static_local_multiple_functions() { + let code = r#" +// Each function has its own static local +int func_a(void) { + static int x = 10; + x = x + 1; + return x; +} + +int func_b(void) { + static int x = 20; // Different static, same local name + x = x + 2; + return x; +} + +int main(void) { + int a1 = func_a(); // 11 + int b1 = func_b(); // 22 + int a2 = func_a(); // 12 + int b2 = func_b(); // 24 + + if (a1 != 11) return 1; + if (b1 != 22) return 2; + if (a2 != 12) return 3; + if (b2 != 24) return 4; + return 0; +} +"#; + assert_eq!(compile_and_run("static_local_multiple_functions", code), 0); +} + +#[test] +fn static_local_uninitialized() { + let code = r#" +// Uninitialized static local is zero-initialized +int get_count(void) { + static int count; // Should be 0 + count = count + 5; + return count; +} + +int main(void) { + int a = get_count(); // 5 + int b = get_count(); // 10 + if (a != 5) return 1; + if (b != 10) return 2; + return 0; +} +"#; + assert_eq!(compile_and_run("static_local_uninitialized", code), 0); +} + +// ============================================================================ +// Static Functions: Internal linkage +// ============================================================================ + +#[test] +fn static_function_basic() { + let code = r#" +// Static function has internal linkage (not visible to linker) +static int helper(int x) { + return x * 2; +} + +int main(void) { + return helper(21); // Should return 42 +} +"#; + assert_eq!(compile_and_run("static_function_basic", code), 42); +} + +#[test] +fn static_function_multiple() { + let code = r#" +// Multiple static functions +static int double_it(int x) { + return x * 2; +} + +static int triple_it(int x) { + return x * 3; +} + +int main(void) { + int a = double_it(10); // 20 + int b = triple_it(10); // 30 + return a + b; // 50 +} +"#; + assert_eq!(compile_and_run("static_function_multiple", code), 50); +} + +#[test] +fn static_function_calls_static() { + let code = r#" +// Static function calling another static function +static int add_one(int x) { + return x + 1; +} + +static int add_two(int x) { + return add_one(add_one(x)); +} + +int main(void) { + return add_two(40); // 42 +} +"#; + assert_eq!(compile_and_run("static_function_calls_static", code), 42); +} + +// ============================================================================ +// Static Global Variables (already tested in globals/mod.rs but verify here) +// ============================================================================ + +#[test] +fn static_global_basic() { + let code = r#" +// Static global has internal linkage +static int global_val = 42; + +int main(void) { + return global_val; +} +"#; + assert_eq!(compile_and_run("static_global_basic", code), 42); +} + +#[test] +fn static_global_modified() { + let code = r#" +// Static global can be modified +static int counter = 0; + +void increment(void) { + counter = counter + 1; +} + +int main(void) { + increment(); + increment(); + increment(); + return counter; // Should be 3 +} +"#; + assert_eq!(compile_and_run("static_global_modified", code), 3); +} + +// ============================================================================ +// Extern declarations +// ============================================================================ + +#[test] +fn extern_declaration_basic() { + let code = r#" +// Global variable (external linkage by default) +int shared_value = 42; + +// Extern declaration references the same variable +extern int shared_value; + +int main(void) { + return shared_value; +} +"#; + assert_eq!(compile_and_run("extern_declaration_basic", code), 42); +} diff --git a/cc/tests/features/types_compatible.rs b/cc/tests/features/types_compatible.rs new file mode 100644 index 000000000..f06fc0be5 --- /dev/null +++ b/cc/tests/features/types_compatible.rs @@ -0,0 +1,139 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Tests for __builtin_types_compatible_p builtin +// + +use crate::common::compile_and_run; + +#[test] +fn types_compatible_comprehensive() { + let code = r#" +int main(void) { + // Test 1: Same basic types are compatible + if (__builtin_types_compatible_p(int, int) != 1) return 1; + + // Test 2: Different basic types are not compatible + if (__builtin_types_compatible_p(int, long) != 0) return 2; + + // Test 3: char and int are not compatible + if (__builtin_types_compatible_p(char, int) != 0) return 3; + + // Test 4: const int and int ARE compatible (qualifiers ignored) + if (__builtin_types_compatible_p(const int, int) != 1) return 4; + + // Test 5: volatile int and int ARE compatible (qualifiers ignored) + if (__builtin_types_compatible_p(volatile int, int) != 1) return 5; + + // Test 6: const volatile int and int ARE compatible + if (__builtin_types_compatible_p(const volatile int, int) != 1) return 6; + + // Test 7: unsigned int and int are NOT compatible + if (__builtin_types_compatible_p(unsigned int, int) != 0) return 7; + + // Test 8: Pointer types - same base type + if (__builtin_types_compatible_p(int*, int*) != 1) return 8; + + // Test 9: Pointer types - different base types + if (__builtin_types_compatible_p(int*, char*) != 0) return 9; + + // Test 10: long long and long long + if (__builtin_types_compatible_p(long long, long long) != 1) return 10; + + // Test 11: unsigned long and unsigned long + if (__builtin_types_compatible_p(unsigned long, unsigned long) != 1) return 11; + + // Test 12: float and double + if (__builtin_types_compatible_p(float, double) != 0) return 12; + + // Test 13: void* and void* + if (__builtin_types_compatible_p(void*, void*) != 1) return 13; + + // Test 14: void* and int* are NOT compatible + if (__builtin_types_compatible_p(void*, int*) != 0) return 14; + + // Test 15: short and short + if (__builtin_types_compatible_p(short, short) != 1) return 15; + + // Test 16: Use in conditional expression (compile-time constant) + int result; + if (__builtin_types_compatible_p(int, int)) { + result = 1; + } else { + result = 0; + } + if (result != 1) return 16; + + // Test 17: double pointer types + if (__builtin_types_compatible_p(int**, int**) != 1) return 17; + + // Test 18: mixed signedness pointers + if (__builtin_types_compatible_p(unsigned int*, int*) != 0) return 18; + + return 0; +} +"#; + assert_eq!(compile_and_run("types_compatible_comprehensive", code), 0); +} + +#[test] +fn types_compatible_structs() { + let code = r#" +struct Point { + int x; + int y; +}; + +struct Size { + int width; + int height; +}; + +int main(void) { + // Test 1: Same struct type is compatible + if (__builtin_types_compatible_p(struct Point, struct Point) != 1) return 1; + + // Test 2: Different struct types are NOT compatible (even with same layout) + if (__builtin_types_compatible_p(struct Point, struct Size) != 0) return 2; + + // Test 3: Pointer to same struct + if (__builtin_types_compatible_p(struct Point*, struct Point*) != 1) return 3; + + // Test 4: const struct and struct (qualifiers ignored) + if (__builtin_types_compatible_p(const struct Point, struct Point) != 1) return 4; + + return 0; +} +"#; + assert_eq!(compile_and_run("types_compatible_structs", code), 0); +} + +#[test] +fn types_compatible_arrays() { + let code = r#" +int main(void) { + // Test 1: Same size arrays are compatible + if (__builtin_types_compatible_p(int[10], int[10]) != 1) return 1; + + // Test 2: Different size arrays are NOT compatible + if (__builtin_types_compatible_p(int[10], int[20]) != 0) return 2; + + // Test 3: Different element types are NOT compatible + if (__builtin_types_compatible_p(int[10], char[10]) != 0) return 3; + + // Test 4: Array and non-array are NOT compatible + if (__builtin_types_compatible_p(int[10], int) != 0) return 4; + + // Test 5: Pointer to array types + if (__builtin_types_compatible_p(int*[5], int*[5]) != 1) return 5; + + return 0; +} +"#; + assert_eq!(compile_and_run("types_compatible_arrays", code), 0); +} diff --git a/cc/tests/features/varargs.rs b/cc/tests/features/varargs.rs new file mode 100644 index 000000000..90faa95af --- /dev/null +++ b/cc/tests/features/varargs.rs @@ -0,0 +1,190 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Tests for variadic function support (va_list, va_start, va_arg, va_end, va_copy) +// +// Tests are aggregated to reduce compile/link cycles while maintaining coverage. +// + +use crate::common::compile_and_run; + +// ============================================================================ +// Variadic function implementation tests (callee-side) +// Tests: va_list declaration, sum_ints, mixed_types, pointers, va_copy +// ============================================================================ + +#[test] +fn varargs_callee_comprehensive() { + let code = r#" +// Test 1: Simple va_list declaration (must compile and run) +void test_decl(void) { + __builtin_va_list ap; + (void)ap; // suppress unused warning +} + +// Test 2: Sum integers using va_arg +int sum_ints(int count, ...) { + __builtin_va_list ap; + __builtin_va_start(ap, count); + + int total = 0; + for (int i = 0; i < count; i++) { + total += __builtin_va_arg(ap, int); + } + + __builtin_va_end(ap); + return total; +} + +// Test 3: Mixed types (int + long + int) +long sum_mixed(int count, ...) { + __builtin_va_list ap; + __builtin_va_start(ap, count); + + long total = 0; + + if (count > 0) { + total += __builtin_va_arg(ap, int); + } + if (count > 1) { + total += __builtin_va_arg(ap, long); + } + if (count > 2) { + total += __builtin_va_arg(ap, int); + } + + __builtin_va_end(ap); + return total; +} + +// Test 4: Pointer arguments +int first_chars_sum(int count, ...) { + __builtin_va_list ap; + __builtin_va_start(ap, count); + + int total = 0; + for (int i = 0; i < count; i++) { + char *str = __builtin_va_arg(ap, char*); + if (str) { + total += str[0]; + } + } + + __builtin_va_end(ap); + return total; +} + +// Test 5: va_copy +int test_copy(int count, ...) { + __builtin_va_list ap, ap2; + __builtin_va_start(ap, count); + __builtin_va_copy(ap2, ap); + + int val1 = __builtin_va_arg(ap, int); + int val2 = __builtin_va_arg(ap2, int); + + __builtin_va_end(ap); + __builtin_va_end(ap2); + + if (val1 != val2) return -1; + return val1; +} + +int main(void) { + // Run test_decl (just needs to not crash) + test_decl(); + + // Test sum_ints + if (sum_ints(3, 10, 20, 12) != 42) return 1; + if (sum_ints(5, 1, 2, 3, 4, 5) != 15) return 2; + if (sum_ints(1, 100) != 100) return 3; + if (sum_ints(0) != 0) return 4; + + // Test sum_mixed + if (sum_mixed(3, 10, 20L, 12) != 42) return 5; + + // Test first_chars_sum: 'A' = 65, 'B' = 66, 'C' = 67 -> 198 + if (first_chars_sum(3, "ABC", "BCD", "CDE") != 198) return 6; + + // Test va_copy + if (test_copy(1, 42) != 42) return 7; + + return 0; +} +"#; + assert_eq!(compile_and_run("varargs_callee_comprehensive", code), 0); +} + +// ============================================================================ +// Variadic function caller tests (calling libc variadic functions) +// ============================================================================ + +#[test] +fn varargs_caller_comprehensive() { + let code = r#" +int sprintf(char *buf, const char *fmt, ...); + +int main(void) { + char buf[64]; + + // Test sprintf with single integer + sprintf(buf, "%d", 42); + if (buf[0] != '4' || buf[1] != '2') return 1; + + // Test sprintf with multiple args: "2+3=5" + sprintf(buf, "%d+%d=%d", 2, 3, 5); + if (buf[0] != '2') return 2; + if (buf[2] != '3') return 3; + if (buf[4] != '5') return 4; + + // Test sprintf with string + sprintf(buf, "%s", "hello"); + if (buf[0] != 'h' || buf[4] != 'o') return 5; + + return 0; +} +"#; + assert_eq!(compile_and_run("varargs_caller_comprehensive", code), 0); +} + +// ============================================================================ +// Variadic function calls with floating-point arguments +// Tests the AL register being set correctly (SysV x86-64 ABI requirement) +// ============================================================================ + +#[test] +fn varargs_float_args() { + let code = r#" +int sprintf(char *buf, const char *fmt, ...); + +int main(void) { + char buf[128]; + + // Test sprintf with double argument + // This tests that AL is correctly set to indicate XMM register usage + sprintf(buf, "%d", (int)3.14159); + if (buf[0] != '3') return 1; + + // Test with actual float formatting (requires libc printf float support) + // Just test that we can pass floats to variadic functions without crashing + double d = 42.5; + sprintf(buf, "%.0f", d); + // Check first char is '4' (from 42.5 -> "42" or "43") + if (buf[0] != '4') return 2; + + // Multiple float args + double a = 10.0; + double b = 20.0; + sprintf(buf, "%.0f,%.0f", a, b); + if (buf[0] != '1' || buf[3] != '2') return 3; + + return 0; +} +"#; + assert_eq!(compile_and_run("varargs_float_args", code), 0); +} diff --git a/cc/tests/globals/mod.rs b/cc/tests/globals/mod.rs new file mode 100644 index 000000000..3e7bea2cb --- /dev/null +++ b/cc/tests/globals/mod.rs @@ -0,0 +1,296 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Integration tests for global variables +// + +use crate::common::{cleanup_exe, compile, create_c_file, run}; + +// ============================================================================ +// Basic Global Variable Tests +// ============================================================================ + +#[test] +fn test_global_int_initialized() { + let c_file = create_c_file( + "global_int_init", + r#" +int global_counter = 42; +int main(void) { + return global_counter; +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "compilation should succeed"); + + let exit_code = run(exe.as_ref().unwrap()); + assert_eq!(exit_code, 42, "expected exit code 42"); + + cleanup_exe(&exe); +} + +#[test] +fn test_global_int_uninitialized() { + let c_file = create_c_file( + "global_int_uninit", + r#" +int global_uninit; +int main(void) { + return global_uninit; +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "compilation should succeed"); + + let exit_code = run(exe.as_ref().unwrap()); + // Uninitialized global should be zero + assert_eq!(exit_code, 0, "expected exit code 0"); + + cleanup_exe(&exe); +} + +#[test] +fn test_global_static() { + let c_file = create_c_file( + "global_static", + r#" +static int static_var = 10; +int main(void) { + return static_var; +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "compilation should succeed"); + + let exit_code = run(exe.as_ref().unwrap()); + assert_eq!(exit_code, 10, "expected exit code 10"); + + cleanup_exe(&exe); +} + +#[test] +fn test_global_static_uninitialized() { + let c_file = create_c_file( + "global_static_uninit", + r#" +static int static_uninit; +int main(void) { + return static_uninit; +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "compilation should succeed"); + + let exit_code = run(exe.as_ref().unwrap()); + // Static uninitialized should be zero + assert_eq!(exit_code, 0, "expected exit code 0"); + + cleanup_exe(&exe); +} + +// ============================================================================ +// Multiple Global Variables +// ============================================================================ + +#[test] +fn test_multiple_globals() { + let c_file = create_c_file( + "multiple_globals", + r#" +int a = 10; +int b = 20; +int c = 12; +int main(void) { + return a + b + c; +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "compilation should succeed"); + + let exit_code = run(exe.as_ref().unwrap()); + assert_eq!(exit_code, 42, "expected exit code 42"); + + cleanup_exe(&exe); +} + +#[test] +fn test_globals_different_types() { + let c_file = create_c_file( + "globals_diff_types", + r#" +char c = 1; +short s = 2; +int i = 3; +long l = 4; +int main(void) { + return c + s + i + l; +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "compilation should succeed"); + + let exit_code = run(exe.as_ref().unwrap()); + assert_eq!(exit_code, 10, "expected exit code 10"); + + cleanup_exe(&exe); +} + +// ============================================================================ +// Global Variable Modification +// ============================================================================ + +#[test] +fn test_global_modification() { + let c_file = create_c_file( + "global_modify", + r#" +int counter = 0; +int main(void) { + counter = 42; + return counter; +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "compilation should succeed"); + + let exit_code = run(exe.as_ref().unwrap()); + assert_eq!(exit_code, 42, "expected exit code 42"); + + cleanup_exe(&exe); +} + +#[test] +fn test_global_increment() { + let c_file = create_c_file( + "global_increment", + r#" +int counter = 40; +int main(void) { + counter = counter + 1; + counter = counter + 1; + return counter; +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "compilation should succeed"); + + let exit_code = run(exe.as_ref().unwrap()); + assert_eq!(exit_code, 42, "expected exit code 42"); + + cleanup_exe(&exe); +} + +// ============================================================================ +// Float/Double Global Variables +// ============================================================================ + +#[test] +fn test_global_double() { + let c_file = create_c_file( + "global_double", + r#" +double d = 42.5; +int main(void) { + return (int)d; +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "compilation should succeed"); + + let exit_code = run(exe.as_ref().unwrap()); + assert_eq!(exit_code, 42, "expected exit code 42"); + + cleanup_exe(&exe); +} + +#[test] +fn test_global_float() { + let c_file = create_c_file( + "global_float", + r#" +float f = 42.9f; +int main(void) { + return (int)f; +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "compilation should succeed"); + + let exit_code = run(exe.as_ref().unwrap()); + assert_eq!(exit_code, 42, "expected exit code 42"); + + cleanup_exe(&exe); +} + +// ============================================================================ +// Pointer Globals +// ============================================================================ + +#[test] +fn test_global_pointer_null() { + let c_file = create_c_file( + "global_ptr_null", + r#" +int *ptr = 0; +int main(void) { + return ptr ? 1 : 0; +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "compilation should succeed"); + + let exit_code = run(exe.as_ref().unwrap()); + assert_eq!(exit_code, 0, "expected exit code 0"); + + cleanup_exe(&exe); +} + +#[test] +fn test_global_pointer_to_global() { + let c_file = create_c_file( + "global_ptr_to_global", + r#" +int value = 42; +int *ptr; +int main(void) { + ptr = &value; + return *ptr; +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "compilation should succeed"); + + let exit_code = run(exe.as_ref().unwrap()); + assert_eq!(exit_code, 42, "expected exit code 42"); + + cleanup_exe(&exe); +} diff --git a/cc/tests/integration.rs b/cc/tests/integration.rs new file mode 100644 index 000000000..8900a117b --- /dev/null +++ b/cc/tests/integration.rs @@ -0,0 +1,16 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Integration test harness for pcc +// + +mod common; +mod datatypes; +mod features; +mod globals; +mod pcc; diff --git a/cc/tests/pcc/func_combined.c b/cc/tests/pcc/func_combined.c new file mode 100644 index 000000000..28a23bc72 --- /dev/null +++ b/cc/tests/pcc/func_combined.c @@ -0,0 +1,20 @@ +// Test: Combined internal and external function calls +// Expected exit code: 14 +// Expected stdout: Function calls work! + +int puts(char *s); + +int sum(int a, int b) { + return a + b; +} + +int double_it(int x) { + return x + x; +} + +int main(void) { + int x = sum(3, 4); // 7 + int y = double_it(x); // 14 + puts("Function calls work!"); + return y; +} diff --git a/cc/tests/pcc/func_internal.c b/cc/tests/pcc/func_internal.c new file mode 100644 index 000000000..d08a140de --- /dev/null +++ b/cc/tests/pcc/func_internal.c @@ -0,0 +1,16 @@ +// Test: Internal function calls +// Expected exit code: 14 + +int sum(int a, int b) { + return a + b; +} + +int double_it(int x) { + return x + x; +} + +int main(void) { + int x = sum(3, 4); // 7 + int y = double_it(x); // 14 + return y; +} diff --git a/cc/tests/pcc/func_libc.c b/cc/tests/pcc/func_libc.c new file mode 100644 index 000000000..0b7adac5d --- /dev/null +++ b/cc/tests/pcc/func_libc.c @@ -0,0 +1,10 @@ +// Test: External libc function call (puts) +// Expected exit code: 0 +// Expected stdout: Hello from libc! + +int puts(char *s); + +int main(void) { + puts("Hello from libc!"); + return 0; +} diff --git a/cc/tests/pcc/mod.rs b/cc/tests/pcc/mod.rs new file mode 100644 index 000000000..1bd4b3b5f --- /dev/null +++ b/cc/tests/pcc/mod.rs @@ -0,0 +1,852 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Integration tests for pcc - end-to-end compilation tests +// + +use crate::common::{cleanup_exe, compile, compile_test_file, create_c_file, run, run_with_output}; + +// ============================================================================ +// Basic Tests +// ============================================================================ + +#[test] +fn test_return_constant() { + let c_file = create_c_file( + "return_constant", + r#" +int main(void) { + return 42; +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "compilation should succeed"); + + let exit_code = run(exe.as_ref().unwrap()); + assert_eq!(exit_code, 42, "expected exit code 42"); + + cleanup_exe(&exe); +} + +#[test] +fn test_arithmetic() { + let c_file = create_c_file( + "arithmetic", + r#" +int main(void) { + int a = 10; + int b = 5; + int c = a + b * 2; + return c; +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "compilation should succeed"); + + let exit_code = run(exe.as_ref().unwrap()); + // 10 + 5 * 2 = 10 + 10 = 20 + assert_eq!(exit_code, 20, "expected exit code 20"); + + cleanup_exe(&exe); +} + +#[test] +fn test_if_else() { + let c_file = create_c_file( + "if_else", + r#" +int main(void) { + int x = 10; + if (x > 5) { + return 1; + } else { + return 0; + } +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "compilation should succeed"); + + let exit_code = run(exe.as_ref().unwrap()); + // 10 > 5 is true, so should return 1 + assert_eq!(exit_code, 1, "expected exit code 1"); + + cleanup_exe(&exe); +} + +#[test] +fn test_subtraction() { + let c_file = create_c_file( + "subtraction", + r#" +int main(void) { + int a = 100; + int b = 58; + return a - b; +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "compilation should succeed"); + + let exit_code = run(exe.as_ref().unwrap()); + // 100 - 58 = 42 + assert_eq!(exit_code, 42, "expected exit code 42"); + + cleanup_exe(&exe); +} + +#[test] +fn test_division() { + let c_file = create_c_file( + "division", + r#" +int main(void) { + int a = 84; + int b = 2; + return a / b; +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "compilation should succeed"); + + let exit_code = run(exe.as_ref().unwrap()); + // 84 / 2 = 42 + assert_eq!(exit_code, 42, "expected exit code 42"); + + cleanup_exe(&exe); +} + +#[test] +fn test_modulo() { + let c_file = create_c_file( + "modulo", + r#" +int main(void) { + int a = 47; + int b = 10; + return a % b; +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "compilation should succeed"); + + let exit_code = run(exe.as_ref().unwrap()); + // 47 % 10 = 7 + assert_eq!(exit_code, 7, "expected exit code 7"); + + cleanup_exe(&exe); +} + +#[test] +fn test_nested_if() { + let c_file = create_c_file( + "nested_if", + r#" +int main(void) { + int a = 10; + int b = 20; + if (a < b) { + if (b > 15) { + return 3; + } else { + return 2; + } + } else { + return 1; + } +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "compilation should succeed"); + + let exit_code = run(exe.as_ref().unwrap()); + // a < b is true (10 < 20), b > 15 is true (20 > 15), so return 3 + assert_eq!(exit_code, 3, "expected exit code 3"); + + cleanup_exe(&exe); +} + +#[test] +fn test_bitwise() { + let c_file = create_c_file( + "bitwise", + r#" +int main(void) { + int a = 0x0F; + int b = 0xF0; + int c = a | b; + return c & 0xFF; +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "compilation should succeed"); + + let exit_code = run(exe.as_ref().unwrap()); + // 0x0F | 0xF0 = 0xFF, 0xFF & 0xFF = 0xFF = 255 + assert_eq!(exit_code, 255, "expected exit code 255"); + + cleanup_exe(&exe); +} + +// ============================================================================ +// Function Call Tests +// ============================================================================ + +/// Test: Internal function calls +#[test] +fn test_function_call_internal() { + let exe = compile_test_file("pcc", "func_internal.c"); + assert!(exe.is_some(), "compilation should succeed"); + + let exit_code = run(exe.as_ref().unwrap()); + // sum(3, 4) = 7, double_it(7) = 14 + assert_eq!(exit_code, 14, "expected exit code 14"); + + cleanup_exe(&exe); +} + +/// Test: External libc function call (puts) +#[test] +fn test_function_call_libc() { + let exe = compile_test_file("pcc", "func_libc.c"); + assert!(exe.is_some(), "compilation should succeed"); + + let (exit_code, stdout) = run_with_output(exe.as_ref().unwrap()); + assert_eq!(exit_code, 0, "expected exit code 0"); + assert!( + stdout.contains("Hello from libc!"), + "expected output containing 'Hello from libc!'" + ); + + cleanup_exe(&exe); +} + +/// Test: Combined internal and external function calls +#[test] +fn test_function_call_combined() { + let exe = compile_test_file("pcc", "func_combined.c"); + assert!(exe.is_some(), "compilation should succeed"); + + let (exit_code, stdout) = run_with_output(exe.as_ref().unwrap()); + // sum(3, 4) = 7, double_it(7) = 14 + assert_eq!(exit_code, 14, "expected exit code 14"); + assert!( + stdout.contains("Function calls work!"), + "expected output containing 'Function calls work!'" + ); + + cleanup_exe(&exe); +} + +// ============================================================================ +// Loop Tests +// ============================================================================ + +/// Test: While loop with break +#[test] +fn test_while_loop() { + let c_file = create_c_file( + "while_loop", + r#" +int main(void) { + int sum = 0; + int i = 1; + + // Basic while loop: sum 1..5 = 15 + while (i <= 5) { + sum = sum + i; + i = i + 1; + } + if (sum != 15) return 1; + + // While with break: count to 5 using infinite loop + break + i = 0; + while (1) { + i = i + 1; + if (i == 5) break; + } + if (i != 5) return 2; + + // While with continue: sum odd numbers 1-9 = 25 + sum = 0; + i = 0; + while (i < 10) { + i = i + 1; + if ((i % 2) == 0) continue; // Skip even + sum = sum + i; + } + if (sum != 25) return 3; + + return 0; +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "compilation should succeed"); + let exit_code = run(exe.as_ref().unwrap()); + assert_eq!( + exit_code, 0, + "while loop with break/continue: all tests should pass" + ); + cleanup_exe(&exe); +} + +/// Test: Do-while loop with break/continue +#[test] +fn test_do_while_loop() { + let c_file = create_c_file( + "do_while_loop", + r#" +int main(void) { + int sum = 0; + int i = 1; + + // Basic do-while: sum 1..5 = 15 + do { + sum = sum + i; + i = i + 1; + } while (i <= 5); + if (sum != 15) return 1; + + // Do-while with break: count to 5 + i = 0; + do { + i = i + 1; + if (i == 5) break; + } while (1); + if (i != 5) return 2; + + // Do-while with continue: sum odd 1-9 = 25 + sum = 0; + i = 0; + do { + i = i + 1; + if ((i % 2) == 0) continue; + sum = sum + i; + } while (i < 10); + if (sum != 25) return 3; + + return 0; +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "compilation should succeed"); + let exit_code = run(exe.as_ref().unwrap()); + assert_eq!( + exit_code, 0, + "do-while with break/continue: all tests should pass" + ); + cleanup_exe(&exe); +} + +/// Test: For loop with break/continue +#[test] +fn test_for_loop() { + let c_file = create_c_file( + "for_loop", + r#" +int main(void) { + int sum = 0; + int i; + + // Basic for loop: sum 1..5 = 15 + for (i = 1; i <= 5; i++) { + sum = sum + i; + } + if (sum != 15) return 1; + + // For with break: find first multiple of 7 + for (i = 1; i <= 100; i++) { + if ((i % 7) == 0) break; + } + if (i != 7) return 2; + + // For with continue: sum odd numbers 1-9 = 25 + sum = 0; + for (i = 1; i <= 9; i++) { + if ((i % 2) == 0) continue; + sum = sum + i; + } + if (sum != 25) return 3; + + // For with early break: sum until >= 10 (1+2+3+4 = 10) + sum = 0; + for (i = 1; i <= 100; i++) { + sum = sum + i; + if (sum >= 10) break; + } + if (sum != 10) return 4; + + return 0; +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "compilation should succeed"); + let exit_code = run(exe.as_ref().unwrap()); + assert_eq!( + exit_code, 0, + "for loop with break/continue: all tests should pass" + ); + cleanup_exe(&exe); +} + +/// Test: 2-level nested for loops +#[test] +fn test_nested_for_2_levels() { + let c_file = create_c_file( + "nested_for_2", + r#" +int main(void) { + int sum = 0; + int i, j; + for (i = 0; i < 3; i++) { + for (j = 0; j < 3; j++) { + sum++; + } + } + return sum; // 3*3 = 9 +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "compilation should succeed"); + let exit_code = run(exe.as_ref().unwrap()); + assert_eq!(exit_code, 9, "nested for (2 levels): expected 9"); + cleanup_exe(&exe); +} + +/// Test: 3-level nested for loops +#[test] +fn test_nested_for_3_levels() { + let c_file = create_c_file( + "nested_for_3", + r#" +int main(void) { + int sum = 0; + int i, j, k; + for (i = 0; i < 2; i++) { + for (j = 0; j < 3; j++) { + for (k = 0; k < 4; k++) { + sum++; + } + } + } + return sum; // 2*3*4 = 24 +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "compilation should succeed"); + let exit_code = run(exe.as_ref().unwrap()); + assert_eq!(exit_code, 24, "nested for (3 levels): expected 24"); + cleanup_exe(&exe); +} + +/// Test: Nested while loops +#[test] +fn test_nested_while() { + let c_file = create_c_file( + "nested_while", + r#" +int main(void) { + int sum = 0; + int i = 0; + int j; + while (i < 3) { + j = 0; + while (j < 3) { + sum++; + j++; + } + i++; + } + return sum; // 3*3 = 9 +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "compilation should succeed"); + let exit_code = run(exe.as_ref().unwrap()); + assert_eq!(exit_code, 9, "nested while: expected 9"); + cleanup_exe(&exe); +} + +/// Test: Nested do-while loops +#[test] +fn test_nested_do_while() { + let c_file = create_c_file( + "nested_do_while", + r#" +int main(void) { + int sum = 0; + int i = 0; + int j; + do { + j = 0; + do { + sum++; + j++; + } while (j < 2); + i++; + } while (i < 3); + return sum; // 3*2 = 6 +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "compilation should succeed"); + let exit_code = run(exe.as_ref().unwrap()); + assert_eq!(exit_code, 6, "nested do-while: expected 6"); + cleanup_exe(&exe); +} + +// ============================================================================ +// Switch Statement Tests +// ============================================================================ + +/// Test: Basic switch statement with cases and default +#[test] +fn test_switch_basic() { + let c_file = create_c_file( + "switch_basic", + r#" +int main(void) { + int x = 2; + int result = 0; + switch (x) { + case 1: + result = 10; + break; + case 2: + result = 20; + break; + case 3: + result = 30; + break; + default: + result = 99; + break; + } + if (result != 20) return 1; + return 0; +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "switch_basic: compilation should succeed"); + let exit_code = run(exe.as_ref().unwrap()); + assert_eq!(exit_code, 0, "switch_basic: case 2 should set result to 20"); + cleanup_exe(&exe); +} + +/// Test: Switch with fallthrough between cases +#[test] +fn test_switch_fallthrough() { + let c_file = create_c_file( + "switch_fallthrough", + r#" +int main(void) { + int x = 1; + int result = 0; + switch (x) { + case 1: + result = result + 1; + case 2: + result = result + 2; + break; + case 3: + result = 30; + break; + } + if (result != 3) return 1; // 1 + 2 from fallthrough + return 0; +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!( + exe.is_some(), + "switch_fallthrough: compilation should succeed" + ); + let exit_code = run(exe.as_ref().unwrap()); + assert_eq!( + exit_code, 0, + "switch_fallthrough: case 1 should fall through to case 2" + ); + cleanup_exe(&exe); +} + +/// Test: Switch that uses default case +#[test] +fn test_switch_default() { + let c_file = create_c_file( + "switch_default", + r#" +int main(void) { + int x = 99; + int result = 0; + switch (x) { + case 1: + result = 10; + break; + case 2: + result = 20; + break; + default: + result = 42; + break; + } + if (result != 42) return 1; + return 0; +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "switch_default: compilation should succeed"); + let exit_code = run(exe.as_ref().unwrap()); + assert_eq!( + exit_code, 0, + "switch_default: unmatched value should use default" + ); + cleanup_exe(&exe); +} + +/// Test: Switch with no default and no matching case +#[test] +fn test_switch_no_default() { + let c_file = create_c_file( + "switch_no_default", + r#" +int main(void) { + int x = 99; + int result = 5; + switch (x) { + case 1: + result = 10; + break; + case 2: + result = 20; + break; + } + if (result != 5) return 1; // No case matched, result unchanged + return 0; +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!( + exe.is_some(), + "switch_no_default: compilation should succeed" + ); + let exit_code = run(exe.as_ref().unwrap()); + assert_eq!( + exit_code, 0, + "switch_no_default: unmatched value should skip switch body" + ); + cleanup_exe(&exe); +} + +/// Test: Switch with negative case values +#[test] +fn test_switch_negative_cases() { + let c_file = create_c_file( + "switch_negative", + r#" +int main(void) { + int x = -1; + int result = 0; + switch (x) { + case -2: + result = 1; + break; + case -1: + result = 2; + break; + case 0: + result = 3; + break; + case 1: + result = 4; + break; + } + if (result != 2) return 1; + return 0; +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "switch_negative: compilation should succeed"); + let exit_code = run(exe.as_ref().unwrap()); + assert_eq!(exit_code, 0, "switch_negative: case -1 should match"); + cleanup_exe(&exe); +} + +/// Test: Switch with zero case value +#[test] +fn test_switch_zero_case() { + let c_file = create_c_file( + "switch_zero", + r#" +int main(void) { + int x = 0; + int result = 99; + switch (x) { + case 0: + result = 0; + break; + case 1: + result = 1; + break; + } + if (result != 0) return 1; + return 0; +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "switch_zero: compilation should succeed"); + let exit_code = run(exe.as_ref().unwrap()); + assert_eq!(exit_code, 0, "switch_zero: case 0 should match"); + cleanup_exe(&exe); +} + +// ============================================================================ +// Enum Tests +// ============================================================================ + +/// Test: Basic enum with implicit values +#[test] +fn test_enum_basic() { + let c_file = create_c_file( + "enum_basic", + r#" +enum Color { RED, GREEN, BLUE }; +int main(void) { + enum Color c = GREEN; + if (c != 1) return 1; + if (RED != 0) return 2; + if (BLUE != 2) return 3; + return 0; +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "enum_basic: compilation should succeed"); + let exit_code = run(exe.as_ref().unwrap()); + assert_eq!( + exit_code, 0, + "enum_basic: enum values should match expected" + ); + cleanup_exe(&exe); +} + +/// Test: Enum with explicit values +#[test] +fn test_enum_explicit_values() { + let c_file = create_c_file( + "enum_explicit", + r#" +enum Status { OK = 0, ERROR = 10, PENDING = 100 }; +int main(void) { + if (OK != 0) return 1; + if (ERROR != 10) return 2; + if (PENDING != 100) return 3; + return 0; +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "enum_explicit: compilation should succeed"); + let exit_code = run(exe.as_ref().unwrap()); + assert_eq!(exit_code, 0, "enum_explicit: explicit values should work"); + cleanup_exe(&exe); +} + +/// Test: Enum with negative values +#[test] +fn test_enum_negative_values() { + let c_file = create_c_file( + "enum_negative", + r#" +enum Level { LOW = -1, MEDIUM = 0, HIGH = 1 }; +int main(void) { + if (LOW != -1) return 1; + if (MEDIUM != 0) return 2; + if (HIGH != 1) return 3; + return 0; +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "enum_negative: compilation should succeed"); + let exit_code = run(exe.as_ref().unwrap()); + assert_eq!(exit_code, 0, "enum_negative: negative values should work"); + cleanup_exe(&exe); +} + +/// Test: Enum in switch statement +#[test] +fn test_enum_switch() { + let c_file = create_c_file( + "enum_switch", + r#" +enum Day { MON, TUE, WED }; +int main(void) { + enum Day d = TUE; + int result = 0; + switch (d) { + case MON: result = 1; break; + case TUE: result = 2; break; + case WED: result = 3; break; + } + if (result != 2) return 1; + return 0; +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "enum_switch: compilation should succeed"); + let exit_code = run(exe.as_ref().unwrap()); + assert_eq!(exit_code, 0, "enum_switch: switch on enum should work"); + cleanup_exe(&exe); +} diff --git a/cc/token/lexer.rs b/cc/token/lexer.rs new file mode 100644 index 000000000..e0cbd2106 --- /dev/null +++ b/cc/token/lexer.rs @@ -0,0 +1,1385 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Lexer module for pcc - C99 Tokenizer +// Based on the sparse compiler design by Linus Torvalds +// + +use crate::diag; +use std::collections::HashMap; + +// Re-export Position for use by other modules +pub use crate::diag::Position; + +// ============================================================================ +// Token Types +// ============================================================================ + +/// Token types following sparse's model +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[allow(dead_code)] // Enum completeness for lexer model +pub enum TokenType { + Eof, + Error, + Ident, + Number, + Char, + WideChar, + String, + WideString, + Special, + StreamBegin, + StreamEnd, +} + +/// Special tokens (operators and punctuators) +/// Values >= SPECIAL_BASE are multi-character operators +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(u32)] +pub enum SpecialToken { + // Single character - stored as their ASCII value + // Multi-character operators start at 256 + AddAssign = 256, // += + Increment, // ++ + SubAssign, // -= + Decrement, // -- + Arrow, // -> + MulAssign, // *= + DivAssign, // /= + ModAssign, // %= + Lte, // <= + Gte, // >= + Equal, // == + NotEqual, // != + LogicalAnd, // && + AndAssign, // &= + LogicalOr, // || + OrAssign, // |= + XorAssign, // ^= + HashHash, // ## + LeftShift, // << + RightShift, // >> + DotDot, // .. + ShlAssign, // <<= + ShrAssign, // >>= + Ellipsis, // ... +} + +impl SpecialToken { + pub const BASE: u32 = 256; +} + +// Position is imported from crate::diag + +// ============================================================================ +// Identifier Interning +// ============================================================================ + +/// Identifier intern table (string interning for identifiers) +pub struct IdentTable { + map: HashMap, + idents: Vec, +} + +impl IdentTable { + pub fn new() -> Self { + Self { + map: HashMap::new(), + idents: Vec::new(), + } + } + + /// Intern an identifier, returning its unique ID + pub fn intern(&mut self, name: &str) -> u32 { + if let Some(&id) = self.map.get(name) { + return id; + } + let id = self.idents.len() as u32; + self.idents.push(name.to_string()); + self.map.insert(name.to_string(), id); + id + } + + /// Get identifier name by ID + pub fn get(&self, id: u32) -> Option<&str> { + self.idents.get(id as usize).map(|s| s.as_str()) + } +} + +impl Default for IdentTable { + fn default() -> Self { + Self::new() + } +} + +// ============================================================================ +// Token Value +// ============================================================================ + +/// Token value - union-like enum following sparse's model +#[derive(Debug, Clone)] +pub enum TokenValue { + None, + Number(String), // Numeric literal as string (pp-number) + Ident(u32), // Identifier (interned ID) + Special(u32), // Operator/punctuator + String(String), // String literal content + Char(String), // Character literal content + WideString(String), // Wide string literal + WideChar(String), // Wide character literal +} + +// ============================================================================ +// Token +// ============================================================================ + +/// A C token +#[derive(Debug, Clone)] +pub struct Token { + pub typ: TokenType, + pub pos: Position, + pub value: TokenValue, +} + +impl Token { + pub fn new(typ: TokenType, pos: Position) -> Self { + Self { + typ, + pos, + value: TokenValue::None, + } + } + + pub fn with_value(typ: TokenType, pos: Position, value: TokenValue) -> Self { + Self { typ, pos, value } + } +} + +// ============================================================================ +// Character Classification +// ============================================================================ + +/// Character class flags (following sparse's cclass) +const LETTER: u8 = 1; +const DIGIT: u8 = 2; +const HEX: u8 = 4; +const EXP: u8 = 8; +const DOT: u8 = 16; +const VALID_SECOND: u8 = 32; // Can be second char of 2-char operator + +/// Character classification table +fn char_class(c: u8) -> u8 { + match c { + b'0'..=b'9' => DIGIT | HEX, + b'A'..=b'D' | b'F' => LETTER | HEX, + b'E' => LETTER | HEX | EXP, // E for exponent + b'G'..=b'O' => LETTER, + b'P' => LETTER | EXP, // P for hex float exponent + b'Q'..=b'Z' => LETTER, + b'a'..=b'd' | b'f' => LETTER | HEX, + b'e' => LETTER | HEX | EXP, + b'g'..=b'o' => LETTER, + b'p' => LETTER | EXP, + b'q'..=b'z' => LETTER, + b'_' => LETTER, + b'.' => DOT | VALID_SECOND, + b'=' | b'+' | b'-' | b'>' | b'<' | b'&' | b'|' | b'#' => VALID_SECOND, + _ => 0, + } +} + +#[inline] +fn is_digit(c: u8) -> bool { + char_class(c) & DIGIT != 0 +} + +#[inline] +fn is_letter_or_digit(c: u8) -> bool { + char_class(c) & (LETTER | DIGIT) != 0 +} + +// ============================================================================ +// Stream (Input Source) +// ============================================================================ + +// Stream management is now in crate::diag + +/// Stream table wrapper for backward compatibility +/// Uses the global thread-local StreamRegistry from diag +pub struct StreamTable; + +impl StreamTable { + pub fn new() -> Self { + diag::clear_streams(); + Self + } + + pub fn add(&mut self, name: String) -> u16 { + diag::init_stream(&name) + } +} + +impl Default for StreamTable { + fn default() -> Self { + Self::new() + } +} + +// ============================================================================ +// Tokenizer +// ============================================================================ + +const EOF: i32 = -1; + +/// C Tokenizer following sparse's design +pub struct Tokenizer<'a> { + // Input + buffer: &'a [u8], + offset: usize, + + // Position tracking + stream_id: u16, + line: u32, + col: u16, + newline: bool, + whitespace: bool, + + // Interning + idents: IdentTable, +} + +impl<'a> Tokenizer<'a> { + pub fn new(buffer: &'a [u8], stream_id: u16) -> Self { + Self { + buffer, + offset: 0, + stream_id, + line: 1, + col: 0, + newline: true, + whitespace: false, + idents: IdentTable::new(), + } + } + + /// Get current position + fn pos(&self) -> Position { + let mut pos = Position::new(self.stream_id, self.line, self.col); + pos.newline = self.newline; + pos.whitespace = self.whitespace; + pos + } + + /// Get next character, handling line splicing (backslash-newline) + fn nextchar(&mut self) -> i32 { + loop { + if self.offset >= self.buffer.len() { + return EOF; + } + + let c = self.buffer[self.offset] as i32; + self.offset += 1; + + // Handle carriage return + if c == b'\r' as i32 { + // Check for \r\n + if self.offset < self.buffer.len() && self.buffer[self.offset] == b'\n' { + self.offset += 1; + } + self.line += 1; + self.col = 0; + self.newline = true; + return b'\n' as i32; + } + + // Handle newline + if c == b'\n' as i32 { + self.line += 1; + self.col = 0; + self.newline = true; + return c; + } + + // Handle backslash (potential line splice) + if c == b'\\' as i32 && self.offset < self.buffer.len() { + let next = self.buffer[self.offset]; + if next == b'\n' { + // Line splice: skip backslash-newline + self.offset += 1; + self.line += 1; + self.col = 0; + continue; + } else if next == b'\r' { + // Line splice with \r or \r\n + self.offset += 1; + if self.offset < self.buffer.len() && self.buffer[self.offset] == b'\n' { + self.offset += 1; + } + self.line += 1; + self.col = 0; + continue; + } + } + + // Handle tab + if c == b'\t' as i32 { + self.col = (self.col + 8) & !7; // Round to next multiple of 8 + } else { + self.col += 1; + } + + return c; + } + } + + /// Peek at next character without consuming (handles line splicing) + fn peekchar(&self) -> i32 { + let mut offset = self.offset; + loop { + if offset >= self.buffer.len() { + return EOF; + } + let c = self.buffer[offset]; + + // Handle backslash (potential line splice) + if c == b'\\' && offset + 1 < self.buffer.len() { + let next = self.buffer[offset + 1]; + if next == b'\n' { + // Skip backslash-newline + offset += 2; + continue; + } else if next == b'\r' { + // Skip backslash-CR or backslash-CRLF + offset += 2; + if offset < self.buffer.len() && self.buffer[offset] == b'\n' { + offset += 1; + } + continue; + } + } + + // Handle \r as \n + if c == b'\r' { + return b'\n' as i32; + } + + return c as i32; + } + } + + /// Skip whitespace, tracking whitespace/newline flags + fn skip_whitespace(&mut self) -> i32 { + loop { + let c = self.nextchar(); + if c == EOF { + return EOF; + } + match c as u8 { + b' ' | b'\t' | b'\x0C' | b'\x0B' => { + self.whitespace = true; + } + b'\n' => { + self.newline = true; + self.whitespace = true; + } + _ => return c, + } + } + } + + /// Get a pp-number token + /// pp-number: digit | . digit | pp-number (digit|letter|.|e[+-]|E[+-]|p[+-]|P[+-]) + fn get_number(&mut self, first: u8) -> Token { + let pos = self.pos(); + let mut num = String::new(); + num.push(first as char); + + loop { + // Use peek to avoid consuming characters that would affect line/col tracking + let c = self.peekchar(); + if c == EOF { + break; + } + let cu = c as u8; + let class = char_class(cu); + + // Continue if digit, letter, or dot + if class & (DIGIT | LETTER | DOT) != 0 { + self.nextchar(); // Now consume it + num.push(cu as char); + + // Handle exponent sign (e+, e-, E+, E-, p+, p-, P+, P-) + if class & EXP != 0 { + let next = self.peekchar(); + if next == b'+' as i32 || next == b'-' as i32 { + num.push(self.nextchar() as u8 as char); + } + } + } else { + break; + } + } + + Token::with_value(TokenType::Number, pos, TokenValue::Number(num)) + } + + /// Get an identifier token + fn get_identifier(&mut self, first: u8) -> Token { + let pos = self.pos(); + let mut name = String::new(); + name.push(first as char); + + loop { + // Use peek to avoid consuming characters that would affect line/col tracking + let c = self.peekchar(); + if c == EOF { + break; + } + let cu = c as u8; + if is_letter_or_digit(cu) { + self.nextchar(); // Now consume it + name.push(cu as char); + } else { + // Check for L"..." or L'...' (wide string/char) + if name == "L" && (cu == b'"' || cu == b'\'') { + self.nextchar(); // Consume the quote + return self.get_string_or_char(cu, true); + } + break; + } + } + + let id = self.idents.intern(&name); + Token::with_value(TokenType::Ident, pos, TokenValue::Ident(id)) + } + + /// Get a string or character literal + fn get_string_or_char(&mut self, delim: u8, wide: bool) -> Token { + let pos = self.pos(); + let mut content = String::new(); + let mut escape = false; + + loop { + let c = self.nextchar(); + if c == EOF { + // Unterminated string/char + break; + } + let cu = c as u8; + + if escape { + content.push(cu as char); + escape = false; + continue; + } + + if cu == b'\\' { + content.push(cu as char); + escape = true; + continue; + } + + if cu == delim { + // End of literal + break; + } + + if cu == b'\n' { + // Error: newline in string/char literal + // For now, just end the literal + break; + } + + content.push(cu as char); + } + + let (typ, value) = if delim == b'"' { + if wide { + (TokenType::WideString, TokenValue::WideString(content)) + } else { + (TokenType::String, TokenValue::String(content)) + } + } else if wide { + (TokenType::WideChar, TokenValue::WideChar(content)) + } else { + (TokenType::Char, TokenValue::Char(content)) + }; + + Token::with_value(typ, pos, value) + } + + /// Skip a single-line comment (// ...) + fn skip_line_comment(&mut self) { + loop { + let c = self.nextchar(); + if c == EOF || c == b'\n' as i32 { + break; + } + } + } + + /// Skip a block comment (/* ... */) + fn skip_block_comment(&mut self) { + loop { + let c = self.nextchar(); + if c == EOF { + // Unterminated comment + break; + } + if c == b'*' as i32 { + let next = self.nextchar(); + if next == b'/' as i32 { + break; + } + // Don't unget - just continue looking for */ + } + } + } + + /// Get a special token (operator/punctuator) + fn get_special(&mut self, first: u8) -> Option { + let pos = self.pos(); + + // Check for string/char literals + if first == b'"' { + return Some(self.get_string_or_char(b'"', false)); + } + if first == b'\'' { + return Some(self.get_string_or_char(b'\'', false)); + } + + // Check for .digit (floating point number) + if first == b'.' { + let next = self.peekchar(); + if next != EOF && is_digit(next as u8) { + return Some(self.get_number(first)); + } + } + + // Check for comments + if first == b'/' { + let next = self.peekchar(); + if next == b'/' as i32 { + self.nextchar(); + self.skip_line_comment(); + return None; // No token, continue tokenizing + } + if next == b'*' as i32 { + self.nextchar(); + self.skip_block_comment(); + return None; // No token, continue tokenizing + } + } + + // Two-character operator lookup table + // Format: (first, second, code) + static TWO_CHAR_OPS: &[(u8, u8, u32)] = &[ + (b'+', b'=', SpecialToken::AddAssign as u32), + (b'+', b'+', SpecialToken::Increment as u32), + (b'-', b'=', SpecialToken::SubAssign as u32), + (b'-', b'-', SpecialToken::Decrement as u32), + (b'-', b'>', SpecialToken::Arrow as u32), + (b'*', b'=', SpecialToken::MulAssign as u32), + (b'/', b'=', SpecialToken::DivAssign as u32), + (b'%', b'=', SpecialToken::ModAssign as u32), + (b'<', b'=', SpecialToken::Lte as u32), + (b'>', b'=', SpecialToken::Gte as u32), + (b'=', b'=', SpecialToken::Equal as u32), + (b'!', b'=', SpecialToken::NotEqual as u32), + (b'&', b'&', SpecialToken::LogicalAnd as u32), + (b'&', b'=', SpecialToken::AndAssign as u32), + (b'|', b'|', SpecialToken::LogicalOr as u32), + (b'|', b'=', SpecialToken::OrAssign as u32), + (b'^', b'=', SpecialToken::XorAssign as u32), + (b'#', b'#', SpecialToken::HashHash as u32), + (b'<', b'<', SpecialToken::LeftShift as u32), + (b'>', b'>', SpecialToken::RightShift as u32), + (b'.', b'.', SpecialToken::DotDot as u32), + ]; + + // Check for two-character operators + let next = self.peekchar(); + if next != EOF { + let next_u8 = next as u8; + let class = char_class(next_u8); + + if class & VALID_SECOND != 0 { + for &(c0, c1, code) in TWO_CHAR_OPS { + if first == c0 && next_u8 == c1 { + self.nextchar(); // Consume second char + + // Check for three-character operators + let third = self.peekchar(); + if third != EOF { + let third_u8 = third as u8; + // <<= or >>= + if code == SpecialToken::LeftShift as u32 && third_u8 == b'=' { + self.nextchar(); + return Some(Token::with_value( + TokenType::Special, + pos, + TokenValue::Special(SpecialToken::ShlAssign as u32), + )); + } + if code == SpecialToken::RightShift as u32 && third_u8 == b'=' { + self.nextchar(); + return Some(Token::with_value( + TokenType::Special, + pos, + TokenValue::Special(SpecialToken::ShrAssign as u32), + )); + } + // ... + if code == SpecialToken::DotDot as u32 && third_u8 == b'.' { + self.nextchar(); + return Some(Token::with_value( + TokenType::Special, + pos, + TokenValue::Special(SpecialToken::Ellipsis as u32), + )); + } + } + + return Some(Token::with_value( + TokenType::Special, + pos, + TokenValue::Special(code), + )); + } + } + } + } + + // Single character operator + Some(Token::with_value( + TokenType::Special, + pos, + TokenValue::Special(first as u32), + )) + } + + /// Get one token + fn get_one_token(&mut self, c: u8) -> Option { + let class = char_class(c); + + if class & DIGIT != 0 { + return Some(self.get_number(c)); + } + + if class & LETTER != 0 { + return Some(self.get_identifier(c)); + } + + self.get_special(c) + } + + /// Tokenize the entire input, returning all tokens + pub fn tokenize(&mut self) -> Vec { + let mut tokens = Vec::new(); + + // Add stream begin token + tokens.push(Token::new(TokenType::StreamBegin, self.pos())); + + loop { + // Skip whitespace - this updates newline/whitespace flags + let c = self.skip_whitespace(); + if c == EOF { + break; + } + + if let Some(token) = self.get_one_token(c as u8) { + tokens.push(token); + // Reset flags for next token (only after we've captured position) + // Don't reset if comment was skipped - the comment may have consumed + // newlines that affect the next token's position flags + self.newline = false; + self.whitespace = false; + } + // If get_one_token returns None (comment), continue without resetting flags + } + + // Add stream end token + tokens.push(Token::new(TokenType::StreamEnd, self.pos())); + + tokens + } + + /// Get the identifier table (for looking up identifier names) + pub fn ident_table(&self) -> &IdentTable { + &self.idents + } + + /// Get mutable access to the identifier table (for preprocessing) + pub fn ident_table_mut(&mut self) -> &mut IdentTable { + &mut self.idents + } + + /// Take ownership of the identifier table (used by preprocessor tests) + #[cfg(test)] + pub fn into_ident_table(self) -> IdentTable { + self.idents + } +} + +// ============================================================================ +// Token Display +// ============================================================================ + +/// Display a special token +pub fn show_special(value: u32) -> String { + if value < SpecialToken::BASE { + // Single character + return (value as u8 as char).to_string(); + } + + match value { + x if x == SpecialToken::AddAssign as u32 => "+=".to_string(), + x if x == SpecialToken::Increment as u32 => "++".to_string(), + x if x == SpecialToken::SubAssign as u32 => "-=".to_string(), + x if x == SpecialToken::Decrement as u32 => "--".to_string(), + x if x == SpecialToken::Arrow as u32 => "->".to_string(), + x if x == SpecialToken::MulAssign as u32 => "*=".to_string(), + x if x == SpecialToken::DivAssign as u32 => "/=".to_string(), + x if x == SpecialToken::ModAssign as u32 => "%=".to_string(), + x if x == SpecialToken::Lte as u32 => "<=".to_string(), + x if x == SpecialToken::Gte as u32 => ">=".to_string(), + x if x == SpecialToken::Equal as u32 => "==".to_string(), + x if x == SpecialToken::NotEqual as u32 => "!=".to_string(), + x if x == SpecialToken::LogicalAnd as u32 => "&&".to_string(), + x if x == SpecialToken::AndAssign as u32 => "&=".to_string(), + x if x == SpecialToken::LogicalOr as u32 => "||".to_string(), + x if x == SpecialToken::OrAssign as u32 => "|=".to_string(), + x if x == SpecialToken::XorAssign as u32 => "^=".to_string(), + x if x == SpecialToken::HashHash as u32 => "##".to_string(), + x if x == SpecialToken::LeftShift as u32 => "<<".to_string(), + x if x == SpecialToken::RightShift as u32 => ">>".to_string(), + x if x == SpecialToken::DotDot as u32 => "..".to_string(), + x if x == SpecialToken::ShlAssign as u32 => "<<=".to_string(), + x if x == SpecialToken::ShrAssign as u32 => ">>=".to_string(), + x if x == SpecialToken::Ellipsis as u32 => "...".to_string(), + _ => format!("", value), + } +} + +/// Format a token for display +pub fn show_token(token: &Token, idents: &IdentTable) -> String { + match token.typ { + TokenType::Eof => "".to_string(), + TokenType::Error => "".to_string(), + TokenType::StreamBegin => "".to_string(), + TokenType::StreamEnd => "".to_string(), + TokenType::Ident => { + if let TokenValue::Ident(id) = &token.value { + idents.get(*id).unwrap_or("").to_string() + } else { + "".to_string() + } + } + TokenType::Number => { + if let TokenValue::Number(n) = &token.value { + n.clone() + } else { + "".to_string() + } + } + TokenType::String => { + if let TokenValue::String(s) = &token.value { + format!("\"{}\"", s) + } else { + "".to_string() + } + } + TokenType::WideString => { + if let TokenValue::WideString(s) = &token.value { + format!("L\"{}\"", s) + } else { + "".to_string() + } + } + TokenType::Char => { + if let TokenValue::Char(s) = &token.value { + format!("'{}'", s) + } else { + "".to_string() + } + } + TokenType::WideChar => { + if let TokenValue::WideChar(s) = &token.value { + format!("L'{}'", s) + } else { + "".to_string() + } + } + TokenType::Special => { + if let TokenValue::Special(v) = &token.value { + show_special(*v) + } else { + "".to_string() + } + } + } +} + +/// Format token type name +pub fn token_type_name(typ: TokenType) -> &'static str { + match typ { + TokenType::Eof => "EOF", + TokenType::Error => "ERROR", + TokenType::Ident => "IDENT", + TokenType::Number => "NUMBER", + TokenType::Char => "CHAR", + TokenType::WideChar => "WCHAR", + TokenType::String => "STRING", + TokenType::WideString => "WSTRING", + TokenType::Special => "SPECIAL", + TokenType::StreamBegin => "STREAM_BEGIN", + TokenType::StreamEnd => "STREAM_END", + } +} + +// ============================================================================ +// Tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + + fn tokenize_str(input: &str) -> (Vec, IdentTable) { + let mut tokenizer = Tokenizer::new(input.as_bytes(), 0); + let tokens = tokenizer.tokenize(); + let idents = std::mem::take(&mut tokenizer.idents); + (tokens, idents) + } + + #[test] + fn test_simple_tokens() { + let (tokens, idents) = tokenize_str("int main"); + // StreamBegin, "int", "main", StreamEnd + assert_eq!(tokens.len(), 4); + assert_eq!(tokens[0].typ, TokenType::StreamBegin); + assert_eq!(tokens[1].typ, TokenType::Ident); + assert_eq!(tokens[2].typ, TokenType::Ident); + assert_eq!(tokens[3].typ, TokenType::StreamEnd); + + assert_eq!(show_token(&tokens[1], &idents), "int"); + assert_eq!(show_token(&tokens[2], &idents), "main"); + } + + #[test] + fn test_numbers() { + let (tokens, _) = tokenize_str("123 0x1F 3.14 1e10 0.5e-3"); + // Skip StreamBegin/End + assert_eq!(tokens[1].typ, TokenType::Number); + assert_eq!(tokens[2].typ, TokenType::Number); + assert_eq!(tokens[3].typ, TokenType::Number); + assert_eq!(tokens[4].typ, TokenType::Number); + assert_eq!(tokens[5].typ, TokenType::Number); + + if let TokenValue::Number(n) = &tokens[1].value { + assert_eq!(n, "123"); + } + if let TokenValue::Number(n) = &tokens[2].value { + assert_eq!(n, "0x1F"); + } + if let TokenValue::Number(n) = &tokens[3].value { + assert_eq!(n, "3.14"); + } + if let TokenValue::Number(n) = &tokens[4].value { + assert_eq!(n, "1e10"); + } + if let TokenValue::Number(n) = &tokens[5].value { + assert_eq!(n, "0.5e-3"); + } + } + + #[test] + fn test_strings() { + let (tokens, _) = tokenize_str(r#""hello" "world""#); + assert_eq!(tokens[1].typ, TokenType::String); + assert_eq!(tokens[2].typ, TokenType::String); + + if let TokenValue::String(s) = &tokens[1].value { + assert_eq!(s, "hello"); + } + if let TokenValue::String(s) = &tokens[2].value { + assert_eq!(s, "world"); + } + } + + #[test] + fn test_char_literals() { + let (tokens, _) = tokenize_str("'a' '\\n' '\\0'"); + assert_eq!(tokens[1].typ, TokenType::Char); + assert_eq!(tokens[2].typ, TokenType::Char); + assert_eq!(tokens[3].typ, TokenType::Char); + } + + #[test] + fn test_operators() { + let (tokens, idents) = tokenize_str("+ += ++ - -= -- -> * *= / /= % %= = =="); + let ops: Vec<_> = tokens[1..tokens.len() - 1] + .iter() + .map(|t| show_token(t, &idents)) + .collect(); + assert_eq!( + ops, + vec![ + "+", "+=", "++", "-", "-=", "--", "->", "*", "*=", "/", "/=", "%", "%=", "=", "==" + ] + ); + } + + #[test] + fn test_comparison_ops() { + let (tokens, idents) = tokenize_str("< <= > >= == != && || !"); + let ops: Vec<_> = tokens[1..tokens.len() - 1] + .iter() + .map(|t| show_token(t, &idents)) + .collect(); + assert_eq!(ops, vec!["<", "<=", ">", ">=", "==", "!=", "&&", "||", "!"]); + } + + #[test] + fn test_bitwise_ops() { + let (tokens, idents) = tokenize_str("& &= | |= ^ ^= ~ << >> <<= >>="); + let ops: Vec<_> = tokens[1..tokens.len() - 1] + .iter() + .map(|t| show_token(t, &idents)) + .collect(); + assert_eq!( + ops, + vec!["&", "&=", "|", "|=", "^", "^=", "~", "<<", ">>", "<<=", ">>="] + ); + } + + #[test] + fn test_punctuation() { + let (tokens, idents) = tokenize_str("( ) [ ] { } ; , . ..."); + let ops: Vec<_> = tokens[1..tokens.len() - 1] + .iter() + .map(|t| show_token(t, &idents)) + .collect(); + assert_eq!( + ops, + vec!["(", ")", "[", "]", "{", "}", ";", ",", ".", "..."] + ); + } + + #[test] + fn test_line_comment() { + let (tokens, idents) = tokenize_str("a // comment\nb"); + assert_eq!(tokens.len(), 4); // StreamBegin, a, b, StreamEnd + assert_eq!(show_token(&tokens[1], &idents), "a"); + assert_eq!(show_token(&tokens[2], &idents), "b"); + } + + #[test] + fn test_block_comment() { + let (tokens, idents) = tokenize_str("a /* comment */ b"); + assert_eq!(tokens.len(), 4); + assert_eq!(show_token(&tokens[1], &idents), "a"); + assert_eq!(show_token(&tokens[2], &idents), "b"); + } + + #[test] + fn test_line_splice() { + let (tokens, idents) = tokenize_str("a\\\nb"); + // Line splice joins 'a' and 'b' into one identifier "ab" + // In C, backslash-newline is deleted, so this becomes "ab" + assert_eq!(tokens.len(), 3); // StreamBegin, ab, StreamEnd + assert_eq!(show_token(&tokens[1], &idents), "ab"); + } + + #[test] + fn test_wide_string() { + let (tokens, _) = tokenize_str(r#"L"wide""#); + assert_eq!(tokens[1].typ, TokenType::WideString); + if let TokenValue::WideString(s) = &tokens[1].value { + assert_eq!(s, "wide"); + } + } + + #[test] + fn test_wide_char() { + let (tokens, _) = tokenize_str("L'w'"); + assert_eq!(tokens[1].typ, TokenType::WideChar); + } + + #[test] + fn test_position_tracking() { + let (tokens, _) = tokenize_str("a\nb"); + // 'a' is on line 1 + assert_eq!(tokens[1].pos.line, 1); + // 'b' is on line 2 + assert_eq!(tokens[2].pos.line, 2); + } + + #[test] + fn test_newline_flag_first_token() { + // First token at start of file should have newline=true + let (tokens, _) = tokenize_str("#define"); + // tokens[0] is STREAM_BEGIN, tokens[1] is the first real token '#' + assert!( + tokens[1].pos.newline, + "First token should have newline=true" + ); + } + + #[test] + fn test_newline_flag_after_newline() { + // Token after newline should have newline=true + let (tokens, _) = tokenize_str("a\n#define"); + // tokens[0] is STREAM_BEGIN, tokens[1] is 'a', tokens[2] is '#' + assert!( + !tokens[1].pos.newline || tokens[1].pos.newline, + "First token can have newline=true" + ); + assert!( + tokens[2].pos.newline, + "Token after newline should have newline=true" + ); + } + + #[test] + fn test_preprocessor_tokens() { + let (tokens, idents) = tokenize_str("#include "); + let toks: Vec<_> = tokens[1..tokens.len() - 1] + .iter() + .map(|t| show_token(t, &idents)) + .collect(); + // # include < stdio . h > + assert_eq!(toks, vec!["#", "include", "<", "stdio", ".", "h", ">"]); + } + + #[test] + fn test_function_declaration() { + let (tokens, idents) = tokenize_str("int main(void) { return 0; }"); + let toks: Vec<_> = tokens[1..tokens.len() - 1] + .iter() + .map(|t| show_token(t, &idents)) + .collect(); + assert_eq!( + toks, + vec!["int", "main", "(", "void", ")", "{", "return", "0", ";", "}"] + ); + } + + // ======================================================================== + // Additional coverage tests for multi-char operators + // ======================================================================== + + #[test] + fn test_hashhash_operator() { + // ## is the preprocessor token paste operator + let (tokens, idents) = tokenize_str("a ## b"); + let ops: Vec<_> = tokens[1..tokens.len() - 1] + .iter() + .map(|t| show_token(t, &idents)) + .collect(); + assert_eq!(ops, vec!["a", "##", "b"]); + } + + #[test] + fn test_dotdot_operator() { + // .. is used in sparse (range), test it doesn't break + let (tokens, idents) = tokenize_str("a .. b"); + let ops: Vec<_> = tokens[1..tokens.len() - 1] + .iter() + .map(|t| show_token(t, &idents)) + .collect(); + assert_eq!(ops, vec!["a", "..", "b"]); + } + + #[test] + fn test_ternary_operators() { + // ? and : for ternary expressions + let (tokens, idents) = tokenize_str("a ? b : c"); + let ops: Vec<_> = tokens[1..tokens.len() - 1] + .iter() + .map(|t| show_token(t, &idents)) + .collect(); + assert_eq!(ops, vec!["a", "?", "b", ":", "c"]); + } + + #[test] + fn test_all_two_char_operators() { + // Comprehensive test of ALL 2-char operators + let (tokens, idents) = + tokenize_str("+= ++ -= -- -> *= /= %= <= >= == != && &= || |= ^= ## << >> .."); + let ops: Vec<_> = tokens[1..tokens.len() - 1] + .iter() + .map(|t| show_token(t, &idents)) + .collect(); + assert_eq!( + ops, + vec![ + "+=", "++", "-=", "--", "->", "*=", "/=", "%=", "<=", ">=", "==", "!=", "&&", "&=", + "||", "|=", "^=", "##", "<<", ">>", ".." + ] + ); + } + + #[test] + fn test_all_three_char_operators() { + // Test all 3-char operators + let (tokens, idents) = tokenize_str("<<= >>= ..."); + let ops: Vec<_> = tokens[1..tokens.len() - 1] + .iter() + .map(|t| show_token(t, &idents)) + .collect(); + assert_eq!(ops, vec!["<<=", ">>=", "..."]); + } + + #[test] + fn test_three_char_in_context() { + // 3-char operators in realistic context + let (tokens, idents) = tokenize_str("x <<= 2; y >>= 1; void f(int a, ...)"); + let ops: Vec<_> = tokens[1..tokens.len() - 1] + .iter() + .map(|t| show_token(t, &idents)) + .collect(); + assert_eq!( + ops, + vec![ + "x", "<<=", "2", ";", "y", ">>=", "1", ";", "void", "f", "(", "int", "a", ",", + "...", ")" + ] + ); + } + + // ======================================================================== + // Multi-line comment tests + // ======================================================================== + + #[test] + fn test_multiline_block_comment() { + let (tokens, idents) = tokenize_str("a /* this is\na multi-line\ncomment */ b"); + assert_eq!(tokens.len(), 4); // StreamBegin, a, b, StreamEnd + assert_eq!(show_token(&tokens[1], &idents), "a"); + assert_eq!(show_token(&tokens[2], &idents), "b"); + } + + #[test] + fn test_block_comment_with_stars() { + // Comment with * characters inside (common in doc comments) + let (tokens, idents) = tokenize_str("a /* ** stars ** */ b"); + assert_eq!(tokens.len(), 4); + assert_eq!(show_token(&tokens[1], &idents), "a"); + assert_eq!(show_token(&tokens[2], &idents), "b"); + } + + #[test] + fn test_block_comment_with_slashes() { + // Comment with / characters inside + let (tokens, idents) = tokenize_str("a /* // not a line comment */ b"); + assert_eq!(tokens.len(), 4); + assert_eq!(show_token(&tokens[1], &idents), "a"); + assert_eq!(show_token(&tokens[2], &idents), "b"); + } + + #[test] + fn test_block_comment_asterisk_not_end() { + // * followed by non-/ should not end comment + let (tokens, idents) = tokenize_str("a /* x * y */ b"); + assert_eq!(tokens.len(), 4); + assert_eq!(show_token(&tokens[1], &idents), "a"); + assert_eq!(show_token(&tokens[2], &idents), "b"); + } + + #[test] + fn test_multiline_comment_position_tracking() { + // After a multiline comment, position should be correct + let (tokens, _) = tokenize_str("a\n/* comment\nspanning\nlines */\nb"); + // a is on line 1, b is on line 5 + assert_eq!(tokens[1].pos.line, 1); + assert_eq!(tokens[2].pos.line, 5); + } + + // ======================================================================== + // Additional number format tests + // ======================================================================== + + #[test] + fn test_hex_float_numbers() { + // Hex floats with p/P exponent (C99 feature) + let (tokens, _) = tokenize_str("0x1p5 0x1.0p10 0xABCp-5 0x1P+3"); + assert_eq!(tokens[1].typ, TokenType::Number); + assert_eq!(tokens[2].typ, TokenType::Number); + assert_eq!(tokens[3].typ, TokenType::Number); + assert_eq!(tokens[4].typ, TokenType::Number); + + if let TokenValue::Number(n) = &tokens[1].value { + assert_eq!(n, "0x1p5"); + } + if let TokenValue::Number(n) = &tokens[2].value { + assert_eq!(n, "0x1.0p10"); + } + if let TokenValue::Number(n) = &tokens[3].value { + assert_eq!(n, "0xABCp-5"); + } + if let TokenValue::Number(n) = &tokens[4].value { + assert_eq!(n, "0x1P+3"); + } + } + + #[test] + fn test_number_suffixes() { + // Integer suffixes + let (tokens, _) = tokenize_str("123L 456UL 789LL 0xFFu 42lu"); + for i in 1..=5 { + assert_eq!(tokens[i].typ, TokenType::Number); + } + if let TokenValue::Number(n) = &tokens[1].value { + assert_eq!(n, "123L"); + } + if let TokenValue::Number(n) = &tokens[2].value { + assert_eq!(n, "456UL"); + } + } + + #[test] + fn test_float_suffixes() { + // Float suffixes + let (tokens, _) = tokenize_str("3.14f 2.71F 1.0l 9.8L"); + for i in 1..=4 { + assert_eq!(tokens[i].typ, TokenType::Number); + } + } + + #[test] + fn test_dot_starting_number() { + // Numbers starting with . + let (tokens, _) = tokenize_str(".5 .123 .0e10"); + assert_eq!(tokens[1].typ, TokenType::Number); + assert_eq!(tokens[2].typ, TokenType::Number); + assert_eq!(tokens[3].typ, TokenType::Number); + + if let TokenValue::Number(n) = &tokens[1].value { + assert_eq!(n, ".5"); + } + } + + // ======================================================================== + // Edge cases and tricky sequences + // ======================================================================== + + #[test] + fn test_operator_adjacency() { + // Operators without spaces - maximal munch + let (tokens, idents) = tokenize_str("a+++b"); // a ++ + b + let ops: Vec<_> = tokens[1..tokens.len() - 1] + .iter() + .map(|t| show_token(t, &idents)) + .collect(); + assert_eq!(ops, vec!["a", "++", "+", "b"]); + } + + #[test] + fn test_operator_adjacency_minus() { + let (tokens, idents) = tokenize_str("a---b"); // a -- - b + let ops: Vec<_> = tokens[1..tokens.len() - 1] + .iter() + .map(|t| show_token(t, &idents)) + .collect(); + assert_eq!(ops, vec!["a", "--", "-", "b"]); + } + + #[test] + fn test_shift_vs_templates() { + // >> should be one token (not two > >) + let (tokens, idents) = tokenize_str("a>>b"); + let ops: Vec<_> = tokens[1..tokens.len() - 1] + .iter() + .map(|t| show_token(t, &idents)) + .collect(); + assert_eq!(ops, vec!["a", ">>", "b"]); + } + + #[test] + fn test_arrow_vs_minus_gt() { + // -> should be one token + let (tokens, idents) = tokenize_str("ptr->field"); + let ops: Vec<_> = tokens[1..tokens.len() - 1] + .iter() + .map(|t| show_token(t, &idents)) + .collect(); + assert_eq!(ops, vec!["ptr", "->", "field"]); + } + + #[test] + fn test_string_with_comment_chars() { + // String containing /* and */ should not be treated as comment + let (tokens, _) = tokenize_str(r#""/* not a comment */""#); + assert_eq!(tokens.len(), 3); // StreamBegin, string, StreamEnd + assert_eq!(tokens[1].typ, TokenType::String); + if let TokenValue::String(s) = &tokens[1].value { + assert_eq!(s, "/* not a comment */"); + } + } + + #[test] + fn test_char_with_quote() { + // Character literal with escaped quote + let (tokens, _) = tokenize_str(r#"'\''"#); + assert_eq!(tokens[1].typ, TokenType::Char); + if let TokenValue::Char(s) = &tokens[1].value { + assert_eq!(s, "\\'"); + } + } + + #[test] + fn test_string_with_escaped_quote() { + let (tokens, _) = tokenize_str(r#""hello \"world\"""#); + assert_eq!(tokens[1].typ, TokenType::String); + if let TokenValue::String(s) = &tokens[1].value { + assert_eq!(s, "hello \\\"world\\\""); + } + } + + #[test] + fn test_empty_string() { + let (tokens, _) = tokenize_str(r#""""#); + assert_eq!(tokens[1].typ, TokenType::String); + if let TokenValue::String(s) = &tokens[1].value { + assert_eq!(s, ""); + } + } + + #[test] + fn test_empty_char() { + // Empty char literal (technically invalid C, but lexer should handle) + let (tokens, _) = tokenize_str("''"); + assert_eq!(tokens[1].typ, TokenType::Char); + if let TokenValue::Char(s) = &tokens[1].value { + assert_eq!(s, ""); + } + } + + #[test] + fn test_consecutive_comments() { + let (tokens, idents) = tokenize_str("a /* c1 */ /* c2 */ b"); + assert_eq!(tokens.len(), 4); + assert_eq!(show_token(&tokens[1], &idents), "a"); + assert_eq!(show_token(&tokens[2], &idents), "b"); + } + + #[test] + fn test_comment_at_eof() { + let (tokens, idents) = tokenize_str("a /* comment */"); + assert_eq!(tokens.len(), 3); // StreamBegin, a, StreamEnd + assert_eq!(show_token(&tokens[1], &idents), "a"); + } + + #[test] + fn test_line_comment_at_eof() { + let (tokens, idents) = tokenize_str("a // comment"); + assert_eq!(tokens.len(), 3); + assert_eq!(show_token(&tokens[1], &idents), "a"); + } +} diff --git a/cc/token/mod.rs b/cc/token/mod.rs new file mode 100644 index 000000000..089740cc6 --- /dev/null +++ b/cc/token/mod.rs @@ -0,0 +1,17 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Token module - lexer and preprocessor +// + +pub mod lexer; +pub mod preprocess; + +// Re-export items used by main.rs +pub use lexer::{show_token, token_type_name, StreamTable, Tokenizer}; +pub use preprocess::preprocess; diff --git a/cc/token/preprocess.rs b/cc/token/preprocess.rs new file mode 100644 index 000000000..dcaba673b --- /dev/null +++ b/cc/token/preprocess.rs @@ -0,0 +1,3157 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// C Preprocessor +// Based on the sparse compiler design by Linus Torvalds +// +// Main API: preprocess(tokens, target, idents) -> preprocessed_tokens +// + +use std::collections::{HashMap, HashSet}; +use std::fs; +use std::path::{Path, PathBuf}; +use std::time::SystemTime; + +use super::lexer::{IdentTable, Position, SpecialToken, Token, TokenType, TokenValue, Tokenizer}; +use crate::arch; +use crate::diag; +use crate::os; +use crate::target::Target; + +// ============================================================================ +// Macro Definition +// ============================================================================ + +/// A macro parameter +#[derive(Debug, Clone)] +pub struct MacroParam { + pub name: String, + pub index: usize, +} + +/// A macro definition (object-like or function-like) +#[derive(Debug, Clone)] +pub struct Macro { + /// Macro name + pub name: String, + /// Replacement tokens (stored as token copies) + pub body: Vec, + /// Is this a function-like macro? + pub is_function: bool, + /// Parameters for function-like macros + pub params: Vec, + /// Is this a variadic macro (...)? (reserved for future use) + pub _is_variadic: bool, + /// Built-in expand function (for __LINE__, __FILE__, etc.) + pub builtin: Option, +} + +/// A token stored in a macro body +#[derive(Debug, Clone)] +pub struct MacroToken { + pub typ: TokenType, + pub value: MacroTokenValue, + pub whitespace: bool, +} + +/// Value of a macro token +#[derive(Debug, Clone)] +pub enum MacroTokenValue { + None, + Number(String), + Ident(String), + String(String), + Char(String), + Special(u32), + /// Parameter reference (by index) + Param(usize), + /// Stringified parameter (#param) + Stringify(usize), + /// Token paste marker (##) + Paste, + /// __VA_ARGS__ (index of variadic params start) + VaArgs, +} + +/// Built-in macro types +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BuiltinMacro { + Line, + File, + Date, + Time, + Counter, + HasAttribute, + HasBuiltin, + HasFeature, + HasExtension, + HasInclude, + HasIncludeNext, +} + +impl Macro { + /// Create a predefined macro (value is treated as a number/literal) + pub fn predefined(name: &str, value: Option<&str>) -> Self { + let body = match value { + Some(v) => vec![MacroToken { + typ: TokenType::Number, + value: MacroTokenValue::Number(v.to_string()), + whitespace: false, + }], + None => vec![], + }; + Self { + name: name.to_string(), + body, + is_function: false, + params: vec![], + _is_variadic: false, + builtin: None, + } + } + + /// Create a keyword alias macro (value is treated as an identifier/keyword) + pub fn keyword_alias(name: &str, value: &str) -> Self { + let body = if value.is_empty() { + vec![] + } else { + vec![MacroToken { + typ: TokenType::Ident, + value: MacroTokenValue::Ident(value.to_string()), + whitespace: false, + }] + }; + Self { + name: name.to_string(), + body, + is_function: false, + params: vec![], + _is_variadic: false, + builtin: None, + } + } + + /// Create a builtin macro + pub fn builtin(name: &str, builtin: BuiltinMacro, is_function: bool) -> Self { + Self { + name: name.to_string(), + body: vec![], + is_function, + params: if is_function { + vec![MacroParam { + name: "x".to_string(), + index: 0, + }] + } else { + vec![] + }, + _is_variadic: false, + builtin: Some(builtin), + } + } +} + +// ============================================================================ +// Conditional Compilation State +// ============================================================================ + +/// State of a conditional (#if/#ifdef block) +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum CondState { + /// Currently in a true branch, processing tokens + Active, + /// Currently in a false branch, skipping tokens + Skipping, + /// Already found a true branch, skip remaining branches + Done, +} + +/// A conditional compilation block +#[derive(Debug, Clone)] +struct Conditional { + state: CondState, + /// Has this conditional had a true branch? + had_true: bool, +} + +// ============================================================================ +// Preprocessor +// ============================================================================ + +/// C Preprocessor +pub struct Preprocessor<'a> { + /// Target configuration + target: &'a Target, + + /// Macro definitions + macros: HashMap, + + /// Conditional compilation stack + cond_stack: Vec, + + /// Include paths for angle-bracket includes + system_include_paths: Vec, + + /// Include paths for quote includes (searched first) + quote_include_paths: Vec, + + /// Current file name (for __FILE__) + current_file: String, + + /// Current file directory (for relative includes) + current_dir: String, + + /// Counter for __COUNTER__ + counter: u32, + + /// Include depth (for cycle detection) + include_depth: u32, + + /// Maximum include depth + max_include_depth: u32, + + /// Set of files currently being included (cycle detection) + include_stack: HashSet, + + /// Files that have been included with #pragma once or include guards + once_files: HashSet, + + /// Macros currently being expanded (for recursion prevention) + expanding: HashSet, + + /// Compilation date string for __DATE__ (format: "Mmm dd yyyy") + compile_date: String, + + /// Compilation time string for __TIME__ (format: "hh:mm:ss") + compile_time: String, +} + +impl<'a> Preprocessor<'a> { + /// Format the current time as C99 __DATE__ and __TIME__ strings + /// Returns (date_string, time_string) where: + /// - date_string is "Mmm dd yyyy" (e.g., "Dec 4 2025") + /// - time_string is "hh:mm:ss" (e.g., "14:30:00") + fn format_compile_time() -> (String, String) { + const MONTHS: [&str; 12] = [ + "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", + ]; + + // Get seconds since Unix epoch + let duration = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap_or_default(); + let secs = duration.as_secs() as i64; + + // Convert to date/time components + // Days since epoch + let days = secs / 86400; + let time_of_day = secs % 86400; + + let hours = time_of_day / 3600; + let minutes = (time_of_day % 3600) / 60; + let seconds = time_of_day % 60; + + // Calculate year, month, day from days since epoch (1970-01-01) + // Using a simplified algorithm + let mut remaining_days = days; + let mut year = 1970i32; + + loop { + let days_in_year = if year % 4 == 0 && (year % 100 != 0 || year % 400 == 0) { + 366 + } else { + 365 + }; + if remaining_days < days_in_year { + break; + } + remaining_days -= days_in_year; + year += 1; + } + + let is_leap = year % 4 == 0 && (year % 100 != 0 || year % 400 == 0); + let days_in_months: [i64; 12] = if is_leap { + [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] + } else { + [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] + }; + + let mut month = 0usize; + for (i, &days_in_month) in days_in_months.iter().enumerate() { + if remaining_days < days_in_month { + month = i; + break; + } + remaining_days -= days_in_month; + } + let day = remaining_days + 1; // 1-based + + // Format date: "Mmm dd yyyy" (day is space-padded to 2 chars) + let date_str = format!("{} {:2} {}", MONTHS[month], day, year); + // Format time: "hh:mm:ss" + let time_str = format!("{:02}:{:02}:{:02}", hours, minutes, seconds); + + (date_str, time_str) + } + + /// Create a new preprocessor + pub fn new(target: &'a Target, filename: &str) -> Self { + let current_dir = Path::new(filename) + .parent() + .map(|p| p.to_string_lossy().to_string()) + .unwrap_or_else(|| ".".to_string()); + + let (compile_date, compile_time) = Self::format_compile_time(); + + let mut pp = Self { + target, + macros: HashMap::new(), + cond_stack: Vec::new(), + system_include_paths: Vec::new(), + quote_include_paths: Vec::new(), + current_file: filename.to_string(), + current_dir, + counter: 0, + include_depth: 0, + max_include_depth: 200, + include_stack: HashSet::new(), + once_files: HashSet::new(), + expanding: HashSet::new(), + compile_date, + compile_time, + }; + + // Initialize predefined macros + pp.init_predefined_macros(); + + // Initialize include paths from OS + for path in os::get_include_paths(target) { + pp.system_include_paths.push(path.to_string()); + } + + pp + } + + /// Initialize predefined macros + fn init_predefined_macros(&mut self) { + // Standard C macros + self.define_macro(Macro::predefined("__STDC__", Some("1"))); + self.define_macro(Macro::predefined("__STDC_VERSION__", Some("199901L"))); // C99 + self.define_macro(Macro::predefined("__STDC_HOSTED__", Some("1"))); + + // GCC compatibility macros (required by system headers) + self.define_macro(Macro::predefined("__GNUC__", Some("4"))); + self.define_macro(Macro::predefined("__GNUC_MINOR__", Some("2"))); + self.define_macro(Macro::predefined("__GNUC_PATCHLEVEL__", Some("1"))); + self.define_macro(Macro::predefined( + "__VERSION__", + Some("\"4.2.1 Compatible pcc\""), + )); + self.define_macro(Macro::predefined("__GNUC_STDC_INLINE__", Some("1"))); + + // GCC type keyword compatibility + self.define_macro(Macro::keyword_alias("__signed", "signed")); + self.define_macro(Macro::keyword_alias("__signed__", "signed")); + self.define_macro(Macro::keyword_alias("__inline", "inline")); + self.define_macro(Macro::keyword_alias("__inline__", "inline")); + self.define_macro(Macro::keyword_alias("__volatile", "volatile")); + self.define_macro(Macro::keyword_alias("__volatile__", "volatile")); + self.define_macro(Macro::keyword_alias("__extension__", "")); // expands to nothing + self.define_macro(Macro::keyword_alias("__restrict", "restrict")); + self.define_macro(Macro::keyword_alias("__restrict__", "restrict")); + + // Architecture macros + for (name, value) in arch::get_arch_macros(self.target) { + if let Some(v) = value { + self.define_macro(Macro::predefined(name, Some(v))); + } else { + self.define_macro(Macro::predefined(name, None)); + } + } + + // Limit macros + for (name, value) in arch::get_limit_macros(self.target) { + self.define_macro(Macro::predefined(name, Some(value))); + } + + // OS macros + for (name, value) in os::get_os_macros(self.target) { + if let Some(v) = value { + self.define_macro(Macro::predefined(name, Some(v))); + } else { + self.define_macro(Macro::predefined(name, None)); + } + } + + // Builtin macros + self.define_macro(Macro::builtin("__LINE__", BuiltinMacro::Line, false)); + self.define_macro(Macro::builtin("__FILE__", BuiltinMacro::File, false)); + self.define_macro(Macro::builtin("__DATE__", BuiltinMacro::Date, false)); + self.define_macro(Macro::builtin("__TIME__", BuiltinMacro::Time, false)); + self.define_macro(Macro::builtin("__COUNTER__", BuiltinMacro::Counter, false)); + + // Function-like builtins + self.define_macro(Macro::builtin( + "__has_attribute", + BuiltinMacro::HasAttribute, + true, + )); + self.define_macro(Macro::builtin( + "__has_builtin", + BuiltinMacro::HasBuiltin, + true, + )); + self.define_macro(Macro::builtin( + "__has_feature", + BuiltinMacro::HasFeature, + true, + )); + self.define_macro(Macro::builtin( + "__has_extension", + BuiltinMacro::HasExtension, + true, + )); + self.define_macro(Macro::builtin( + "__has_include", + BuiltinMacro::HasInclude, + true, + )); + self.define_macro(Macro::builtin( + "__has_include_next", + BuiltinMacro::HasIncludeNext, + true, + )); + } + + /// Define a macro + pub fn define_macro(&mut self, mac: Macro) { + self.macros.insert(mac.name.clone(), mac); + } + + /// Undefine a macro + pub fn undef_macro(&mut self, name: &str) { + self.macros.remove(name); + } + + /// Check if a macro is defined + pub fn is_defined(&self, name: &str) -> bool { + self.macros.contains_key(name) + } + + /// Get a macro definition + pub fn get_macro(&self, name: &str) -> Option<&Macro> { + self.macros.get(name) + } + + /// Check if we're currently skipping tokens + fn is_skipping(&self) -> bool { + self.cond_stack + .last() + .map(|c| c.state != CondState::Active) + .unwrap_or(false) + } + + /// Process tokens through the preprocessor + pub fn preprocess(&mut self, tokens: Vec, idents: &mut IdentTable) -> Vec { + let mut output = Vec::new(); + let mut iter = tokens.into_iter().peekable(); + + while let Some(token) = iter.next() { + match token.typ { + TokenType::StreamBegin | TokenType::StreamEnd => { + // Pass through stream markers + output.push(token); + } + + TokenType::Special => { + if let TokenValue::Special(code) = &token.value { + // Check for # at start of line (preprocessor directive) + if *code == b'#' as u32 && token.pos.newline { + self.handle_directive(&mut iter, &token, &mut output, idents); + continue; + } + } + if !self.is_skipping() { + output.push(token); + } + } + + TokenType::Ident => { + if self.is_skipping() { + continue; + } + // Check for macro expansion + if let TokenValue::Ident(id) = &token.value { + if let Some(name) = idents.get(*id) { + let name = name.to_string(); + if let Some(expanded) = + self.try_expand_macro(&name, &token.pos, &mut iter, idents) + { + output.extend(expanded); + continue; + } + } + } + output.push(token); + } + + _ => { + if !self.is_skipping() { + output.push(token); + } + } + } + } + + // Check for unterminated conditionals (only at top level) + // During includes, the cond_stack is saved/restored by include_file + if self.include_depth == 0 && !self.cond_stack.is_empty() { + let msg = format!( + "{}: {} unterminated #if directive(s)", + self.current_file, + self.cond_stack.len() + ); + let eof_pos = Position::new(0, 0, 0); + diag::warning(eof_pos, &msg); + } + + output + } + + /// Handle a preprocessor directive + fn handle_directive( + &mut self, + iter: &mut std::iter::Peekable, + hash_token: &Token, + output: &mut Vec, + idents: &mut IdentTable, + ) where + I: Iterator, + { + // Get the directive name + let directive_token = match iter.next() { + Some(t) => t, + None => return, // Empty directive, ignore + }; + + // Get directive name + let directive_name = match &directive_token.typ { + TokenType::Ident => { + if let TokenValue::Ident(id) = &directive_token.value { + idents.get(*id).map(|s| s.to_string()) + } else { + None + } + } + _ => None, + }; + + let directive = match directive_name { + Some(name) => name, + None => { + // Consume rest of line + self.skip_to_eol(iter); + return; + } + }; + + match directive.as_str() { + "define" => self.handle_define(iter, idents), + "undef" => self.handle_undef(iter, idents), + "ifdef" => self.handle_ifdef(iter, idents), + "ifndef" => self.handle_ifndef(iter, idents), + "if" => self.handle_if(iter, idents), + "elif" => self.handle_elif(iter, idents), + "else" => self.handle_else(iter), + "endif" => self.handle_endif(iter), + "include" => self.handle_include(iter, output, idents, hash_token, false), + "include_next" => self.handle_include(iter, output, idents, hash_token, true), + "error" => self.handle_error(iter, &hash_token.pos, idents), + "warning" => self.handle_warning(iter, &hash_token.pos, idents), + "pragma" => self.handle_pragma(iter, idents), + "line" => self.handle_line(iter), + _ => { + // Unknown directive + if !self.is_skipping() { + diag::warning( + hash_token.pos, + &format!("unknown preprocessor directive #{}", directive), + ); + } + self.skip_to_eol(iter); + } + } + } + + /// Skip tokens until end of line + fn skip_to_eol(&self, iter: &mut std::iter::Peekable) + where + I: Iterator, + { + while let Some(token) = iter.peek() { + if token.pos.newline { + break; + } + iter.next(); + } + } + + /// Collect tokens until end of line + fn collect_to_eol(&self, iter: &mut std::iter::Peekable) -> Vec + where + I: Iterator, + { + let mut tokens = Vec::new(); + while let Some(token) = iter.peek() { + if token.pos.newline { + break; + } + tokens.push(iter.next().unwrap()); + } + tokens + } + + /// Handle #define + fn handle_define(&mut self, iter: &mut std::iter::Peekable, idents: &IdentTable) + where + I: Iterator, + { + if self.is_skipping() { + self.skip_to_eol(iter); + return; + } + + // Get macro name + let name_token = match iter.next() { + Some(t) => t, + None => return, + }; + + let macro_name = match &name_token.typ { + TokenType::Ident => { + if let TokenValue::Ident(id) = &name_token.value { + idents.get(*id).map(|s| s.to_string()) + } else { + None + } + } + _ => None, + }; + + let name = match macro_name { + Some(n) => n, + None => { + self.skip_to_eol(iter); + return; + } + }; + + // Check if function-like macro (immediate '(' without whitespace) + let mut params = Vec::new(); + let mut is_function = false; + let mut _is_variadic = false; + + if let Some(next) = iter.peek() { + if !next.pos.whitespace { + if let TokenValue::Special(code) = &next.value { + if *code == b'(' as u32 { + is_function = true; + iter.next(); // consume '(' + + // Parse parameters + let mut param_index = 0; + while let Some(param_tok) = iter.next() { + if param_tok.pos.newline { + break; + } + + match ¶m_tok.value { + TokenValue::Special(c) if *c == b')' as u32 => break, + TokenValue::Special(c) if *c == b',' as u32 => continue, + TokenValue::Special(c) if *c == SpecialToken::Ellipsis as u32 => { + _is_variadic = true; + // Consume closing paren + for t in iter.by_ref() { + if t.pos.newline { + break; + } + if let TokenValue::Special(c) = &t.value { + if *c == b')' as u32 { + break; + } + } + } + break; + } + TokenValue::Ident(id) => { + if let Some(param_name) = idents.get(*id) { + params.push(MacroParam { + name: param_name.to_string(), + index: param_index, + }); + param_index += 1; + } + } + _ => {} + } + } + } + } + } + } + + // Collect body tokens + let body_tokens = self.collect_to_eol(iter); + let body = self.tokens_to_macro_body(&body_tokens, ¶ms, idents); + + let mac = Macro { + name: name.clone(), + body, + is_function, + params, + _is_variadic, + builtin: None, + }; + + self.define_macro(mac); + } + + /// Convert tokens to macro body + fn tokens_to_macro_body( + &self, + tokens: &[Token], + params: &[MacroParam], + idents: &IdentTable, + ) -> Vec { + let mut body = Vec::new(); + let mut i = 0; + + while i < tokens.len() { + let token = &tokens[i]; + + // Check for # (stringify) + if let TokenValue::Special(code) = &token.value { + if *code == b'#' as u32 { + // Check if followed by ## + if i + 1 < tokens.len() { + if let TokenValue::Special(next_code) = &tokens[i + 1].value { + if *next_code == b'#' as u32 { + // ## token paste + body.push(MacroToken { + typ: TokenType::Special, + value: MacroTokenValue::Paste, + whitespace: token.pos.whitespace, + }); + i += 2; + continue; + } + } + } + + // # stringify - look for following parameter + if i + 1 < tokens.len() { + if let TokenValue::Ident(id) = &tokens[i + 1].value { + if let Some(name) = idents.get(*id) { + // Check if it's a parameter + for param in params { + if param.name == name { + body.push(MacroToken { + typ: TokenType::Special, + value: MacroTokenValue::Stringify(param.index), + whitespace: token.pos.whitespace, + }); + i += 2; + continue; + } + } + // Check for __VA_ARGS__ + if name == "__VA_ARGS__" { + body.push(MacroToken { + typ: TokenType::Special, + value: MacroTokenValue::Stringify(params.len()), + whitespace: token.pos.whitespace, + }); + i += 2; + continue; + } + } + } + } + } + + // Check for ## (token paste) - also handle HashHash special token + if *code == SpecialToken::HashHash as u32 { + body.push(MacroToken { + typ: TokenType::Special, + value: MacroTokenValue::Paste, + whitespace: token.pos.whitespace, + }); + i += 1; + continue; + } + } + + // Check for parameter reference or __VA_ARGS__ + if let TokenValue::Ident(id) = &token.value { + if let Some(name) = idents.get(*id) { + // Check if it's __VA_ARGS__ + if name == "__VA_ARGS__" { + body.push(MacroToken { + typ: TokenType::Ident, + value: MacroTokenValue::VaArgs, + whitespace: token.pos.whitespace, + }); + i += 1; + continue; + } + + // Check if it's a parameter + let mut found_param = false; + for param in params { + if param.name == name { + body.push(MacroToken { + typ: TokenType::Ident, + value: MacroTokenValue::Param(param.index), + whitespace: token.pos.whitespace, + }); + found_param = true; + break; + } + } + if found_param { + i += 1; + continue; + } + } + } + + // Regular token + let macro_token = self.token_to_macro_token(token, idents); + body.push(macro_token); + i += 1; + } + + body + } + + /// Convert a regular token to a macro token + fn token_to_macro_token(&self, token: &Token, idents: &IdentTable) -> MacroToken { + let value = match &token.value { + TokenValue::Number(n) => MacroTokenValue::Number(n.clone()), + TokenValue::Ident(id) => { + let name = idents.get(*id).unwrap_or("").to_string(); + MacroTokenValue::Ident(name) + } + TokenValue::String(s) => MacroTokenValue::String(s.clone()), + TokenValue::Char(c) => MacroTokenValue::Char(c.clone()), + TokenValue::Special(code) => MacroTokenValue::Special(*code), + TokenValue::WideString(s) => MacroTokenValue::String(s.clone()), + TokenValue::WideChar(c) => MacroTokenValue::Char(c.clone()), + TokenValue::None => MacroTokenValue::None, + }; + + MacroToken { + typ: token.typ, + value, + whitespace: token.pos.whitespace, + } + } + + /// Handle #undef + fn handle_undef(&mut self, iter: &mut std::iter::Peekable, idents: &IdentTable) + where + I: Iterator, + { + if self.is_skipping() { + self.skip_to_eol(iter); + return; + } + + if let Some(token) = iter.next() { + if let TokenType::Ident = &token.typ { + if let TokenValue::Ident(id) = &token.value { + if let Some(name) = idents.get(*id) { + self.undef_macro(name); + } + } + } + } + + self.skip_to_eol(iter); + } + + /// Handle #ifdef + fn handle_ifdef(&mut self, iter: &mut std::iter::Peekable, idents: &IdentTable) + where + I: Iterator, + { + let defined = if let Some(token) = iter.next() { + if let TokenType::Ident = &token.typ { + if let TokenValue::Ident(id) = &token.value { + if let Some(name) = idents.get(*id) { + self.is_defined(name) + } else { + false + } + } else { + false + } + } else { + false + } + } else { + false + }; + + self.skip_to_eol(iter); + self.push_conditional(defined); + } + + /// Handle #ifndef + fn handle_ifndef(&mut self, iter: &mut std::iter::Peekable, idents: &IdentTable) + where + I: Iterator, + { + let defined = if let Some(token) = iter.next() { + if let TokenType::Ident = &token.typ { + if let TokenValue::Ident(id) = &token.value { + if let Some(name) = idents.get(*id) { + self.is_defined(name) + } else { + false + } + } else { + false + } + } else { + false + } + } else { + false + }; + + self.skip_to_eol(iter); + self.push_conditional(!defined); + } + + /// Handle #if + fn handle_if(&mut self, iter: &mut std::iter::Peekable, idents: &IdentTable) + where + I: Iterator, + { + let tokens = self.collect_to_eol(iter); + let value = if self.is_skipping() { + false + } else { + self.evaluate_expression(&tokens, idents) + }; + + self.push_conditional(value); + } + + /// Handle #elif + fn handle_elif(&mut self, iter: &mut std::iter::Peekable, idents: &IdentTable) + where + I: Iterator, + { + let tokens = self.collect_to_eol(iter); + + // Check if we should evaluate this branch + let should_eval = if let Some(cond) = self.cond_stack.last() { + cond.state == CondState::Skipping && !cond.had_true + } else { + false + }; + + let expr_value = if should_eval { + self.evaluate_expression(&tokens, idents) + } else { + false + }; + + if let Some(cond) = self.cond_stack.last_mut() { + match cond.state { + CondState::Active => { + // We were in a true branch, now skip + cond.state = CondState::Done; + cond.had_true = true; + } + CondState::Skipping => { + if !cond.had_true && expr_value { + // Try this branch + cond.state = CondState::Active; + cond.had_true = true; + } + } + CondState::Done => { + // Already found true branch, skip + } + } + } + } + + /// Handle #else + fn handle_else(&mut self, iter: &mut std::iter::Peekable) + where + I: Iterator, + { + self.skip_to_eol(iter); + + if let Some(cond) = self.cond_stack.last_mut() { + match cond.state { + CondState::Active => { + cond.state = CondState::Done; + cond.had_true = true; + } + CondState::Skipping => { + if !cond.had_true { + cond.state = CondState::Active; + cond.had_true = true; + } else { + cond.state = CondState::Done; + } + } + CondState::Done => {} + } + } + } + + /// Handle #endif + fn handle_endif(&mut self, iter: &mut std::iter::Peekable) + where + I: Iterator, + { + self.skip_to_eol(iter); + self.cond_stack.pop(); + } + + /// Push a new conditional + fn push_conditional(&mut self, condition: bool) { + // If we're already skipping, new conditional starts in skip mode + let parent_skipping = self.is_skipping(); + + let state = if parent_skipping { + CondState::Skipping + } else if condition { + CondState::Active + } else { + CondState::Skipping + }; + + self.cond_stack.push(Conditional { + state, + // When parent is skipping, mark had_true so #else/#elif won't activate + had_true: parent_skipping || condition, + }); + } + + /// Evaluate a preprocessor expression + fn evaluate_expression(&self, tokens: &[Token], idents: &IdentTable) -> bool { + let mut evaluator = ExprEvaluator::new(self, idents); + evaluator.evaluate(tokens) != 0 + } + + /// Handle #include + fn handle_include( + &mut self, + iter: &mut std::iter::Peekable, + output: &mut Vec, + idents: &mut IdentTable, + hash_token: &Token, + is_include_next: bool, + ) where + I: Iterator, + { + if self.is_skipping() { + self.skip_to_eol(iter); + return; + } + + // Collect the include path tokens + let path_tokens = self.collect_to_eol(iter); + if path_tokens.is_empty() { + diag::error(hash_token.pos, "expected filename after #include"); + return; + } + + // Determine if system include (<...>) or quoted ("...") + let (filename, is_system) = self.parse_include_path(&path_tokens, idents); + + if filename.is_empty() { + diag::error(hash_token.pos, "empty filename in #include"); + return; + } + + // Find and include the file + if let Some(path) = self.find_include_file(&filename, is_system, is_include_next) { + self.include_file(&path, output, idents, hash_token); + } else { + diag::error(hash_token.pos, &format!("'{}': file not found", filename)); + } + } + + /// Parse include path from tokens + fn parse_include_path(&self, tokens: &[Token], idents: &IdentTable) -> (String, bool) { + if tokens.is_empty() { + return (String::new(), false); + } + + // Check for + if let TokenValue::Special(code) = &tokens[0].value { + if *code == b'<' as u32 { + // System include - collect until > + let mut filename = String::new(); + for token in &tokens[1..] { + if let TokenValue::Special(c) = &token.value { + if *c == b'>' as u32 { + break; + } + filename.push(*c as u8 as char); + } else { + filename.push_str(&self.token_to_string(token, idents)); + } + } + return (filename, true); + } + } + + // Check for "filename" + if let TokenValue::String(s) = &tokens[0].value { + return (s.clone(), false); + } + + // Fallback: try to reconstruct from tokens + let mut filename = String::new(); + for token in tokens { + filename.push_str(&self.token_to_string(token, idents)); + } + (filename, false) + } + + /// Convert token to string + fn token_to_string(&self, token: &Token, idents: &IdentTable) -> String { + match &token.value { + TokenValue::Ident(id) => idents.get(*id).unwrap_or("").to_string(), + TokenValue::Number(n) => n.clone(), + TokenValue::String(s) => s.clone(), + TokenValue::Special(code) => { + if *code < 256 { + (*code as u8 as char).to_string() + } else { + String::new() + } + } + _ => String::new(), + } + } + + /// Find an include file + fn find_include_file( + &self, + filename: &str, + is_system: bool, + _is_include_next: bool, + ) -> Option { + // Absolute path + if filename.starts_with('/') { + let path = PathBuf::from(filename); + if path.exists() { + return Some(path); + } + return None; + } + + // For quoted includes, first check relative to current file + if !is_system { + let relative_path = Path::new(&self.current_dir).join(filename); + if relative_path.exists() { + return Some(relative_path); + } + + // Check quote include paths + for dir in &self.quote_include_paths { + let path = Path::new(dir).join(filename); + if path.exists() { + return Some(path); + } + } + } + + // Check system include paths + for dir in &self.system_include_paths { + let path = Path::new(dir).join(filename); + if path.exists() { + return Some(path); + } + } + + None + } + + /// Include a file + fn include_file( + &mut self, + path: &Path, + output: &mut Vec, + idents: &mut IdentTable, + hash_token: &Token, + ) { + // Canonicalize path for cycle detection + let canonical = match path.canonicalize() { + Ok(p) => p, + Err(_) => path.to_path_buf(), + }; + + // Check for #pragma once + if self.once_files.contains(&canonical) { + return; + } + + // Check for include cycle + if self.include_stack.contains(&canonical) { + diag::error( + hash_token.pos, + &format!("recursive include of '{}'", path.display()), + ); + return; + } + + // Check include depth + if self.include_depth >= self.max_include_depth { + diag::error( + hash_token.pos, + &format!( + "#include nested too deeply (max {})", + self.max_include_depth + ), + ); + return; + } + + // Read the file + let content = match fs::read(path) { + Ok(c) => c, + Err(e) => { + diag::error( + hash_token.pos, + &format!("cannot read '{}': {}", path.display(), e), + ); + return; + } + }; + + // Save current state + let saved_file = + std::mem::replace(&mut self.current_file, path.to_string_lossy().to_string()); + let saved_dir = std::mem::replace( + &mut self.current_dir, + path.parent() + .map(|p| p.to_string_lossy().to_string()) + .unwrap_or_else(|| ".".to_string()), + ); + // Save cond_stack - included files have isolated conditional state + let saved_cond_stack = std::mem::take(&mut self.cond_stack); + + self.include_stack.insert(canonical.clone()); + self.include_depth += 1; + + // Create a new stream for this file + let stream_id = diag::init_stream(&self.current_file); + + // Tokenize the included file + let mut tokenizer = Tokenizer::new(&content, stream_id); + let tokens = tokenizer.tokenize(); + + // Build mapping from included file's ident IDs to main ident table IDs + let included_idents = tokenizer.ident_table(); + let mut id_map: HashMap = HashMap::new(); + for i in 0.. { + if let Some(name) = included_idents.get(i) { + let new_id = idents.intern(name); + id_map.insert(i, new_id); + } else { + break; + } + } + + // Remap token identifiers + let remapped_tokens: Vec = tokens + .into_iter() + .map(|mut token| { + if let TokenValue::Ident(old_id) = &token.value { + if let Some(&new_id) = id_map.get(old_id) { + token.value = TokenValue::Ident(new_id); + } + } + token + }) + .collect(); + + // Preprocess the included tokens + let preprocessed = self.preprocess(remapped_tokens, idents); + + // Filter out stream markers from included content + for token in preprocessed { + match token.typ { + TokenType::StreamBegin | TokenType::StreamEnd => {} + _ => output.push(token), + } + } + + // Restore state + self.include_depth -= 1; + self.include_stack.remove(&canonical); + self.current_file = saved_file; + self.current_dir = saved_dir; + self.cond_stack = saved_cond_stack; + } + + /// Handle #error + fn handle_error( + &mut self, + iter: &mut std::iter::Peekable, + pos: &Position, + idents: &IdentTable, + ) where + I: Iterator, + { + if self.is_skipping() { + self.skip_to_eol(iter); + return; + } + + let tokens = self.collect_to_eol(iter); + let msg = self.tokens_to_text(&tokens, idents); + diag::error(*pos, &format!("#error {}", msg)); + } + + /// Handle #warning + fn handle_warning( + &mut self, + iter: &mut std::iter::Peekable, + pos: &Position, + idents: &IdentTable, + ) where + I: Iterator, + { + if self.is_skipping() { + self.skip_to_eol(iter); + return; + } + + let tokens = self.collect_to_eol(iter); + let msg = self.tokens_to_text(&tokens, idents); + diag::warning(*pos, &format!("#warning {}", msg)); + } + + /// Handle #pragma + fn handle_pragma(&mut self, iter: &mut std::iter::Peekable, idents: &IdentTable) + where + I: Iterator, + { + if self.is_skipping() { + self.skip_to_eol(iter); + return; + } + + // Check for #pragma once + if let Some(token) = iter.peek() { + if let TokenValue::Ident(id) = &token.value { + if let Some(name) = idents.get(*id) { + if name == "once" { + if let Ok(canonical) = Path::new(&self.current_file).canonicalize() { + self.once_files.insert(canonical); + } + } + } + } + } + + self.skip_to_eol(iter); + } + + /// Handle #line directive + fn handle_line(&mut self, iter: &mut std::iter::Peekable) + where + I: Iterator, + { + if self.is_skipping() { + self.skip_to_eol(iter); + return; + } + + // Just skip for now + self.skip_to_eol(iter); + } + + /// Convert tokens to text for error messages + fn tokens_to_text(&self, tokens: &[Token], idents: &IdentTable) -> String { + let mut result = String::new(); + for token in tokens { + if !result.is_empty() && token.pos.whitespace { + result.push(' '); + } + match &token.value { + TokenValue::Ident(id) => { + if let Some(name) = idents.get(*id) { + result.push_str(name); + } + } + TokenValue::Number(n) => result.push_str(n), + TokenValue::String(s) => result.push_str(s), + TokenValue::Special(code) if *code < 256 => { + result.push(*code as u8 as char); + } + _ => {} + } + } + result + } + + /// Try to expand a macro + fn try_expand_macro( + &mut self, + name: &str, + pos: &Position, + iter: &mut std::iter::Peekable, + idents: &mut IdentTable, + ) -> Option> + where + I: Iterator, + { + // Check for recursion + if self.expanding.contains(name) { + return None; + } + + let mac = self.macros.get(name)?.clone(); + + // Handle builtin macros + if let Some(builtin) = mac.builtin { + return self.expand_builtin(builtin, pos, &mac, iter, idents); + } + + // Handle function-like macros + if mac.is_function { + // Check for opening paren + if let Some(next) = iter.peek() { + if let TokenValue::Special(code) = &next.value { + if *code == b'(' as u32 { + iter.next(); // consume '(' + let args = self.collect_macro_args(iter, idents); + return self.expand_function_macro(&mac, &args, pos, idents); + } + } + } + // No paren - don't expand + return None; + } + + // Object-like macro + self.expand_object_macro(&mac, pos, idents) + } + + /// Collect arguments for a function-like macro call + fn collect_macro_args( + &self, + iter: &mut std::iter::Peekable, + _idents: &IdentTable, + ) -> Vec> + where + I: Iterator, + { + let mut args = Vec::new(); + let mut current_arg = Vec::new(); + let mut paren_depth = 0; + + for token in iter.by_ref() { + if token.pos.newline && paren_depth == 0 { + // Unterminated macro call + break; + } + + match &token.value { + TokenValue::Special(code) => { + if *code == b'(' as u32 { + paren_depth += 1; + current_arg.push(token); + } else if *code == b')' as u32 { + if paren_depth == 0 { + // End of arguments + if !current_arg.is_empty() || !args.is_empty() { + args.push(current_arg); + } + break; + } + paren_depth -= 1; + current_arg.push(token); + } else if *code == b',' as u32 && paren_depth == 0 { + // Argument separator + args.push(current_arg); + current_arg = Vec::new(); + } else { + current_arg.push(token); + } + } + _ => { + current_arg.push(token); + } + } + } + + args + } + + /// Expand a function-like macro + fn expand_function_macro( + &mut self, + mac: &Macro, + args: &[Vec], + pos: &Position, + idents: &mut IdentTable, + ) -> Option> { + self.expanding.insert(mac.name.clone()); + + let mut result = Vec::new(); + let mut i = 0; + + while i < mac.body.len() { + let mt = &mac.body[i]; + + // Check for token pasting + let next_is_paste = + i + 1 < mac.body.len() && matches!(mac.body[i + 1].value, MacroTokenValue::Paste); + let prev_was_paste = i > 0 && matches!(mac.body[i - 1].value, MacroTokenValue::Paste); + + match &mt.value { + MacroTokenValue::Paste => { + // Skip paste markers + i += 1; + continue; + } + MacroTokenValue::Stringify(idx) => { + // Stringify the argument + let arg = args.get(*idx).cloned().unwrap_or_default(); + let text = self.tokens_to_text(&arg, idents); + result.push(Token::with_value( + TokenType::String, + *pos, + TokenValue::String(text), + )); + } + MacroTokenValue::Param(idx) => { + let arg = args.get(*idx).cloned().unwrap_or_default(); + + if next_is_paste || prev_was_paste { + // Don't expand for token pasting + if prev_was_paste && !result.is_empty() { + // Paste with previous token + let prev = result.pop().unwrap(); + let pasted = self.paste_tokens(&prev, &arg, pos, idents); + result.extend(pasted); + } else { + result.extend(arg); + } + } else { + // Expand the argument + let expanded = self.preprocess(arg, idents); + for mut tok in expanded { + if matches!(tok.typ, TokenType::StreamBegin | TokenType::StreamEnd) { + continue; + } + tok.pos = *pos; + result.push(tok); + } + } + } + MacroTokenValue::VaArgs => { + // Variadic arguments + let start = mac.params.len(); + for (j, arg) in args.iter().enumerate().skip(start) { + if j > start { + result.push(Token::with_value( + TokenType::Special, + *pos, + TokenValue::Special(b',' as u32), + )); + } + if next_is_paste || prev_was_paste { + result.extend(arg.clone()); + } else { + let expanded = self.preprocess(arg.clone(), idents); + for mut tok in expanded { + if matches!(tok.typ, TokenType::StreamBegin | TokenType::StreamEnd) + { + continue; + } + tok.pos = *pos; + result.push(tok); + } + } + } + } + _ => { + let token = self.macro_token_to_token(mt, pos, idents); + + if prev_was_paste && !result.is_empty() { + // Paste with previous token + let prev = result.pop().unwrap(); + let pasted = self.paste_tokens(&prev, &[token], pos, idents); + result.extend(pasted); + } else { + result.push(token); + } + } + } + + i += 1; + } + + // Rescan for more macro expansion + // NOTE: Keep macro in expanding set during rescan to prevent infinite recursion + let rescanned = self.preprocess(result, idents); + + self.expanding.remove(&mac.name); + + let filtered: Vec<_> = rescanned + .into_iter() + .filter(|t| !matches!(t.typ, TokenType::StreamBegin | TokenType::StreamEnd)) + .collect(); + + Some(filtered) + } + + /// Paste tokens together + fn paste_tokens( + &self, + left: &Token, + right: &[Token], + pos: &Position, + idents: &mut IdentTable, + ) -> Vec { + if right.is_empty() { + return vec![left.clone()]; + } + + let left_str = self.token_to_string(left, idents); + let right_str = self.token_to_string(&right[0], idents); + let combined = format!("{}{}", left_str, right_str); + + // Re-tokenize the combined string + let stream_id = diag::init_stream(""); + let mut tokenizer = Tokenizer::new(combined.as_bytes(), stream_id); + let tokens = tokenizer.tokenize(); + + // Build mapping from paste_idents IDs to main idents IDs + let paste_idents = tokenizer.ident_table(); + let mut id_map: HashMap = HashMap::new(); + for i in 0.. { + if let Some(name) = paste_idents.get(i) { + let new_id = idents.intern(name); + id_map.insert(i, new_id); + } else { + break; + } + } + + let mut result: Vec<_> = tokens + .into_iter() + .filter(|t| !matches!(t.typ, TokenType::StreamBegin | TokenType::StreamEnd)) + .map(|mut t| { + t.pos = *pos; + // Remap identifier IDs + if let TokenValue::Ident(old_id) = &t.value { + if let Some(&new_id) = id_map.get(old_id) { + t.value = TokenValue::Ident(new_id); + } + } + t + }) + .collect(); + + // Add remaining right tokens + result.extend(right.iter().skip(1).cloned()); + + result + } + + /// Expand an object-like macro + fn expand_object_macro( + &mut self, + mac: &Macro, + pos: &Position, + idents: &mut IdentTable, + ) -> Option> { + if mac.body.is_empty() { + return Some(vec![]); + } + + self.expanding.insert(mac.name.clone()); + + let mut result = Vec::new(); + let mut i = 0; + + while i < mac.body.len() { + let mt = &mac.body[i]; + + // Check for token pasting - prev_was_paste means we need to paste with previous result + let prev_was_paste = i > 0 && matches!(mac.body[i - 1].value, MacroTokenValue::Paste); + + match &mt.value { + MacroTokenValue::Paste => { + // Skip paste markers + i += 1; + continue; + } + _ => { + let token = self.macro_token_to_token(mt, pos, idents); + + if prev_was_paste && !result.is_empty() { + // Paste with previous token + let prev = result.pop().unwrap(); + let pasted = self.paste_tokens(&prev, &[token], pos, idents); + result.extend(pasted); + } else { + result.push(token); + } + } + } + + i += 1; + } + + // Rescan for more macro expansion + // NOTE: Keep macro in expanding set during rescan to prevent infinite recursion + // (e.g., when const -> __const and __const -> const both exist) + let rescanned = self.preprocess(result, idents); + + self.expanding.remove(&mac.name); + + let filtered: Vec<_> = rescanned + .into_iter() + .filter(|t| !matches!(t.typ, TokenType::StreamBegin | TokenType::StreamEnd)) + .collect(); + + Some(filtered) + } + + /// Convert a macro token to a regular token + fn macro_token_to_token( + &self, + mt: &MacroToken, + pos: &Position, + idents: &mut IdentTable, + ) -> Token { + let mut new_pos = *pos; + new_pos.whitespace = mt.whitespace; + + let value = match &mt.value { + MacroTokenValue::Number(n) => TokenValue::Number(n.clone()), + MacroTokenValue::Ident(name) => { + let id = idents.intern(name); + TokenValue::Ident(id) + } + MacroTokenValue::String(s) => TokenValue::String(s.clone()), + MacroTokenValue::Char(c) => TokenValue::Char(c.clone()), + MacroTokenValue::Special(code) => TokenValue::Special(*code), + _ => TokenValue::None, + }; + + Token::with_value(mt.typ, new_pos, value) + } + + /// Expand a builtin macro + fn expand_builtin( + &mut self, + builtin: BuiltinMacro, + pos: &Position, + _mac: &Macro, + iter: &mut std::iter::Peekable, + idents: &mut IdentTable, + ) -> Option> + where + I: Iterator, + { + match builtin { + BuiltinMacro::Line => Some(vec![Token::with_value( + TokenType::Number, + *pos, + TokenValue::Number(pos.line.to_string()), + )]), + BuiltinMacro::File => Some(vec![Token::with_value( + TokenType::String, + *pos, + TokenValue::String(self.current_file.clone()), + )]), + BuiltinMacro::Date => Some(vec![Token::with_value( + TokenType::String, + *pos, + TokenValue::String(self.compile_date.clone()), + )]), + BuiltinMacro::Time => Some(vec![Token::with_value( + TokenType::String, + *pos, + TokenValue::String(self.compile_time.clone()), + )]), + BuiltinMacro::Counter => { + let val = self.counter; + self.counter += 1; + Some(vec![Token::with_value( + TokenType::Number, + *pos, + TokenValue::Number(val.to_string()), + )]) + } + BuiltinMacro::HasAttribute + | BuiltinMacro::HasBuiltin + | BuiltinMacro::HasFeature + | BuiltinMacro::HasExtension => { + // Consume the arguments + if let Some(next) = iter.peek() { + if let TokenValue::Special(code) = &next.value { + if *code == b'(' as u32 { + iter.next(); + let args = self.collect_macro_args(iter, idents); + let result = self.eval_has_builtin(builtin, &args, idents); + return Some(vec![Token::with_value( + TokenType::Number, + *pos, + TokenValue::Number(if result { "1" } else { "0" }.to_string()), + )]); + } + } + } + Some(vec![Token::with_value( + TokenType::Number, + *pos, + TokenValue::Number("0".to_string()), + )]) + } + BuiltinMacro::HasInclude | BuiltinMacro::HasIncludeNext => { + // Consume the arguments + if let Some(next) = iter.peek() { + if let TokenValue::Special(code) = &next.value { + if *code == b'(' as u32 { + iter.next(); + let args = self.collect_macro_args(iter, idents); + let result = self.eval_has_include(&args, idents); + return Some(vec![Token::with_value( + TokenType::Number, + *pos, + TokenValue::Number(if result { "1" } else { "0" }.to_string()), + )]); + } + } + } + Some(vec![Token::with_value( + TokenType::Number, + *pos, + TokenValue::Number("0".to_string()), + )]) + } + } + } + + /// Evaluate __has_attribute, __has_builtin, etc. + fn eval_has_builtin( + &self, + builtin: BuiltinMacro, + args: &[Vec], + idents: &IdentTable, + ) -> bool { + if args.is_empty() { + return false; + } + + // Get the argument name + let name = if let Some(tok) = args[0].first() { + self.token_to_string(tok, idents) + } else { + return false; + }; + + match builtin { + BuiltinMacro::HasAttribute => { + // Return true for common attributes we support + matches!( + name.as_str(), + "noreturn" + | "const" + | "pure" + | "unused" + | "used" + | "deprecated" + | "weak" + | "aligned" + | "packed" + | "format" + | "nonnull" + | "returns_nonnull" + | "warn_unused_result" + | "visibility" + | "always_inline" + | "noinline" + | "cold" + | "hot" + | "malloc" + | "constructor" + | "destructor" + ) + } + BuiltinMacro::HasBuiltin => { + // Return true for builtins we support + matches!( + name.as_str(), + "__builtin_va_list" + | "__builtin_va_start" + | "__builtin_va_end" + | "__builtin_va_arg" + | "__builtin_va_copy" + | "__builtin_bswap16" + | "__builtin_bswap32" + | "__builtin_bswap64" + | "__builtin_expect" + | "__builtin_unreachable" + | "__builtin_trap" + | "__builtin_constant_p" + | "__builtin_types_compatible_p" + | "__builtin_offsetof" + | "__builtin_alloca" + ) + } + BuiltinMacro::HasFeature => { + // C11/C99 features + matches!( + name.as_str(), + "c_alignas" + | "c_alignof" + | "c_static_assert" + | "c_generic_selections" + | "c_thread_local" + ) + } + BuiltinMacro::HasExtension => { + // Extensions we support (basically same as features for now) + matches!( + name.as_str(), + "c_alignas" + | "c_alignof" + | "c_static_assert" + | "c_generic_selections" + | "attribute_deprecated_with_message" + | "attribute_unavailable_with_message" + | "enumerator_attributes" + | "cxx_attributes" + ) + } + _ => false, + } + } + + /// Evaluate __has_include + fn eval_has_include(&self, args: &[Vec], idents: &IdentTable) -> bool { + if args.is_empty() { + return false; + } + + let (filename, is_system) = self.parse_include_path(&args[0], idents); + self.find_include_file(&filename, is_system, false) + .is_some() + } +} + +// ============================================================================ +// Expression Evaluator for #if +// ============================================================================ + +struct ExprEvaluator<'a, 'b> { + pp: &'a Preprocessor<'b>, + idents: &'a IdentTable, + tokens: Vec, + pos: usize, +} + +impl<'a, 'b> ExprEvaluator<'a, 'b> { + fn new(pp: &'a Preprocessor<'b>, idents: &'a IdentTable) -> Self { + Self { + pp, + idents, + tokens: Vec::new(), + pos: 0, + } + } + + fn evaluate(&mut self, tokens: &[Token]) -> i64 { + self.tokens = tokens.to_vec(); + self.pos = 0; + self.expr_or() + } + + fn current(&self) -> Option<&Token> { + self.tokens.get(self.pos) + } + + fn advance(&mut self) { + self.pos += 1; + } + + fn is_special(&self, expected: u32) -> bool { + if let Some(tok) = self.current() { + if let TokenValue::Special(code) = &tok.value { + return *code == expected; + } + } + false + } + + fn is_ident(&self, expected: &str) -> bool { + if let Some(tok) = self.current() { + if let TokenValue::Ident(id) = &tok.value { + if let Some(name) = self.idents.get(*id) { + return name == expected; + } + } + } + false + } + + fn get_ident(&self) -> Option { + if let Some(tok) = self.current() { + if let TokenValue::Ident(id) = &tok.value { + return self.idents.get(*id).map(|s| s.to_string()); + } + } + None + } + + // Operator precedence (lowest to highest): + // || + // && + // | + // ^ + // & + // == != + // < <= > >= + // << >> + // + - + // * / % + // ! ~ - + (unary) + // defined, primary + + fn expr_or(&mut self) -> i64 { + let mut left = self.expr_and(); + while self.is_special(SpecialToken::LogicalOr as u32) { + self.advance(); + let right = self.expr_and(); + left = if left != 0 || right != 0 { 1 } else { 0 }; + } + left + } + + fn expr_and(&mut self) -> i64 { + let mut left = self.expr_bitor(); + while self.is_special(SpecialToken::LogicalAnd as u32) { + self.advance(); + let right = self.expr_bitor(); + left = if left != 0 && right != 0 { 1 } else { 0 }; + } + left + } + + fn expr_bitor(&mut self) -> i64 { + let mut left = self.expr_bitxor(); + while self.is_special(b'|' as u32) { + self.advance(); + let right = self.expr_bitxor(); + left |= right; + } + left + } + + fn expr_bitxor(&mut self) -> i64 { + let mut left = self.expr_bitand(); + while self.is_special(b'^' as u32) { + self.advance(); + let right = self.expr_bitand(); + left ^= right; + } + left + } + + fn expr_bitand(&mut self) -> i64 { + let mut left = self.expr_equality(); + while self.is_special(b'&' as u32) { + self.advance(); + let right = self.expr_equality(); + left &= right; + } + left + } + + fn expr_equality(&mut self) -> i64 { + let mut left = self.expr_relational(); + loop { + if self.is_special(SpecialToken::Equal as u32) { + self.advance(); + let right = self.expr_relational(); + left = if left == right { 1 } else { 0 }; + } else if self.is_special(SpecialToken::NotEqual as u32) { + self.advance(); + let right = self.expr_relational(); + left = if left != right { 1 } else { 0 }; + } else { + break; + } + } + left + } + + fn expr_relational(&mut self) -> i64 { + let mut left = self.expr_shift(); + loop { + if self.is_special(b'<' as u32) { + self.advance(); + let right = self.expr_shift(); + left = if left < right { 1 } else { 0 }; + } else if self.is_special(b'>' as u32) { + self.advance(); + let right = self.expr_shift(); + left = if left > right { 1 } else { 0 }; + } else if self.is_special(SpecialToken::Lte as u32) { + self.advance(); + let right = self.expr_shift(); + left = if left <= right { 1 } else { 0 }; + } else if self.is_special(SpecialToken::Gte as u32) { + self.advance(); + let right = self.expr_shift(); + left = if left >= right { 1 } else { 0 }; + } else { + break; + } + } + left + } + + fn expr_shift(&mut self) -> i64 { + let mut left = self.expr_additive(); + loop { + if self.is_special(SpecialToken::LeftShift as u32) { + self.advance(); + let right = self.expr_additive(); + left <<= right; + } else if self.is_special(SpecialToken::RightShift as u32) { + self.advance(); + let right = self.expr_additive(); + left >>= right; + } else { + break; + } + } + left + } + + fn expr_additive(&mut self) -> i64 { + let mut left = self.expr_multiplicative(); + loop { + if self.is_special(b'+' as u32) { + self.advance(); + let right = self.expr_multiplicative(); + left += right; + } else if self.is_special(b'-' as u32) { + self.advance(); + let right = self.expr_multiplicative(); + left -= right; + } else { + break; + } + } + left + } + + fn expr_multiplicative(&mut self) -> i64 { + let mut left = self.expr_unary(); + loop { + if self.is_special(b'*' as u32) { + self.advance(); + let right = self.expr_unary(); + left *= right; + } else if self.is_special(b'/' as u32) { + self.advance(); + let right = self.expr_unary(); + if right != 0 { + left /= right; + } + } else if self.is_special(b'%' as u32) { + self.advance(); + let right = self.expr_unary(); + if right != 0 { + left %= right; + } + } else { + break; + } + } + left + } + + fn expr_unary(&mut self) -> i64 { + if self.is_special(b'!' as u32) { + self.advance(); + let val = self.expr_unary(); + return if val == 0 { 1 } else { 0 }; + } + if self.is_special(b'~' as u32) { + self.advance(); + let val = self.expr_unary(); + return !val; + } + if self.is_special(b'-' as u32) { + self.advance(); + let val = self.expr_unary(); + return -val; + } + if self.is_special(b'+' as u32) { + self.advance(); + return self.expr_unary(); + } + self.expr_primary() + } + + fn expr_primary(&mut self) -> i64 { + // Handle defined(X) or defined X + if self.is_ident("defined") { + self.advance(); + return self.eval_defined(); + } + + // Handle parenthesized expression + if self.is_special(b'(' as u32) { + self.advance(); + let val = self.expr_or(); + if self.is_special(b')' as u32) { + self.advance(); + } + return val; + } + + // Handle number + if let Some(tok) = self.current() { + if let TokenValue::Number(n) = &tok.value { + let num_str = n.clone(); + self.advance(); + return self.parse_number(&num_str); + } + } + + // Handle character literal + if let Some(tok) = self.current() { + if let TokenValue::Char(c) = &tok.value { + let char_str = c.clone(); + self.advance(); + if char_str.is_empty() { + return 0; + } + return char_str.chars().next().unwrap_or('\0') as i64; + } + } + + // Handle identifier (undefined macro = 0) + if let Some(tok) = self.current() { + if let TokenValue::Ident(id) = &tok.value { + let ident_id = *id; + self.advance(); + // Check if it's a defined macro with a value + if let Some(name) = self.idents.get(ident_id) { + if let Some(mac) = self.pp.get_macro(name) { + if let Some(mt) = mac.body.first() { + if let MacroTokenValue::Number(n) = &mt.value { + return self.parse_number(n); + } + } + } + } + return 0; // Undefined identifier = 0 + } + } + + 0 + } + + fn eval_defined(&mut self) -> i64 { + // defined(X) or defined X + let mut need_paren = false; + if self.is_special(b'(' as u32) { + self.advance(); + need_paren = true; + } + + let result = if let Some(name) = self.get_ident() { + self.advance(); + if self.pp.is_defined(&name) { + 1 + } else { + 0 + } + } else { + 0 + }; + + if need_paren && self.is_special(b')' as u32) { + self.advance(); + } + + result + } + + fn parse_number(&self, s: &str) -> i64 { + let s = s.trim_end_matches(['u', 'U', 'l', 'L']); + + if s.starts_with("0x") || s.starts_with("0X") { + i64::from_str_radix(&s[2..], 16).unwrap_or(0) + } else if s.starts_with("0b") || s.starts_with("0B") { + i64::from_str_radix(&s[2..], 2).unwrap_or(0) + } else if s.starts_with('0') && s.len() > 1 && s.chars().nth(1).unwrap().is_ascii_digit() { + i64::from_str_radix(&s[1..], 8).unwrap_or(0) + } else { + s.parse().unwrap_or(0) + } + } +} + +// ============================================================================ +// Public API +// ============================================================================ + +/// Preprocess tokens +/// +/// This is the main entry point for preprocessing. +/// Takes lexer output and returns preprocessed tokens. +pub fn preprocess( + tokens: Vec, + target: &Target, + idents: &mut IdentTable, + filename: &str, +) -> Vec { + let mut pp = Preprocessor::new(target, filename); + pp.preprocess(tokens, idents) +} + +// ============================================================================ +// Tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + use crate::token::lexer::Tokenizer; + + fn preprocess_str(input: &str) -> (Vec, IdentTable) { + let target = Target::host(); + let mut tokenizer = Tokenizer::new(input.as_bytes(), 0); + let tokens = tokenizer.tokenize(); + let result = { + let idents = tokenizer.ident_table_mut(); + preprocess(tokens, &target, idents, "") + }; + (result, tokenizer.into_ident_table()) + } + + fn get_token_strings(tokens: &[Token], idents: &IdentTable) -> Vec { + tokens + .iter() + .filter_map(|t| match &t.typ { + TokenType::Ident => { + if let TokenValue::Ident(id) = &t.value { + idents.get(*id).map(|s| s.to_string()) + } else { + None + } + } + TokenType::Number => { + if let TokenValue::Number(n) = &t.value { + Some(n.clone()) + } else { + None + } + } + TokenType::String => { + if let TokenValue::String(s) = &t.value { + Some(format!("\"{}\"", s)) + } else { + None + } + } + TokenType::Special => { + if let TokenValue::Special(code) = &t.value { + if *code < 256 { + Some((*code as u8 as char).to_string()) + } else { + None + } + } else { + None + } + } + _ => None, + }) + .collect() + } + + #[test] + fn test_simple_define() { + let (tokens, idents) = preprocess_str("#define FOO 42\nFOO"); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"42".to_string())); + } + + #[test] + fn test_undef() { + let (tokens, idents) = preprocess_str("#define FOO 42\n#undef FOO\nFOO"); + let strs = get_token_strings(&tokens, &idents); + // FOO should not be expanded after undef + assert!(strs.contains(&"FOO".to_string())); + } + + #[test] + fn test_ifdef_true() { + let (tokens, idents) = preprocess_str("#define FOO\n#ifdef FOO\nyes\n#endif"); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"yes".to_string())); + } + + #[test] + fn test_ifdef_false() { + let (tokens, idents) = preprocess_str("#ifdef FOO\nyes\n#endif\nno"); + let strs = get_token_strings(&tokens, &idents); + assert!(!strs.contains(&"yes".to_string())); + assert!(strs.contains(&"no".to_string())); + } + + #[test] + fn test_ifndef_true() { + let (tokens, idents) = preprocess_str("#ifndef FOO\nyes\n#endif"); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"yes".to_string())); + } + + #[test] + fn test_ifndef_false() { + let (tokens, idents) = preprocess_str("#define FOO\n#ifndef FOO\nyes\n#endif\nno"); + let strs = get_token_strings(&tokens, &idents); + assert!(!strs.contains(&"yes".to_string())); + assert!(strs.contains(&"no".to_string())); + } + + #[test] + fn test_ifdef_else() { + let (tokens, idents) = preprocess_str("#ifdef FOO\nyes\n#else\nno\n#endif"); + let strs = get_token_strings(&tokens, &idents); + assert!(!strs.contains(&"yes".to_string())); + assert!(strs.contains(&"no".to_string())); + } + + #[test] + fn test_nested_ifdef() { + let (tokens, idents) = + preprocess_str("#define A\n#ifdef A\n#ifdef B\ninner\n#endif\nouter\n#endif"); + let strs = get_token_strings(&tokens, &idents); + assert!(!strs.contains(&"inner".to_string())); // B not defined + assert!(strs.contains(&"outer".to_string())); // A is defined + } + + #[test] + fn test_if_true() { + let (tokens, idents) = preprocess_str("#if 1\nyes\n#endif"); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"yes".to_string())); + } + + #[test] + fn test_if_false() { + let (tokens, idents) = preprocess_str("#if 0\nyes\n#endif\nno"); + let strs = get_token_strings(&tokens, &idents); + assert!(!strs.contains(&"yes".to_string())); + assert!(strs.contains(&"no".to_string())); + } + + #[test] + fn test_if_defined() { + let (tokens, idents) = preprocess_str("#define FOO\n#if defined(FOO)\nyes\n#endif"); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"yes".to_string())); + } + + #[test] + fn test_elif() { + let (tokens, idents) = preprocess_str("#if 0\none\n#elif 1\ntwo\n#else\nthree\n#endif"); + let strs = get_token_strings(&tokens, &idents); + assert!(!strs.contains(&"one".to_string())); + assert!(strs.contains(&"two".to_string())); + assert!(!strs.contains(&"three".to_string())); + } + + #[test] + fn test_predefined_stdc() { + let target = Target::host(); + let pp = Preprocessor::new(&target, "test.c"); + assert!(pp.is_defined("__STDC__")); + assert!(pp.is_defined("__STDC_VERSION__")); + } + + #[test] + fn test_predefined_arch() { + let target = Target::host(); + let pp = Preprocessor::new(&target, "test.c"); + + // Should have either x86_64 or aarch64 defined + assert!(pp.is_defined("__x86_64__") || pp.is_defined("__aarch64__")); + } + + #[test] + fn test_line_macro() { + let (tokens, _idents) = preprocess_str("__LINE__"); + // Should have a number token + assert!(tokens.iter().any(|t| t.typ == TokenType::Number)); + } + + #[test] + fn test_counter_macro() { + let (tokens, _idents) = preprocess_str("__COUNTER__ __COUNTER__ __COUNTER__"); + let nums: Vec<_> = tokens + .iter() + .filter_map(|t| { + if let TokenValue::Number(n) = &t.value { + Some(n.clone()) + } else { + None + } + }) + .collect(); + // Should have 0, 1, 2 + assert_eq!(nums, vec!["0", "1", "2"]); + } + + #[test] + fn test_deeply_nested_conditionals() { + let input = r#" +#define A +#ifdef A + level1 + #ifdef B + level2a + #else + level2b + #ifdef A + level3 + #endif + #endif +#endif +"#; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + + assert!(strs.contains(&"level1".to_string())); + assert!(!strs.contains(&"level2a".to_string())); // B not defined + assert!(strs.contains(&"level2b".to_string())); // else branch + assert!(strs.contains(&"level3".to_string())); // A still defined + } + + #[test] + fn test_else_basic() { + // Ensure #else works correctly when condition is false + let (tokens, idents) = preprocess_str("#if 0\nyes\n#else\nno\n#endif"); + let strs = get_token_strings(&tokens, &idents); + assert!(!strs.contains(&"yes".to_string())); + assert!(strs.contains(&"no".to_string())); + } + + #[test] + fn test_endif_basic() { + // Ensure #endif properly closes conditional blocks + let (tokens, idents) = preprocess_str("#ifdef FOO\nskipped\n#endif\nafter"); + let strs = get_token_strings(&tokens, &idents); + assert!(!strs.contains(&"skipped".to_string())); + assert!(strs.contains(&"after".to_string())); + } + + #[test] + fn test_include_skipped_in_false_branch() { + // #include in a false branch should be skipped + let (tokens, idents) = preprocess_str("#if 0\n#include \n#endif\ncode"); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"code".to_string())); + // No error from trying to include stdio.h + } + + #[test] + fn test_error_skipped_in_false_branch() { + // #error in a false branch should not trigger + let (tokens, idents) = + preprocess_str("#if 0\n#error This should not trigger\n#endif\ncode"); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"code".to_string())); + } + + #[test] + fn test_warning_skipped_in_false_branch() { + // #warning in a false branch should not trigger + let (tokens, idents) = + preprocess_str("#if 0\n#warning This should not trigger\n#endif\ncode"); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"code".to_string())); + } + + #[test] + fn test_pragma_ignored() { + // #pragma should be silently ignored + let (tokens, idents) = preprocess_str("#pragma once\ncode"); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"code".to_string())); + } + + #[test] + fn test_line_directive_ignored() { + // #line should be processed (currently ignored) + let (tokens, idents) = preprocess_str("#line 100\ncode"); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"code".to_string())); + } + + #[test] + fn test_define_with_value() { + // Test #define with a specific value + let (tokens, idents) = preprocess_str("#define VALUE 123\nVALUE"); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"123".to_string())); + } + + #[test] + fn test_define_empty() { + // Test #define without value (flag-style macro) + let (tokens, idents) = preprocess_str("#define FLAG\n#ifdef FLAG\nyes\n#endif"); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"yes".to_string())); + } + + #[test] + fn test_undef_removes_macro() { + // Verify #undef removes a macro so #ifdef fails + let (tokens, idents) = + preprocess_str("#define FOO\n#undef FOO\n#ifdef FOO\nyes\n#endif\nno"); + let strs = get_token_strings(&tokens, &idents); + assert!(!strs.contains(&"yes".to_string())); + assert!(strs.contains(&"no".to_string())); + } + + #[test] + fn test_function_like_macro() { + let (tokens, idents) = preprocess_str("#define ADD(a, b) a + b\nADD(1, 2)"); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"1".to_string())); + assert!(strs.contains(&"+".to_string())); + assert!(strs.contains(&"2".to_string())); + } + + #[test] + fn test_if_logical_and() { + let (tokens, idents) = preprocess_str("#if 1 && 1\nyes\n#endif"); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"yes".to_string())); + + let (tokens, idents) = preprocess_str("#if 1 && 0\nyes\n#endif\nno"); + let strs = get_token_strings(&tokens, &idents); + assert!(!strs.contains(&"yes".to_string())); + assert!(strs.contains(&"no".to_string())); + } + + #[test] + fn test_if_logical_or() { + let (tokens, idents) = preprocess_str("#if 0 || 1\nyes\n#endif"); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"yes".to_string())); + + let (tokens, idents) = preprocess_str("#if 0 || 0\nyes\n#endif\nno"); + let strs = get_token_strings(&tokens, &idents); + assert!(!strs.contains(&"yes".to_string())); + assert!(strs.contains(&"no".to_string())); + } + + #[test] + fn test_if_not() { + let (tokens, idents) = preprocess_str("#if !0\nyes\n#endif"); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"yes".to_string())); + + let (tokens, idents) = preprocess_str("#if !1\nyes\n#endif\nno"); + let strs = get_token_strings(&tokens, &idents); + assert!(!strs.contains(&"yes".to_string())); + assert!(strs.contains(&"no".to_string())); + } + + #[test] + fn test_if_comparison() { + let (tokens, idents) = preprocess_str("#if 5 > 3\nyes\n#endif"); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"yes".to_string())); + + let (tokens, idents) = preprocess_str("#if 5 < 3\nyes\n#endif\nno"); + let strs = get_token_strings(&tokens, &idents); + assert!(!strs.contains(&"yes".to_string())); + assert!(strs.contains(&"no".to_string())); + } + + // ======================================================================== + // Header guard tests + // ======================================================================== + + #[test] + fn test_header_guard_basic() { + // Simulates typical header guard pattern + let input = r#" +#ifndef MY_HEADER_H +#define MY_HEADER_H +first_include +#endif +#ifndef MY_HEADER_H +#define MY_HEADER_H +second_include +#endif +"#; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"first_include".to_string())); + assert!(!strs.contains(&"second_include".to_string())); + } + + #[test] + fn test_header_guard_ifdef_style() { + // Alternative header guard using #ifdef + let input = r#" +#ifdef GUARD +#else +#define GUARD +first +#endif +#ifdef GUARD +second +#endif +"#; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"first".to_string())); + assert!(strs.contains(&"second".to_string())); + } + + // ======================================================================== + // Multiple elif chain tests + // ======================================================================== + + #[test] + fn test_multiple_elif_first() { + let input = "#if 1\none\n#elif 1\ntwo\n#elif 1\nthree\n#else\nfour\n#endif"; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"one".to_string())); + assert!(!strs.contains(&"two".to_string())); + assert!(!strs.contains(&"three".to_string())); + assert!(!strs.contains(&"four".to_string())); + } + + #[test] + fn test_multiple_elif_middle() { + let input = "#if 0\none\n#elif 0\ntwo\n#elif 1\nthree\n#else\nfour\n#endif"; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(!strs.contains(&"one".to_string())); + assert!(!strs.contains(&"two".to_string())); + assert!(strs.contains(&"three".to_string())); + assert!(!strs.contains(&"four".to_string())); + } + + #[test] + fn test_multiple_elif_else() { + let input = "#if 0\none\n#elif 0\ntwo\n#elif 0\nthree\n#else\nfour\n#endif"; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(!strs.contains(&"one".to_string())); + assert!(!strs.contains(&"two".to_string())); + assert!(!strs.contains(&"three".to_string())); + assert!(strs.contains(&"four".to_string())); + } + + // ======================================================================== + // Defined operator tests + // ======================================================================== + + #[test] + fn test_defined_without_parens() { + let input = "#define FOO\n#if defined FOO\nyes\n#endif"; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"yes".to_string())); + } + + #[test] + fn test_defined_not_defined() { + let input = "#if defined(BAR)\nyes\n#endif\nno"; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(!strs.contains(&"yes".to_string())); + assert!(strs.contains(&"no".to_string())); + } + + #[test] + fn test_defined_negated() { + let input = "#if !defined(FOO)\nyes\n#endif"; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"yes".to_string())); + } + + #[test] + fn test_defined_in_complex_expr() { + let input = "#define A\n#if defined(A) && !defined(B)\nyes\n#endif"; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"yes".to_string())); + } + + // ======================================================================== + // Macro expansion tests + // ======================================================================== + + #[test] + fn test_multi_token_macro() { + let input = "#define EXPR 1 + 2 + 3\nEXPR"; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"1".to_string())); + assert!(strs.contains(&"2".to_string())); + assert!(strs.contains(&"3".to_string())); + } + + #[test] + fn test_nested_macro_expansion() { + let input = "#define A B\n#define B 42\nA"; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"42".to_string())); + } + + #[test] + fn test_macro_in_if_expr() { + let input = "#define VAL 5\n#if VAL > 3\nyes\n#endif"; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"yes".to_string())); + } + + #[test] + fn test_macro_redefinition() { + // Macro redefinition should use latest value + let input = "#define X 1\n#define X 2\nX"; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"2".to_string())); + assert!(!strs.contains(&"1".to_string())); + } + + // ======================================================================== + // Arithmetic in #if expressions + // ======================================================================== + + #[test] + fn test_if_addition() { + let input = "#if 2 + 3 == 5\nyes\n#endif"; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"yes".to_string())); + } + + #[test] + fn test_if_subtraction() { + let input = "#if 10 - 3 == 7\nyes\n#endif"; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"yes".to_string())); + } + + #[test] + fn test_if_multiplication() { + let input = "#if 3 * 4 == 12\nyes\n#endif"; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"yes".to_string())); + } + + #[test] + fn test_if_division() { + let input = "#if 12 / 4 == 3\nyes\n#endif"; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"yes".to_string())); + } + + #[test] + fn test_if_modulo() { + let input = "#if 10 % 3 == 1\nyes\n#endif"; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"yes".to_string())); + } + + #[test] + fn test_if_parentheses() { + let input = "#if (2 + 3) * 2 == 10\nyes\n#endif"; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"yes".to_string())); + } + + // ======================================================================== + // Comparison operators in #if + // ======================================================================== + + #[test] + fn test_if_equal() { + let input = "#if 5 == 5\nyes\n#endif"; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"yes".to_string())); + } + + #[test] + fn test_if_not_equal() { + let input = "#if 5 != 3\nyes\n#endif"; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"yes".to_string())); + } + + #[test] + fn test_if_less_equal() { + let input = "#if 3 <= 3\nyes\n#endif"; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"yes".to_string())); + } + + #[test] + fn test_if_greater_equal() { + let input = "#if 5 >= 5\nyes\n#endif"; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"yes".to_string())); + } + + // ======================================================================== + // Bitwise operators in #if + // ======================================================================== + + #[test] + fn test_if_bitwise_and() { + let input = "#if 0xFF & 0x0F == 0x0F\nyes\n#endif"; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"yes".to_string())); + } + + #[test] + fn test_if_bitwise_or() { + let input = "#if (0xF0 | 0x0F) == 0xFF\nyes\n#endif"; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"yes".to_string())); + } + + // ======================================================================== + // Edge cases + // ======================================================================== + + #[test] + fn test_empty_if_block() { + let input = "#if 1\n#endif\nafter"; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"after".to_string())); + } + + #[test] + fn test_empty_else_block() { + let input = "#if 0\nskipped\n#else\n#endif\nafter"; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(!strs.contains(&"skipped".to_string())); + assert!(strs.contains(&"after".to_string())); + } + + #[test] + fn test_consecutive_conditionals() { + let input = "#if 1\nfirst\n#endif\n#if 1\nsecond\n#endif"; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"first".to_string())); + assert!(strs.contains(&"second".to_string())); + } + + #[test] + fn test_undefined_macro_is_zero() { + // Undefined macros evaluate to 0 in #if expressions + let input = "#if UNDEFINED\nyes\n#endif\nno"; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(!strs.contains(&"yes".to_string())); + assert!(strs.contains(&"no".to_string())); + } + + #[test] + fn test_ternary_in_if() { + let input = "#if 1 ? 1 : 0\nyes\n#endif"; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"yes".to_string())); + } + + // Tests for nested conditional skipping (bug fix) + #[test] + fn test_nested_if_else_in_skipped_block() { + // When outer #ifndef is false (guard defined), inner #if/#else should not activate + let input = r#" +#define GUARD +#ifndef GUARD +outer_skipped +#if 0 +inner_if_skipped +#else +inner_else_should_also_skip +#endif +#endif +after +"#; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(!strs.contains(&"outer_skipped".to_string())); + assert!(!strs.contains(&"inner_if_skipped".to_string())); + assert!(!strs.contains(&"inner_else_should_also_skip".to_string())); + assert!(strs.contains(&"after".to_string())); + } + + #[test] + fn test_nested_elif_in_skipped_block() { + // When outer block is skipped, nested #elif should not activate + let input = r#" +#define GUARD +#ifndef GUARD +#if 0 +a +#elif 1 +b_should_not_appear +#else +c +#endif +#endif +done +"#; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(!strs.contains(&"a".to_string())); + assert!(!strs.contains(&"b_should_not_appear".to_string())); + assert!(!strs.contains(&"c".to_string())); + assert!(strs.contains(&"done".to_string())); + } + + #[test] + fn test_deeply_nested_skipped_conditionals() { + // Multiple levels of nesting inside a skipped block + let input = r#" +#if 0 +level1 +#if 1 +level2_should_skip +#if 1 +level3_should_skip +#else +level3_else_should_skip +#endif +#endif +#endif +visible +"#; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(!strs.contains(&"level1".to_string())); + assert!(!strs.contains(&"level2_should_skip".to_string())); + assert!(!strs.contains(&"level3_should_skip".to_string())); + assert!(!strs.contains(&"level3_else_should_skip".to_string())); + assert!(strs.contains(&"visible".to_string())); + } + + // Tests for token pasting in object-like macros (bug fix) + #[test] + fn test_token_paste_object_macro() { + let input = "#define CONCAT a ## b\nCONCAT"; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"ab".to_string())); + } + + #[test] + fn test_token_paste_object_macro_numbers() { + let input = "#define NUM 1 ## 2 ## 3\nNUM"; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"123".to_string())); + } + + #[test] + fn test_token_paste_object_macro_mixed() { + let input = "#define PREFIX foo ## 123\nPREFIX"; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"foo123".to_string())); + } + + // Tests for token pasting in function-like macros + #[test] + fn test_token_paste_function_macro() { + let input = "#define CONCAT(a, b) a ## b\nCONCAT(foo, bar)"; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"foobar".to_string())); + } + + #[test] + fn test_token_paste_function_macro_prefix() { + let input = "#define MAKE_ID(x) id_ ## x\nMAKE_ID(test)"; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"id_test".to_string())); + } + + #[test] + fn test_token_paste_function_macro_suffix() { + let input = "#define MAKE_FUNC(x) x ## _func\nMAKE_FUNC(my)"; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + assert!(strs.contains(&"my_func".to_string())); + } + + #[test] + fn test_token_paste_creates_identifier() { + // Pasting should create a new identifier that can be used + let input = r#" +#define PASTE(a, b) a ## b +#define foobar 42 +PASTE(foo, bar) +"#; + let (tokens, idents) = preprocess_str(input); + let strs = get_token_strings(&tokens, &idents); + // foobar should expand to 42 + assert!(strs.contains(&"42".to_string())); + } +} diff --git a/cc/types.rs b/cc/types.rs new file mode 100644 index 000000000..b248d20b7 --- /dev/null +++ b/cc/types.rs @@ -0,0 +1,897 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Type system for pcc C99 compiler +// Based on sparse's compositional type model +// + +use std::fmt; + +// ============================================================================ +// Composite Type Components +// ============================================================================ + +/// A struct/union member +#[derive(Debug, Clone, PartialEq)] +pub struct StructMember { + /// Member name (empty string for unnamed bitfields) + pub name: String, + /// Member type + pub typ: Type, + /// Byte offset within struct (0 for unions, offset of storage unit for bitfields) + pub offset: usize, + /// For bitfields: bit offset within storage unit (0 = LSB) + pub bit_offset: Option, + /// For bitfields: bit width + pub bit_width: Option, + /// For bitfields: size of storage unit in bytes + pub storage_unit_size: Option, +} + +/// Information about a struct/union member lookup +#[derive(Debug, Clone)] +pub struct MemberInfo { + /// Byte offset within struct + pub offset: usize, + /// Member type + pub typ: Type, + /// For bitfields: bit offset within storage unit + pub bit_offset: Option, + /// For bitfields: bit width + pub bit_width: Option, + /// For bitfields: storage unit size in bytes + pub storage_unit_size: Option, +} + +/// An enum constant +#[derive(Debug, Clone, PartialEq)] +pub struct EnumConstant { + /// Constant name + pub name: String, + /// Constant value + pub value: i64, +} + +/// Composite type definition (struct, union, or enum) +#[derive(Debug, Clone, PartialEq)] +pub struct CompositeType { + /// Tag name (e.g., "point" in "struct point") + pub tag: Option, + /// Members for struct/union + pub members: Vec, + /// Constants for enum + pub enum_constants: Vec, + /// Total size in bytes + pub size: usize, + /// Alignment requirement in bytes + pub align: usize, + /// False for forward declarations + pub is_complete: bool, +} + +impl CompositeType { + /// Create a new empty composite type (forward declaration) + pub fn incomplete(tag: Option) -> Self { + Self { + tag, + members: Vec::new(), + enum_constants: Vec::new(), + size: 0, + align: 1, + is_complete: false, + } + } + + /// Compute struct layout with natural alignment, including bitfield packing + /// Returns (total_size, alignment) + pub fn compute_struct_layout(members: &mut [StructMember]) -> (usize, usize) { + let mut offset = 0usize; + let mut max_align = 1usize; + let mut current_bit_offset = 0u32; + let mut current_storage_unit_size = 0u32; // 0 means no active bitfield storage + + for member in members.iter_mut() { + if let Some(bit_width) = member.bit_width { + // This is a bitfield + let storage_size = member.typ.size_bytes() as u32; + let storage_bits = storage_size * 8; + + if bit_width == 0 { + // Zero-width bitfield: force alignment to next storage unit + if current_storage_unit_size > 0 { + offset += current_storage_unit_size as usize; + current_bit_offset = 0; + current_storage_unit_size = 0; + } + member.offset = offset; + member.bit_offset = None; + member.storage_unit_size = None; + continue; + } + + // Check if we need a new storage unit: + // - No active storage unit + // - Storage unit type changed + // - Bitfield doesn't fit in remaining space + let need_new_unit = current_storage_unit_size == 0 + || current_storage_unit_size != storage_size + || current_bit_offset + bit_width > storage_bits; + + if need_new_unit { + // Close current storage unit if active + if current_storage_unit_size > 0 { + offset += current_storage_unit_size as usize; + } + // Align to storage unit size + let align = storage_size as usize; + offset = (offset + align - 1) & !(align - 1); + max_align = max_align.max(align); + current_bit_offset = 0; + current_storage_unit_size = storage_size; + } + + member.offset = offset; + member.bit_offset = Some(current_bit_offset); + member.storage_unit_size = Some(storage_size); + current_bit_offset += bit_width; + } else { + // Regular member - close any active bitfield storage unit + if current_storage_unit_size > 0 { + offset += current_storage_unit_size as usize; + current_bit_offset = 0; + current_storage_unit_size = 0; + } + + let align = member.typ.alignment(); + max_align = max_align.max(align); + + // Align offset to member's alignment + offset = (offset + align - 1) & !(align - 1); + member.offset = offset; + member.bit_offset = None; + member.storage_unit_size = None; + + // Advance by member size + offset += member.typ.size_bytes(); + } + } + + // Close final bitfield storage unit if active + if current_storage_unit_size > 0 { + offset += current_storage_unit_size as usize; + } + + // Pad struct size to alignment + let size = if max_align > 1 { + (offset + max_align - 1) & !(max_align - 1) + } else { + offset + }; + (size, max_align) + } + + /// Compute union layout: all members at offset 0, size = max member size + /// Returns (total_size, alignment) + pub fn compute_union_layout(members: &mut [StructMember]) -> (usize, usize) { + let mut max_size = 0usize; + let mut max_align = 1usize; + + for member in members.iter_mut() { + member.offset = 0; // All union members at offset 0 + max_size = max_size.max(member.typ.size_bytes()); + max_align = max_align.max(member.typ.alignment()); + } + + // Pad size to alignment + let size = if max_align > 1 { + (max_size + max_align - 1) & !(max_align - 1) + } else { + max_size + }; + (size, max_align) + } +} + +// ============================================================================ +// Type Modifiers +// ============================================================================ + +bitflags::bitflags! { + /// Type modifiers (storage class, qualifiers, signedness) + #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] + pub struct TypeModifiers: u32 { + // Storage class specifiers + const STATIC = 1 << 0; + const EXTERN = 1 << 1; + const REGISTER = 1 << 2; + const AUTO = 1 << 3; + const TYPEDEF = 1 << 13; // Not a storage class semantically, but syntactically + + // Type qualifiers + const CONST = 1 << 4; + const VOLATILE = 1 << 5; + const RESTRICT = 1 << 6; + + // Signedness + const SIGNED = 1 << 7; + const UNSIGNED = 1 << 8; + + // Size modifiers + const SHORT = 1 << 9; + const LONG = 1 << 10; + const LONGLONG = 1 << 11; + + // Inline + const INLINE = 1 << 12; + } +} + +// ============================================================================ +// Type Kinds +// ============================================================================ + +/// Basic type kinds (mirrors sparse's SYM_* for types) +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum TypeKind { + // Basic types + Void, + Bool, + Char, + Short, + Int, + Long, + LongLong, + Float, + Double, + LongDouble, + + // Derived types + Pointer, + Array, + Function, + + // Composite types (for future expansion) + Struct, + Union, + Enum, + + // Compiler builtin types + /// __builtin_va_list - platform-specific variadic argument list type + /// x86-64: 24-byte struct (1 element array of struct with 4 fields) + /// aarch64-linux: 32-byte struct + /// aarch64-macos: char* (8 bytes, same as pointer) + VaList, +} + +impl fmt::Display for TypeKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + TypeKind::Void => write!(f, "void"), + TypeKind::Bool => write!(f, "_Bool"), + TypeKind::Char => write!(f, "char"), + TypeKind::Short => write!(f, "short"), + TypeKind::Int => write!(f, "int"), + TypeKind::Long => write!(f, "long"), + TypeKind::LongLong => write!(f, "long long"), + TypeKind::Float => write!(f, "float"), + TypeKind::Double => write!(f, "double"), + TypeKind::LongDouble => write!(f, "long double"), + TypeKind::Pointer => write!(f, "pointer"), + TypeKind::Array => write!(f, "array"), + TypeKind::Function => write!(f, "function"), + TypeKind::Struct => write!(f, "struct"), + TypeKind::Union => write!(f, "union"), + TypeKind::Enum => write!(f, "enum"), + TypeKind::VaList => write!(f, "__builtin_va_list"), + } + } +} + +// ============================================================================ +// Type Representation +// ============================================================================ + +/// A C type (compositional structure like sparse) +/// +/// Types are built compositionally: +/// - `int *p` -> Pointer { base: Int } +/// - `int arr[10]` -> Array { base: Int, size: 10 } +/// - `int (*fp)(int)` -> Pointer { base: Function { return: Int, params: [Int] } } +#[derive(Debug, Clone, PartialEq)] +pub struct Type { + /// The kind of type + pub kind: TypeKind, + + /// Type modifiers (const, volatile, signed, unsigned, etc.) + pub modifiers: TypeModifiers, + + /// Base type for pointers, arrays, and function return types + pub base: Option>, + + /// Array size (for arrays) + pub array_size: Option, + + /// Function parameter types (for functions) + pub params: Option>, + + /// Is this function variadic? (for functions) + pub variadic: bool, + + /// Composite type data (for struct, union, enum) + pub composite: Option>, +} + +impl Default for Type { + fn default() -> Self { + Self { + kind: TypeKind::Int, + modifiers: TypeModifiers::empty(), + base: None, + array_size: None, + params: None, + variadic: false, + composite: None, + } + } +} + +impl Type { + /// Create a new basic type + pub fn basic(kind: TypeKind) -> Self { + Self { + kind, + ..Default::default() + } + } + + /// Create a type with modifiers + pub fn with_modifiers(kind: TypeKind, modifiers: TypeModifiers) -> Self { + Self { + kind, + modifiers, + ..Default::default() + } + } + + /// Create a pointer type + pub fn pointer(base: Type) -> Self { + Self { + kind: TypeKind::Pointer, + modifiers: TypeModifiers::empty(), + base: Some(Box::new(base)), + array_size: None, + params: None, + variadic: false, + composite: None, + } + } + + /// Create an array type + pub fn array(base: Type, size: usize) -> Self { + Self { + kind: TypeKind::Array, + modifiers: TypeModifiers::empty(), + base: Some(Box::new(base)), + array_size: Some(size), + params: None, + variadic: false, + composite: None, + } + } + + /// Create a function type + pub fn function(return_type: Type, params: Vec, variadic: bool) -> Self { + Self { + kind: TypeKind::Function, + modifiers: TypeModifiers::empty(), + base: Some(Box::new(return_type)), + array_size: None, + params: Some(params), + variadic, + composite: None, + } + } + + /// Create a struct type + pub fn struct_type(composite: CompositeType) -> Self { + Self { + kind: TypeKind::Struct, + modifiers: TypeModifiers::empty(), + base: None, + array_size: None, + params: None, + variadic: false, + composite: Some(Box::new(composite)), + } + } + + /// Create a union type + pub fn union_type(composite: CompositeType) -> Self { + Self { + kind: TypeKind::Union, + modifiers: TypeModifiers::empty(), + base: None, + array_size: None, + params: None, + variadic: false, + composite: Some(Box::new(composite)), + } + } + + /// Create an enum type + pub fn enum_type(composite: CompositeType) -> Self { + Self { + kind: TypeKind::Enum, + modifiers: TypeModifiers::empty(), + base: None, + array_size: None, + params: None, + variadic: false, + composite: Some(Box::new(composite)), + } + } + + /// Create an incomplete (forward-declared) struct type + pub fn incomplete_struct(tag: String) -> Self { + Self::struct_type(CompositeType::incomplete(Some(tag))) + } + + /// Create an incomplete (forward-declared) union type + pub fn incomplete_union(tag: String) -> Self { + Self::union_type(CompositeType::incomplete(Some(tag))) + } + + /// Create an incomplete (forward-declared) enum type + pub fn incomplete_enum(tag: String) -> Self { + Self::enum_type(CompositeType::incomplete(Some(tag))) + } + + /// Check if this is an integer type + pub fn is_integer(&self) -> bool { + matches!( + self.kind, + TypeKind::Bool + | TypeKind::Char + | TypeKind::Short + | TypeKind::Int + | TypeKind::Long + | TypeKind::LongLong + ) + } + + /// Check if this is a floating point type + pub fn is_float(&self) -> bool { + matches!( + self.kind, + TypeKind::Float | TypeKind::Double | TypeKind::LongDouble + ) + } + + /// Check if this is an arithmetic type (integer or float) + pub fn is_arithmetic(&self) -> bool { + self.is_integer() || self.is_float() + } + + /// Check if this is a scalar type (arithmetic or pointer) + pub fn is_scalar(&self) -> bool { + self.is_arithmetic() || self.kind == TypeKind::Pointer + } + + /// Check if this type is unsigned + pub fn is_unsigned(&self) -> bool { + self.modifiers.contains(TypeModifiers::UNSIGNED) + } + + /// Check if this is a plain char (no explicit signed/unsigned) + /// Plain char has platform-dependent signedness + pub fn is_plain_char(&self) -> bool { + self.kind == TypeKind::Char + && !self.modifiers.contains(TypeModifiers::SIGNED) + && !self.modifiers.contains(TypeModifiers::UNSIGNED) + } + + /// Get the base type (for pointers, arrays, functions) + pub fn get_base(&self) -> Option<&Type> { + self.base.as_deref() + } + + /// Get the size of this type in bits (for a typical 64-bit target) + /// This is used by the IR to size operations + pub fn size_bits(&self) -> u32 { + match self.kind { + TypeKind::Void => 0, + TypeKind::Bool => 8, + TypeKind::Char => 8, + TypeKind::Short => 16, + TypeKind::Int => 32, + TypeKind::Long => 64, // LP64 model (macOS, Linux x86-64) + TypeKind::LongLong => 64, + TypeKind::Float => 32, + TypeKind::Double => 64, + TypeKind::LongDouble => 128, // x86-64 + TypeKind::Pointer => 64, // 64-bit pointers + TypeKind::Array => { + let base_size = self.base.as_ref().map(|b| b.size_bits()).unwrap_or(0); + let count = self.array_size.unwrap_or(0) as u32; + base_size * count + } + TypeKind::Function => 0, // Functions don't have a size + TypeKind::Struct | TypeKind::Union => (self.size_bytes() * 8) as u32, + TypeKind::Enum => 32, // Enums are int-sized + // va_list size is platform-specific: + // - x86-64: 24 bytes (192 bits) - struct with 4 fields + // - aarch64-macos: 8 bytes (64 bits) - just a char* + // - aarch64-linux: 32 bytes (256 bits) - struct with 5 fields + TypeKind::VaList => { + #[cfg(target_arch = "x86_64")] + { + 192 + } + #[cfg(all(target_arch = "aarch64", target_os = "macos"))] + { + 64 // On Apple ARM64, va_list is just char* + } + #[cfg(all(target_arch = "aarch64", not(target_os = "macos")))] + { + 256 // On Linux ARM64, va_list is a 32-byte struct + } + #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] + { + 64 // Fallback to pointer size + } + } + } + } + + /// Get the size of this type in bytes + pub fn size_bytes(&self) -> usize { + match self.kind { + TypeKind::Struct | TypeKind::Union => { + self.composite.as_ref().map(|c| c.size).unwrap_or(0) + } + TypeKind::Enum => 4, + _ => (self.size_bits() / 8) as usize, + } + } + + /// Get natural alignment for this type in bytes + pub fn alignment(&self) -> usize { + match self.kind { + TypeKind::Void => 1, + TypeKind::Bool | TypeKind::Char => 1, + TypeKind::Short => 2, + TypeKind::Int | TypeKind::Float => 4, + TypeKind::Long | TypeKind::LongLong | TypeKind::Double | TypeKind::Pointer => 8, + TypeKind::LongDouble => 16, + TypeKind::Struct | TypeKind::Union => { + self.composite.as_ref().map(|c| c.align).unwrap_or(1) + } + TypeKind::Enum => 4, // Enums are int-aligned + TypeKind::Array => self.base.as_ref().map(|b| b.alignment()).unwrap_or(1), + TypeKind::Function => 1, + TypeKind::VaList => 8, // All platforms use 8-byte alignment for va_list + } + } + + /// Find a member in a struct/union type + /// Returns MemberInfo with full bitfield details if found + pub fn find_member(&self, name: &str) -> Option { + if let Some(ref composite) = self.composite { + for member in &composite.members { + if member.name == name { + return Some(MemberInfo { + offset: member.offset, + typ: member.typ.clone(), + bit_offset: member.bit_offset, + bit_width: member.bit_width, + storage_unit_size: member.storage_unit_size, + }); + } + } + } + None + } + + /// Check if two types are compatible (for __builtin_types_compatible_p) + /// This ignores top-level qualifiers (const, volatile, restrict) + /// but otherwise requires types to be identical. + /// Note: Different enum types are NOT compatible, even if they have + /// the same underlying integer type. + pub fn types_compatible(&self, other: &Type) -> bool { + // Top-level qualifiers to ignore + const QUALIFIERS: TypeModifiers = TypeModifiers::CONST + .union(TypeModifiers::VOLATILE) + .union(TypeModifiers::RESTRICT); + + // Compare kinds first + if self.kind != other.kind { + return false; + } + + // Compare modifiers (ignoring top-level qualifiers) + let self_mods = self.modifiers.difference(QUALIFIERS); + let other_mods = other.modifiers.difference(QUALIFIERS); + if self_mods != other_mods { + return false; + } + + // Compare array sizes + if self.array_size != other.array_size { + return false; + } + + // Compare variadic flag + if self.variadic != other.variadic { + return false; + } + + // Compare base types (for pointers, arrays, functions) + match (&self.base, &other.base) { + (Some(a), Some(b)) => { + if !a.types_compatible(b) { + return false; + } + } + (None, None) => {} + _ => return false, + } + + // Compare function parameters + match (&self.params, &other.params) { + (Some(a), Some(b)) => { + if a.len() != b.len() { + return false; + } + for (pa, pb) in a.iter().zip(b.iter()) { + if !pa.types_compatible(pb) { + return false; + } + } + } + (None, None) => {} + _ => return false, + } + + // Compare composite types (struct, union, enum) + // For these, we require the exact same composite definition + // (different enum types are NOT compatible even with same underlying type) + match (&self.composite, &other.composite) { + (Some(a), Some(b)) => a == b, + (None, None) => true, + _ => false, + } + } +} + +impl fmt::Display for Type { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Print modifiers + if self.modifiers.contains(TypeModifiers::CONST) { + write!(f, "const ")?; + } + if self.modifiers.contains(TypeModifiers::VOLATILE) { + write!(f, "volatile ")?; + } + if self.modifiers.contains(TypeModifiers::UNSIGNED) { + write!(f, "unsigned ")?; + } else if self.modifiers.contains(TypeModifiers::SIGNED) && self.kind == TypeKind::Char { + write!(f, "signed ")?; + } + + match self.kind { + TypeKind::Pointer => { + if let Some(base) = &self.base { + write!(f, "{}*", base) + } else { + write!(f, "*") + } + } + TypeKind::Array => { + if let Some(base) = &self.base { + if let Some(size) = self.array_size { + write!(f, "{}[{}]", base, size) + } else { + write!(f, "{}[]", base) + } + } else { + write!(f, "[]") + } + } + TypeKind::Function => { + if let Some(ret) = &self.base { + write!(f, "{}(", ret)?; + if let Some(params) = &self.params { + for (i, param) in params.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{}", param)?; + } + if self.variadic { + if !params.is_empty() { + write!(f, ", ")?; + } + write!(f, "...")?; + } + } + write!(f, ")") + } else { + write!(f, "()") + } + } + _ => write!(f, "{}", self.kind), + } + } +} + +// ============================================================================ +// Tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_basic_types() { + let int_type = Type::basic(TypeKind::Int); + assert!(int_type.is_integer()); + assert!(int_type.is_arithmetic()); + assert!(int_type.is_scalar()); + assert!(!int_type.is_float()); + } + + #[test] + fn test_pointer_type() { + let int_ptr = Type::pointer(Type::basic(TypeKind::Int)); + assert_eq!(int_ptr.kind, TypeKind::Pointer); + assert!(int_ptr.is_scalar()); + assert!(!int_ptr.is_integer()); + + let base = int_ptr.get_base().unwrap(); + assert_eq!(base.kind, TypeKind::Int); + } + + #[test] + fn test_array_type() { + let int_arr = Type::array(Type::basic(TypeKind::Int), 10); + assert_eq!(int_arr.kind, TypeKind::Array); + assert_eq!(int_arr.array_size, Some(10)); + + let base = int_arr.get_base().unwrap(); + assert_eq!(base.kind, TypeKind::Int); + } + + #[test] + fn test_function_type() { + let func = Type::function( + Type::basic(TypeKind::Int), + vec![Type::basic(TypeKind::Int), Type::basic(TypeKind::Char)], + false, + ); + assert_eq!(func.kind, TypeKind::Function); + assert!(!func.variadic); + + let params = func.params.as_ref().unwrap(); + assert_eq!(params.len(), 2); + assert_eq!(params[0].kind, TypeKind::Int); + assert_eq!(params[1].kind, TypeKind::Char); + } + + #[test] + fn test_unsigned_modifier() { + let uint = Type::with_modifiers(TypeKind::Int, TypeModifiers::UNSIGNED); + assert!(uint.is_unsigned()); + } + + #[test] + fn test_type_display() { + let int_type = Type::basic(TypeKind::Int); + assert_eq!(format!("{}", int_type), "int"); + + let const_int = Type::with_modifiers(TypeKind::Int, TypeModifiers::CONST); + assert_eq!(format!("{}", const_int), "const int"); + + let uint = Type::with_modifiers(TypeKind::Int, TypeModifiers::UNSIGNED); + assert_eq!(format!("{}", uint), "unsigned int"); + + let int_ptr = Type::pointer(Type::basic(TypeKind::Int)); + assert_eq!(format!("{}", int_ptr), "int*"); + } + + #[test] + fn test_nested_pointer() { + // int **pp + let int_ptr_ptr = Type::pointer(Type::pointer(Type::basic(TypeKind::Int))); + assert_eq!(int_ptr_ptr.kind, TypeKind::Pointer); + + let inner = int_ptr_ptr.get_base().unwrap(); + assert_eq!(inner.kind, TypeKind::Pointer); + + let innermost = inner.get_base().unwrap(); + assert_eq!(innermost.kind, TypeKind::Int); + } + + #[test] + fn test_pointer_to_array() { + // int (*p)[10] - pointer to array of 10 ints + let arr_type = Type::array(Type::basic(TypeKind::Int), 10); + let ptr_to_arr = Type::pointer(arr_type); + + assert_eq!(ptr_to_arr.kind, TypeKind::Pointer); + let base = ptr_to_arr.get_base().unwrap(); + assert_eq!(base.kind, TypeKind::Array); + assert_eq!(base.array_size, Some(10)); + } + + #[test] + fn test_types_compatible_same_type() { + let int1 = Type::basic(TypeKind::Int); + let int2 = Type::basic(TypeKind::Int); + assert!(int1.types_compatible(&int2)); + + let char1 = Type::basic(TypeKind::Char); + let char2 = Type::basic(TypeKind::Char); + assert!(char1.types_compatible(&char2)); + } + + #[test] + fn test_types_compatible_different_types() { + let int_type = Type::basic(TypeKind::Int); + let char_type = Type::basic(TypeKind::Char); + assert!(!int_type.types_compatible(&char_type)); + + let long_type = Type::basic(TypeKind::Long); + assert!(!int_type.types_compatible(&long_type)); + } + + #[test] + fn test_types_compatible_qualifiers_ignored() { + let int_type = Type::basic(TypeKind::Int); + let const_int = Type::with_modifiers(TypeKind::Int, TypeModifiers::CONST); + let volatile_int = Type::with_modifiers(TypeKind::Int, TypeModifiers::VOLATILE); + let cv_int = Type::with_modifiers( + TypeKind::Int, + TypeModifiers::CONST | TypeModifiers::VOLATILE, + ); + + // All should be compatible with plain int + assert!(int_type.types_compatible(&const_int)); + assert!(int_type.types_compatible(&volatile_int)); + assert!(int_type.types_compatible(&cv_int)); + assert!(const_int.types_compatible(&volatile_int)); + } + + #[test] + fn test_types_compatible_signedness_matters() { + let int_type = Type::basic(TypeKind::Int); + let uint_type = Type::with_modifiers(TypeKind::Int, TypeModifiers::UNSIGNED); + // Signedness is NOT a qualifier, so these are NOT compatible + assert!(!int_type.types_compatible(&uint_type)); + } + + #[test] + fn test_types_compatible_pointers() { + let int_ptr = Type::pointer(Type::basic(TypeKind::Int)); + let int_ptr2 = Type::pointer(Type::basic(TypeKind::Int)); + assert!(int_ptr.types_compatible(&int_ptr2)); + + let char_ptr = Type::pointer(Type::basic(TypeKind::Char)); + assert!(!int_ptr.types_compatible(&char_ptr)); + } + + #[test] + fn test_types_compatible_arrays() { + let arr10 = Type::array(Type::basic(TypeKind::Int), 10); + let arr10_2 = Type::array(Type::basic(TypeKind::Int), 10); + assert!(arr10.types_compatible(&arr10_2)); + + let arr20 = Type::array(Type::basic(TypeKind::Int), 20); + assert!(!arr10.types_compatible(&arr20)); + } +}