diff --git a/cc/arch/aarch64/codegen.rs b/cc/arch/aarch64/codegen.rs
index 68b6b52e..4c3a4a06 100644
--- a/cc/arch/aarch64/codegen.rs
+++ b/cc/arch/aarch64/codegen.rs
@@ -1397,6 +1397,7 @@ impl Aarch64CodeGen {
     }
 
     pub(super) fn emit_move(&mut self, src: PseudoId, dst: Reg, size: u32, frame_size: i32) {
+        let actual_size = size; // Keep original size for sub-32-bit stack loads
         let size = size.max(32);
         let loc = self.get_location(src);
         let op_size = OperandSize::from_bits(size);
@@ -1413,9 +1414,12 @@ impl Aarch64CodeGen {
             }
             Loc::Stack(offset) => {
                 let actual_offset = self.stack_offset(frame_size, offset);
+                // For sub-32-bit values, use sized load (ldrb/ldrh) which zero-extends.
+                // This avoids reading garbage from adjacent stack bytes.
+                let load_size = OperandSize::from_bits(actual_size.max(8));
                 // LIR: load from stack (FP-relative for alloca safety)
                 self.push_lir(Aarch64Inst::Ldr {
-                    size: op_size,
+                    size: load_size,
                     addr: MemAddr::BaseOffset {
                         base: Reg::X29,
                         offset: actual_offset,
diff --git a/cc/arch/x86_64/codegen.rs b/cc/arch/x86_64/codegen.rs
index 9bdd7d5e..fff14b2f 100644
--- a/cc/arch/x86_64/codegen.rs
+++ b/cc/arch/x86_64/codegen.rs
@@ -994,6 +994,7 @@ impl X86_64CodeGen {
     }
 
     pub(super) fn emit_move(&mut self, src: PseudoId, dst: Reg, size: u32) {
+        let actual_size = size; // Keep original size for sub-32-bit handling
         let size = size.max(32);
         let op_size = OperandSize::from_bits(size);
         let loc = self.get_location(src);
@@ -1011,26 +1012,54 @@ impl X86_64CodeGen {
             }
             Loc::Stack(offset) => {
                 let adjusted = offset + self.callee_saved_offset;
-                // LIR: memory-to-register move
-                self.push_lir(X86Inst::Mov {
-                    size: op_size,
-                    src: GpOperand::Mem(MemAddr::BaseOffset {
-                        base: Reg::Rbp,
-                        offset: -adjusted,
-                    }),
-                    dst: GpOperand::Reg(dst),
-                });
+                // For sub-32-bit values, use zero-extending load to avoid garbage in upper bits
+                if actual_size < 32 {
+                    // LIR: zero-extending memory-to-register move
+                    self.push_lir(X86Inst::Movzx {
+                        src_size: OperandSize::from_bits(actual_size),
+                        dst_size: OperandSize::B32,
+                        src: GpOperand::Mem(MemAddr::BaseOffset {
+                            base: Reg::Rbp,
+                            offset: -adjusted,
+                        }),
+                        dst,
+                    });
+                } else {
+                    // LIR: memory-to-register move
+                    self.push_lir(X86Inst::Mov {
+                        size: op_size,
+                        src: GpOperand::Mem(MemAddr::BaseOffset {
+                            base: Reg::Rbp,
+                            offset: -adjusted,
+                        }),
+                        dst: GpOperand::Reg(dst),
+                    });
+                }
             }
             Loc::IncomingArg(offset) => {
-                // LIR: memory-to-register move from incoming stack arg
-                self.push_lir(X86Inst::Mov {
-                    size: op_size,
-                    src: GpOperand::Mem(MemAddr::BaseOffset {
-                        base: Reg::Rbp,
-                        offset,
-                    }),
-                    dst: GpOperand::Reg(dst),
-                });
+                // For sub-32-bit values, use zero-extending load
+                if actual_size < 32 {
+                    // LIR: zero-extending memory-to-register move from incoming stack arg
+                    self.push_lir(X86Inst::Movzx {
+                        src_size: OperandSize::from_bits(actual_size),
+                        dst_size: OperandSize::B32,
+                        src: GpOperand::Mem(MemAddr::BaseOffset {
+                            base: Reg::Rbp,
+                            offset,
+                        }),
+                        dst,
+                    });
+                } else {
+                    // LIR: memory-to-register move from incoming stack arg
+                    self.push_lir(X86Inst::Mov {
+                        size: op_size,
+                        src: GpOperand::Mem(MemAddr::BaseOffset {
+                            base: Reg::Rbp,
+                            offset,
+                        }),
+                        dst: GpOperand::Reg(dst),
+                    });
+                }
             }
             Loc::Imm(v) => {
                 // x86-64: movl sign-extends to 64-bit, movq only works with 32-bit signed immediates
@@ -1114,18 +1143,47 @@ impl X86_64CodeGen {
                 });
             }
             Loc::Stack(offset) => {
-                // Use actual size for memory stores (8, 16, 32, 64 bits)
                 let adjusted = offset + self.callee_saved_offset;
-                let op_size = OperandSize::from_bits(size);
-                // LIR: register-to-memory move
-                self.push_lir(X86Inst::Mov {
-                    size: op_size,
-                    src: GpOperand::Reg(src),
-                    dst: GpOperand::Mem(MemAddr::BaseOffset {
-                        base: Reg::Rbp,
-                        offset: -adjusted,
-                    }),
-                });
+                // For sub-32-bit values, first zero-extend to 32 bits then store 32 bits.
+                // This ensures that subsequent 32-bit loads get correct values.
+                if size < 32 {
+                    // Use scratch register to avoid modifying source
+                    let scratch = if src == Reg::R10 { Reg::R11 } else { Reg::R10 };
+                    // Copy to scratch
+                    self.push_lir(X86Inst::Mov {
+                        size: OperandSize::B32,
+                        src: GpOperand::Reg(src),
+                        dst: GpOperand::Reg(scratch),
+                    });
+                    // Zero-extend by masking to the appropriate size
+                    let mask = (1i64 << size) - 1;
+                    self.push_lir(X86Inst::And {
+                        size: OperandSize::B32,
+                        src: GpOperand::Imm(mask),
+                        dst: scratch,
+                    });
+                    // Store 32 bits
+                    self.push_lir(X86Inst::Mov {
+                        size: OperandSize::B32,
+                        src: GpOperand::Reg(scratch),
+                        dst: GpOperand::Mem(MemAddr::BaseOffset {
+                            base: Reg::Rbp,
+                            offset: -adjusted,
+                        }),
+                    });
+                } else {
+                    // Store with actual size
+                    let op_size = OperandSize::from_bits(size);
+                    // LIR: register-to-memory move
+                    self.push_lir(X86Inst::Mov {
+                        size: op_size,
+                        src: GpOperand::Reg(src),
+                        dst: GpOperand::Mem(MemAddr::BaseOffset {
+                            base: Reg::Rbp,
+                            offset: -adjusted,
+                        }),
+                    });
+                }
             }
             _ => {}
         }
diff --git a/cc/ir/linearize.rs b/cc/ir/linearize.rs
index 4bbf59fe..970b51f0 100644
--- a/cc/ir/linearize.rs
+++ b/cc/ir/linearize.rs
@@ -725,7 +725,12 @@ impl<'a> Linearizer<'a> {
             }
 
             TypeKind::Struct | TypeKind::Union => {
-                if let Some(composite) = self.types.get(typ).composite.as_ref() {
+                // Resolve incomplete struct types to their complete definitions
+                // This is needed when a typedef to an incomplete struct is used
+                // before the struct is fully defined (forward declaration pattern)
+                let resolved_typ = self.resolve_struct_type(typ);
+                let resolved_size = (self.types.size_bits(resolved_typ) / 8) as usize;
+                if let Some(composite) = self.types.get(resolved_typ).composite.as_ref() {
                     let members = &composite.members;
                     let mut init_fields = Vec::new();
                     let mut current_field_idx = 0;
@@ -765,7 +770,7 @@ impl<'a> Linearizer<'a> {
                     init_fields.sort_by_key(|(offset, _, _)| *offset);
 
                     Initializer::Struct {
-                        total_size,
+                        total_size: resolved_size,
                         fields: init_fields,
                     }
                 } else {
@@ -1691,8 +1696,12 @@ impl<'a> Linearizer<'a> {
                 }
             }
             TypeKind::Struct | TypeKind::Union => {
+                // Resolve incomplete struct types to their complete definitions
+                // This is needed when a typedef to an incomplete struct is used
+                // before the struct is fully defined (forward declaration pattern)
+                let resolved_typ = self.resolve_struct_type(typ);
                 // Get struct fields from the type's composite data
-                if let Some(composite) = self.types.get(typ).composite.as_ref() {
+                if let Some(composite) = self.types.get(resolved_typ).composite.as_ref() {
                     // Clone members to avoid borrow issues
                     let members: Vec<_> = composite.members.clone();
 
diff --git a/cc/ir/test_linearize.rs b/cc/ir/test_linearize.rs
index a760e007..ffe7c6de 100644
--- a/cc/ir/test_linearize.rs
+++ b/cc/ir/test_linearize.rs
@@ -13,10 +13,11 @@
 use super::*;
 use crate::parse::ast::{
     AssignOp, BlockItem, Declaration, ExprKind, ExternalDecl, FunctionDef, InitDeclarator,
-    Parameter, UnaryOp,
+    InitElement, Parameter, UnaryOp,
 };
 use crate::strings::StringTable;
-use crate::types::Type;
+use crate::symbol::Symbol;
+use crate::types::{CompositeType, StructMember, Type};
 
 /// Create a default position for test code
 fn test_pos() -> Position {
@@ -2533,3 +2534,158 @@ fn test_string_literal_char_pointer_init() {
         ir
     );
 }
+
+// ============================================================================
+// Incomplete struct type resolution test (regression test for forward declarations)
+// Tests the fix in resolve_struct_type() that resolves incomplete struct types
+// to their complete definitions when processing initializers.
+// ============================================================================
+
+/// Helper to linearize with a custom symbol table (for testing struct resolution)
+fn test_linearize_with_symbols(
+    tu: &TranslationUnit,
+    symbols: &SymbolTable,
+    types: &TypeTable,
+    strings: &StringTable,
+) -> Module {
+    let target = Target::host();
+    linearize(tu, symbols, types, strings, &target)
+}
+
+#[test]
+fn test_incomplete_struct_type_resolution() {
+    // This test verifies that when a typedef refers to an incomplete struct,
+    // the linearizer correctly resolves it to the complete struct definition
+    // when processing struct initializers.
+    //
+    // Pattern being tested:
+    //   typedef struct foo foo_t;  // incomplete at this point
+    //   struct foo { int x; int y; };  // complete definition
+    //   foo_t f = {1, 2};  // should use complete struct's size (8 bytes), not 0
+    //
+    let mut strings = StringTable::new();
+    let mut types = TypeTable::new(64);
+    let mut symbols = SymbolTable::new();
+    let test_id = strings.intern("test");
+    let foo_tag = strings.intern("foo");
+    let f_id = strings.intern("f");
+    let x_id = strings.intern("x");
+    let y_id = strings.intern("y");
+    let int_type = types.int_id;
+
+    // Create the complete struct type: struct foo { int x; int y; }
+    let complete_composite = CompositeType {
+        tag: Some(foo_tag),
+        members: vec![
+            StructMember {
+                name: x_id,
+                typ: int_type,
+                offset: 0,
+                bit_offset: None,
+                bit_width: None,
+                storage_unit_size: None,
+            },
+            StructMember {
+                name: y_id,
+                typ: int_type,
+                offset: 4, // Second int at offset 4 bytes
+                bit_offset: None,
+                bit_width: None,
+                storage_unit_size: None,
+            },
+        ],
+        enum_constants: vec![],
+        size: 8,  // 2 ints = 8 bytes
+        align: 4, // int alignment
+        is_complete: true,
+    };
+    let complete_struct_type = types.intern(Type::struct_type(complete_composite));
+
+    // Register the complete struct in the symbol table as a tag
+    symbols
+        .declare(Symbol::tag(foo_tag, complete_struct_type, 0))
+        .expect("Failed to declare tag");
+
+    // Verify the symbol table is correctly set up
+    let looked_up = symbols.lookup_tag(foo_tag);
+    assert!(
+        looked_up.is_some(),
+        "Symbol table should contain tag for 'foo'"
+    );
+    assert_eq!(
+        looked_up.unwrap().typ,
+        complete_struct_type,
+        "Tag should point to complete struct type"
+    );
+
+    // Create an incomplete struct type with the same tag
+    // This simulates what happens with: typedef struct foo foo_t; (before struct foo is defined)
+    let incomplete_composite = CompositeType::incomplete(Some(foo_tag));
+    let incomplete_struct_type = types.intern(Type::struct_type(incomplete_composite));
+
+    // Verify the incomplete type has size 0 before resolution
+    assert_eq!(
+        types.size_bytes(incomplete_struct_type),
+        0,
+        "Incomplete struct should have size 0"
+    );
+
+    // Create an initializer list: {1, 2}
+    let init_list = Expr::typed_unpositioned(
+        ExprKind::InitList {
+            elements: vec![
+                InitElement {
+                    designators: vec![],
+                    value: Box::new(Expr::int(1, &types)),
+                },
+                InitElement {
+                    designators: vec![],
+                    value: Box::new(Expr::int(2, &types)),
+                },
+            ],
+        },
+        incomplete_struct_type,
+    );
+
+    // Create function: void test() { foo_t f = {1, 2}; }
+    // Using the incomplete struct type for the declaration
+    let func = FunctionDef {
+        return_type: types.void_id,
+        name: test_id,
+        params: vec![],
+        body: Stmt::Block(vec![BlockItem::Declaration(Declaration {
+            declarators: vec![InitDeclarator {
+                name: f_id,
+                typ: incomplete_struct_type,
+                init: Some(init_list),
+                vla_sizes: vec![],
+            }],
+        })]),
+        pos: test_pos(),
+    };
+    let tu = TranslationUnit {
+        items: vec![ExternalDecl::FunctionDef(func)],
+    };
+
+    // Linearize with the symbol table that has the complete struct registered
+    let module = test_linearize_with_symbols(&tu, &symbols, &types, &strings);
+    let ir = format!("{}", module);
+
+    // The IR should show stores to the struct fields at proper offsets
+    // Without the fix, the initializer would have total_size=0 and generate no stores
+    assert!(
+        ir.contains("store"),
+        "Struct initializer should generate store instructions. \
+         This would fail if incomplete struct type was not resolved. IR:\n{}",
+        ir
+    );
+
+    // Should have at least 2 stores (one for each field: x and y)
+    let store_count = ir.matches("store").count();
+    assert!(
+        store_count >= 2,
+        "Should have at least 2 stores for struct fields (x, y), got {}: {}",
+        store_count,
+        ir
+    );
+}
diff --git a/cc/tests/datatypes/char.rs b/cc/tests/datatypes/char.rs
index a4ea8118..4791b2ff 100644
--- a/cc/tests/datatypes/char.rs
+++ b/cc/tests/datatypes/char.rs
@@ -990,3 +990,74 @@ int main(void) {
 "#;
     assert_eq!(compile_and_run("char_advanced", code), 0);
 }
+
+// ============================================================================
+// Char: Stack Spill/Reload Regression Test
+// Tests that char values passed through stack or spilled to stack preserve
+// correct values without garbage in upper register bits (regression test for
+// movzx fix in x86_64 codegen)
+// ============================================================================
+
+#[test]
+fn char_stack_spill_reload() {
+    let code = r#"
+// Force char/unsigned char values through the stack by having many parameters
+// On x86_64: first 6 integer args go to registers, rest go to stack
+// This tests that char values are properly zero-extended when reloaded
+
+char add_many_chars(char a, char b, char c, char d, char e, char f,
+                    char g, char h, char i, char j, char k, char l) {
+    // g through l should be on the stack on x86_64
+    return a + b + c + d + e + f + g + h + i + j + k + l;
+}
+
+unsigned char add_many_uchars(unsigned char a, unsigned char b, unsigned char c,
+                               unsigned char d, unsigned char e, unsigned char f,
+                               unsigned char g, unsigned char h, unsigned char i,
+                               unsigned char j, unsigned char k, unsigned char l) {
+    return a + b + c + d + e + f + g + h + i + j + k + l;
+}
+
+// Test that char local variables spilled across function calls are preserved
+char test_char_spill(char x) {
+    // Force x to be spilled by calling a function
+    char saved = x;
+    // dummy call that might clobber registers
+    char y = add_many_chars(1,2,3,4,5,6,7,8,9,10,11,12);
+    // Use saved value - if upper bits were garbage, comparison would fail
+    return saved + y;
+}
+
+int main(void) {
+    // Test 1: Many char args (sum = 1+2+...+12 = 78)
+    char result1 = add_many_chars(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
+    if (result1 != 78) return 1;
+
+    // Test 2: Many unsigned char args (same sum)
+    unsigned char result2 = add_many_uchars(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
+    if (result2 != 78) return 2;
+
+    // Test 3: Char spill across function call
+    char result3 = test_char_spill(10);
+    // 10 + 78 = 88
+    if (result3 != 88) return 3;
+
+    // Test 4: Verify specific char values from stack args
+    // Use values that would be corrupted if high bits contain garbage
+    // e.g., if loaded as 32-bit without zero-extend, upper bits might be 0xFF
+    char result4 = add_many_chars(0, 0, 0, 0, 0, 0,
+                                   1, 2, 3, 4, 5, 6);  // g-l on stack
+    // sum = 1+2+3+4+5+6 = 21
+    if (result4 != 21) return 4;
+
+    // Test 5: Large unsigned char values (test zero-extension, not sign-extension)
+    unsigned char result5 = add_many_uchars(0, 0, 0, 0, 0, 0,
+                                             200, 10, 5, 5, 0, 0);
+    // sum = 200+10+5+5 = 220
+    if (result5 != 220) return 5;
+
+    return 0;
+}
+"#;
+    assert_eq!(compile_and_run("char_stack_spill", code), 0);
+}
diff --git a/cc/tests/datatypes/short.rs b/cc/tests/datatypes/short.rs
index 5af800d1..3ab48886 100644
--- a/cc/tests/datatypes/short.rs
+++ b/cc/tests/datatypes/short.rs
@@ -885,3 +885,80 @@ int main(void) {
 "#;
     assert_eq!(compile_and_run("short_advanced", code), 0);
 }
+
+// ============================================================================
+// Short: Stack Spill/Reload Regression Test
+// Tests that short values passed through stack or spilled to stack preserve
+// correct values without garbage in upper register bits (regression test for
+// movzx fix in x86_64 codegen)
+// ============================================================================
+
+#[test]
+fn short_stack_spill_reload() {
+    let code = r#"
+// Force short/unsigned short values through the stack by having many parameters
+// On x86_64: first 6 integer args go to registers, rest go to stack
+// This tests that short values are properly zero-extended when reloaded
+
+int add_many_shorts(short a, short b, short c, short d, short e, short f,
+                    short g, short h, short i, short j, short k, short l) {
+    // g through l should be on the stack on x86_64
+    return a + b + c + d + e + f + g + h + i + j + k + l;
+}
+
+unsigned int add_many_ushorts(unsigned short a, unsigned short b, unsigned short c,
+                               unsigned short d, unsigned short e, unsigned short f,
+                               unsigned short g, unsigned short h, unsigned short i,
+                               unsigned short j, unsigned short k, unsigned short l) {
+    return a + b + c + d + e + f + g + h + i + j + k + l;
+}
+
+// Test that short local variables spilled across function calls are preserved
+int test_short_spill(short x) {
+    // Force x to be spilled by calling a function
+    short saved = x;
+    // dummy call that might clobber registers
+    int y = add_many_shorts(100, 200, 300, 400, 500, 600,
+                            700, 800, 900, 1000, 1100, 1200);
+    // Use saved value - if upper bits were garbage, comparison would fail
+    return saved + y;
+}
+
+int main(void) {
+    // Test 1: Many short args (sum = 100+200+...+1200 = 7800)
+    int result1 = add_many_shorts(100, 200, 300, 400, 500, 600,
+                                   700, 800, 900, 1000, 1100, 1200);
+    if (result1 != 7800) return 1;
+
+    // Test 2: Many unsigned short args (same sum)
+    unsigned int result2 = add_many_ushorts(100, 200, 300, 400, 500, 600,
+                                             700, 800, 900, 1000, 1100, 1200);
+    if (result2 != 7800) return 2;
+
+    // Test 3: Short spill across function call
+    int result3 = test_short_spill(1000);
+    // 1000 + 7800 = 8800
+    if (result3 != 8800) return 3;
+
+    // Test 4: Verify stack args preserve values
+    int result4 = add_many_shorts(0, 0, 0, 0, 0, 0,
+                                   1000, 2000, 3000, 4000, 5000, 6000);  // g-l on stack
+    // sum = 1000+2000+3000+4000+5000+6000 = 21000
+    if (result4 != 21000) return 4;
+
+    // Test 5: Large unsigned short values (test zero-extension)
+    unsigned int result5 = add_many_ushorts(0, 0, 0, 0, 0, 0,
+                                             50000, 10000, 5000, 100, 0, 0);
+    // sum = 50000+10000+5000+100 = 65100
+    if (result5 != 65100) return 5;
+
+    // Test 6: Negative short values (test sign preservation when widened)
+    int result6 = add_many_shorts(-100, -200, -300, -400, -500, -600,
+                                   -700, -800, -900, -1000, -1100, -1200);
+    if (result6 != -7800) return 6;
+
+    return 0;
+}
+"#;
+    assert_eq!(compile_and_run("short_stack_spill", code), 0);
+}