Skip to content

Commit e743ea1

Browse files
committed
[cc] more fixes
1 parent 3ddbe97 commit e743ea1

File tree

19 files changed

+1033
-101
lines changed

19 files changed

+1033
-101
lines changed

cc/arch/aarch64/codegen.rs

Lines changed: 97 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1106,9 +1106,11 @@ impl Aarch64CodeGen {
11061106
Opcode::SymAddr => {
11071107
if let (Some(target), Some(&src)) = (insn.target, insn.src.first()) {
11081108
let dst_loc = self.get_location(target);
1109+
// Use X16 as scratch to avoid clobbering live values
1110+
// X16 is the intra-procedure-call scratch register (IP0)
11091111
let dst_reg = match &dst_loc {
11101112
Loc::Reg(r) => *r,
1111-
_ => Reg::X9,
1113+
_ => Reg::X16,
11121114
};
11131115
let src_loc = self.get_location(src);
11141116
match src_loc {
@@ -1693,6 +1695,13 @@ impl Aarch64CodeGen {
16931695

16941696
// Get source address (where the struct data is)
16951697
let value_loc = self.get_location(value);
1698+
1699+
// Special case: if value is immediate 0, zero the struct instead of copying
1700+
if let Loc::Imm(0) = value_loc {
1701+
self.emit_struct_zero(insn, addr, num_qwords, frame_size);
1702+
return;
1703+
}
1704+
16961705
// Get destination address
16971706
let addr_loc = self.get_location(addr);
16981707

@@ -1777,6 +1786,71 @@ impl Aarch64CodeGen {
17771786
}
17781787
}
17791788

1789+
/// Emit code to zero a struct (for struct = {0} initialization)
1790+
fn emit_struct_zero(
1791+
&mut self,
1792+
insn: &Instruction,
1793+
addr: PseudoId,
1794+
num_qwords: u32,
1795+
frame_size: i32,
1796+
) {
1797+
let addr_loc = self.get_location(addr);
1798+
1799+
// Load destination address into X17
1800+
match addr_loc {
1801+
Loc::Stack(offset) => {
1802+
let total_offset = self.stack_offset(frame_size, offset) + insn.offset as i32;
1803+
self.push_lir(Aarch64Inst::Add {
1804+
size: OperandSize::B64,
1805+
src1: Reg::X29,
1806+
src2: GpOperand::Imm(total_offset as i64),
1807+
dst: Reg::X17,
1808+
});
1809+
}
1810+
Loc::Reg(r) => {
1811+
if insn.offset != 0 {
1812+
self.push_lir(Aarch64Inst::Add {
1813+
size: OperandSize::B64,
1814+
src1: r,
1815+
src2: GpOperand::Imm(insn.offset),
1816+
dst: Reg::X17,
1817+
});
1818+
} else if r != Reg::X17 {
1819+
self.push_lir(Aarch64Inst::Mov {
1820+
size: OperandSize::B64,
1821+
src: GpOperand::Reg(r),
1822+
dst: Reg::X17,
1823+
});
1824+
}
1825+
}
1826+
Loc::Global(ref name) => {
1827+
self.emit_load_addr(name, Reg::X17);
1828+
if insn.offset != 0 {
1829+
self.push_lir(Aarch64Inst::Add {
1830+
size: OperandSize::B64,
1831+
src1: Reg::X17,
1832+
src2: GpOperand::Imm(insn.offset),
1833+
dst: Reg::X17,
1834+
});
1835+
}
1836+
}
1837+
_ => return,
1838+
}
1839+
1840+
// Store zeros using XZR (zero register) - aarch64 has hardware zero reg!
1841+
for i in 0..num_qwords {
1842+
let byte_offset = (i * 8) as i32;
1843+
self.push_lir(Aarch64Inst::Str {
1844+
size: OperandSize::B64,
1845+
src: Reg::Xzr,
1846+
addr: MemAddr::BaseOffset {
1847+
base: Reg::X17,
1848+
offset: byte_offset,
1849+
},
1850+
});
1851+
}
1852+
}
1853+
17801854
fn emit_call(&mut self, insn: &Instruction, frame_size: i32, types: &TypeTable) {
17811855
// Get function name (or placeholder for indirect calls)
17821856
let func_name = if insn.indirect_target.is_some() {
@@ -1830,29 +1904,42 @@ impl Aarch64CodeGen {
18301904
let size = insn.size.max(32);
18311905
let op_size = OperandSize::from_bits(size);
18321906
let dst_loc = self.get_location(target);
1907+
// Use X16 as default scratch to avoid clobbering live values
18331908
let dst_reg = match &dst_loc {
18341909
Loc::Reg(r) => *r,
1835-
_ => Reg::X9,
1910+
_ => Reg::X16,
1911+
};
1912+
1913+
// Pick non-conflicting temp registers for cond/then/else values
1914+
// If dst_reg is one of our default temps, shift allocation to avoid conflicts
1915+
let (cond_reg, then_reg, else_reg) = if dst_reg == Reg::X10 {
1916+
(Reg::X11, Reg::X12, Reg::X13)
1917+
} else if dst_reg == Reg::X11 {
1918+
(Reg::X10, Reg::X12, Reg::X13)
1919+
} else if dst_reg == Reg::X12 {
1920+
(Reg::X10, Reg::X11, Reg::X13)
1921+
} else {
1922+
(Reg::X10, Reg::X11, Reg::X12) // Original allocation
18361923
};
18371924

18381925
// Load condition, then and else values
1839-
self.emit_move(cond, Reg::X10, 64, frame_size);
1840-
self.emit_move(then_val, Reg::X11, size, frame_size);
1841-
self.emit_move(else_val, Reg::X12, size, frame_size);
1926+
self.emit_move(cond, cond_reg, 64, frame_size);
1927+
self.emit_move(then_val, then_reg, size, frame_size);
1928+
self.emit_move(else_val, else_reg, size, frame_size);
18421929

18431930
// LIR: compare condition with zero
18441931
self.push_lir(Aarch64Inst::Cmp {
18451932
size: OperandSize::B64,
1846-
src1: Reg::X10,
1933+
src1: cond_reg,
18471934
src2: GpOperand::Imm(0),
18481935
});
18491936

18501937
// Use csel: if cond != 0, select then_val, else select else_val
18511938
self.push_lir(Aarch64Inst::Csel {
18521939
size: op_size,
18531940
cond: CondCode::Ne,
1854-
src_true: Reg::X11,
1855-
src_false: Reg::X12,
1941+
src_true: then_reg,
1942+
src_false: else_reg,
18561943
dst: dst_reg,
18571944
});
18581945

@@ -2146,6 +2233,7 @@ fn asm_reg_name_64(reg: Reg) -> &'static str {
21462233
Reg::X29 => "x29",
21472234
Reg::X30 => "x30",
21482235
Reg::SP => "sp",
2236+
Reg::Xzr => "xzr",
21492237
}
21502238
}
21512239

@@ -2183,6 +2271,7 @@ fn asm_reg_name_32(reg: Reg) -> &'static str {
21832271
Reg::X29 => "w29",
21842272
Reg::X30 => "w30",
21852273
Reg::SP => "wsp",
2274+
Reg::Xzr => "wzr",
21862275
}
21872276
}
21882277

cc/arch/aarch64/expression.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ impl Aarch64CodeGen {
3131
let dst_loc = self.get_location(target);
3232
let work_reg = match &dst_loc {
3333
Loc::Reg(r) => *r,
34-
_ => Reg::X9,
34+
_ => Reg::X16,
3535
};
3636

3737
// Load first operand
@@ -120,7 +120,7 @@ impl Aarch64CodeGen {
120120
let dst_loc = self.get_location(target);
121121
let work_reg = match &dst_loc {
122122
Loc::Reg(r) => *r,
123-
_ => Reg::X9,
123+
_ => Reg::X16,
124124
};
125125

126126
self.emit_move(src, work_reg, size, frame_size);
@@ -156,7 +156,7 @@ impl Aarch64CodeGen {
156156
let dst_loc = self.get_location(target);
157157
let dst_reg = match &dst_loc {
158158
Loc::Reg(r) => *r,
159-
_ => Reg::X9,
159+
_ => Reg::X16,
160160
};
161161

162162
self.emit_move(src1, Reg::X10, size, frame_size);
@@ -192,7 +192,7 @@ impl Aarch64CodeGen {
192192
let dst_loc = self.get_location(target);
193193
let dst_reg = match &dst_loc {
194194
Loc::Reg(r) => *r,
195-
_ => Reg::X9,
195+
_ => Reg::X16,
196196
};
197197

198198
// Division instruction
@@ -263,7 +263,7 @@ impl Aarch64CodeGen {
263263
let dst_loc = self.get_location(target);
264264
let dst_reg = match &dst_loc {
265265
Loc::Reg(r) => *r,
266-
_ => Reg::X9,
266+
_ => Reg::X16,
267267
};
268268

269269
// Use cset to set register based on condition
@@ -300,7 +300,7 @@ impl Aarch64CodeGen {
300300
let dst_loc = self.get_location(target);
301301
let dst_reg = match &dst_loc {
302302
Loc::Reg(r) => *r,
303-
_ => Reg::X9,
303+
_ => Reg::X16,
304304
};
305305

306306
match insn.op {

cc/arch/aarch64/float.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,7 @@ impl Aarch64CodeGen {
388388
let dst_loc = self.get_location(target);
389389
let dst_reg = match &dst_loc {
390390
Loc::Reg(r) => *r,
391-
_ => Reg::X9,
391+
_ => Reg::X16,
392392
};
393393

394394
// Set result based on condition
@@ -481,7 +481,7 @@ impl Aarch64CodeGen {
481481
let dst_loc = self.get_location(target);
482482
let dst_reg = match &dst_loc {
483483
Loc::Reg(r) => *r,
484-
_ => Reg::X9,
484+
_ => Reg::X16,
485485
};
486486

487487
// Load source to FP register

cc/arch/aarch64/regalloc.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ pub enum Reg {
9494
X30,
9595
// Stack pointer (special, shares encoding with XZR in some contexts)
9696
SP,
97+
// Zero register (always reads as zero, discards writes)
98+
Xzr,
9799
}
98100

99101
impl Reg {
@@ -131,6 +133,7 @@ impl Reg {
131133
Reg::X29 => "x29",
132134
Reg::X30 => "x30",
133135
Reg::SP => "sp",
136+
Reg::Xzr => "xzr",
134137
}
135138
}
136139

@@ -168,6 +171,7 @@ impl Reg {
168171
Reg::X29 => "w29",
169172
Reg::X30 => "w30",
170173
Reg::SP => "sp", // SP doesn't have a 32-bit form in normal use
174+
Reg::Xzr => "wzr",
171175
}
172176
}
173177

cc/arch/x86_64/codegen.rs

Lines changed: 99 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -864,9 +864,10 @@ impl X86_64CodeGen {
864864
Opcode::SymAddr => {
865865
if let (Some(target), Some(&src)) = (insn.target, insn.src.first()) {
866866
let dst_loc = self.get_location(target);
867+
// Use R10 as scratch to avoid clobbering live values in Rax
867868
let dst_reg = match &dst_loc {
868869
Loc::Reg(r) => *r,
869-
_ => Reg::Rax,
870+
_ => Reg::R10,
870871
};
871872
let src_loc = self.get_location(src);
872873
match src_loc {
@@ -1590,6 +1591,12 @@ impl X86_64CodeGen {
15901591
// Get destination address
15911592
let addr_loc = self.get_location(addr);
15921593

1594+
// Special case: if value is immediate 0, zero the struct instead of copying
1595+
if let Loc::Imm(0) = value_loc {
1596+
self.emit_struct_zero(insn, addr, num_qwords);
1597+
return;
1598+
}
1599+
15931600
// Load source address into R10
15941601
match value_loc {
15951602
Loc::Stack(offset) => {
@@ -1717,6 +1724,89 @@ impl X86_64CodeGen {
17171724
}
17181725
}
17191726

1727+
/// Emit code to zero a struct (for struct = {0} initialization)
1728+
fn emit_struct_zero(&mut self, insn: &Instruction, addr: PseudoId, num_qwords: u32) {
1729+
let addr_loc = self.get_location(addr);
1730+
1731+
// Load destination address into R11
1732+
match addr_loc {
1733+
Loc::Stack(offset) => {
1734+
let adjusted = offset - insn.offset as i32 + self.callee_saved_offset;
1735+
self.push_lir(X86Inst::Lea {
1736+
addr: MemAddr::BaseOffset {
1737+
base: Reg::Rbp,
1738+
offset: -adjusted,
1739+
},
1740+
dst: Reg::R11,
1741+
});
1742+
}
1743+
Loc::Reg(r) => {
1744+
if insn.offset != 0 {
1745+
self.push_lir(X86Inst::Lea {
1746+
addr: MemAddr::BaseOffset {
1747+
base: r,
1748+
offset: insn.offset as i32,
1749+
},
1750+
dst: Reg::R11,
1751+
});
1752+
} else if r != Reg::R11 {
1753+
self.push_lir(X86Inst::Mov {
1754+
size: OperandSize::B64,
1755+
src: GpOperand::Reg(r),
1756+
dst: GpOperand::Reg(Reg::R11),
1757+
});
1758+
}
1759+
}
1760+
Loc::Global(ref name) => {
1761+
if self.needs_got_access(name) {
1762+
self.push_lir(X86Inst::Mov {
1763+
size: OperandSize::B64,
1764+
src: GpOperand::Mem(MemAddr::GotPcrel(Symbol::extern_sym(name.clone()))),
1765+
dst: GpOperand::Reg(Reg::R11),
1766+
});
1767+
} else {
1768+
let symbol = if name.starts_with('.') {
1769+
Symbol::local(name.clone())
1770+
} else {
1771+
Symbol::global(name.clone())
1772+
};
1773+
self.push_lir(X86Inst::Lea {
1774+
addr: MemAddr::RipRelative(symbol),
1775+
dst: Reg::R11,
1776+
});
1777+
}
1778+
if insn.offset != 0 {
1779+
self.push_lir(X86Inst::Add {
1780+
size: OperandSize::B64,
1781+
src: GpOperand::Imm(insn.offset),
1782+
dst: Reg::R11,
1783+
});
1784+
}
1785+
}
1786+
_ => return,
1787+
}
1788+
1789+
// Load 0 into R10 once
1790+
self.push_lir(X86Inst::Mov {
1791+
size: OperandSize::B64,
1792+
src: GpOperand::Imm(0),
1793+
dst: GpOperand::Reg(Reg::R10),
1794+
});
1795+
1796+
// Store zeros to each qword
1797+
for i in 0..num_qwords {
1798+
let byte_offset = (i * 8) as i32;
1799+
self.push_lir(X86Inst::Mov {
1800+
size: OperandSize::B64,
1801+
src: GpOperand::Reg(Reg::R10),
1802+
dst: GpOperand::Mem(MemAddr::BaseOffset {
1803+
base: Reg::R11,
1804+
offset: byte_offset,
1805+
}),
1806+
});
1807+
}
1808+
}
1809+
17201810
fn emit_call(&mut self, insn: &Instruction, types: &TypeTable) {
17211811
// Get function name (or placeholder for indirect calls)
17221812
let func_name = if insn.indirect_target.is_some() {
@@ -1811,12 +1901,18 @@ impl X86_64CodeGen {
18111901
});
18121902
}
18131903
}
1814-
self.emit_move(then_val, Reg::R10, size);
1904+
// Use R11 for then_val when dst_reg is R10 to avoid clobbering else value
1905+
let then_reg = if dst_reg == Reg::R10 {
1906+
Reg::R11
1907+
} else {
1908+
Reg::R10
1909+
};
1910+
self.emit_move(then_val, then_reg, size);
18151911
// LIR: conditional move if not equal (non-zero)
18161912
self.push_lir(X86Inst::CMov {
18171913
cc: CondCode::Ne,
18181914
size: op_size,
1819-
src: GpOperand::Reg(Reg::R10),
1915+
src: GpOperand::Reg(then_reg),
18201916
dst: dst_reg,
18211917
});
18221918
if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) {

0 commit comments

Comments
 (0)