Skip to content

Commit c230aeb

Browse files
authored
Merge pull request #491 from rustcoreutils/updates
[cc] Optimization fixes
2 parents 6651c5f + dec761b commit c230aeb

File tree

17 files changed

+1990
-727
lines changed

17 files changed

+1990
-727
lines changed

cc/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ plib = { path = "../plib" }
1616
[dev-dependencies]
1717
tempfile = "3"
1818

19+
[features]
20+
# Enable full compile matrix (4 configs) instead of default single config (-O -g)
21+
test_matrix = []
22+
1923
[lib]
2024
name = "posixutils_cc"
2125
path = "./lib.rs"

cc/arch/aarch64/codegen.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1894,6 +1894,8 @@ impl Aarch64CodeGen {
18941894
self.handle_call_return_value(insn, types, frame_size);
18951895
}
18961896

1897+
/// Emit a select (ternary) instruction using CSEL
1898+
/// This is used for pure ternary expressions: cond ? a : b
18971899
fn emit_select(&mut self, insn: &Instruction, frame_size: i32) {
18981900
let (cond, then_val, else_val) = match (insn.src.first(), insn.src.get(1), insn.src.get(2))
18991901
{
@@ -1930,7 +1932,7 @@ impl Aarch64CodeGen {
19301932
self.emit_move(then_val, then_reg, size, frame_size);
19311933
self.emit_move(else_val, else_reg, size, frame_size);
19321934

1933-
// LIR: compare condition with zero
1935+
// Compare condition with zero
19341936
self.push_lir(Aarch64Inst::Cmp {
19351937
size: OperandSize::B64,
19361938
src1: cond_reg,

cc/arch/aarch64/expression.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@ impl Aarch64CodeGen {
307307
Opcode::Zext => {
308308
// Zero extend: use uxtb, uxth, or just mov for 32->64
309309
self.emit_move(src, dst_reg, 64, frame_size);
310-
match insn.size {
310+
match insn.src_size {
311311
8 => {
312312
self.push_lir(Aarch64Inst::Uxtb {
313313
src: dst_reg,

cc/arch/x86_64/codegen.rs

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1850,6 +1850,8 @@ impl X86_64CodeGen {
18501850
self.handle_call_return_value(insn, types);
18511851
}
18521852

1853+
/// Emit a select (ternary) instruction using CMOVcc
1854+
/// This is used for pure ternary expressions: cond ? a : b
18531855
fn emit_select(&mut self, insn: &Instruction) {
18541856
let (cond, then_val, else_val) = match (insn.src.first(), insn.src.get(1), insn.src.get(2))
18551857
{
@@ -1867,54 +1869,63 @@ impl X86_64CodeGen {
18671869
Loc::Reg(r) => *r,
18681870
_ => Reg::R10, // Use scratch register R10
18691871
};
1872+
1873+
// Move else value into destination first (default if condition is false)
18701874
self.emit_move(else_val, dst_reg, size);
1875+
1876+
// Test condition
18711877
let cond_loc = self.get_location(cond);
18721878
match &cond_loc {
18731879
Loc::Reg(r) => {
1874-
// LIR: test register with itself
1880+
// Test register with itself
18751881
self.push_lir(X86Inst::Test {
18761882
size: OperandSize::B64,
18771883
src: GpOperand::Reg(*r),
18781884
dst: GpOperand::Reg(*r),
18791885
});
18801886
}
18811887
Loc::Imm(v) => {
1888+
// Constant condition - just use appropriate value
18821889
if *v != 0 {
18831890
self.emit_move(then_val, dst_reg, size);
18841891
if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) {
18851892
self.emit_move_to_loc(dst_reg, &dst_loc, size);
18861893
}
18871894
return;
18881895
}
1896+
// else_val already in dst_reg
18891897
if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) {
18901898
self.emit_move_to_loc(dst_reg, &dst_loc, size);
18911899
}
18921900
return;
18931901
}
18941902
_ => {
1903+
// Load condition to scratch register and test
18951904
self.emit_move(cond, Reg::R11, 64);
1896-
// LIR: test R11 with itself
18971905
self.push_lir(X86Inst::Test {
18981906
size: OperandSize::B64,
18991907
src: GpOperand::Reg(Reg::R11),
19001908
dst: GpOperand::Reg(Reg::R11),
19011909
});
19021910
}
19031911
}
1912+
1913+
// Conditional move: if condition is non-zero (NE), use then_val
19041914
// Use R11 for then_val when dst_reg is R10 to avoid clobbering else value
19051915
let then_reg = if dst_reg == Reg::R10 {
19061916
Reg::R11
19071917
} else {
19081918
Reg::R10
19091919
};
19101920
self.emit_move(then_val, then_reg, size);
1911-
// LIR: conditional move if not equal (non-zero)
19121921
self.push_lir(X86Inst::CMov {
19131922
cc: CondCode::Ne,
19141923
size: op_size,
19151924
src: GpOperand::Reg(then_reg),
19161925
dst: dst_reg,
19171926
});
1927+
1928+
// Move to final destination if needed
19181929
if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) {
19191930
self.emit_move_to_loc(dst_reg, &dst_loc, size);
19201931
}

cc/arch/x86_64/expression.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,12 @@ impl X86_64CodeGen {
397397
};
398398
match insn.op {
399399
Opcode::Zext => {
400-
self.emit_move(src, dst_reg, insn.size);
400+
// Move source at its original size - this ensures we only load
401+
// the valid bits from stack/memory. On x86-64, 32-bit register
402+
// writes automatically zero the upper 32 bits.
403+
// For 8/16-bit sources, emit_move uses movzbl/movzwl which
404+
// zero-extends to 32 bits (and thus to 64 bits).
405+
self.emit_move(src, dst_reg, insn.src_size.max(32));
401406
}
402407
Opcode::Sext => {
403408
// Move source at its original size, then sign-extend

cc/arch/x86_64/regalloc.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -260,9 +260,10 @@ pub fn opcode_constraints(op: Opcode) -> RegConstraints {
260260
inputs: &[Reg::Rax],
261261
},
262262
// Variable shifts: count must be in Cl (Rcx)
263-
// Note: shifts don't clobber Rcx, they just require it as input
263+
// The codegen moves the shift count INTO Rcx, clobbering it.
264+
// For immediate shifts, this constraint is conservative but safe.
264265
Opcode::Shl | Opcode::Lsr | Opcode::Asr => RegConstraints {
265-
clobbers: &[],
266+
clobbers: &[Reg::Rcx],
266267
inputs: &[Reg::Rcx],
267268
},
268269
// VaArg: emit_va_arg_int uses Rax for reg_save_area/overflow pointer,

cc/ir/dce.rs

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ fn is_root(op: Opcode) -> bool {
6464
| Opcode::Alloca
6565
| Opcode::Setjmp // Has side effects (saves context)
6666
| Opcode::Longjmp // Never returns (noreturn)
67+
| Opcode::Asm // Inline assembly has side effects
6768
)
6869
}
6970

@@ -91,20 +92,30 @@ fn get_uses(insn: &Instruction) -> Vec<PseudoId> {
9192
uses.push(indirect);
9293
}
9394

95+
// Inline assembly inputs (the pseudos that the asm reads)
96+
if let Some(ref asm_data) = insn.asm_data {
97+
for input in &asm_data.inputs {
98+
uses.push(input.pseudo);
99+
}
100+
}
101+
94102
uses
95103
}
96104

97-
/// Find the instruction that defines a pseudo.
98-
/// Returns (block_index, instruction_index) if found.
99-
fn find_def(func: &Function, id: PseudoId) -> Option<(usize, usize)> {
105+
/// Find all instructions that define a pseudo.
106+
/// Returns vec of (block_index, instruction_index) for each definition.
107+
/// After inlining, a pseudo may have multiple definitions from different branches
108+
/// (e.g., the return target is written to from multiple return paths).
109+
fn find_all_defs(func: &Function, id: PseudoId) -> Vec<(usize, usize)> {
110+
let mut defs = Vec::new();
100111
for (bb_idx, bb) in func.blocks.iter().enumerate() {
101112
for (insn_idx, insn) in bb.insns.iter().enumerate() {
102113
if insn.target == Some(id) {
103-
return Some((bb_idx, insn_idx));
114+
defs.push((bb_idx, insn_idx));
104115
}
105116
}
106117
}
107-
None
118+
defs
108119
}
109120

110121
/// Eliminate dead code using mark-sweep algorithm.
@@ -128,8 +139,9 @@ fn eliminate_dead_code(func: &mut Function) -> bool {
128139

129140
// Phase 2: Propagate liveness transitively
130141
while let Some(id) = worklist.pop_front() {
131-
// Find the instruction that defines this pseudo
132-
if let Some((bb_idx, insn_idx)) = find_def(func, id) {
142+
// Find all instructions that define this pseudo
143+
// (there may be multiple after inlining, e.g., return target written from multiple paths)
144+
for (bb_idx, insn_idx) in find_all_defs(func, id) {
133145
let insn = &func.blocks[bb_idx].insns[insn_idx];
134146

135147
// Mark all operands of the defining instruction as live

cc/ir/dominate.rs

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,11 @@ pub fn domtree_build(func: &mut Function) {
149149
// Find new idom as intersection of all processed predecessors
150150
let mut new_idom: Option<usize> = None;
151151
for parent_id in parents {
152-
let parent_nr = postorder_nr[&parent_id];
152+
// Skip predecessors that weren't reached during DFS (unreachable blocks)
153+
let parent_nr = match postorder_nr.get(&parent_id) {
154+
Some(&nr) => nr,
155+
None => continue,
156+
};
153157
if doms[parent_nr].is_none() {
154158
continue;
155159
}
@@ -634,4 +638,62 @@ mod tests {
634638
assert!(func.dominates(BasicBlockId(3), BasicBlockId(4)));
635639
assert!(!func.dominates(BasicBlockId(3), BasicBlockId(1)));
636640
}
641+
642+
/// Test that domtree_build handles unreachable blocks gracefully.
643+
/// Unreachable blocks have predecessors that weren't visited during DFS.
644+
#[test]
645+
fn test_domtree_with_unreachable_block() {
646+
// Create a CFG with an unreachable block:
647+
// entry(0)
648+
// |
649+
// v
650+
// bb1(1)
651+
// |
652+
// v
653+
// exit(2)
654+
//
655+
// unreachable(3) <-- has edge from nowhere reachable
656+
// |
657+
// v
658+
// bb1(1) <-- unreachable points TO bb1, making bb1 have unreachable as predecessor
659+
660+
let types = TypeTable::new(64);
661+
let mut func = Function::new("test", types.void_id);
662+
663+
let mut entry = BasicBlock::new(BasicBlockId(0));
664+
entry.children = vec![BasicBlockId(1)];
665+
entry.add_insn(Instruction::new(Opcode::Entry));
666+
entry.add_insn(Instruction::br(BasicBlockId(1)));
667+
668+
let mut bb1 = BasicBlock::new(BasicBlockId(1));
669+
// bb1 has both entry AND unreachable as predecessors
670+
bb1.parents = vec![BasicBlockId(0), BasicBlockId(3)];
671+
bb1.children = vec![BasicBlockId(2)];
672+
bb1.add_insn(Instruction::br(BasicBlockId(2)));
673+
674+
let mut exit = BasicBlock::new(BasicBlockId(2));
675+
exit.parents = vec![BasicBlockId(1)];
676+
exit.add_insn(Instruction::ret(None));
677+
678+
// Unreachable block - not reachable from entry
679+
let mut unreachable = BasicBlock::new(BasicBlockId(3));
680+
unreachable.children = vec![BasicBlockId(1)];
681+
unreachable.add_insn(Instruction::br(BasicBlockId(1)));
682+
683+
func.entry = BasicBlockId(0);
684+
func.blocks = vec![entry, bb1, exit, unreachable];
685+
686+
// This should not panic - the fix skips predecessors not reached during DFS
687+
domtree_build(&mut func);
688+
689+
// Verify the reachable blocks have correct dominators
690+
let entry_block = func.get_block(BasicBlockId(0)).unwrap();
691+
assert!(entry_block.idom.is_none());
692+
693+
let bb1_block = func.get_block(BasicBlockId(1)).unwrap();
694+
assert_eq!(bb1_block.idom, Some(BasicBlockId(0)));
695+
696+
let exit_block = func.get_block(BasicBlockId(2)).unwrap();
697+
assert_eq!(exit_block.idom, Some(BasicBlockId(1)));
698+
}
637699
}

0 commit comments

Comments
 (0)