Skip to content

Commit 7b5090c

Browse files
committed
[cc] IR fixes
Summary of Fixes 1. Block reordering for register allocator liveness (cc/ir/inline.rs) - Problem: After inlining, blocks were appended in arbitrary order to func.blocks. The register allocator's liveness analysis assumes blocks are in control flow order. - Fix: Added reorder_blocks_topologically() function that reorders blocks using BFS from the entry block after inlining completes. 2. Inliner continuation block children detection (cc/ir/inline.rs) - Problem: When checking if a block branches to the continuation, the code checked only the last instruction. But if the original callee had return followed by __builtin_unreachable(), the unreachable was cloned after the branch, making the check fail. - Fix: Changed to check if any instruction in the block branches to continuation, not just the last. 3. DCE treating Asm as root (cc/ir/dce.rs) - Problem: Opcode::Asm was not in the is_root() list, so DCE could eliminate inline assembly instructions. - Fix: Added Opcode::Asm to the root opcodes list. 4. DCE tracking Asm input operands (cc/ir/dce.rs) - Problem: The get_uses() function didn't include pseudos from inline assembly inputs, so DCE could eliminate code that produces values used by inline assembly. - Fix: Added code to iterate through asm_data.inputs and add their pseudos to the uses list. 5. Global symbol remapping for inliner (cc/ir/inline.rs - from earlier) - Added global_sym_map to ensure that when the same global symbol is referenced multiple times in an inlined function, they all map to the same caller pseudo ID.
1 parent 45fc688 commit 7b5090c

File tree

6 files changed

+156
-24
lines changed

6 files changed

+156
-24
lines changed

cc/ir/dce.rs

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ fn is_root(op: Opcode) -> bool {
6464
| Opcode::Alloca
6565
| Opcode::Setjmp // Has side effects (saves context)
6666
| Opcode::Longjmp // Never returns (noreturn)
67+
| Opcode::Asm // Inline assembly has side effects
6768
)
6869
}
6970

@@ -91,20 +92,30 @@ fn get_uses(insn: &Instruction) -> Vec<PseudoId> {
9192
uses.push(indirect);
9293
}
9394

95+
// Inline assembly inputs (the pseudos that the asm reads)
96+
if let Some(ref asm_data) = insn.asm_data {
97+
for input in &asm_data.inputs {
98+
uses.push(input.pseudo);
99+
}
100+
}
101+
94102
uses
95103
}
96104

97-
/// Find the instruction that defines a pseudo.
98-
/// Returns (block_index, instruction_index) if found.
99-
fn find_def(func: &Function, id: PseudoId) -> Option<(usize, usize)> {
105+
/// Find all instructions that define a pseudo.
106+
/// Returns vec of (block_index, instruction_index) for each definition.
107+
/// After inlining, a pseudo may have multiple definitions from different branches
108+
/// (e.g., the return target is written to from multiple return paths).
109+
fn find_all_defs(func: &Function, id: PseudoId) -> Vec<(usize, usize)> {
110+
let mut defs = Vec::new();
100111
for (bb_idx, bb) in func.blocks.iter().enumerate() {
101112
for (insn_idx, insn) in bb.insns.iter().enumerate() {
102113
if insn.target == Some(id) {
103-
return Some((bb_idx, insn_idx));
114+
defs.push((bb_idx, insn_idx));
104115
}
105116
}
106117
}
107-
None
118+
defs
108119
}
109120

110121
/// Eliminate dead code using mark-sweep algorithm.
@@ -128,8 +139,9 @@ fn eliminate_dead_code(func: &mut Function) -> bool {
128139

129140
// Phase 2: Propagate liveness transitively
130141
while let Some(id) = worklist.pop_front() {
131-
// Find the instruction that defines this pseudo
132-
if let Some((bb_idx, insn_idx)) = find_def(func, id) {
142+
// Find all instructions that define this pseudo
143+
// (there may be multiple after inlining, e.g., return target written from multiple paths)
144+
for (bb_idx, insn_idx) in find_all_defs(func, id) {
133145
let insn = &func.blocks[bb_idx].insns[insn_idx];
134146

135147
// Mark all operands of the defining instruction as live

cc/ir/inline.rs

Lines changed: 112 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,9 @@ struct InlineContext {
200200
pseudo_map: HashMap<PseudoId, PseudoId>,
201201
/// Map from callee BasicBlockId to caller BasicBlockId
202202
bb_map: HashMap<BasicBlockId, BasicBlockId>,
203+
/// Map from global symbol names to their caller pseudo IDs
204+
/// Multiple callee pseudos with the same global symbol name should map to the same caller pseudo
205+
global_sym_map: HashMap<String, PseudoId>,
203206
/// Next pseudo ID available in the caller
204207
next_pseudo_id: u32,
205208
/// Next basic block ID available in the caller
@@ -239,8 +242,8 @@ impl InlineContext {
239242
return_continuation_bb: BasicBlockId,
240243
return_target: Option<PseudoId>,
241244
) -> Self {
242-
// Find max pseudo ID and BB ID in caller
243-
let max_pseudo = caller.pseudos.iter().map(|p| p.id.0).max().unwrap_or(0);
245+
// Use caller.next_pseudo instead of scanning pseudos list
246+
// (the list may not contain all pseudo IDs, e.g., phi nodes from SSA)
244247
let max_bb = caller.blocks.iter().map(|b| b.id.0).max().unwrap_or(0);
245248

246249
// Collect callee's local variable names - these need to be renamed
@@ -249,7 +252,8 @@ impl InlineContext {
249252
Self {
250253
pseudo_map: HashMap::new(),
251254
bb_map: HashMap::new(),
252-
next_pseudo_id: max_pseudo + 1,
255+
global_sym_map: HashMap::new(),
256+
next_pseudo_id: caller.next_pseudo,
253257
next_bb_id: max_bb + 1,
254258
call_args,
255259
return_continuation_bb,
@@ -308,6 +312,18 @@ impl InlineContext {
308312
self.alloc_pseudo_id()
309313
}
310314
}
315+
// Global symbols: ensure same name maps to same ID across multiple inlinings
316+
// This is critical when a function is inlined multiple times and references
317+
// the same global variable - all references must point to the same pseudo
318+
Some(PseudoKind::Sym(name)) if !self.callee_locals.contains(name) => {
319+
if let Some(&existing_id) = self.global_sym_map.get(name) {
320+
self.pseudo_map.insert(callee_id, existing_id);
321+
return existing_id;
322+
}
323+
let new_id = self.alloc_pseudo_id();
324+
self.global_sym_map.insert(name.clone(), new_id);
325+
new_id
326+
}
311327
// For all other kinds, allocate new ID
312328
_ => self.alloc_pseudo_id(),
313329
};
@@ -636,12 +652,14 @@ fn inline_call_site(
636652
}
637653

638654
// Blocks with Ret (now Br to continuation): update children
639-
if block
655+
// Note: We check for ANY instruction (not just last) that branches to continuation,
656+
// because the original callee block may have had unreachable instructions after ret
657+
// (e.g., __builtin_unreachable() after return), which get cloned after the branch.
658+
let has_branch_to_continuation = block
640659
.insns
641-
.last()
642-
.map(|i| i.op == Opcode::Br && i.bb_true == Some(continuation_bb_id))
643-
.unwrap_or(false)
644-
{
660+
.iter()
661+
.any(|i| i.op == Opcode::Br && i.bb_true == Some(continuation_bb_id));
662+
if has_branch_to_continuation {
645663
block.children = vec![continuation_bb_id];
646664
continuation_bb.parents.push(block.id);
647665
}
@@ -681,20 +699,96 @@ fn inline_call_site(
681699
}
682700
}
683701

684-
// Update old children's parent references to point to continuation
702+
// Update old children's parent references and phi nodes to point to continuation
685703
for &child_id in &old_children {
686704
if let Some(child) = caller.blocks.iter_mut().find(|b| b.id == child_id) {
705+
// Update parent list
687706
for parent in &mut child.parents {
688707
if *parent == call_bb_id {
689708
*parent = continuation_bb_id;
690709
}
691710
}
711+
712+
// Update phi nodes: replace call_bb_id with continuation_bb_id
713+
for insn in &mut child.insns {
714+
if insn.op == Opcode::Phi {
715+
for (pred_bb, _) in &mut insn.phi_list {
716+
if *pred_bb == call_bb_id {
717+
*pred_bb = continuation_bb_id;
718+
}
719+
}
720+
}
721+
}
692722
}
693723
}
694724

725+
// Update caller's next_pseudo to avoid ID collisions with later allocations
726+
caller.next_pseudo = ctx.next_pseudo_id;
727+
695728
true
696729
}
697730

731+
// ============================================================================
732+
// Block Reordering (for correct liveness analysis in regalloc)
733+
// ============================================================================
734+
735+
/// Reorder blocks in control flow (topological) order.
736+
/// This is critical for the linear scan register allocator, which assumes
737+
/// blocks are in execution order when computing live intervals.
738+
fn reorder_blocks_topologically(func: &mut Function) {
739+
if func.blocks.is_empty() {
740+
return;
741+
}
742+
743+
// Build a map of block ID to block
744+
let block_map: HashMap<BasicBlockId, usize> = func
745+
.blocks
746+
.iter()
747+
.enumerate()
748+
.map(|(i, b)| (b.id, i))
749+
.collect();
750+
751+
// BFS from entry to get reachable blocks in control flow order
752+
let mut visited: HashSet<BasicBlockId> = HashSet::new();
753+
let mut order: Vec<BasicBlockId> = Vec::new();
754+
let mut queue: std::collections::VecDeque<BasicBlockId> = std::collections::VecDeque::new();
755+
756+
queue.push_back(func.entry);
757+
758+
while let Some(bb_id) = queue.pop_front() {
759+
if visited.contains(&bb_id) {
760+
continue;
761+
}
762+
visited.insert(bb_id);
763+
order.push(bb_id);
764+
765+
// Get successors from the block's children
766+
if let Some(&idx) = block_map.get(&bb_id) {
767+
for &child_id in &func.blocks[idx].children {
768+
if !visited.contains(&child_id) {
769+
queue.push_back(child_id);
770+
}
771+
}
772+
}
773+
}
774+
775+
// Include any unreachable blocks at the end (shouldn't happen, but be safe)
776+
for block in &func.blocks {
777+
if !visited.contains(&block.id) {
778+
order.push(block.id);
779+
}
780+
}
781+
782+
// Reorder blocks according to the computed order
783+
let mut new_blocks: Vec<BasicBlock> = Vec::with_capacity(func.blocks.len());
784+
for bb_id in order {
785+
if let Some(&idx) = block_map.get(&bb_id) {
786+
new_blocks.push(func.blocks[idx].clone());
787+
}
788+
}
789+
func.blocks = new_blocks;
790+
}
791+
698792
// ============================================================================
699793
// Main Inlining Pass
700794
// ============================================================================
@@ -768,6 +862,15 @@ pub fn run(module: &mut Module, opt_level: u32) -> bool {
768862
}
769863
}
770864

865+
// Reorder blocks in all functions that were modified
866+
// This ensures blocks are in control flow order for the register allocator's
867+
// linear scan algorithm, which relies on position-based liveness analysis
868+
if any_changed {
869+
for func in &mut module.functions {
870+
reorder_blocks_topologically(func);
871+
}
872+
}
873+
771874
// Remove dead static functions that were fully inlined
772875
if any_changed {
773876
remove_dead_functions(module);

cc/ir/instcombine.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -612,10 +612,14 @@ mod tests {
612612
let types = TypeTable::new(64);
613613
let mut func = Function::new("test", types.int_id);
614614

615-
for p in pseudos {
616-
func.add_pseudo(p);
615+
for p in &pseudos {
616+
func.add_pseudo(p.clone());
617617
}
618618

619+
// Set next_pseudo to be after the highest pseudo ID
620+
let max_id = pseudos.iter().map(|p| p.id.0).max().unwrap_or(0);
621+
func.next_pseudo = max_id + 1;
622+
619623
let mut bb = BasicBlock::new(BasicBlockId(0));
620624
bb.add_insn(Instruction::new(Opcode::Entry));
621625
bb.add_insn(insn);

cc/ir/linearize.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1050,6 +1050,12 @@ impl<'a> Linearizer<'a> {
10501050
if self.run_ssa {
10511051
if let Some(ref mut ir_func) = self.current_func {
10521052
ssa_convert(ir_func, self.types);
1053+
// Note: ssa_convert sets ir_func.next_pseudo to account for phi nodes
1054+
}
1055+
} else {
1056+
// Only set next_pseudo if SSA was NOT run (SSA sets its own)
1057+
if let Some(ref mut ir_func) = self.current_func {
1058+
ir_func.next_pseudo = self.next_pseudo;
10531059
}
10541060
}
10551061

cc/ir/mod.rs

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1054,6 +1054,8 @@ pub struct Function {
10541054
pub entry: BasicBlockId,
10551055
/// All pseudos indexed by PseudoId
10561056
pub pseudos: Vec<Pseudo>,
1057+
/// Next pseudo ID to allocate (monotonically increasing)
1058+
pub next_pseudo: u32,
10571059
/// Local variables (name -> info), used for SSA conversion
10581060
pub locals: HashMap<String, LocalVar>,
10591061
/// Maximum dominator tree depth (computed by dominate.rs)
@@ -1075,6 +1077,7 @@ impl Default for Function {
10751077
blocks: Vec::new(),
10761078
entry: BasicBlockId(0),
10771079
pseudos: Vec::new(),
1080+
next_pseudo: 0,
10781081
locals: HashMap::new(),
10791082
max_dom_level: 0,
10801083
is_static: false,
@@ -1144,17 +1147,18 @@ impl Function {
11441147
self.locals.get(name)
11451148
}
11461149

1147-
/// Compute the next available pseudo ID
1148-
/// This scans all existing pseudos to find the maximum ID, then returns max + 1
1149-
pub fn next_pseudo_id(&self) -> PseudoId {
1150-
let max_id = self.pseudos.iter().map(|p| p.id.0).max().unwrap_or(0);
1151-
PseudoId(max_id + 1)
1150+
/// Allocate a new pseudo ID
1151+
/// Returns a unique ID and increments the counter
1152+
pub fn alloc_pseudo(&mut self) -> PseudoId {
1153+
let id = PseudoId(self.next_pseudo);
1154+
self.next_pseudo += 1;
1155+
id
11521156
}
11531157

11541158
/// Create a new constant integer pseudo and return its ID
11551159
/// The pseudo is added to self.pseudos
11561160
pub fn create_const_pseudo(&mut self, value: i64) -> PseudoId {
1157-
let id = self.next_pseudo_id();
1161+
let id = self.alloc_pseudo();
11581162
let pseudo = Pseudo::val(id, value);
11591163
self.add_pseudo(pseudo);
11601164
id

cc/ir/ssa.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -666,6 +666,9 @@ pub fn ssa_convert(func: &mut Function, types: &TypeTable) {
666666
// Phase 3: Remove dead stores
667667
let dead_stores = std::mem::take(&mut converter.dead_stores);
668668
remove_dead_stores(converter.func, &dead_stores);
669+
670+
// Update function's next_pseudo to avoid ID collisions with later allocations
671+
converter.func.next_pseudo = converter.next_pseudo_id;
669672
}
670673

671674
// ============================================================================

0 commit comments

Comments
 (0)