Skip to content

Commit b4327a0

Browse files
toppercvladimirradosavljevic
authored andcommitted
[MCP] Remove dead copies from basic blocks with successors. (#86973)
Previously we wouldn't remove dead copies from basic blocks with successors. The comment said we didn't want to trust the live-in lists. The comment is very old so I'm not sure if that's still a concern today. This patch checks the live-in lists and removes copies from MaybeDeadCopies if they are referenced by any live-ins in any successors. We only do this if the tracksLiveness property is set. If that property is not set, we retain the old behavior.
1 parent 68b69f5 commit b4327a0

11 files changed

+38
-37
lines changed

llvm/lib/CodeGen/MachineCopyPropagation.cpp

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,7 @@ class MachineCopyPropagation : public MachineFunctionPass {
370370
typedef enum { DebugUse = false, RegularUse = true } DebugType;
371371

372372
void ReadRegister(MCRegister Reg, MachineInstr &Reader, DebugType DT);
373+
void readSuccessorLiveIns(const MachineBasicBlock &MBB);
373374
void ForwardCopyPropagateBlock(MachineBasicBlock &MBB);
374375
void BackwardCopyPropagateBlock(MachineBasicBlock &MBB);
375376
void EliminateSpillageCopies(MachineBasicBlock &MBB);
@@ -422,6 +423,22 @@ void MachineCopyPropagation::ReadRegister(MCRegister Reg, MachineInstr &Reader,
422423
}
423424
}
424425

426+
void MachineCopyPropagation::readSuccessorLiveIns(
427+
const MachineBasicBlock &MBB) {
428+
if (MaybeDeadCopies.empty())
429+
return;
430+
431+
// If a copy result is livein to a successor, it is not dead.
432+
for (const MachineBasicBlock *Succ : MBB.successors()) {
433+
for (const auto &LI : Succ->liveins()) {
434+
for (MCRegUnit Unit : TRI->regunits(LI.PhysReg)) {
435+
if (MachineInstr *Copy = Tracker.findCopyForUnit(Unit, *TRI))
436+
MaybeDeadCopies.remove(Copy);
437+
}
438+
}
439+
}
440+
}
441+
425442
/// Return true if \p PreviousCopy did copy register \p Src to register \p Def.
426443
/// This fact may have been obscured by sub register usage or may not be true at
427444
/// all even though Src and Def are subregisters of the registers used in
@@ -873,10 +890,17 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
873890
Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr);
874891
}
875892

876-
// If MBB doesn't have successors, delete the copies whose defs are not used.
877-
// If MBB does have successors, then conservative assume the defs are live-out
878-
// since we don't want to trust live-in lists.
879-
if (MBB.succ_empty()) {
893+
bool TracksLiveness = MRI->tracksLiveness();
894+
895+
// If liveness is tracked, we can use the live-in lists to know which
896+
// copies aren't dead.
897+
if (TracksLiveness)
898+
readSuccessorLiveIns(MBB);
899+
900+
// If MBB doesn't have succesor, delete copies whose defs are not used.
901+
// If MBB does have successors, we can only delete copies if we are able to
902+
// use liveness information from successors to confirm they are really dead.
903+
if (MBB.succ_empty() || TracksLiveness) {
880904
for (MachineInstr *MaybeDead : MaybeDeadCopies) {
881905
LLVM_DEBUG(dbgs() << "MCP: Removing copy due to no live-out succ: ";
882906
MaybeDead->dump());

llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -468,7 +468,6 @@ define amdgpu_ps void @add_i32_varying(ptr addrspace(8) inreg %out, ptr addrspac
468468
; GFX1032-NEXT: s_cbranch_execz .LBB1_3
469469
; GFX1032-NEXT: ; %bb.2:
470470
; GFX1032-NEXT: v_mov_b32_e32 v0, s11
471-
; GFX1032-NEXT: s_mov_b32 s10, s11
472471
; GFX1032-NEXT: buffer_atomic_add v0, off, s[4:7], 0 glc
473472
; GFX1032-NEXT: .LBB1_3:
474473
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
@@ -604,7 +603,6 @@ define amdgpu_ps void @add_i32_varying(ptr addrspace(8) inreg %out, ptr addrspac
604603
; GFX1132-NEXT: s_cbranch_execz .LBB1_3
605604
; GFX1132-NEXT: ; %bb.2:
606605
; GFX1132-NEXT: v_mov_b32_e32 v0, s11
607-
; GFX1132-NEXT: s_mov_b32 s10, s11
608606
; GFX1132-NEXT: buffer_atomic_add_u32 v0, off, s[4:7], 0 glc
609607
; GFX1132-NEXT: .LBB1_3:
610608
; GFX1132-NEXT: s_or_b32 exec_lo, exec_lo, s9

llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ define dso_local signext i32 @main(i32 signext %argc, ptr nocapture readnone %ar
3030
; CHECK-NEXT: # kill: def $r3 killed $r3 killed $x3
3131
; CHECK-NEXT: cmpwi 3, 0
3232
; CHECK-NEXT: crorc 20, 10, 2
33-
; CHECK-NEXT: crmove 21, 2
3433
; CHECK-NEXT: bc 4, 20, .LBB0_4
3534
; CHECK-NEXT: # %bb.2: # %if.end5
3635
; CHECK-NEXT: addis 3, 2, .L.str@toc@ha
@@ -75,11 +74,9 @@ define dso_local signext i32 @main(i32 signext %argc, ptr nocapture readnone %ar
7574
; BE-NEXT: addi 3, 31, 128
7675
; BE-NEXT: bl _setjmp
7776
; BE-NEXT: nop
78-
; BE-NEXT: crmove 20, 10
7977
; BE-NEXT: # kill: def $r3 killed $r3 killed $x3
8078
; BE-NEXT: cmpwi 3, 0
8179
; BE-NEXT: crorc 20, 10, 2
82-
; BE-NEXT: crmove 21, 2
8380
; BE-NEXT: bc 4, 20, .LBB0_4
8481
; BE-NEXT: # %bb.2: # %if.end5
8582
; BE-NEXT: addis 3, 2, .L.str@toc@ha

llvm/test/CodeGen/Thumb2/aligned-nonfallthrough.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,8 @@ define i64 @loopif(ptr nocapture readonly %x, i32 %y, i32 %n) {
4242
; CHECK-NEXT: cmp r2, #1
4343
; CHECK-NEXT: blt .LBB1_4
4444
; CHECK-NEXT: @ %bb.1: @ %for.body.lr.ph
45-
; CHECK-NEXT: mov lr, r2
46-
; CHECK-NEXT: mov r12, r0
4745
; CHECK-NEXT: dls lr, r2
46+
; CHECK-NEXT: mov r12, r0
4847
; CHECK-NEXT: movs r0, #0
4948
; CHECK-NEXT: movs r3, #0
5049
; CHECK-NEXT: .p2align 2

llvm/test/CodeGen/Thumb2/mve-gather-increment.ll

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -542,9 +542,7 @@ define arm_aapcs_vfpcc void @gather_inc_v8i16_simple(ptr noalias nocapture reado
542542
; CHECK-NEXT: .pad #28
543543
; CHECK-NEXT: sub sp, #28
544544
; CHECK-NEXT: cmp r2, #1
545-
; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill
546-
; CHECK-NEXT: mov r1, r2
547-
; CHECK-NEXT: str r2, [sp, #8] @ 4-byte Spill
545+
; CHECK-NEXT: strd r1, r2, [sp, #4] @ 8-byte Folded Spill
548546
; CHECK-NEXT: blt .LBB11_5
549547
; CHECK-NEXT: @ %bb.1: @ %vector.ph.preheader
550548
; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
@@ -661,9 +659,7 @@ define arm_aapcs_vfpcc void @gather_inc_v8i16_complex(ptr noalias nocapture read
661659
; CHECK-NEXT: .pad #136
662660
; CHECK-NEXT: sub sp, #136
663661
; CHECK-NEXT: cmp r2, #1
664-
; CHECK-NEXT: str r1, [sp, #64] @ 4-byte Spill
665-
; CHECK-NEXT: mov r1, r2
666-
; CHECK-NEXT: str r2, [sp, #68] @ 4-byte Spill
662+
; CHECK-NEXT: strd r1, r2, [sp, #64] @ 8-byte Folded Spill
667663
; CHECK-NEXT: blt.w .LBB12_5
668664
; CHECK-NEXT: @ %bb.1: @ %vector.ph.preheader
669665
; CHECK-NEXT: ldr r1, [sp, #68] @ 4-byte Reload
@@ -952,11 +948,9 @@ define arm_aapcs_vfpcc void @gather_inc_v16i8_complex(ptr noalias nocapture read
952948
; CHECK-NEXT: vstrw.32 q1, [sp, #152] @ 16-byte Spill
953949
; CHECK-NEXT: vldrw.u32 q1, [sp, #296] @ 16-byte Reload
954950
; CHECK-NEXT: vstrw.32 q0, [sp, #168] @ 16-byte Spill
955-
; CHECK-NEXT: vmov q0, q2
956-
; CHECK-NEXT: vmov q3, q5
957-
; CHECK-NEXT: vadd.i32 q1, q1, r0
958951
; CHECK-NEXT: vldrw.u32 q0, [sp, #248] @ 16-byte Reload
959952
; CHECK-NEXT: vldrw.u32 q3, [sp, #216] @ 16-byte Reload
953+
; CHECK-NEXT: vadd.i32 q1, q1, r0
960954
; CHECK-NEXT: vstrw.32 q5, [sp, #120] @ 16-byte Spill
961955
; CHECK-NEXT: vadd.i32 q0, q0, r0
962956
; CHECK-NEXT: subs.w r11, r11, #16
@@ -1243,9 +1237,7 @@ define arm_aapcs_vfpcc void @gather_inc_v16i8_simple(ptr noalias nocapture reado
12431237
; CHECK-NEXT: .pad #64
12441238
; CHECK-NEXT: sub sp, #64
12451239
; CHECK-NEXT: cmp r2, #1
1246-
; CHECK-NEXT: str r1, [sp, #56] @ 4-byte Spill
1247-
; CHECK-NEXT: mov r1, r2
1248-
; CHECK-NEXT: str r2, [sp, #60] @ 4-byte Spill
1240+
; CHECK-NEXT: strd r1, r2, [sp, #56] @ 8-byte Folded Spill
12491241
; CHECK-NEXT: blt.w .LBB14_5
12501242
; CHECK-NEXT: @ %bb.1: @ %vector.ph.preheader
12511243
; CHECK-NEXT: adr r5, .LCPI14_3

llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -608,7 +608,6 @@ define dso_local void @arm_mat_mult_q15(i16* noalias nocapture readonly %A, i16*
608608
; CHECK-NEXT: strd r0, r2, [sp, #24] @ 8-byte Folded Spill
609609
; CHECK-NEXT: cmp r3, #0
610610
; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill
611-
; CHECK-NEXT: mov r0, r3
612611
; CHECK-NEXT: itt ne
613612
; CHECK-NEXT: ldrne r0, [sp, #136]
614613
; CHECK-NEXT: cmpne r0, #0

llvm/test/CodeGen/Thumb2/mve-laneinterleaving-reduct.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -118,9 +118,7 @@ define void @correlate(ptr nocapture noundef readonly %ID, ptr nocapture noundef
118118
; CHECK-NEXT: .pad #12
119119
; CHECK-NEXT: sub sp, #12
120120
; CHECK-NEXT: cmp r3, #1
121-
; CHECK-NEXT: strd r0, r1, [sp] @ 8-byte Folded Spill
122-
; CHECK-NEXT: mov r1, r3
123-
; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill
121+
; CHECK-NEXT: stm.w sp, {r0, r1, r3} @ 12-byte Folded Spill
124122
; CHECK-NEXT: blt .LBB4_12
125123
; CHECK-NEXT: @ %bb.1: @ %for.body.lr.ph
126124
; CHECK-NEXT: ldr r1, [sp, #48]

llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1062,9 +1062,8 @@ define arm_aapcs_vfpcc void @_Z37_arm_radix4_butterfly_inverse_f32_mvePK21arm_cf
10621062
; CHECK-NEXT: .pad #40
10631063
; CHECK-NEXT: sub sp, #40
10641064
; CHECK-NEXT: cmp r2, #8
1065-
; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill
10661065
; CHECK-NEXT: vstr s0, [sp] @ 4-byte Spill
1067-
; CHECK-NEXT: mov r1, r2
1066+
; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill
10681067
; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill
10691068
; CHECK-NEXT: blo .LBB7_9
10701069
; CHECK-NEXT: @ %bb.1:

llvm/test/CodeGen/Thumb2/mve-vldst4.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -95,14 +95,13 @@ define void @vldst4(ptr nocapture readonly %pIn, ptr nocapture %pOut, i32 %numRo
9595
; CHECK-NEXT: vmovx.f16 s8, s27
9696
; CHECK-NEXT: vins.f16 s12, s24
9797
; CHECK-NEXT: vins.f16 s13, s25
98+
; CHECK-NEXT: vins.f16 s2, s10
9899
; CHECK-NEXT: vins.f16 s3, s11
99100
; CHECK-NEXT: vins.f16 s1, s9
100-
; CHECK-NEXT: vins.f16 s2, s10
101101
; CHECK-NEXT: vins.f16 s22, s8
102102
; CHECK-NEXT: vmov q2, q3
103-
; CHECK-NEXT: vmov.f32 s17, s0
104-
; CHECK-NEXT: vmov.f32 s10, s4
105103
; CHECK-NEXT: vmov q6, q0
104+
; CHECK-NEXT: vmov.f32 s10, s4
106105
; CHECK-NEXT: vmov.f32 s11, s7
107106
; CHECK-NEXT: vmov.f32 s9, s0
108107
; CHECK-NEXT: vmov.f32 s17, s2

llvm/test/CodeGen/X86/optimize-max-0.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -492,7 +492,6 @@ define void @bar(ptr %r, i32 %s, i32 %w, i32 %x, ptr %j, i32 %d) nounwind {
492492
; CHECK-NEXT: jb LBB1_4
493493
; CHECK-NEXT: ## %bb.5: ## %bb9
494494
; CHECK-NEXT: ## in Loop: Header=BB1_4 Depth=1
495-
; CHECK-NEXT: movl %edi, %ebx
496495
; CHECK-NEXT: incl %ecx
497496
; CHECK-NEXT: addl {{[0-9]+}}(%esp), %eax
498497
; CHECK-NEXT: addl %edi, %edx

0 commit comments

Comments
 (0)