Skip to content

Commit 1ceccbb

Browse files
authored
VirtRegRewriter: Add implicit register defs for live out undef lanes (#112679)
If an undef subregister def is live into another block, we need to maintain a physreg def to track the liveness of those lanes. This would manifest a verifier error after branch folding, when the cloned tail block use no longer had a def. We need to detect interference with other assigned intervals to avoid clobbering the undef lanes defined in other intervals, since the undef def didn't count as interference. This is pretty ugly and adds a new dependency on LiveRegMatrix, keeping it live for one more pass. It also adds a lot of implicit operand spam (we really should have a better representation for this). There is a missing verifier check for this situation. Added an xfailed test that demonstrates this. We may also be able to revert the changes in 47d3cbc. It might be better to insert an IMPLICIT_DEF before the instruction rather than using the implicit-def operand. Fixes #98474
1 parent 61353cc commit 1ceccbb

10 files changed

+643
-39
lines changed

llvm/include/llvm/CodeGen/LiveRegMatrix.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,16 @@ class LiveRegMatrix {
118118
/// the segment [Start, End).
119119
bool checkInterference(SlotIndex Start, SlotIndex End, MCRegister PhysReg);
120120

121+
/// Check for interference in the segment [Start, End) that may prevent
122+
/// assignment to PhysReg, like checkInterference. Returns a lane mask of
123+
/// which lanes of the physical register interfere in the segment [Start, End)
124+
/// of some other interval already assigned to PhysReg.
125+
///
126+
/// If this function returns LaneBitmask::getNone(), PhysReg is completely
127+
/// free at the segment [Start, End).
128+
LaneBitmask checkInterferenceLanes(SlotIndex Start, SlotIndex End,
129+
MCRegister PhysReg);
130+
121131
/// Assign VirtReg to PhysReg.
122132
/// This will mark VirtReg's live range as occupied in the LiveRegMatrix and
123133
/// update VirtRegMap. The live range is expected to be available in PhysReg.

llvm/lib/CodeGen/LiveRegMatrix.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,41 @@ bool LiveRegMatrix::checkInterference(SlotIndex Start, SlotIndex End,
244244
return false;
245245
}
246246

247+
LaneBitmask LiveRegMatrix::checkInterferenceLanes(SlotIndex Start,
248+
SlotIndex End,
249+
MCRegister PhysReg) {
250+
// Construct artificial live range containing only one segment [Start, End).
251+
VNInfo valno(0, Start);
252+
LiveRange::Segment Seg(Start, End, &valno);
253+
LiveRange LR;
254+
LR.addSegment(Seg);
255+
256+
LaneBitmask InterferingLanes;
257+
258+
// Check for interference with that segment
259+
for (MCRegUnitMaskIterator MCRU(PhysReg, TRI); MCRU.isValid(); ++MCRU) {
260+
auto [Unit, Lanes] = *MCRU;
261+
// LR is stack-allocated. LiveRegMatrix caches queries by a key that
262+
// includes the address of the live range. If (for the same reg unit) this
263+
// checkInterference overload is called twice, without any other query()
264+
// calls in between (on heap-allocated LiveRanges) - which would invalidate
265+
// the cached query - the LR address seen the second time may well be the
266+
// same as that seen the first time, while the Start/End/valno may not - yet
267+
// the same cached result would be fetched. To avoid that, we don't cache
268+
// this query.
269+
//
270+
// FIXME: the usability of the Query API needs to be improved to avoid
271+
// subtle bugs due to query identity. Avoiding caching, for example, would
272+
// greatly simplify things.
273+
LiveIntervalUnion::Query Q;
274+
Q.reset(UserTag, LR, Matrix[Unit]);
275+
if (Q.checkInterference())
276+
InterferingLanes |= Lanes;
277+
}
278+
279+
return InterferingLanes;
280+
}
281+
247282
Register LiveRegMatrix::getOneVReg(unsigned PhysReg) const {
248283
const LiveInterval *VRegInterval = nullptr;
249284
for (MCRegUnit Unit : TRI->regunits(PhysReg)) {

llvm/lib/CodeGen/VirtRegMap.cpp

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "llvm/CodeGen/LiveDebugVariables.h"
2222
#include "llvm/CodeGen/LiveInterval.h"
2323
#include "llvm/CodeGen/LiveIntervals.h"
24+
#include "llvm/CodeGen/LiveRegMatrix.h"
2425
#include "llvm/CodeGen/LiveStacks.h"
2526
#include "llvm/CodeGen/MachineBasicBlock.h"
2627
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -203,6 +204,7 @@ class VirtRegRewriter : public MachineFunctionPass {
203204
MachineRegisterInfo *MRI = nullptr;
204205
SlotIndexes *Indexes = nullptr;
205206
LiveIntervals *LIS = nullptr;
207+
LiveRegMatrix *LRM = nullptr;
206208
VirtRegMap *VRM = nullptr;
207209
LiveDebugVariables *DebugVars = nullptr;
208210
DenseSet<Register> RewriteRegs;
@@ -215,6 +217,9 @@ class VirtRegRewriter : public MachineFunctionPass {
215217
void handleIdentityCopy(MachineInstr &MI);
216218
void expandCopyBundle(MachineInstr &MI) const;
217219
bool subRegLiveThrough(const MachineInstr &MI, MCRegister SuperPhysReg) const;
220+
LaneBitmask liveOutUndefPhiLanesForUndefSubregDef(
221+
const LiveInterval &LI, const MachineBasicBlock &MBB, unsigned SubReg,
222+
MCPhysReg PhysReg, const MachineInstr &MI) const;
218223

219224
public:
220225
static char ID;
@@ -247,6 +252,7 @@ INITIALIZE_PASS_BEGIN(VirtRegRewriter, "virtregrewriter",
247252
INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
248253
INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
249254
INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
255+
INITIALIZE_PASS_DEPENDENCY(LiveRegMatrixWrapperLegacy)
250256
INITIALIZE_PASS_DEPENDENCY(LiveStacks)
251257
INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy)
252258
INITIALIZE_PASS_END(VirtRegRewriter, "virtregrewriter",
@@ -262,6 +268,7 @@ void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const {
262268
AU.addRequired<LiveStacks>();
263269
AU.addPreserved<LiveStacks>();
264270
AU.addRequired<VirtRegMapWrapperLegacy>();
271+
AU.addRequired<LiveRegMatrixWrapperLegacy>();
265272

266273
if (!ClearVirtRegs)
267274
AU.addPreserved<LiveDebugVariables>();
@@ -276,6 +283,7 @@ bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) {
276283
MRI = &MF->getRegInfo();
277284
Indexes = &getAnalysis<SlotIndexesWrapperPass>().getSI();
278285
LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
286+
LRM = &getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
279287
VRM = &getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
280288
DebugVars = &getAnalysis<LiveDebugVariables>();
281289
LLVM_DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n"
@@ -548,6 +556,40 @@ bool VirtRegRewriter::subRegLiveThrough(const MachineInstr &MI,
548556
return false;
549557
}
550558

559+
/// Compute a lanemask for undef lanes which need to be preserved out of the
560+
/// defining block for a register assignment for a subregister def. \p PhysReg
561+
/// is assigned to \p LI, which is the main range.
562+
LaneBitmask VirtRegRewriter::liveOutUndefPhiLanesForUndefSubregDef(
563+
const LiveInterval &LI, const MachineBasicBlock &MBB, unsigned SubReg,
564+
MCPhysReg PhysReg, const MachineInstr &MI) const {
565+
LaneBitmask UndefMask = ~TRI->getSubRegIndexLaneMask(SubReg);
566+
LaneBitmask LiveOutUndefLanes;
567+
568+
for (const LiveInterval::SubRange &SR : LI.subranges()) {
569+
// Figure out which lanes are undef live into a successor.
570+
LaneBitmask NeedImpDefLanes = UndefMask & SR.LaneMask;
571+
if (NeedImpDefLanes.any() && !LIS->isLiveOutOfMBB(SR, &MBB)) {
572+
for (const MachineBasicBlock *Succ : MBB.successors()) {
573+
if (LIS->isLiveInToMBB(SR, Succ))
574+
LiveOutUndefLanes |= NeedImpDefLanes;
575+
}
576+
}
577+
}
578+
579+
SlotIndex MIIndex = LIS->getInstructionIndex(MI);
580+
SlotIndex BeforeMIUses = MIIndex.getBaseIndex();
581+
LaneBitmask InterferingLanes =
582+
LRM->checkInterferenceLanes(BeforeMIUses, MIIndex.getRegSlot(), PhysReg);
583+
LiveOutUndefLanes &= ~InterferingLanes;
584+
585+
LLVM_DEBUG(if (LiveOutUndefLanes.any()) {
586+
dbgs() << "Need live out undef defs for " << printReg(PhysReg)
587+
<< LiveOutUndefLanes << " from " << printMBBReference(MBB) << '\n';
588+
});
589+
590+
return LiveOutUndefLanes;
591+
}
592+
551593
void VirtRegRewriter::rewrite() {
552594
bool NoSubRegLiveness = !MRI->subRegLivenessEnabled();
553595
SmallVector<Register, 8> SuperDeads;
@@ -602,6 +644,32 @@ void VirtRegRewriter::rewrite() {
602644
MO.setIsUndef(true);
603645
} else if (!MO.isDead()) {
604646
assert(MO.isDef());
647+
if (MO.isUndef()) {
648+
const LiveInterval &LI = LIS->getInterval(VirtReg);
649+
650+
LaneBitmask LiveOutUndefLanes =
651+
liveOutUndefPhiLanesForUndefSubregDef(LI, *MBBI, SubReg,
652+
PhysReg, MI);
653+
if (LiveOutUndefLanes.any()) {
654+
SmallVector<unsigned, 16> CoveringIndexes;
655+
656+
// TODO: Just use one super register def if none of the lanes
657+
// are needed?
658+
if (!TRI->getCoveringSubRegIndexes(
659+
*MRI, MRI->getRegClass(VirtReg), LiveOutUndefLanes,
660+
CoveringIndexes))
661+
llvm_unreachable(
662+
"cannot represent required subregister defs");
663+
664+
// Try to represent the minimum needed live out def as a
665+
// sequence of subregister defs.
666+
//
667+
// FIXME: It would be better if we could directly represent
668+
// liveness with a lanemask instead of spamming operands.
669+
for (unsigned SubIdx : CoveringIndexes)
670+
SuperDefs.push_back(TRI->getSubReg(PhysReg, SubIdx));
671+
}
672+
}
605673
}
606674
}
607675

llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll

Lines changed: 30 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -38,24 +38,19 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
3838
; GFX90A-NEXT: {{ $}}
3939
; GFX90A-NEXT: renamable $sgpr30_sgpr31 = S_MOV_B64 0
4040
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr26_sgpr27, implicit-def dead $scc
41-
; GFX90A-NEXT: $vgpr22 = IMPLICIT_DEF
42-
; GFX90A-NEXT: $vgpr10 = IMPLICIT_DEF
43-
; GFX90A-NEXT: $vgpr24 = IMPLICIT_DEF
44-
; GFX90A-NEXT: $vgpr18 = IMPLICIT_DEF
45-
; GFX90A-NEXT: $vgpr20 = IMPLICIT_DEF
4641
; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.59, implicit $vcc
4742
; GFX90A-NEXT: {{ $}}
4843
; GFX90A-NEXT: bb.2:
4944
; GFX90A-NEXT: successors: %bb.3(0x80000000)
50-
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr22, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6, $sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr30_sgpr31, $sgpr42_sgpr43, $sgpr54, $sgpr55, $sgpr16_sgpr17_sgpr18, $sgpr18_sgpr19, $sgpr20_sgpr21_sgpr22, $vgpr2, $vgpr3, $vgpr10, $vgpr24, $vgpr18, $vgpr20
45+
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6, $sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr30_sgpr31, $sgpr42_sgpr43, $sgpr54, $sgpr55, $sgpr16_sgpr17_sgpr18, $sgpr18_sgpr19, $sgpr20_sgpr21_sgpr22, $vgpr2, $vgpr3
5146
; GFX90A-NEXT: {{ $}}
5247
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
5348
; GFX90A-NEXT: renamable $sgpr23 = IMPLICIT_DEF
54-
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
55-
; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF
56-
; GFX90A-NEXT: renamable $vgpr21 = IMPLICIT_DEF
57-
; GFX90A-NEXT: renamable $vgpr23 = IMPLICIT_DEF
58-
; GFX90A-NEXT: renamable $vgpr25 = IMPLICIT_DEF
49+
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
50+
; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF implicit-def $vgpr18
51+
; GFX90A-NEXT: renamable $vgpr21 = IMPLICIT_DEF implicit-def $vgpr20
52+
; GFX90A-NEXT: renamable $vgpr23 = IMPLICIT_DEF implicit-def $vgpr22
53+
; GFX90A-NEXT: renamable $vgpr25 = IMPLICIT_DEF implicit-def $vgpr24
5954
; GFX90A-NEXT: renamable $sgpr28_sgpr29 = S_MOV_B64 0
6055
; GFX90A-NEXT: {{ $}}
6156
; GFX90A-NEXT: bb.3.Flow17:
@@ -111,8 +106,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
111106
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
112107
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
113108
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
114-
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
115-
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
109+
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
110+
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
116111
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
117112
; GFX90A-NEXT: {{ $}}
118113
; GFX90A-NEXT: bb.6.Flow20:
@@ -395,8 +390,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
395390
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
396391
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
397392
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
398-
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
399-
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
393+
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
394+
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
400395
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
401396
; GFX90A-NEXT: $sgpr30_sgpr31 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
402397
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.37, implicit $exec
@@ -434,8 +429,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
434429
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
435430
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
436431
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
437-
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
438-
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
432+
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
433+
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
439434
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
440435
; GFX90A-NEXT: $sgpr36_sgpr37 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
441436
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.39, implicit $exec
@@ -484,8 +479,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
484479
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
485480
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
486481
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
487-
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
488-
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
482+
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
483+
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
489484
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
490485
; GFX90A-NEXT: $sgpr38_sgpr39 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
491486
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.41, implicit $exec
@@ -535,8 +530,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
535530
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
536531
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
537532
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
538-
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
539-
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
533+
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
534+
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
540535
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
541536
; GFX90A-NEXT: $sgpr40_sgpr41 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
542537
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.47, implicit $exec
@@ -589,8 +584,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
589584
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
590585
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
591586
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
592-
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
593-
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
587+
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
588+
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
594589
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
595590
; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_MOV_B64 0
596591
; GFX90A-NEXT: {{ $}}
@@ -643,8 +638,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
643638
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
644639
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
645640
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
646-
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
647-
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
641+
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
642+
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
648643
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
649644
; GFX90A-NEXT: $sgpr16_sgpr17 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
650645
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.43, implicit $exec
@@ -689,8 +684,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
689684
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
690685
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
691686
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
692-
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
693-
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
687+
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
688+
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
694689
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
695690
; GFX90A-NEXT: S_BRANCH %bb.45
696691
; GFX90A-NEXT: {{ $}}
@@ -719,8 +714,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
719714
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
720715
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
721716
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
722-
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
723-
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
717+
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
718+
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
724719
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
725720
; GFX90A-NEXT: S_BRANCH %bb.46
726721
; GFX90A-NEXT: {{ $}}
@@ -748,8 +743,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
748743
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
749744
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
750745
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
751-
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
752-
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
746+
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
747+
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
753748
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
754749
; GFX90A-NEXT: S_BRANCH %bb.62
755750
; GFX90A-NEXT: {{ $}}
@@ -773,8 +768,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
773768
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
774769
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
775770
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
776-
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
777-
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
771+
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
772+
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
778773
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
779774
; GFX90A-NEXT: $sgpr58_sgpr59 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
780775
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.53, implicit $exec
@@ -880,8 +875,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
880875
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
881876
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
882877
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
883-
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
884-
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
878+
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
879+
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
885880
; GFX90A-NEXT: $sgpr50_sgpr51 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
886881
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.57, implicit $exec
887882
; GFX90A-NEXT: {{ $}}

0 commit comments

Comments
 (0)