Skip to content

Commit f80bb29

Browse files
committed
Use LiveRegMatrix and only add necessary impdefs
We apparently need to detect interference with other assigned intervals to avoid clobbering the undef lanes defined in other intervals, since the undef def didn't count as interference. This is pretty ugly and adds a new dependency on LiveRegMatrix, keeping it live for one more pass. It also adds a lot of implicit operand spam (we really should have a better representation for this).
1 parent 41d4717 commit f80bb29

10 files changed

+253
-72
lines changed

llvm/include/llvm/CodeGen/LiveRegMatrix.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,16 @@ class LiveRegMatrix {
118118
/// the segment [Start, End).
119119
bool checkInterference(SlotIndex Start, SlotIndex End, MCRegister PhysReg);
120120

121+
/// Check for interference in the segment [Start, End) that may prevent
122+
/// assignment to PhysReg, like checkInterference. Returns a lane mask of
123+
/// which lanes of the physical register interfere in the segment [Start, End)
124+
/// of some other interval already assigned to PhysReg.
125+
///
126+
/// If this function returns LaneBitmask::getNone(), PhysReg is completely
127+
/// free at the segment [Start, End).
128+
LaneBitmask checkInterferenceLanes(SlotIndex Start, SlotIndex End,
129+
MCRegister PhysReg);
130+
121131
/// Assign VirtReg to PhysReg.
122132
/// This will mark VirtReg's live range as occupied in the LiveRegMatrix and
123133
/// update VirtRegMap. The live range is expected to be available in PhysReg.

llvm/lib/CodeGen/LiveRegMatrix.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,41 @@ bool LiveRegMatrix::checkInterference(SlotIndex Start, SlotIndex End,
244244
return false;
245245
}
246246

247+
LaneBitmask LiveRegMatrix::checkInterferenceLanes(SlotIndex Start,
248+
SlotIndex End,
249+
MCRegister PhysReg) {
250+
// Construct artificial live range containing only one segment [Start, End).
251+
VNInfo valno(0, Start);
252+
LiveRange::Segment Seg(Start, End, &valno);
253+
LiveRange LR;
254+
LR.addSegment(Seg);
255+
256+
LaneBitmask InterferingLanes;
257+
258+
// Check for interference with that segment
259+
for (MCRegUnitMaskIterator MCRU(PhysReg, TRI); MCRU.isValid(); ++MCRU) {
260+
auto [Unit, Lanes] = *MCRU;
261+
// LR is stack-allocated. LiveRegMatrix caches queries by a key that
262+
// includes the address of the live range. If (for the same reg unit) this
263+
// checkInterference overload is called twice, without any other query()
264+
// calls in between (on heap-allocated LiveRanges) - which would invalidate
265+
// the cached query - the LR address seen the second time may well be the
266+
// same as that seen the first time, while the Start/End/valno may not - yet
267+
// the same cached result would be fetched. To avoid that, we don't cache
268+
// this query.
269+
//
270+
// FIXME: the usability of the Query API needs to be improved to avoid
271+
// subtle bugs due to query identity. Avoiding caching, for example, would
272+
// greatly simplify things.
273+
LiveIntervalUnion::Query Q;
274+
Q.reset(UserTag, LR, Matrix[Unit]);
275+
if (Q.checkInterference())
276+
InterferingLanes |= Lanes;
277+
}
278+
279+
return InterferingLanes;
280+
}
281+
247282
Register LiveRegMatrix::getOneVReg(unsigned PhysReg) const {
248283
const LiveInterval *VRegInterval = nullptr;
249284
for (MCRegUnit Unit : TRI->regunits(PhysReg)) {

llvm/lib/CodeGen/VirtRegMap.cpp

Lines changed: 26 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "llvm/CodeGen/LiveDebugVariables.h"
2222
#include "llvm/CodeGen/LiveInterval.h"
2323
#include "llvm/CodeGen/LiveIntervals.h"
24+
#include "llvm/CodeGen/LiveRegMatrix.h"
2425
#include "llvm/CodeGen/LiveStacks.h"
2526
#include "llvm/CodeGen/MachineBasicBlock.h"
2627
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -203,6 +204,7 @@ class VirtRegRewriter : public MachineFunctionPass {
203204
MachineRegisterInfo *MRI = nullptr;
204205
SlotIndexes *Indexes = nullptr;
205206
LiveIntervals *LIS = nullptr;
207+
LiveRegMatrix *LRM = nullptr;
206208
VirtRegMap *VRM = nullptr;
207209
LiveDebugVariables *DebugVars = nullptr;
208210
DenseSet<Register> RewriteRegs;
@@ -215,9 +217,6 @@ class VirtRegRewriter : public MachineFunctionPass {
215217
void handleIdentityCopy(MachineInstr &MI);
216218
void expandCopyBundle(MachineInstr &MI) const;
217219
bool subRegLiveThrough(const MachineInstr &MI, MCRegister SuperPhysReg) const;
218-
bool needLiveOutUndefSubregDef(const LiveInterval &LI,
219-
const MachineBasicBlock &MBB, unsigned SubReg,
220-
MCPhysReg PhysReg) const;
221220
LaneBitmask liveOutUndefPhiLanesForUndefSubregDef(
222221
const LiveInterval &LI, const MachineBasicBlock &MBB, unsigned SubReg,
223222
MCPhysReg PhysReg, const MachineInstr &MI) const;
@@ -253,6 +252,7 @@ INITIALIZE_PASS_BEGIN(VirtRegRewriter, "virtregrewriter",
253252
INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
254253
INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
255254
INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
255+
INITIALIZE_PASS_DEPENDENCY(LiveRegMatrixWrapperLegacy)
256256
INITIALIZE_PASS_DEPENDENCY(LiveStacks)
257257
INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy)
258258
INITIALIZE_PASS_END(VirtRegRewriter, "virtregrewriter",
@@ -268,6 +268,7 @@ void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const {
268268
AU.addRequired<LiveStacks>();
269269
AU.addPreserved<LiveStacks>();
270270
AU.addRequired<VirtRegMapWrapperLegacy>();
271+
AU.addRequired<LiveRegMatrixWrapperLegacy>();
271272

272273
if (!ClearVirtRegs)
273274
AU.addPreserved<LiveDebugVariables>();
@@ -282,6 +283,7 @@ bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) {
282283
MRI = &MF->getRegInfo();
283284
Indexes = &getAnalysis<SlotIndexesWrapperPass>().getSI();
284285
LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
286+
LRM = &getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
285287
VRM = &getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
286288
DebugVars = &getAnalysis<LiveDebugVariables>();
287289
LLVM_DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n"
@@ -554,26 +556,6 @@ bool VirtRegRewriter::subRegLiveThrough(const MachineInstr &MI,
554556
return false;
555557
}
556558

557-
/// Check if we need to maintain liveness for undef subregister lanes that are
558-
/// live out of a block.
559-
bool VirtRegRewriter::needLiveOutUndefSubregDef(const LiveInterval &LI,
560-
const MachineBasicBlock &MBB,
561-
unsigned SubReg,
562-
MCPhysReg PhysReg) const {
563-
LaneBitmask UndefMask = ~TRI->getSubRegIndexLaneMask(SubReg);
564-
for (const LiveInterval::SubRange &SR : LI.subranges()) {
565-
LaneBitmask NeedImpDefLanes = UndefMask & SR.LaneMask;
566-
if (NeedImpDefLanes.any() && !LIS->isLiveOutOfMBB(SR, &MBB)) {
567-
for (const MachineBasicBlock *Succ : MBB.successors()) {
568-
if (LIS->isLiveInToMBB(SR, Succ))
569-
return true;
570-
}
571-
}
572-
}
573-
574-
return false;
575-
}
576-
577559
/// Compute a lanemask for undef lanes which need to be preserved out of the
578560
/// defining block for a register assignment.
579561
LaneBitmask VirtRegRewriter::liveOutUndefPhiLanesForUndefSubregDef(
@@ -591,20 +573,17 @@ LaneBitmask VirtRegRewriter::liveOutUndefPhiLanesForUndefSubregDef(
591573
}
592574
}
593575
}
594-
if (LiveOutUndefLanes.none())
595-
return LiveOutUndefLanes;
596576

597577
SlotIndex MIIndex = LIS->getInstructionIndex(MI);
598578
SlotIndex BeforeMIUses = MIIndex.getBaseIndex();
599-
SlotIndex AfterMIDefs = MIIndex.getBoundaryIndex();
600-
601-
for (MCRegUnitMaskIterator MCRU(PhysReg, TRI); MCRU.isValid(); ++MCRU) {
602-
auto [RU, PhysRegMask] = *MCRU;
579+
LaneBitmask InterferingLanes =
580+
LRM->checkInterferenceLanes(BeforeMIUses, MIIndex.getRegSlot(), PhysReg);
581+
LiveOutUndefLanes &= ~InterferingLanes;
603582

604-
const LiveRange &UnitRange = LIS->getRegUnit(RU);
605-
if (UnitRange.liveAt(AfterMIDefs) && UnitRange.liveAt(BeforeMIUses))
606-
LiveOutUndefLanes &= ~PhysRegMask;
607-
}
583+
LLVM_DEBUG(if (LiveOutUndefLanes.any()) {
584+
dbgs() << "Need live out undef defs for " << printReg(PhysReg)
585+
<< LiveOutUndefLanes << " from " << printMBBReference(MBB) << '\n';
586+
});
608587

609588
return LiveOutUndefLanes;
610589
}
@@ -672,33 +651,21 @@ void VirtRegRewriter::rewrite() {
672651
if (LiveOutUndefLanes.any()) {
673652
SmallVector<unsigned, 16> CoveringIndexes;
674653

675-
// TODO: Just use the super register if
676-
if (TRI->getCoveringSubRegIndexes(
654+
// TODO: Just use one super register def if none of the lanes
655+
// are needed?
656+
if (!TRI->getCoveringSubRegIndexes(
677657
*MRI, MRI->getRegClass(VirtReg), LiveOutUndefLanes,
678-
CoveringIndexes)) {
679-
// Try to represent the minimum needed live out def as a
680-
// sequence of subregister defs.
681-
//
682-
// FIXME: It would be better if we could directly represent
683-
// liveness with a lanemask instead of spamming operands.
684-
for (unsigned SubIdx : CoveringIndexes)
685-
SuperDefs.push_back(TRI->getSubReg(PhysReg, SubIdx));
686-
} else {
687-
// If we could not represent this as a sequence of
688-
// subregisters, it's safe to replace all the lanes with a
689-
// full def of the super register.
690-
SuperDefs.push_back(PhysReg);
691-
}
692-
}
693-
694-
if (false &&
695-
needLiveOutUndefSubregDef(LI, *MBBI, SubReg, PhysReg)) {
696-
SuperDefs.push_back(PhysReg);
697-
698-
for (MCRegister AssignedSubReg : TRI->subregs(PhysReg)) {
699-
if (subRegLiveThrough(MI, AssignedSubReg))
700-
SuperKills.push_back(AssignedSubReg);
701-
}
658+
CoveringIndexes))
659+
llvm_unreachable(
660+
"cannot represent required subregister defs");
661+
662+
// Try to represent the minimum needed live out def as a
663+
// sequence of subregister defs.
664+
//
665+
// FIXME: It would be better if we could directly represent
666+
// liveness with a lanemask instead of spamming operands.
667+
for (unsigned SubIdx : CoveringIndexes)
668+
SuperDefs.push_back(TRI->getSubReg(PhysReg, SubIdx));
702669
}
703670
}
704671
}

llvm/test/CodeGen/AMDGPU/indirect-call.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -603,6 +603,7 @@ define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) {
603603
; GISEL-NEXT: s_mov_b32 s14, s43
604604
; GISEL-NEXT: s_mov_b32 s15, s42
605605
; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17]
606+
; GISEL-NEXT: v_mov_b32_e32 v1, v0
606607
; GISEL-NEXT: ; implicit-def: $vgpr0
607608
; GISEL-NEXT: ; implicit-def: $vgpr31
608609
; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49]
@@ -1383,6 +1384,7 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) {
13831384
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
13841385
; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc
13851386
; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9]
1387+
; GISEL-NEXT: v_mov_b32_e32 v2, v0
13861388
; GISEL-NEXT: ; implicit-def: $vgpr1
13871389
; GISEL-NEXT: ; implicit-def: $vgpr0
13881390
; GISEL-NEXT: s_xor_b64 exec, exec, s[6:7]

llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ body: |
3232
; CHECK-NEXT: dead undef [[DEF2:%[0-9]+]].sub0:vreg_64 = IMPLICIT_DEF
3333
; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4)
3434
; CHECK-NEXT: SI_SPILL_S512_SAVE killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s512) into %stack.0, align 4, addrspace 5)
35-
; CHECK-NEXT: renamable $sgpr24 = IMPLICIT_DEF implicit-def $sgpr24_sgpr25_sgpr26_sgpr27
35+
; CHECK-NEXT: renamable $sgpr24 = IMPLICIT_DEF implicit-def $sgpr25
3636
; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM undef renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4)
3737
; CHECK-NEXT: $exec = S_MOV_B64_term undef renamable $sgpr4_sgpr5
3838
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.6, implicit $exec
@@ -82,7 +82,7 @@ body: |
8282
; CHECK-NEXT: {{ $}}
8383
; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s512) from %stack.0, align 4, addrspace 5)
8484
; CHECK-NEXT: dead [[IMAGE_SAMPLE_LZ_V1_V2_5:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF]], killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, undef renamable $sgpr24_sgpr25_sgpr26_sgpr27, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
85-
; CHECK-NEXT: renamable $sgpr25 = COPY undef renamable $sgpr24, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27
85+
; CHECK-NEXT: renamable $sgpr25 = COPY undef renamable $sgpr24, implicit-def $sgpr24
8686
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.7, implicit undef $vcc
8787
; CHECK-NEXT: S_BRANCH %bb.6
8888
; CHECK-NEXT: {{ $}}

llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ body: |
3030
; CHECK-NEXT: dead renamable $sgpr5 = IMPLICIT_DEF
3131
; CHECK-NEXT: dead undef [[DEF3:%[0-9]+]].sub1:vreg_64 = IMPLICIT_DEF
3232
; CHECK-NEXT: dead renamable $sgpr5 = IMPLICIT_DEF
33-
; CHECK-NEXT: renamable $sgpr24 = IMPLICIT_DEF implicit-def $sgpr24_sgpr25_sgpr26_sgpr27
33+
; CHECK-NEXT: renamable $sgpr24 = IMPLICIT_DEF implicit-def $sgpr25
3434
; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM undef renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4)
3535
; CHECK-NEXT: $exec = S_MOV_B64_term undef renamable $sgpr4_sgpr5
3636
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec
@@ -78,7 +78,7 @@ body: |
7878
; CHECK-NEXT: {{ $}}
7979
; CHECK-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX8_IMM undef renamable $sgpr4_sgpr5, 32, 0 :: (invariant load (s256), addrspace 4)
8080
; CHECK-NEXT: dead [[IMAGE_SAMPLE_LZ_V1_V2_5:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF]], killed renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19, undef renamable $sgpr24_sgpr25_sgpr26_sgpr27, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
81-
; CHECK-NEXT: renamable $sgpr25 = COPY undef renamable $sgpr24, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27
81+
; CHECK-NEXT: renamable $sgpr25 = COPY undef renamable $sgpr24, implicit-def $sgpr24
8282
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.6, implicit undef $vcc
8383
; CHECK-NEXT: S_BRANCH %bb.5
8484
; CHECK-NEXT: {{ $}}
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -start-before=greedy,2 -stop-after=tailduplication -verify-machineinstrs -o - %s | FileCheck %s
3+
4+
---
5+
name: undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub1_sub2_assigned_physreg_interference
6+
tracksRegLiveness: true
7+
machineFunctionInfo:
8+
isEntryFunction: true
9+
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
10+
stackPtrOffsetReg: '$sgpr32'
11+
body: |
12+
; CHECK-LABEL: name: undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub1_sub2_assigned_physreg_interference
13+
; CHECK: bb.0:
14+
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
15+
; CHECK-NEXT: liveins: $sgpr0, $vgpr2
16+
; CHECK-NEXT: {{ $}}
17+
; CHECK-NEXT: S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
18+
; CHECK-NEXT: S_CBRANCH_SCC0 %bb.2, implicit killed $scc
19+
; CHECK-NEXT: {{ $}}
20+
; CHECK-NEXT: bb.1:
21+
; CHECK-NEXT: liveins: $vgpr2
22+
; CHECK-NEXT: {{ $}}
23+
; CHECK-NEXT: renamable $vgpr3 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr4_vgpr5
24+
; CHECK-NEXT: EXP 0, killed renamable $vgpr3, renamable $vgpr4, renamable $vgpr5, killed renamable $vgpr2, 0, 0, 0, implicit $exec
25+
; CHECK-NEXT: S_ENDPGM 0
26+
; CHECK-NEXT: {{ $}}
27+
; CHECK-NEXT: bb.2:
28+
; CHECK-NEXT: liveins: $vgpr2
29+
; CHECK-NEXT: {{ $}}
30+
; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
31+
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
32+
; CHECK-NEXT: renamable $vgpr3_vgpr4_vgpr5 = BUFFER_LOAD_FORMAT_XYZ_IDXEN killed renamable $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 8)
33+
; CHECK-NEXT: EXP 0, killed renamable $vgpr3, renamable $vgpr4, renamable $vgpr5, killed renamable $vgpr2, 0, 0, 0, implicit $exec
34+
; CHECK-NEXT: S_ENDPGM 0
35+
bb.0:
36+
liveins: $sgpr0, $vgpr2
37+
38+
%2:vgpr_32 = COPY $vgpr2
39+
S_CMP_EQ_U32 killed $sgpr0, 0, implicit-def $scc
40+
S_CBRANCH_SCC0 %bb.2, implicit killed $scc
41+
42+
bb.1:
43+
undef %0.sub0:vreg_96 = V_MOV_B32_e32 0, implicit $exec
44+
S_BRANCH %bb.3
45+
46+
bb.2:
47+
S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
48+
%1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
49+
%0:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_IDXEN killed %1, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), addrspace 8)
50+
51+
bb.3:
52+
EXP 0, killed %0.sub0, killed %0.sub1, killed %0.sub2, %2:vgpr_32, 0, 0, 0, implicit $exec
53+
S_ENDPGM 0
54+
55+
...

0 commit comments

Comments
 (0)