Skip to content

Commit 1449859

Browse files
committed
Address feedback + fix failing test
1 parent 957c7da commit 1449859

File tree

5 files changed

+49
-62
lines changed

5 files changed

+49
-62
lines changed

llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp

Lines changed: 13 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,6 @@
4040
#include "llvm/MC/MCSchedule.h"
4141
#include "llvm/MC/TargetRegistry.h"
4242
#include "llvm/Support/ErrorHandling.h"
43-
#include "llvm/Support/raw_ostream.h"
44-
#include <deque>
4543
#include <limits>
4644
#include <string>
4745

@@ -1121,16 +1119,16 @@ void PreRARematStage::printTargetRegions(bool PrintAll) const {
11211119

11221120
void PreRARematStage::RematReg::print(
11231121
const DenseMap<MachineInstr *, unsigned> &MIRegion) const {
1124-
REMAT_DEBUG(dbgs() << " [" << MIRegion.at(DefMI) << "] " << *DefMI);
1125-
REMAT_DEBUG(dbgs() << " -> used in [" << UseRegion << "] " << *UseMI);
1122+
dbgs() << REMAT_PREFIX << " [" << MIRegion.at(DefMI) << "] " << *DefMI;
1123+
dbgs() << REMAT_PREFIX << " -> used in [" << UseRegion << "] " << *UseMI;
11261124
const unsigned NumRegions = Live.size();
1127-
REMAT_DEBUG(dbgs() << " Guaranteed RP reduction in:");
1125+
dbgs() << REMAT_PREFIX << " Guaranteed RP reduction in:";
11281126
for (unsigned I = 0; I < NumRegions; ++I) {
11291127
if (isBeneficialRegion(I))
11301128
dbgs() << " [" << I << "]";
11311129
}
11321130
dbgs() << '\n';
1133-
REMAT_DEBUG(dbgs() << " Possible RP reduction in:");
1131+
dbgs() << REMAT_PREFIX << " Possible RP reduction in:";
11341132
for (unsigned I = 0; I < NumRegions; ++I) {
11351133
if (isMaybeBeneficialRegion(I))
11361134
dbgs() << " [" << I << "]";
@@ -1883,6 +1881,9 @@ bool PreRARematStage::collectRematRegs(ArrayRef<uint64_t> RegionFreq) {
18831881
// Set of registers already marked for potential remterialization; used for
18841882
// remat chains checks.
18851883
DenseSet<Register> RematRegSet;
1884+
auto IsMORematable = [&RematRegSet](const MachineOperand &MO) -> bool {
1885+
return MO.isReg() && RematRegSet.contains(MO.getReg());
1886+
};
18861887

18871888
// Identify rematerializable instructions in the function.
18881889
for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
@@ -1917,11 +1918,8 @@ bool PreRARematStage::collectRematRegs(ArrayRef<uint64_t> RegionFreq) {
19171918
// either rematerializing the candidates in careful ordering, or
19181919
// deferring the MBB RP walk until the entire chain has been
19191920
// rematerialized.
1920-
MachineOperand &UseFirstMO = UseMI->getOperand(0);
1921-
if ((UseFirstMO.isReg() && RematRegSet.contains(UseFirstMO.getReg())) ||
1922-
llvm::any_of(DefMI.operands(), [&RematRegSet](MachineOperand &MO) {
1923-
return MO.isReg() && RematRegSet.contains(MO.getReg());
1924-
}))
1921+
const MachineOperand &UseMO = UseMI->getOperand(0);
1922+
if (IsMORematable(UseMO) || llvm::any_of(DefMI.operands(), IsMORematable))
19251923
continue;
19261924

19271925
// Do not rematerialize an instruction it it uses registers that aren't
@@ -1965,9 +1963,7 @@ PreRARematStage::RematReg::RematReg(
19651963
Live |= LiveOut;
19661964

19671965
// Store the register's lane bitmask.
1968-
unsigned SubReg = DefMI->getOperand(0).getSubReg();
1969-
Mask = SubReg ? DAG.TRI->getSubRegIndexLaneMask(SubReg)
1970-
: DAG.MRI.getMaxLaneMaskForVReg(Reg);
1966+
Mask = DAG.TRI->getSubRegIndexLaneMask(DefMI->getOperand(0).getSubReg());
19711967
}
19721968

19731969
MachineInstr *
@@ -2054,8 +2050,7 @@ MachineInstr *PreRARematStage::rematerialize(const RematReg &Remat,
20542050
const SIInstrInfo *TII = MF.getSubtarget<GCNSubtarget>().getInstrInfo();
20552051
MachineInstr &DefMI = *Remat.DefMI;
20562052
Register Reg = DefMI.getOperand(0).getReg();
2057-
const TargetRegisterClass *RC = DAG.MRI.getRegClass(Reg);
2058-
Register NewReg = DAG.MRI.createVirtualRegister(RC);
2053+
Register NewReg = DAG.MRI.cloneVirtualRegister(Reg);
20592054

20602055
// Rematerialize the register in the region where it is used.
20612056
MachineBasicBlock::iterator InsertPos = Remat.UseMI;
@@ -2121,15 +2116,14 @@ void PreRARematStage::rollback(const RollbackReg &Rollback) const {
21212116
unsigned DefRegion = MIRegion.at(Remat->DefMI);
21222117
MachineBasicBlock *MBB = RegionBB[DefRegion];
21232118
Register Reg = RematMI->getOperand(0).getReg();
2124-
const TargetRegisterClass *RC = DAG.MRI.getRegClass(Reg);
2125-
Register NewReg = DAG.MRI.createVirtualRegister(RC);
2119+
Register NewReg = DAG.MRI.cloneVirtualRegister(Reg);
21262120

21272121
// Re-rematerialize MI in its original region. Note that it may not be
21282122
// rematerialized exactly in the same position as originally within the
21292123
// region, but it should not matter much.
21302124
MachineBasicBlock::iterator InsertPos(DAG.Regions[DefRegion].second);
21312125
TII->reMaterialize(*MBB, InsertPos, NewReg, 0, *RematMI, *DAG.TRI);
2132-
REMAT_DEBUG(dbgs() << "[" << DefRegion << "] Re-rematerialized as "
2126+
REMAT_DEBUG(dbgs() << '[' << DefRegion << "] Re-rematerialized as "
21332127
<< *std::prev(InsertPos));
21342128
Remat->UseMI->substituteRegister(Reg, NewReg, 0, *DAG.TRI);
21352129
DAG.deleteMI(Remat->UseRegion, RematMI);

llvm/test/CodeGen/AMDGPU/machine-scheduler-rematerialization-scoring.mir

Lines changed: 24 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -28,17 +28,6 @@
2828
# | 5 |
2929
# +---+
3030

31-
--- |
32-
define void @favor_always_benef() {
33-
ret void
34-
}
35-
define void @favor_live_through_in_high_freq_region() {
36-
ret void
37-
}
38-
define void @use_only_region_possible() {
39-
ret void
40-
}
41-
---
4231
# Rematerializing %32 is always beneficial because the defining and using
4332
# regions have the same frequency. It should be rematerialized first.
4433
name: favor_always_benef
@@ -51,12 +40,12 @@ body: |
5140
; CHECK-NEXT: successors: %bb.1(0x80000000)
5241
; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
5342
; CHECK-NEXT: {{ $}}
54-
; CHECK-NEXT: %mem_addr:sgpr_64(p4) = COPY $sgpr0_sgpr1
55-
; CHECK-NEXT: %loop_if_bound:vgpr_32(s32) = COPY $vgpr0
56-
; CHECK-NEXT: %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr(p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
43+
; CHECK-NEXT: %mem_addr:sgpr_64 = COPY $sgpr0_sgpr1
44+
; CHECK-NEXT: %loop_if_bound:vgpr_32 = COPY $vgpr0
45+
; CHECK-NEXT: %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr, 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
5746
; CHECK-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
5847
; CHECK-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
59-
; CHECK-NEXT: %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, %loop_if_bound(s32), implicit $exec
48+
; CHECK-NEXT: %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, %loop_if_bound, implicit $exec
6049
; CHECK-NEXT: %loop_counter:sreg_32 = COPY %mem_data.sub1
6150
; CHECK-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
6251
; CHECK-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
@@ -126,10 +115,10 @@ body: |
126115
bb.0:
127116
liveins: $vgpr0, $sgpr0_sgpr1
128117
129-
%mem_addr:sgpr_64(p4) = COPY $sgpr0_sgpr1
130-
%loop_if_bound:vgpr_32(s32) = COPY $vgpr0
131-
%mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr(p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
132-
%exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, killed %loop_if_bound(s32), implicit $exec
118+
%mem_addr:sgpr_64 = COPY $sgpr0_sgpr1
119+
%loop_if_bound:vgpr_32 = COPY $vgpr0
120+
%mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr, 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
121+
%exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, killed %loop_if_bound, implicit $exec
133122
%loop_counter:sreg_32 = COPY %mem_data.sub1
134123
135124
%0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
@@ -217,12 +206,12 @@ body: |
217206
; CHECK-NEXT: successors: %bb.1(0x80000000)
218207
; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
219208
; CHECK-NEXT: {{ $}}
220-
; CHECK-NEXT: %mem_addr:sgpr_64(p4) = COPY $sgpr0_sgpr1
221-
; CHECK-NEXT: %loop_if_bound:vgpr_32(s32) = COPY $vgpr0
222-
; CHECK-NEXT: %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr(p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
209+
; CHECK-NEXT: %mem_addr:sgpr_64 = COPY $sgpr0_sgpr1
210+
; CHECK-NEXT: %loop_if_bound:vgpr_32 = COPY $vgpr0
211+
; CHECK-NEXT: %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr, 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
223212
; CHECK-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
224213
; CHECK-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
225-
; CHECK-NEXT: %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, %loop_if_bound(s32), implicit $exec
214+
; CHECK-NEXT: %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, %loop_if_bound, implicit $exec
226215
; CHECK-NEXT: %loop_counter:sreg_32 = COPY %mem_data.sub1
227216
; CHECK-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
228217
; CHECK-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
@@ -292,10 +281,10 @@ body: |
292281
bb.0:
293282
liveins: $vgpr0, $sgpr0_sgpr1
294283
295-
%mem_addr:sgpr_64(p4) = COPY $sgpr0_sgpr1
296-
%loop_if_bound:vgpr_32(s32) = COPY $vgpr0
297-
%mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr(p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
298-
%exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, killed %loop_if_bound(s32), implicit $exec
284+
%mem_addr:sgpr_64 = COPY $sgpr0_sgpr1
285+
%loop_if_bound:vgpr_32 = COPY $vgpr0
286+
%mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr, 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
287+
%exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, killed %loop_if_bound, implicit $exec
299288
%loop_counter:sreg_32 = COPY %mem_data.sub1
300289
301290
%0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
@@ -381,9 +370,9 @@ body: |
381370
; CHECK-NEXT: successors: %bb.1(0x80000000)
382371
; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
383372
; CHECK-NEXT: {{ $}}
384-
; CHECK-NEXT: %mem_addr:sgpr_64(p4) = COPY $sgpr0_sgpr1
385-
; CHECK-NEXT: %loop_if_bound:vgpr_32(s32) = COPY $vgpr0
386-
; CHECK-NEXT: %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr(p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
373+
; CHECK-NEXT: %mem_addr:sgpr_64 = COPY $sgpr0_sgpr1
374+
; CHECK-NEXT: %loop_if_bound:vgpr_32 = COPY $vgpr0
375+
; CHECK-NEXT: %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr, 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
387376
; CHECK-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
388377
; CHECK-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
389378
; CHECK-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
@@ -408,7 +397,7 @@ body: |
408397
; CHECK-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
409398
; CHECK-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
410399
; CHECK-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
411-
; CHECK-NEXT: %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, %loop_if_bound(s32), implicit $exec
400+
; CHECK-NEXT: %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, %loop_if_bound, implicit $exec
412401
; CHECK-NEXT: %loop_counter:sreg_32 = COPY %mem_data.sub1
413402
; CHECK-NEXT: {{ $}}
414403
; CHECK-NEXT: bb.1:
@@ -456,10 +445,10 @@ body: |
456445
bb.0:
457446
liveins: $vgpr0, $sgpr0_sgpr1
458447
459-
%mem_addr:sgpr_64(p4) = COPY $sgpr0_sgpr1
460-
%loop_if_bound:vgpr_32(s32) = COPY $vgpr0
461-
%mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr(p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
462-
%exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, killed %loop_if_bound(s32), implicit $exec
448+
%mem_addr:sgpr_64 = COPY $sgpr0_sgpr1
449+
%loop_if_bound:vgpr_32 = COPY $vgpr0
450+
%mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr, 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
451+
%exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, killed %loop_if_bound, implicit $exec
463452
%loop_counter:sreg_32 = COPY %mem_data.sub1
464453
465454
%0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode

llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8695,7 +8695,6 @@ body: |
86958695
; GFX908-NEXT: {{ $}}
86968696
; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
86978697
; GFX908-NEXT: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
8698-
; GFX908-NEXT: dead [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
86998698
; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
87008699
; GFX908-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
87018700
; GFX908-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
@@ -8737,8 +8736,9 @@ body: |
87378736
; GFX908-NEXT: [[V_CVT_I32_F64_e32_39:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 49, implicit $exec, implicit $mode, implicit-def $m0
87388737
; GFX908-NEXT: [[V_CVT_I32_F64_e32_40:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 50, implicit $exec, implicit $mode, implicit-def $m0
87398738
; GFX908-NEXT: [[V_CVT_I32_F64_e32_41:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 51, implicit $exec, implicit $mode, implicit-def $m0
8740-
; GFX908-NEXT: [[V_CVT_I32_F64_e32_42:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 52, implicit $exec, implicit $mode, implicit-def $m0
87418739
; GFX908-NEXT: undef [[V_RCP_F32_e32_:%[0-9]+]].sub0:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_RCP_F32_e32 [[DEF]].sub0, implicit $mode, implicit $exec
8740+
; GFX908-NEXT: [[V_CVT_I32_F64_e32_42:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 52, implicit $exec, implicit $mode, implicit-def $m0
8741+
; GFX908-NEXT: dead [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
87428742
; GFX908-NEXT: {{ $}}
87438743
; GFX908-NEXT: bb.1:
87448744
; GFX908-NEXT: successors: %bb.2(0x80000000)

llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,11 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" }
8383
; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8
8484
; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:12
8585
; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:16
86-
; CHECK-NEXT: s_mov_b64 s[4:5], 0
87-
; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
88-
; CHECK-NEXT: s_xor_b64 s[4:5], exec, s[6:7]
86+
; CHECK-NEXT: s_mov_b64 s[4:5], exec
87+
; CHECK-NEXT: s_mov_b64 s[6:7], 0
88+
; CHECK-NEXT: s_and_b64 s[6:7], s[4:5], s[6:7]
89+
; CHECK-NEXT: s_xor_b64 s[4:5], s[6:7], s[4:5]
90+
; CHECK-NEXT: s_mov_b64 exec, s[6:7]
8991
; CHECK-NEXT: s_cbranch_execz .LBB0_2
9092
; CHECK-NEXT: ; %bb.1:
9193
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4

llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,11 @@ define dso_local i32 @check_boundaries() #0 {
2424
; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8
2525
; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:12
2626
; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:16
27-
; CHECK-NEXT: s_mov_b64 s[4:5], 0
28-
; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
29-
; CHECK-NEXT: s_xor_b64 s[4:5], exec, s[6:7]
27+
; CHECK-NEXT: s_mov_b64 s[4:5], exec
28+
; CHECK-NEXT: s_mov_b64 s[6:7], 0
29+
; CHECK-NEXT: s_and_b64 s[6:7], s[4:5], s[6:7]
30+
; CHECK-NEXT: s_xor_b64 s[4:5], s[6:7], s[4:5]
31+
; CHECK-NEXT: s_mov_b64 exec, s[6:7]
3032
; CHECK-NEXT: s_cbranch_execz .LBB0_2
3133
; CHECK-NEXT: ; %bb.1:
3234
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4

0 commit comments

Comments
 (0)