Skip to content

Commit 112283d

Browse files
rampitecmahesh-attarde
authored andcommitted
[AMDGPU] Select scale_offset for scratch instructions on gfx1250 (llvm#150111)
1 parent 6590e06 commit 112283d

File tree

7 files changed

+411
-114
lines changed

7 files changed

+411
-114
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2116,7 +2116,8 @@ bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
21162116

21172117
bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
21182118
SDValue &VAddr, SDValue &SAddr,
2119-
SDValue &Offset) const {
2119+
SDValue &Offset,
2120+
SDValue &CPol) const {
21202121
int64_t ImmOffset = 0;
21212122

21222123
SDValue LHS, RHS;
@@ -2148,6 +2149,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
21482149
if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
21492150
return false;
21502151
Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
2152+
CPol = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
21512153
return true;
21522154
}
21532155
}
@@ -2181,6 +2183,10 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
21812183
return false;
21822184
SAddr = SelectSAddrFI(CurDAG, SAddr);
21832185
Offset = CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2186+
2187+
bool ScaleOffset = SelectScaleOffset(N, VAddr, true /* IsSigned */);
2188+
CPol = CurDAG->getTargetConstant(ScaleOffset ? AMDGPU::CPol::SCAL : 0,
2189+
SDLoc(), MVT::i32);
21842190
return true;
21852191
}
21862192

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,8 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
175175
bool checkFlatScratchSVSSwizzleBug(SDValue VAddr, SDValue SAddr,
176176
uint64_t ImmOffset) const;
177177
bool SelectScratchSVAddr(SDNode *N, SDValue Addr, SDValue &VAddr,
178-
SDValue &SAddr, SDValue &Offset) const;
178+
SDValue &SAddr, SDValue &Offset,
179+
SDValue &CPol) const;
179180

180181
bool SelectSMRDOffset(SDNode *N, SDValue ByteOffsetNode, SDValue *SOffset,
181182
SDValue *Offset, bool Imm32Only = false,

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5895,22 +5895,32 @@ AMDGPUInstructionSelector::selectScratchSVAddr(MachineOperand &Root) const {
58955895
if (checkFlatScratchSVSSwizzleBug(RHS, LHS, ImmOffset))
58965896
return std::nullopt;
58975897

5898+
unsigned CPol = selectScaleOffset(Root, RHS, true /* IsSigned */)
5899+
? AMDGPU::CPol::SCAL
5900+
: 0;
5901+
58985902
if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
58995903
int FI = LHSDef->MI->getOperand(1).getIndex();
59005904
return {{
5901-
[=](MachineInstrBuilder &MIB) { MIB.addReg(RHS); }, // vaddr
5905+
[=](MachineInstrBuilder &MIB) { MIB.addReg(RHS); }, // vaddr
59025906
[=](MachineInstrBuilder &MIB) { MIB.addFrameIndex(FI); }, // saddr
5903-
[=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); } // offset
5907+
[=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); }, // offset
5908+
[=](MachineInstrBuilder &MIB) { MIB.addImm(CPol); } // cpol
59045909
}};
59055910
}
59065911

5912+
if (!isSGPR(LHS))
5913+
if (auto Def = getDefSrcRegIgnoringCopies(LHS, *MRI))
5914+
LHS = Def->Reg;
5915+
59075916
if (!isSGPR(LHS))
59085917
return std::nullopt;
59095918

59105919
return {{
5911-
[=](MachineInstrBuilder &MIB) { MIB.addReg(RHS); }, // vaddr
5912-
[=](MachineInstrBuilder &MIB) { MIB.addReg(LHS); }, // saddr
5913-
[=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); } // offset
5920+
[=](MachineInstrBuilder &MIB) { MIB.addReg(RHS); }, // vaddr
5921+
[=](MachineInstrBuilder &MIB) { MIB.addReg(LHS); }, // saddr
5922+
[=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); }, // offset
5923+
[=](MachineInstrBuilder &MIB) { MIB.addImm(CPol); } // cpol
59145924
}};
59155925
}
59165926

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ let WantsRoot = true in {
1414
def GlobalSAddr : ComplexPattern<iPTR, 4, "SelectGlobalSAddr", [], [], -10>;
1515
def GlobalSAddrGLC : ComplexPattern<iPTR, 4, "SelectGlobalSAddrGLC", [], [], -10>;
1616
def ScratchSAddr : ComplexPattern<iPTR, 2, "SelectScratchSAddr", [], [], -10>;
17-
def ScratchSVAddr : ComplexPattern<iPTR, 3, "SelectScratchSVAddr", [], [], -10>;
17+
def ScratchSVAddr : ComplexPattern<iPTR, 4, "SelectScratchSVAddr", [], [], -10>;
1818
}
1919

2020
class True16D16Table <string hiOp, string loOp> {
@@ -1443,19 +1443,19 @@ class ScratchStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
14431443
>;
14441444

14451445
class ScratchLoadSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1446-
(vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset))),
1447-
(inst $vaddr, $saddr, $offset, 0)
1446+
(vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset, CPol:$cpol))),
1447+
(inst $vaddr, $saddr, $offset, $cpol)
14481448
>;
14491449

14501450
class ScratchStoreSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
14511451
ValueType vt> : GCNPat <
1452-
(node vt:$data, (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset)),
1453-
(inst getVregSrcForVT<vt>.ret:$data, $vaddr, $saddr, $offset)
1452+
(node vt:$data, (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset, CPol:$cpol)),
1453+
(inst getVregSrcForVT<vt>.ret:$data, $vaddr, $saddr, $offset, $cpol)
14541454
>;
14551455

14561456
class ScratchLoadSVaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1457-
(vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset), vt:$in)),
1458-
(inst $vaddr, $saddr, $offset, 0, $in)
1457+
(vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset, CPol:$cpol), vt:$in)),
1458+
(inst $vaddr, $saddr, $offset, $cpol, $in)
14591459
>;
14601460

14611461
class ScratchLoadSVaddrPat_D16_t16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <

llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll

Lines changed: 24 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -257,20 +257,16 @@ define amdgpu_kernel void @store_load_vindex_kernel(i32 %n) {
257257
; GFX12: ; %bb.0: ; %bb
258258
; GFX12-NEXT: s_load_b32 s0, s[4:5], 0x0
259259
; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
260-
; GFX12-NEXT: v_mov_b32_e32 v2, 15
261-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
260+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
262261
; GFX12-NEXT: v_sub_nc_u32_e32 v1, 0, v0
262+
; GFX12-NEXT: v_mov_b32_e32 v2, 15
263263
; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
264264
; GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v1
265265
; GFX12-NEXT: s_wait_kmcnt 0x0
266266
; GFX12-NEXT: s_lshl_b32 s0, s0, 7
267-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
268-
; GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0
269-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
270-
; GFX12-NEXT: v_add_nc_u32_e32 v1, s0, v1
271-
; GFX12-NEXT: scratch_store_b32 v0, v2, off scope:SCOPE_SYS
267+
; GFX12-NEXT: scratch_store_b32 v0, v2, s0 scope:SCOPE_SYS
272268
; GFX12-NEXT: s_wait_storecnt 0x0
273-
; GFX12-NEXT: scratch_load_b32 v0, v1, off offset:124 scope:SCOPE_SYS
269+
; GFX12-NEXT: scratch_load_b32 v0, v1, s0 offset:124 scope:SCOPE_SYS
274270
; GFX12-NEXT: s_wait_loadcnt 0x0
275271
; GFX12-NEXT: s_endpgm
276272
;
@@ -357,20 +353,16 @@ define amdgpu_kernel void @store_load_vindex_kernel(i32 %n) {
357353
; UNALIGNED_GFX12: ; %bb.0: ; %bb
358354
; UNALIGNED_GFX12-NEXT: s_load_b32 s0, s[4:5], 0x0
359355
; UNALIGNED_GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
360-
; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v2, 15
361-
; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
356+
; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
362357
; UNALIGNED_GFX12-NEXT: v_sub_nc_u32_e32 v1, 0, v0
358+
; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v2, 15
363359
; UNALIGNED_GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
364360
; UNALIGNED_GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v1
365361
; UNALIGNED_GFX12-NEXT: s_wait_kmcnt 0x0
366362
; UNALIGNED_GFX12-NEXT: s_lshl_b32 s0, s0, 7
367-
; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
368-
; UNALIGNED_GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0
369-
; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
370-
; UNALIGNED_GFX12-NEXT: v_add_nc_u32_e32 v1, s0, v1
371-
; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v2, off scope:SCOPE_SYS
363+
; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v2, s0 scope:SCOPE_SYS
372364
; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0
373-
; UNALIGNED_GFX12-NEXT: scratch_load_b32 v0, v1, off offset:124 scope:SCOPE_SYS
365+
; UNALIGNED_GFX12-NEXT: scratch_load_b32 v0, v1, s0 offset:124 scope:SCOPE_SYS
374366
; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0
375367
; UNALIGNED_GFX12-NEXT: s_endpgm
376368
bb:
@@ -937,19 +929,17 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel(i32 %n) {
937929
; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
938930
; GFX12-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS
939931
; GFX12-NEXT: s_wait_loadcnt 0x0
940-
; GFX12-NEXT: v_mov_b32_e32 v2, 15
941932
; GFX12-NEXT: v_sub_nc_u32_e32 v1, 0, v0
933+
; GFX12-NEXT: v_mov_b32_e32 v2, 15
942934
; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
943-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
935+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
944936
; GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v1
945937
; GFX12-NEXT: s_wait_kmcnt 0x0
946938
; GFX12-NEXT: scratch_store_b32 v0, v2, off offset:384 scope:SCOPE_SYS
947939
; GFX12-NEXT: s_wait_storecnt 0x0
948940
; GFX12-NEXT: s_lshl_b32 s0, s0, 7
949941
; GFX12-NEXT: s_add_co_u32 s0, 0x100, s0
950-
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
951-
; GFX12-NEXT: v_add_nc_u32_e32 v1, s0, v1
952-
; GFX12-NEXT: scratch_load_b32 v0, v1, off offset:124 scope:SCOPE_SYS
942+
; GFX12-NEXT: scratch_load_b32 v0, v1, s0 offset:124 scope:SCOPE_SYS
953943
; GFX12-NEXT: s_wait_loadcnt 0x0
954944
; GFX12-NEXT: s_endpgm
955945
;
@@ -1048,19 +1038,17 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel(i32 %n) {
10481038
; UNALIGNED_GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
10491039
; UNALIGNED_GFX12-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS
10501040
; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0
1051-
; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v2, 15
10521041
; UNALIGNED_GFX12-NEXT: v_sub_nc_u32_e32 v1, 0, v0
1042+
; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v2, 15
10531043
; UNALIGNED_GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1054-
; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
1044+
; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
10551045
; UNALIGNED_GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v1
10561046
; UNALIGNED_GFX12-NEXT: s_wait_kmcnt 0x0
10571047
; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v2, off offset:384 scope:SCOPE_SYS
10581048
; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0
10591049
; UNALIGNED_GFX12-NEXT: s_lshl_b32 s0, s0, 7
10601050
; UNALIGNED_GFX12-NEXT: s_add_co_u32 s0, 0x100, s0
1061-
; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1062-
; UNALIGNED_GFX12-NEXT: v_add_nc_u32_e32 v1, s0, v1
1063-
; UNALIGNED_GFX12-NEXT: scratch_load_b32 v0, v1, off offset:124 scope:SCOPE_SYS
1051+
; UNALIGNED_GFX12-NEXT: scratch_load_b32 v0, v1, s0 offset:124 scope:SCOPE_SYS
10641052
; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0
10651053
; UNALIGNED_GFX12-NEXT: s_endpgm
10661054
bb:
@@ -1579,19 +1567,17 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel(i32 %n) {
15791567
; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
15801568
; GFX12-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS
15811569
; GFX12-NEXT: s_wait_loadcnt 0x0
1582-
; GFX12-NEXT: v_mov_b32_e32 v2, 15
15831570
; GFX12-NEXT: v_sub_nc_u32_e32 v1, 0, v0
1571+
; GFX12-NEXT: v_mov_b32_e32 v2, 15
15841572
; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1585-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
1573+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
15861574
; GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v1
15871575
; GFX12-NEXT: s_wait_kmcnt 0x0
15881576
; GFX12-NEXT: scratch_store_b32 v0, v2, off offset:16512 scope:SCOPE_SYS
15891577
; GFX12-NEXT: s_wait_storecnt 0x0
15901578
; GFX12-NEXT: s_lshl_b32 s0, s0, 7
15911579
; GFX12-NEXT: s_add_co_u32 s0, 0x4000, s0
1592-
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1593-
; GFX12-NEXT: v_add_nc_u32_e32 v1, s0, v1
1594-
; GFX12-NEXT: scratch_load_b32 v0, v1, off offset:124 scope:SCOPE_SYS
1580+
; GFX12-NEXT: scratch_load_b32 v0, v1, s0 offset:124 scope:SCOPE_SYS
15951581
; GFX12-NEXT: s_wait_loadcnt 0x0
15961582
; GFX12-NEXT: s_endpgm
15971583
;
@@ -1692,19 +1678,17 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel(i32 %n) {
16921678
; UNALIGNED_GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
16931679
; UNALIGNED_GFX12-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS
16941680
; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0
1695-
; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v2, 15
16961681
; UNALIGNED_GFX12-NEXT: v_sub_nc_u32_e32 v1, 0, v0
1682+
; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v2, 15
16971683
; UNALIGNED_GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1698-
; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
1684+
; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
16991685
; UNALIGNED_GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v1
17001686
; UNALIGNED_GFX12-NEXT: s_wait_kmcnt 0x0
17011687
; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v2, off offset:16512 scope:SCOPE_SYS
17021688
; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0
17031689
; UNALIGNED_GFX12-NEXT: s_lshl_b32 s0, s0, 7
17041690
; UNALIGNED_GFX12-NEXT: s_add_co_u32 s0, 0x4000, s0
1705-
; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1706-
; UNALIGNED_GFX12-NEXT: v_add_nc_u32_e32 v1, s0, v1
1707-
; UNALIGNED_GFX12-NEXT: scratch_load_b32 v0, v1, off offset:124 scope:SCOPE_SYS
1691+
; UNALIGNED_GFX12-NEXT: scratch_load_b32 v0, v1, s0 offset:124 scope:SCOPE_SYS
17081692
; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0
17091693
; UNALIGNED_GFX12-NEXT: s_endpgm
17101694
bb:
@@ -4060,9 +4044,7 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr a
40604044
; GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
40614045
; GFX12: ; %bb.0: ; %bb
40624046
; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4063-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
4064-
; GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0
4065-
; GFX12-NEXT: scratch_store_b32 v0, v1, off offset:65512 scope:SCOPE_SYS
4047+
; GFX12-NEXT: scratch_store_b32 v0, v1, s0 offset:65512 scope:SCOPE_SYS
40664048
; GFX12-NEXT: s_wait_storecnt 0x0
40674049
; GFX12-NEXT: s_endpgm
40684050
;
@@ -4113,9 +4095,7 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr a
41134095
; UNALIGNED_GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
41144096
; UNALIGNED_GFX12: ; %bb.0: ; %bb
41154097
; UNALIGNED_GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4116-
; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
4117-
; UNALIGNED_GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0
4118-
; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v1, off offset:65512 scope:SCOPE_SYS
4098+
; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v1, s0 offset:65512 scope:SCOPE_SYS
41194099
; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0
41204100
; UNALIGNED_GFX12-NEXT: s_endpgm
41214101
bb:
@@ -4172,9 +4152,7 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt
41724152
; GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
41734153
; GFX12: ; %bb.0: ; %bb
41744154
; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4175-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
4176-
; GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0
4177-
; GFX12-NEXT: scratch_store_b32 v0, v1, off offset:-16 scope:SCOPE_SYS
4155+
; GFX12-NEXT: scratch_store_b32 v0, v1, s0 offset:-16 scope:SCOPE_SYS
41784156
; GFX12-NEXT: s_wait_storecnt 0x0
41794157
; GFX12-NEXT: s_endpgm
41804158
;
@@ -4223,9 +4201,7 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt
42234201
; UNALIGNED_GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
42244202
; UNALIGNED_GFX12: ; %bb.0: ; %bb
42254203
; UNALIGNED_GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4226-
; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
4227-
; UNALIGNED_GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0
4228-
; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v1, off offset:-16 scope:SCOPE_SYS
4204+
; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v1, s0 offset:-16 scope:SCOPE_SYS
42294205
; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0
42304206
; UNALIGNED_GFX12-NEXT: s_endpgm
42314207
bb:

0 commit comments

Comments
 (0)