Skip to content

Commit 9091108

Browse files
authored
AMDGPU: Fold mov imm to copy to av_32 class (llvm#155428)
Previously we had special case folding into copies to AGPR_32, ignoring AV_32. Try folding into the pseudos. Not sure why the true16 case regressed.
1 parent 143f5e1 commit 9091108

File tree

7 files changed

+411
-134
lines changed

7 files changed

+411
-134
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1260,17 +1260,6 @@ void SIFoldOperandsImpl::foldOperand(
12601260
return;
12611261

12621262
const TargetRegisterClass *DestRC = TRI->getRegClassForReg(*MRI, DestReg);
1263-
if (!DestReg.isPhysical() && DestRC == &AMDGPU::AGPR_32RegClass) {
1264-
std::optional<int64_t> UseImmVal = OpToFold.getEffectiveImmVal();
1265-
if (UseImmVal && TII->isInlineConstant(
1266-
*UseImmVal, AMDGPU::OPERAND_REG_INLINE_C_INT32)) {
1267-
UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64));
1268-
UseMI->getOperand(1).ChangeToImmediate(*UseImmVal);
1269-
CopiesToReplace.push_back(UseMI);
1270-
return;
1271-
}
1272-
}
1273-
12741263
// Allow immediates COPYd into sgpr_lo16 to be further folded while
12751264
// still being legal if not further folded
12761265
if (DestRC == &AMDGPU::SGPR_LO16RegClass) {
@@ -1283,7 +1272,8 @@ void SIFoldOperandsImpl::foldOperand(
12831272
// MOV. Find a compatible mov instruction with the value.
12841273
for (unsigned MovOp :
12851274
{AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
1286-
AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_MOV_B16_t16_e64}) {
1275+
AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_MOV_B16_t16_e64,
1276+
AMDGPU::V_ACCVGPR_WRITE_B32_e64, AMDGPU::AV_MOV_B32_IMM_PSEUDO}) {
12871277
const MCInstrDesc &MovDesc = TII->get(MovOp);
12881278
assert(MovDesc.getNumDefs() > 0 && MovDesc.operands()[0].RegClass != -1);
12891279

llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior2.ll

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -166,14 +166,13 @@ define amdgpu_kernel void @with_private_to_flat_addrspacecast_cc_kernel(ptr addr
166166
; GFX942-ARCH-FLAT: ; %bb.0:
167167
; GFX942-ARCH-FLAT-NEXT: s_load_dword s2, s[4:5], 0x0
168168
; GFX942-ARCH-FLAT-NEXT: s_mov_b64 s[0:1], src_private_base
169-
; GFX942-ARCH-FLAT-NEXT: s_mov_b32 s0, 0
170-
; GFX942-ARCH-FLAT-NEXT: v_mov_b32_e32 v2, s0
169+
; GFX942-ARCH-FLAT-NEXT: v_mov_b32_e32 v2, 0
171170
; GFX942-ARCH-FLAT-NEXT: s_waitcnt lgkmcnt(0)
172171
; GFX942-ARCH-FLAT-NEXT: s_cmp_lg_u32 s2, -1
173-
; GFX942-ARCH-FLAT-NEXT: s_cselect_b32 s1, s1, 0
174-
; GFX942-ARCH-FLAT-NEXT: s_cselect_b32 s2, s2, 0
175-
; GFX942-ARCH-FLAT-NEXT: v_mov_b32_e32 v0, s2
176-
; GFX942-ARCH-FLAT-NEXT: v_mov_b32_e32 v1, s1
172+
; GFX942-ARCH-FLAT-NEXT: s_cselect_b32 s0, s1, 0
173+
; GFX942-ARCH-FLAT-NEXT: s_cselect_b32 s1, s2, 0
174+
; GFX942-ARCH-FLAT-NEXT: v_mov_b32_e32 v0, s1
175+
; GFX942-ARCH-FLAT-NEXT: v_mov_b32_e32 v1, s0
177176
; GFX942-ARCH-FLAT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1
178177
; GFX942-ARCH-FLAT-NEXT: s_waitcnt vmcnt(0)
179178
; GFX942-ARCH-FLAT-NEXT: s_endpgm

llvm/test/CodeGen/AMDGPU/flat-scratch.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -463,8 +463,7 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
463463
; GFX942-LABEL: store_load_sindex_kernel:
464464
; GFX942: ; %bb.0: ; %bb
465465
; GFX942-NEXT: s_load_dword s0, s[4:5], 0x24
466-
; GFX942-NEXT: s_mov_b32 s1, 15
467-
; GFX942-NEXT: v_mov_b32_e32 v0, s1
466+
; GFX942-NEXT: v_mov_b32_e32 v0, 15
468467
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
469468
; GFX942-NEXT: s_lshl_b32 s1, s0, 2
470469
; GFX942-NEXT: s_and_b32 s0, s0, 15
@@ -611,9 +610,8 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
611610
;
612611
; GFX942-LABEL: store_load_sindex_foo:
613612
; GFX942: ; %bb.0: ; %bb
614-
; GFX942-NEXT: s_mov_b32 s2, 15
615613
; GFX942-NEXT: s_lshl_b32 s1, s0, 2
616-
; GFX942-NEXT: v_mov_b32_e32 v0, s2
614+
; GFX942-NEXT: v_mov_b32_e32 v0, 15
617615
; GFX942-NEXT: s_and_b32 s0, s0, 15
618616
; GFX942-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
619617
; GFX942-NEXT: s_waitcnt vmcnt(0)
@@ -1590,8 +1588,7 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
15901588
; GFX942-NEXT: s_load_dword s0, s[4:5], 0x24
15911589
; GFX942-NEXT: scratch_load_dword v0, off, off sc0 sc1
15921590
; GFX942-NEXT: s_waitcnt vmcnt(0)
1593-
; GFX942-NEXT: s_mov_b32 s1, 15
1594-
; GFX942-NEXT: v_mov_b32_e32 v0, s1
1591+
; GFX942-NEXT: v_mov_b32_e32 v0, 15
15951592
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
15961593
; GFX942-NEXT: s_lshl_b32 s1, s0, 2
15971594
; GFX942-NEXT: s_and_b32 s0, s0, 15
@@ -1808,10 +1805,9 @@ define amdgpu_ps void @store_load_sindex_small_offset_foo(i32 inreg %idx) {
18081805
; GFX942-NEXT: scratch_load_dword v0, off, off sc0 sc1
18091806
; GFX942-NEXT: s_waitcnt vmcnt(0)
18101807
; GFX942-NEXT: s_lshl_b32 s1, s0, 2
1811-
; GFX942-NEXT: s_mov_b32 s2, 15
18121808
; GFX942-NEXT: s_and_b32 s0, s0, 15
18131809
; GFX942-NEXT: s_addk_i32 s1, 0x100
1814-
; GFX942-NEXT: v_mov_b32_e32 v0, s2
1810+
; GFX942-NEXT: v_mov_b32_e32 v0, 15
18151811
; GFX942-NEXT: s_lshl_b32 s0, s0, 2
18161812
; GFX942-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
18171813
; GFX942-NEXT: s_waitcnt vmcnt(0)
@@ -2888,8 +2884,7 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
28882884
; GFX942-NEXT: s_load_dword s0, s[4:5], 0x24
28892885
; GFX942-NEXT: scratch_load_dword v0, off, off offset:4 sc0 sc1
28902886
; GFX942-NEXT: s_waitcnt vmcnt(0)
2891-
; GFX942-NEXT: s_mov_b32 s1, 15
2892-
; GFX942-NEXT: v_mov_b32_e32 v0, s1
2887+
; GFX942-NEXT: v_mov_b32_e32 v0, 15
28932888
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
28942889
; GFX942-NEXT: s_lshl_b32 s1, s0, 2
28952890
; GFX942-NEXT: s_and_b32 s0, s0, 15
@@ -3106,10 +3101,9 @@ define amdgpu_ps void @store_load_sindex_large_offset_foo(i32 inreg %idx) {
31063101
; GFX942-NEXT: scratch_load_dword v0, off, off offset:4 sc0 sc1
31073102
; GFX942-NEXT: s_waitcnt vmcnt(0)
31083103
; GFX942-NEXT: s_lshl_b32 s1, s0, 2
3109-
; GFX942-NEXT: s_mov_b32 s2, 15
31103104
; GFX942-NEXT: s_and_b32 s0, s0, 15
31113105
; GFX942-NEXT: s_addk_i32 s1, 0x4004
3112-
; GFX942-NEXT: v_mov_b32_e32 v0, s2
3106+
; GFX942-NEXT: v_mov_b32_e32 v0, 15
31133107
; GFX942-NEXT: s_lshl_b32 s0, s0, 2
31143108
; GFX942-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
31153109
; GFX942-NEXT: s_waitcnt vmcnt(0)

0 commit comments

Comments
 (0)