Skip to content

Commit 0e931ed

Browse files
committed
Address comments & update test after SGPR CSR change
1 parent 1ff7e72 commit 0e931ed

File tree

5 files changed

+16
-31
lines changed

5 files changed

+16
-31
lines changed

llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1723,8 +1723,8 @@ static void assignSlotsUsingVGPRBlocks(MachineFunction &MF,
17231723
if (!CanUseBlockOps(*CSIt))
17241724
continue;
17251725

1726-
// Find all the regs that will fit in a 32-bit block starting at the current
1727-
// reg and build the mask. It should have 1 for every register that's
1726+
// Find all the regs that will fit in a 32-bit mask starting at the current
1727+
// reg and build said mask. It should have 1 for every register that's
17281728
// included, with the current register as the least significant bit.
17291729
uint32_t Mask = 1;
17301730
CSEnd = std::remove_if(
@@ -1737,8 +1737,7 @@ static void assignSlotsUsingVGPRBlocks(MachineFunction &MF,
17371737
}
17381738
});
17391739

1740-
const TargetRegisterClass *BlockRegClass =
1741-
TII->getRegClassForBlockOp(TRI, MF);
1740+
const TargetRegisterClass *BlockRegClass = &AMDGPU::VReg_1024RegClass;
17421741
Register RegBlock =
17431742
MRI->getMatchingSuperReg(Reg, AMDGPU::sub0, BlockRegClass);
17441743
if (!RegBlock) {

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5831,16 +5831,6 @@ const TargetRegisterClass *SIInstrInfo::getRegClass(const MCInstrDesc &TID,
58315831
IsAllocatable);
58325832
}
58335833

5834-
const TargetRegisterClass *
5835-
SIInstrInfo::getRegClassForBlockOp(const TargetRegisterInfo *TRI,
5836-
const MachineFunction &MF) const {
5837-
const MCInstrDesc &ScratchStoreBlockOp =
5838-
get(AMDGPU::SCRATCH_STORE_BLOCK_SADDR);
5839-
int VDataIdx = AMDGPU::getNamedOperandIdx(ScratchStoreBlockOp.getOpcode(),
5840-
AMDGPU::OpName::vdata);
5841-
return getRegClass(ScratchStoreBlockOp, VDataIdx, TRI, MF);
5842-
}
5843-
58445834
const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
58455835
unsigned OpNo) const {
58465836
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1460,10 +1460,6 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
14601460
const MachineFunction &MF)
14611461
const override;
14621462

1463-
const TargetRegisterClass *
1464-
getRegClassForBlockOp(const TargetRegisterInfo *TRI,
1465-
const MachineFunction &MF) const;
1466-
14671463
void fixImplicitOperands(MachineInstr &MI) const;
14681464

14691465
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,

llvm/test/CodeGen/AMDGPU/pei-vgpr-block-spill-csr.mir

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ body: |
180180
bb.0:
181181
liveins: $sgpr30_sgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40
182182
; W32-LABEL: name: other_regs
183-
; W32: liveins: $sgpr42, $sgpr30_sgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71
183+
; W32: liveins: $sgpr48, $sgpr30_sgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71
184184
; W32-NEXT: {{ $}}
185185
; W32-NEXT: $sgpr0 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
186186
; W32-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr41, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
@@ -189,10 +189,10 @@ body: |
189189
; W32-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0
190190
; W32-NEXT: $m0 = S_MOV_B32 9
191191
; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.4, align 4, addrspace 5)
192-
; W32-NEXT: $vgpr44 = SI_SPILL_S32_TO_VGPR $sgpr42, 0, $vgpr44
193-
; W32-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr43, implicit-def $sgpr22, implicit-def $sgpr42, implicit-def $m0, implicit-def $exec
192+
; W32-NEXT: $vgpr44 = SI_SPILL_S32_TO_VGPR $sgpr48, 0, $vgpr44
193+
; W32-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr43, implicit-def $sgpr22, implicit-def $sgpr48, implicit-def $m0, implicit-def $exec
194194
; W32-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40
195-
; W32-NEXT: $sgpr42 = SI_RESTORE_S32_FROM_VGPR $vgpr44, 0
195+
; W32-NEXT: $sgpr48 = SI_RESTORE_S32_FROM_VGPR $vgpr44, 0
196196
; W32-NEXT: $m0 = S_MOV_B32 9
197197
; W32-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr41, implicit $vgpr42, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: (load (s1024) from %stack.4, align 4, addrspace 5)
198198
; W32-NEXT: $sgpr0 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
@@ -203,7 +203,7 @@ body: |
203203
; W32-NEXT: S_SETPC_B64_return $sgpr30_sgpr31
204204
;
205205
; W64-LABEL: name: other_regs
206-
; W64: liveins: $sgpr42, $sgpr30_sgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71
206+
; W64: liveins: $sgpr48, $sgpr30_sgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71
207207
; W64-NEXT: {{ $}}
208208
; W64-NEXT: $sgpr0_sgpr1 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
209209
; W64-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr41, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
@@ -212,10 +212,10 @@ body: |
212212
; W64-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1
213213
; W64-NEXT: $m0 = S_MOV_B32 9
214214
; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.4, align 4, addrspace 5)
215-
; W64-NEXT: $vgpr44 = SI_SPILL_S32_TO_VGPR $sgpr42, 0, $vgpr44
216-
; W64-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr43, implicit-def $sgpr22, implicit-def $sgpr42, implicit-def $m0, implicit-def $exec
215+
; W64-NEXT: $vgpr44 = SI_SPILL_S32_TO_VGPR $sgpr48, 0, $vgpr44
216+
; W64-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr43, implicit-def $sgpr22, implicit-def $sgpr48, implicit-def $m0, implicit-def $exec
217217
; W64-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40
218-
; W64-NEXT: $sgpr42 = SI_RESTORE_S32_FROM_VGPR $vgpr44, 0
218+
; W64-NEXT: $sgpr48 = SI_RESTORE_S32_FROM_VGPR $vgpr44, 0
219219
; W64-NEXT: $m0 = S_MOV_B32 9
220220
; W64-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr41, implicit $vgpr42, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: (load (s1024) from %stack.4, align 4, addrspace 5)
221221
; W64-NEXT: $sgpr0_sgpr1 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
@@ -224,7 +224,7 @@ body: |
224224
; W64-NEXT: $vgpr44 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5)
225225
; W64-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1
226226
; W64-NEXT: S_SETPC_B64_return $sgpr30_sgpr31
227-
S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr43, implicit-def $sgpr22, implicit-def $sgpr42, implicit-def $m0, implicit-def $exec
227+
S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr43, implicit-def $sgpr22, implicit-def $sgpr48, implicit-def $m0, implicit-def $exec
228228
S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40
229229
230230
S_SETPC_B64_return $sgpr30_sgpr31

llvm/test/CodeGen/AMDGPU/spill-vgpr-block.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ define i32 @non_entry_func(i32 %x) {
1515
; CHECK-NEXT: s_wait_alu 0xfffe
1616
; CHECK-NEXT: s_mov_b32 exec_lo, s0
1717
; CHECK-NEXT: s_mov_b32 m0, 0x110003
18-
; CHECK-NEXT: v_writelane_b32 v2, s40, 0
18+
; CHECK-NEXT: v_writelane_b32 v2, s48, 0
1919
; CHECK-NEXT: ; transferring at most VGPR40 VGPR41 VGPR56 VGPR60 ; 128-byte Folded Spill
2020
; CHECK-NEXT: scratch_store_block off, v[40:71], s32 offset:4
2121
; CHECK-NEXT: s_mov_b32 m0, 1
@@ -32,7 +32,7 @@ define i32 @non_entry_func(i32 %x) {
3232
; CHECK-NEXT: ; transferring at most VGPR40 VGPR41 VGPR56 VGPR60 ; 128-byte Folded Reload
3333
; CHECK-NEXT: scratch_load_block v[40:71], off, s32 offset:4
3434
; CHECK-NEXT: v_mov_b32_e32 v0, v1
35-
; CHECK-NEXT: v_readlane_b32 s40, v2, 0
35+
; CHECK-NEXT: v_readlane_b32 s48, v2, 0
3636
; CHECK-NEXT: s_xor_saveexec_b32 s0, -1
3737
; CHECK-NEXT: scratch_load_b32 v2, off, s32 offset:100 ; 4-byte Folded Reload
3838
; CHECK-NEXT: s_wait_alu 0xfffe
@@ -41,7 +41,7 @@ define i32 @non_entry_func(i32 %x) {
4141
; CHECK-NEXT: s_setpc_b64 s[30:31]
4242
%local = alloca i32, i32 3, addrspace(5)
4343
store i32 %x, ptr addrspace(5) %local
44-
call void asm "s_nop", "~{v0},~{v8},~{v40},~{v41},~{v49},~{v52},~{v56},~{v60},~{v120},~{s0},~{s40}"()
44+
call void asm "s_nop", "~{v0},~{v8},~{v40},~{v41},~{v49},~{v52},~{v56},~{v60},~{v120},~{s0},~{s48}"()
4545
ret i32 %x
4646
}
4747

@@ -87,7 +87,7 @@ define amdgpu_kernel void @entry_func(i32 %x) {
8787
; DAGISEL-NEXT: v_mov_b32_e32 v0, s12
8888
; DAGISEL-NEXT: s_swappc_b64 s[30:31], s[0:1]
8989
; DAGISEL-NEXT: s_endpgm
90-
call void asm "s_nop", "~{v0},~{v8},~{v40},~{v41},~{v49},~{v52},~{v56},~{v60},~{v120},~{s0},~{s40}"()
90+
call void asm "s_nop", "~{v0},~{v8},~{v40},~{v41},~{v49},~{v52},~{v56},~{v60},~{v120},~{s0},~{s48}"()
9191
%res = call i32 @non_entry_func(i32 %x)
9292
ret void
9393
}

0 commit comments

Comments
 (0)