Skip to content

Commit 82046c7

Browse files
authored
[AMDGPU] Adjust hard clause rules for gfx1250 (llvm#152592)
Change from GFX12: Relax S_CLAUSE rules to all all non-flat memory types in the same clause, and all Flat types in the same. For VMEM/FLAT clause types now look like: - Non-Flat (load, store, atomic): buffer, global, scratch, TDM, Async - Flat: load, store, atomic
1 parent 49ccf46 commit 82046c7

File tree

5 files changed

+617
-7
lines changed

5 files changed

+617
-7
lines changed

llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ static cl::opt<unsigned>
5151
namespace {
5252

5353
enum HardClauseType {
54-
// For GFX10:
54+
// For GFX10 and GFX1250:
5555

5656
// Texture, buffer, global or scratch memory instructions.
5757
HARDCLAUSE_VMEM,
@@ -102,7 +102,8 @@ class SIInsertHardClauses {
102102

103103
HardClauseType getHardClauseType(const MachineInstr &MI) {
104104
if (MI.mayLoad() || (MI.mayStore() && ST->shouldClusterStores())) {
105-
if (ST->getGeneration() == AMDGPUSubtarget::GFX10) {
105+
if (ST->getGeneration() == AMDGPUSubtarget::GFX10 ||
106+
ST->hasGFX1250Insts()) {
106107
if ((SIInstrInfo::isVMEM(MI) && !SIInstrInfo::isFLAT(MI)) ||
107108
SIInstrInfo::isSegmentSpecificFLAT(MI)) {
108109
if (ST->hasNSAClauseBug()) {
@@ -115,7 +116,6 @@ class SIInsertHardClauses {
115116
if (SIInstrInfo::isFLAT(MI))
116117
return HARDCLAUSE_FLAT;
117118
} else {
118-
assert(ST->getGeneration() >= AMDGPUSubtarget::GFX11);
119119
if (SIInstrInfo::isMIMG(MI)) {
120120
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
121121
const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo =

llvm/test/CodeGen/AMDGPU/flat-saddr-atomics.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,7 @@ define amdgpu_ps <2 x float> @flat_xchg_saddr_i64_rtn(ptr inreg %sbase, i32 %vof
284284
; GFX1250-SDAG-NEXT: v_subrev_nc_u32_e32 v0, s1, v4
285285
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
286286
; GFX1250-SDAG-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc_lo
287+
; GFX1250-SDAG-NEXT: s_clause 0x1
287288
; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off
288289
; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE
289290
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
@@ -329,6 +330,7 @@ define amdgpu_ps <2 x float> @flat_xchg_saddr_i64_rtn(ptr inreg %sbase, i32 %vof
329330
; GFX1250-GISEL-NEXT: v_subrev_nc_u32_e32 v0, s1, v6
330331
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
331332
; GFX1250-GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v0, vcc_lo
333+
; GFX1250-GISEL-NEXT: s_clause 0x1
332334
; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v2, off
333335
; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[4:5], off scope:SCOPE_SE
334336
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
@@ -382,6 +384,7 @@ define amdgpu_ps <2 x float> @flat_xchg_saddr_i64_rtn_neg128(ptr inreg %sbase, i
382384
; GFX1250-SDAG-NEXT: v_subrev_nc_u32_e32 v0, s1, v4
383385
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
384386
; GFX1250-SDAG-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc_lo
387+
; GFX1250-SDAG-NEXT: s_clause 0x1
385388
; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off
386389
; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE
387390
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
@@ -430,6 +433,7 @@ define amdgpu_ps <2 x float> @flat_xchg_saddr_i64_rtn_neg128(ptr inreg %sbase, i
430433
; GFX1250-GISEL-NEXT: v_subrev_nc_u32_e32 v0, s1, v6
431434
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
432435
; GFX1250-GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v0, vcc_lo
436+
; GFX1250-GISEL-NEXT: s_clause 0x1
433437
; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v2, off
434438
; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[4:5], off scope:SCOPE_SE
435439
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0

llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -244,8 +244,9 @@ define i32 @test_v64i32_load_store(ptr addrspace(1) %ptr, i32 %idx, ptr addrspac
244244
; GCN-GISEL-NEXT: global_load_b128 v[60:63], v[0:1], off offset:16
245245
; GCN-GISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:240
246246
; GCN-GISEL-NEXT: s_wait_loadcnt 0x0
247-
; GCN-GISEL-NEXT: scratch_store_b128 off, v[0:3], s32 offset:64 scope:SCOPE_SE ; 16-byte Folded Spill
248-
; GCN-GISEL-NEXT: scratch_load_b128 v[0:3], off, s32 offset:80 th:TH_LOAD_LU ; 16-byte Folded Reload
247+
; GCN-GISEL-NEXT: s_clause 0x1
248+
; GCN-GISEL-NEXT: scratch_store_b128 off, v[0:3], s32 offset:64 scope:SCOPE_SE
249+
; GCN-GISEL-NEXT: scratch_load_b128 v[0:3], off, s32 offset:80 th:TH_LOAD_LU
249250
; GCN-GISEL-NEXT: s_wait_loadcnt 0x0
250251
; GCN-GISEL-NEXT: s_clause 0xe
251252
; GCN-GISEL-NEXT: global_store_b128 v[46:47], v[0:3], off offset:32

0 commit comments

Comments
 (0)