Skip to content

Commit c6dbcb8

Browse files
kuharrkayaith
authored andcommitted
Revert "[AMDGPU] Reenable BackOffBarrier on GFX11/12 (llvm#155370)"
This reverts commit d6edc1a.
1 parent 7123463 commit c6dbcb8

File tree

8 files changed

+9
-229
lines changed

8 files changed

+9
-229
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1914,7 +1914,6 @@ def FeatureISAVersion10_3_Generic: FeatureSet<
19141914

19151915
def FeatureISAVersion11_Common : FeatureSet<
19161916
[FeatureGFX11,
1917-
FeatureBackOffBarrier,
19181917
FeatureLDSBankCount32,
19191918
FeatureDLInsts,
19201919
FeatureDot5Insts,
@@ -1998,7 +1997,6 @@ def FeatureISAVersion11_5_3 : FeatureSet<
19981997

19991998
def FeatureISAVersion12 : FeatureSet<
20001999
[FeatureGFX12,
2001-
FeatureBackOffBarrier,
20022000
FeatureAddressableLocalMemorySize65536,
20032001
FeatureLDSBankCount32,
20042002
FeatureDLInsts,

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -987,19 +987,13 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
987987
return MI.getDesc().TSFlags & SIInstrFlags::IsNeverUniform;
988988
}
989989

990-
// Check to see if opcode is for a barrier start. Pre gfx12 this is just the
991-
// S_BARRIER, but after support for S_BARRIER_SIGNAL* / S_BARRIER_WAIT we want
992-
// to check for the barrier start (S_BARRIER_SIGNAL*)
993-
bool isBarrierStart(unsigned Opcode) const {
990+
bool isBarrier(unsigned Opcode) const {
994991
return Opcode == AMDGPU::S_BARRIER ||
995992
Opcode == AMDGPU::S_BARRIER_SIGNAL_M0 ||
996993
Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0 ||
997994
Opcode == AMDGPU::S_BARRIER_SIGNAL_IMM ||
998-
Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM;
999-
}
1000-
1001-
bool isBarrier(unsigned Opcode) const {
1002-
return isBarrierStart(Opcode) || Opcode == AMDGPU::S_BARRIER_WAIT ||
995+
Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM ||
996+
Opcode == AMDGPU::S_BARRIER_WAIT ||
1003997
Opcode == AMDGPU::S_BARRIER_INIT_M0 ||
1004998
Opcode == AMDGPU::S_BARRIER_INIT_IMM ||
1005999
Opcode == AMDGPU::S_BARRIER_JOIN_IMM ||

llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp

Lines changed: 1 addition & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -370,12 +370,6 @@ class SICacheControl {
370370
bool IsCrossAddrSpaceOrdering,
371371
Position Pos) const = 0;
372372

373-
/// Inserts any necessary instructions before the barrier start instruction
374-
/// \p MI in order to support pairing of barriers and fences.
375-
virtual bool insertBarrierStart(MachineBasicBlock::iterator &MI) const {
376-
return false;
377-
};
378-
379373
/// Virtual destructor to allow derivations to be deleted.
380374
virtual ~SICacheControl() = default;
381375
};
@@ -564,8 +558,6 @@ class SIGfx10CacheControl : public SIGfx7CacheControl {
564558
SIAtomicScope Scope,
565559
SIAtomicAddrSpace AddrSpace,
566560
Position Pos) const override;
567-
568-
bool insertBarrierStart(MachineBasicBlock::iterator &MI) const override;
569561
};
570562

571563
class SIGfx11CacheControl : public SIGfx10CacheControl {
@@ -2192,21 +2184,6 @@ bool SIGfx10CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
21922184
return Changed;
21932185
}
21942186

2195-
bool SIGfx10CacheControl::insertBarrierStart(
2196-
MachineBasicBlock::iterator &MI) const {
2197-
// We need to wait on vm_vsrc so barriers can pair with fences in GFX10+ CU
2198-
// mode. This is because a CU mode release fence does not emit any wait, which
2199-
// is fine when only dealing with vmem, but isn't sufficient in the presence
2200-
// of barriers which do not go through vmem.
2201-
if (!ST.isCuModeEnabled())
2202-
return false;
2203-
2204-
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
2205-
TII->get(AMDGPU::S_WAITCNT_DEPCTR))
2206-
.addImm(AMDGPU::DepCtr::encodeFieldVmVsrc(0));
2207-
return true;
2208-
}
2209-
22102187
bool SIGfx11CacheControl::enableLoadCacheBypass(
22112188
const MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
22122189
SIAtomicAddrSpace AddrSpace) const {
@@ -2901,8 +2878,7 @@ bool SIMemoryLegalizer::run(MachineFunction &MF) {
29012878
bool Changed = false;
29022879

29032880
SIMemOpAccess MOA(MMI.getObjFileInfo<AMDGPUMachineModuleInfo>());
2904-
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
2905-
CC = SICacheControl::create(ST);
2881+
CC = SICacheControl::create(MF.getSubtarget<GCNSubtarget>());
29062882

29072883
for (auto &MBB : MF) {
29082884
for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) {
@@ -2922,11 +2898,6 @@ bool SIMemoryLegalizer::run(MachineFunction &MF) {
29222898
MI = II->getIterator();
29232899
}
29242900

2925-
if (ST.getInstrInfo()->isBarrierStart(MI->getOpcode())) {
2926-
Changed |= CC->insertBarrierStart(MI);
2927-
continue;
2928-
}
2929-
29302901
if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic))
29312902
continue;
29322903

llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,9 @@ define void @back_off_barrier_no_fence(ptr %in, ptr %out) #0 {
5454
; GFX11-BACKOFF: ; %bb.0:
5555
; GFX11-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5656
; GFX11-BACKOFF-NEXT: flat_load_b32 v0, v[0:1]
57-
; GFX11-BACKOFF-NEXT: s_barrier
5857
; GFX11-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
58+
; GFX11-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0
59+
; GFX11-BACKOFF-NEXT: s_barrier
5960
; GFX11-BACKOFF-NEXT: flat_store_b32 v[2:3], v0
6061
; GFX11-BACKOFF-NEXT: s_waitcnt lgkmcnt(0)
6162
; GFX11-BACKOFF-NEXT: s_setpc_b64 s[30:31]

llvm/test/CodeGen/AMDGPU/lds-dma-workgroup-release.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,6 @@ define amdgpu_kernel void @barrier_release(<4 x i32> inreg %rsrc,
150150
; GFX10CU-NEXT: buffer_load_dword v0, s[8:11], 0 offen lds
151151
; GFX10CU-NEXT: v_mov_b32_e32 v0, s13
152152
; GFX10CU-NEXT: s_waitcnt vmcnt(0)
153-
; GFX10CU-NEXT: s_waitcnt_depctr 0xffe3
154153
; GFX10CU-NEXT: s_barrier
155154
; GFX10CU-NEXT: ds_read_b32 v0, v0
156155
; GFX10CU-NEXT: s_waitcnt lgkmcnt(0)

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,9 +213,9 @@ define weak_odr amdgpu_kernel void @dpp_test1(ptr %arg) local_unnamed_addr {
213213
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
214214
; GFX11-NEXT: v_and_b32_e32 v0, 0xffc, v0
215215
; GFX11-NEXT: ds_load_b32 v1, v0
216+
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
216217
; GFX11-NEXT: s_barrier
217218
; GFX11-NEXT: buffer_gl0_inv
218-
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
219219
; GFX11-NEXT: v_add_co_u32 v0, s0, s0, v0
220220
; GFX11-NEXT: v_add_nc_u32_e32 v1, v1, v1
221221
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)

llvm/test/CodeGen/AMDGPU/memory-legalizer-barriers.ll

Lines changed: 0 additions & 183 deletions
This file was deleted.

llvm/test/CodeGen/AMDGPU/waitcnt-preexisting-vscnt.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ body: |
6262
; GFX11-NEXT: {{ $}}
6363
; GFX11-NEXT: S_WAITCNT 0
6464
; GFX11-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
65-
; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 1
65+
; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
6666
; GFX11-NEXT: S_BARRIER
6767
; GFX11-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
6868
; GFX11-NEXT: S_WAITCNT 7
@@ -176,7 +176,7 @@ body: |
176176
; GFX11-NEXT: {{ $}}
177177
; GFX11-NEXT: S_WAITCNT 0
178178
; GFX11-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
179-
; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 1
179+
; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
180180
; GFX11-NEXT: S_BARRIER
181181
; GFX11-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
182182
; GFX11-NEXT: S_WAITCNT 7

0 commit comments

Comments
 (0)