Skip to content

Commit 1f25c48

Browse files
authored
[AMDGPU] Mitigate DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 bug (#153872)
DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 shall not be claused (we already do not clause DS instructions) and needs waits before and after.
1 parent eecbaac commit 1f25c48

File tree

4 files changed

+41
-0
lines changed

4 files changed

+41
-0
lines changed

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1202,6 +1202,8 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
12021202
fixRequiredExportPriority(MI);
12031203
if (ST.requiresWaitIdleBeforeGetReg())
12041204
fixGetRegWaitIdle(MI);
1205+
if (ST.hasDsAtomicAsyncBarrierArriveB64PipeBug())
1206+
fixDsAtomicAsyncBarrierArriveB64(MI);
12051207
}
12061208

12071209
static bool isVCmpXWritesExec(const SIInstrInfo &TII, const SIRegisterInfo &TRI,
@@ -3451,3 +3453,18 @@ bool GCNHazardRecognizer::fixGetRegWaitIdle(MachineInstr *MI) {
34513453
.addImm(0);
34523454
return true;
34533455
}
3456+
3457+
bool GCNHazardRecognizer::fixDsAtomicAsyncBarrierArriveB64(MachineInstr *MI) {
3458+
if (MI->getOpcode() != AMDGPU::DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64)
3459+
return false;
3460+
3461+
const SIInstrInfo *TII = ST.getInstrInfo();
3462+
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
3463+
TII->get(AMDGPU::S_WAITCNT_DEPCTR))
3464+
.addImm(0xFFE3);
3465+
BuildMI(*MI->getParent(), std::next(MI->getIterator()), MI->getDebugLoc(),
3466+
TII->get(AMDGPU::S_WAITCNT_DEPCTR))
3467+
.addImm(0xFFE3);
3468+
3469+
return true;
3470+
}

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
111111
bool fixVALUMaskWriteHazard(MachineInstr *MI);
112112
bool fixRequiredExportPriority(MachineInstr *MI);
113113
bool fixGetRegWaitIdle(MachineInstr *MI);
114+
bool fixDsAtomicAsyncBarrierArriveB64(MachineInstr *MI);
114115

115116
int checkMAIHazards(MachineInstr *MI);
116117
int checkMAIHazards908(MachineInstr *MI);

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1815,6 +1815,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
18151815
// to the same register.
18161816
return false;
18171817
}
1818+
1819+
// DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 shall not be claused with anything
1820+
// and surronded by S_WAIT_ALU(0xFFE3).
1821+
bool hasDsAtomicAsyncBarrierArriveB64PipeBug() const {
1822+
return getGeneration() == GFX12;
1823+
}
18181824
};
18191825

18201826
class GCNUserSGPRUsageInfo {
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass=post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN %s
3+
4+
---
5+
name: ds_atomic_async_barrier_arrive_b64
6+
tracksRegLiveness: true
7+
body: |
8+
bb.0:
9+
liveins: $vgpr0, $vgpr1
10+
; GCN-LABEL: name: ds_atomic_async_barrier_arrive_b64
11+
; GCN: liveins: $vgpr0, $vgpr1
12+
; GCN-NEXT: {{ $}}
13+
; GCN-NEXT: S_WAITCNT_DEPCTR 65507
14+
; GCN-NEXT: DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 $vgpr1, 0, 0, implicit-def $asynccnt, implicit $asynccnt, implicit $exec
15+
; GCN-NEXT: S_WAITCNT_DEPCTR 65507
16+
DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 $vgpr1, 0, 0, implicit-def $asynccnt, implicit $asynccnt, implicit $exec
17+
...

0 commit comments

Comments
 (0)