diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 1f291ce5c5342..5e297c7540c48 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -1202,6 +1202,8 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) { fixRequiredExportPriority(MI); if (ST.requiresWaitIdleBeforeGetReg()) fixGetRegWaitIdle(MI); + if (ST.hasDsAtomicAsyncBarrierArriveB64PipeBug()) + fixDsAtomicAsyncBarrierArriveB64(MI); } static bool isVCmpXWritesExec(const SIInstrInfo &TII, const SIRegisterInfo &TRI, @@ -3451,3 +3453,18 @@ bool GCNHazardRecognizer::fixGetRegWaitIdle(MachineInstr *MI) { .addImm(0); return true; } + +bool GCNHazardRecognizer::fixDsAtomicAsyncBarrierArriveB64(MachineInstr *MI) { + if (MI->getOpcode() != AMDGPU::DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64) + return false; + + const SIInstrInfo *TII = ST.getInstrInfo(); + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII->get(AMDGPU::S_WAITCNT_DEPCTR)) + .addImm(0xFFE3); + BuildMI(*MI->getParent(), std::next(MI->getIterator()), MI->getDebugLoc(), + TII->get(AMDGPU::S_WAITCNT_DEPCTR)) + .addImm(0xFFE3); + + return true; +} diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h index a078f50219c3c..890d5cbd154d6 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -111,6 +111,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer { bool fixVALUMaskWriteHazard(MachineInstr *MI); bool fixRequiredExportPriority(MachineInstr *MI); bool fixGetRegWaitIdle(MachineInstr *MI); + bool fixDsAtomicAsyncBarrierArriveB64(MachineInstr *MI); int checkMAIHazards(MachineInstr *MI); int checkMAIHazards908(MachineInstr *MI); diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 92de024cc6fcc..436f5c0801fad 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1815,6 +1815,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, // to the same register. return false; } + + // DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 shall not be claused with anything + // and surronded by S_WAIT_ALU(0xFFE3). + bool hasDsAtomicAsyncBarrierArriveB64PipeBug() const { + return getGeneration() == GFX12; + } }; class GCNUserSGPRUsageInfo { diff --git a/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir new file mode 100644 index 0000000000000..f1dbabf1e1a83 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir @@ -0,0 +1,17 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass=post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN %s + +--- +name: ds_atomic_async_barrier_arrive_b64 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; GCN-LABEL: name: ds_atomic_async_barrier_arrive_b64 + ; GCN: liveins: $vgpr0, $vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: S_WAITCNT_DEPCTR 65507 + ; GCN-NEXT: DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 $vgpr1, 0, 0, implicit-def $asynccnt, implicit $asynccnt, implicit $exec + ; GCN-NEXT: S_WAITCNT_DEPCTR 65507 + DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 $vgpr1, 0, 0, implicit-def $asynccnt, implicit $asynccnt, implicit $exec +...