Skip to content

Commit b078b88

Browse files
authored
AMDGPU: Handle gfx950 change in mfma_f64_16x16x4 + valu hazard (#117262)
Increase from 11 wait states to 19
1 parent 33c2b20 commit b078b88

File tree

2 files changed

+28
-10
lines changed

2 files changed

+28
-10
lines changed

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2603,6 +2603,7 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
26032603
const int DMFMA16x16WriteVgprMemExpReadWaitStates = 18;
26042604
const int DMFMA4x4WriteVgprVALUReadWaitStates = 6;
26052605
const int DMFMA16x16WriteVgprVALUReadWaitStates = 11;
2606+
const int GFX950_DMFMA16x16WriteVgprVALUReadWaitStates = 19;
26062607
const int DotWriteSameDotReadSrcAB = 3;
26072608
const int DotWriteDifferentVALURead = 3;
26082609
const int DMFMABetweenVALUWriteVMEMRead = 2;
@@ -2663,9 +2664,12 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
26632664
break;
26642665
case 8:
26652666
case 16:
2666-
NeedWaitStates = IsMemOrExport
2667-
? DMFMA16x16WriteVgprMemExpReadWaitStates
2668-
: DMFMA16x16WriteVgprVALUReadWaitStates;
2667+
NeedWaitStates =
2668+
IsMemOrExport
2669+
? DMFMA16x16WriteVgprMemExpReadWaitStates
2670+
: (ST.hasGFX950Insts()
2671+
? GFX950_DMFMA16x16WriteVgprVALUReadWaitStates
2672+
: DMFMA16x16WriteVgprVALUReadWaitStates);
26692673
break;
26702674
default:
26712675
llvm_unreachable("unexpected dgemm");

llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN %s
1+
# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,GFX940 %s
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,GFX950 %s
23

34
# GCN-LABEL: name: valu_write_vgpr_sgemm_mfma_read
45
# GCN: V_MOV_B32
@@ -803,8 +804,12 @@ body: |
803804
...
804805
# GCN-LABEL: name: dmfma16x16_write_vgpr_valu_read
805806
# GCN: V_MFMA
806-
# GCN-NEXT: S_NOP 7
807-
# GCN-NEXT: S_NOP 2
807+
# GFX940-NEXT: S_NOP 7
808+
# GFX940-NEXT: S_NOP 2
809+
810+
# GFX950-NEXT: S_NOP 7
811+
# GFX950-NEXT: S_NOP 7
812+
# GFX950-NEXT: S_NOP 2
808813
# GCN-NEXT: V_MOV_B32
809814
name: dmfma16x16_write_vgpr_valu_read
810815
body: |
@@ -867,8 +872,13 @@ body: |
867872
...
868873
# GCN-LABEL: name: dmfma16x16_write_vgpr_dot_read
869874
# GCN: V_MFMA
870-
# GCN-NEXT: S_NOP 7
871-
# GCN-NEXT: S_NOP 2
875+
# GFX940-NEXT: S_NOP 7
876+
# GFX940-NEXT: S_NOP 2
877+
878+
# GFX950-NEXT: S_NOP 7
879+
# GFX950-NEXT: S_NOP 7
880+
# GFX950-NEXT: S_NOP 2
881+
872882
# GCN-NEXT: V_DOT
873883
name: dmfma16x16_write_vgpr_dot_read
874884
body: |
@@ -1505,8 +1515,12 @@ body: |
15051515
...
15061516
# GCN-LABEL: name: dmfma16x16_write_agpr_valu_read
15071517
# GCN: V_MFMA
1508-
# GCN-NEXT: S_NOP 7
1509-
# GCN-NEXT: S_NOP 2
1518+
# GFX940-NEXT: S_NOP 7
1519+
# GFX940-NEXT: S_NOP 2
1520+
1521+
# GFX950-NEXT: S_NOP 7
1522+
# GFX950-NEXT: S_NOP 7
1523+
# GFX950-NEXT: S_NOP 2
15101524
# GCN-NEXT: V_ACCVGPR_READ_B32_e64
15111525
name: dmfma16x16_write_agpr_valu_read
15121526
body: |

0 commit comments

Comments
 (0)