Skip to content

Commit db08d78

Browse files
authored
AMDGPU: Handle v_mfma_f64_16x16x4_f64 srcc write VGPR hazard change for gfx950 (#117283)
Read by sgemm/dgemm in srcc after v_mfma_f64_16x16x4_f64 increases from 9 to 17 wait states.
1 parent 8cb6c99 commit db08d78

File tree

2 files changed

+38
-13
lines changed

2 files changed

+38
-13
lines changed

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2302,6 +2302,7 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
23022302
const int SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates = 9;
23032303
const int SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates = 17;
23042304
const int DMFMA16x16WritesVGPROverlappedSrcCWaitStates = 9;
2305+
const int GFX950_DMFMA16x16WritesVGPROverlappedSrcCWaitStates = 17;
23052306
const int DMFMA4x4WritesVGPROverlappedSrcCWaitStates = 4;
23062307
const int SMFMA4x4WritesVGPROverlappedSrcABWaitStates = 5;
23072308
const int SMFMA16x16WritesVGPROverlappedSrcABWaitStates = 11;
@@ -2359,7 +2360,10 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
23592360
case AMDGPU::V_MFMA_F64_16X16X4F64_mac_e64:
23602361
case AMDGPU::V_MFMA_F64_16X16X4F64_mac_vgprcd_e64:
23612362
if (!isXDL(ST, *MI))
2362-
NeedWaitStates = DMFMA16x16WritesVGPROverlappedSrcCWaitStates;
2363+
NeedWaitStates =
2364+
ST.hasGFX950Insts()
2365+
? GFX950_DMFMA16x16WritesVGPROverlappedSrcCWaitStates
2366+
: DMFMA16x16WritesVGPROverlappedSrcCWaitStates;
23632367
break;
23642368
case AMDGPU::V_MFMA_F64_4X4X4F64_e64:
23652369
case AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64:

llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -298,8 +298,12 @@ body: |
298298
...
299299
# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfma_read_overlap
300300
# GCN: V_MFMA
301-
# GCN-NEXT: S_NOP 7
302-
# GCN-NEXT: S_NOP 0
301+
# GFX940-NEXT: S_NOP 7
302+
# GFX940-NEXT: S_NOP 0
303+
304+
# GFX950-NEXT: S_NOP 7
305+
# GFX950-NEXT: S_NOP 7
306+
# GFX950-NEXT: S_NOP 0
303307
# GCN-NEXT: V_MFMA
304308
name: dgemm16x16_mfma_write_vgpr_mfma_read_overlap
305309
body: |
@@ -319,8 +323,12 @@ body: |
319323
...
320324
# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_read_overlap
321325
# GCN: V_MFMA
322-
# GCN-NEXT: S_NOP 7
323-
# GCN-NEXT: S_NOP 0
326+
# GFX940-NEXT: S_NOP 7
327+
# GFX940-NEXT: S_NOP 0
328+
329+
# GFX950-NEXT: S_NOP 7
330+
# GFX950-NEXT: S_NOP 7
331+
# GFX950-NEXT: S_NOP 0
324332
# GCN-NEXT: V_MFMA
325333
name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_read_overlap
326334
body: |
@@ -549,8 +557,12 @@ body: |
549557
...
550558
# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_srca_read_overlap
551559
# GCN: V_MFMA
552-
# GCN-NEXT: S_NOP 7
553-
# GCN-NEXT: S_NOP 2
560+
# GFX940-NEXT: S_NOP 7
561+
# GFX940-NEXT: S_NOP 2
562+
563+
# GFX950-NEXT: S_NOP 7
564+
# GFX950-NEXT: S_NOP 7
565+
# GFX950-NEXT: S_NOP 0
554566
# GCN-NEXT: V_MFMA
555567
name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_srca_read_overlap
556568
body: |
@@ -1333,8 +1345,12 @@ body: |
13331345
...
13341346
# GCN-LABEL: name: dgemm16x16_mfma_write_agpr_mfma_read_overlap
13351347
# GCN: V_MFMA
1336-
# GCN-NEXT: S_NOP 7
1337-
# GCN-NEXT: S_NOP 0
1348+
# GFX940-NEXT: S_NOP 7
1349+
# GFX940-NEXT: S_NOP 0
1350+
1351+
# GFX950-NEXT: S_NOP 7
1352+
# GFX950-NEXT: S_NOP 7
1353+
# GFX950-NEXT: S_NOP 0
13381354
# GCN-NEXT: V_MFMA
13391355
name: dgemm16x16_mfma_write_agpr_mfma_read_overlap
13401356
body: |
@@ -1354,8 +1370,13 @@ body: |
13541370
...
13551371
# GCN-LABEL: name: dgemm16x16_mfma_write_agpr_sgemm_mfma_read_overlap
13561372
# GCN: V_MFMA
1357-
# GCN-NEXT: S_NOP 7
1358-
# GCN-NEXT: S_NOP 0
1373+
# GFX940-NEXT: S_NOP 7
1374+
# GFX940-NEXT: S_NOP 0
1375+
1376+
# GFX950-NEXT: S_NOP 7
1377+
# GFX950-NEXT: S_NOP 7
1378+
# GFX950-NEXT: S_NOP 0
1379+
13591380
# GCN-NEXT: V_MFMA
13601381
name: dgemm16x16_mfma_write_agpr_sgemm_mfma_read_overlap
13611382
body: |
@@ -2502,8 +2523,8 @@ body: |
25022523
...
25032524
# GCN-LABEL: name: xdl_4pass_mfma_write_agpr_smfmac_read_overlap_srcc
25042525
# GCN: V_MFMA
2505-
# GFX940: S_NOP 4
2506-
# GFX950: S_NOP 5
2526+
# GFX940-NEXT: S_NOP 4
2527+
# GFX950-NEXT: S_NOP 5
25072528
# GCN-NEXT: V_SMFMAC_
25082529
name: xdl_4pass_mfma_write_agpr_smfmac_read_overlap_srcc
25092530
body: |

0 commit comments

Comments
 (0)