Skip to content

Commit 85601fd

Browse files
authored
AMDGPU: Handle v_mfma_f64_16x16x4_f64 write VGPR read srca/srcb hazard change for gfx950 (#117284)
Increase in wait states from 11 to 19. The index for smfmac counts as like srcA/srcB.
1 parent db08d78 commit 85601fd

File tree

2 files changed

+50
-16
lines changed

2 files changed

+50
-16
lines changed

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2309,6 +2309,7 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
23092309
const int SMFMA32x32WritesVGPROverlappedSrcABWaitStates = 19;
23102310
const int DMFMA4x4WritesVGPROverlappedMFMASrcABWaitStates = 6;
23112311
const int DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates = 11;
2312+
const int GFX950_DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates = 19;
23122313
const int DMFMA4x4WritesVGPRFullSrcCWaitStates = 4;
23132314
const int GFX940_SMFMA4x4WritesVGPRFullSrcCWaitStates = 2;
23142315
const int MaxWaitStates = 19;
@@ -2414,7 +2415,10 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
24142415
case AMDGPU::V_MFMA_F64_16X16X4F64_vgprcd_e64:
24152416
case AMDGPU::V_MFMA_F64_16X16X4F64_mac_e64:
24162417
case AMDGPU::V_MFMA_F64_16X16X4F64_mac_vgprcd_e64:
2417-
NeedWaitStates = DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates;
2418+
NeedWaitStates =
2419+
ST.hasGFX950Insts()
2420+
? GFX950_DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates
2421+
: DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates;
24182422
break;
24192423
case AMDGPU::V_MFMA_F64_4X4X4F64_e64:
24202424
case AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64:

llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir

Lines changed: 45 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -536,8 +536,12 @@ body: |
536536
...
537537
# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfma_srca_read_overlap
538538
# GCN: V_MFMA
539-
# GCN-NEXT: S_NOP 7
540-
# GCN-NEXT: S_NOP 2
539+
# GFX940-NEXT: S_NOP 7
540+
# GFX940-NEXT: S_NOP 2
541+
542+
# GFX950-NEXT: S_NOP 7
543+
# GFX950-NEXT: S_NOP 7
544+
# GFX950-NEXT: S_NOP 2
541545
# GCN-NEXT: V_MFMA
542546
name: dgemm16x16_mfma_write_vgpr_mfma_srca_read_overlap
543547
body: |
@@ -562,7 +566,7 @@ body: |
562566

563567
# GFX950-NEXT: S_NOP 7
564568
# GFX950-NEXT: S_NOP 7
565-
# GFX950-NEXT: S_NOP 0
569+
# GFX950-NEXT: S_NOP 2
566570
# GCN-NEXT: V_MFMA
567571
name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_srca_read_overlap
568572
body: |
@@ -632,8 +636,12 @@ body: |
632636
...
633637
# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfma_srcb_read_overlap
634638
# GCN: V_MFMA
635-
# GCN-NEXT: S_NOP 7
636-
# GCN-NEXT: S_NOP 2
639+
# GFX940-NEXT: S_NOP 7
640+
# GFX940-NEXT: S_NOP 2
641+
642+
# GFX950-NEXT: S_NOP 7
643+
# GFX950-NEXT: S_NOP 7
644+
# GFX950-NEXT: S_NOP 2
637645
# GCN-NEXT: V_MFMA
638646
name: dgemm16x16_mfma_write_vgpr_mfma_srcb_read_overlap
639647
body: |
@@ -643,8 +651,12 @@ body: |
643651
...
644652
# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_smfmac_srcb_read_overlap
645653
# GCN: V_MFMA
646-
# GCN-NEXT: S_NOP 7
647-
# GCN-NEXT: S_NOP 2
654+
# GFX940-NEXT: S_NOP 7
655+
# GFX940-NEXT: S_NOP 2
656+
657+
# GFX950-NEXT: S_NOP 7
658+
# GFX950-NEXT: S_NOP 7
659+
# GFX950-NEXT: S_NOP 2
648660
# GCN-NEXT: V_SMFMAC
649661
name: dgemm16x16_mfma_write_vgpr_smfmac_srcb_read_overlap
650662
body: |
@@ -654,8 +666,13 @@ body: |
654666
...
655667
# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_smfmac_srcc_read_overlap
656668
# GCN: V_MFMA
657-
# GCN-NEXT: S_NOP 7
658-
# GCN-NEXT: S_NOP 2
669+
# GFX940-NEXT: S_NOP 7
670+
# GFX940-NEXT: S_NOP 2
671+
672+
# GFX950-NEXT: S_NOP 7
673+
# GFX950-NEXT: S_NOP 7
674+
# GFX950-NEXT: S_NOP 2
675+
659676
# GCN-NEXT: V_SMFMAC
660677
name: dgemm16x16_mfma_write_vgpr_smfmac_srcc_read_overlap
661678
body: |
@@ -1452,8 +1469,12 @@ body: |
14521469
...
14531470
# GCN-LABEL: name: dgemm16x16_mfma_write_agpr_mfma_srca_read_overlap
14541471
# GCN: V_MFMA
1455-
# GCN-NEXT: S_NOP 7
1456-
# GCN-NEXT: S_NOP 2
1472+
# GFX940-NEXT: S_NOP 7
1473+
# GFX940-NEXT: S_NOP 2
1474+
1475+
# GFX950-NEXT: S_NOP 7
1476+
# GFX950-NEXT: S_NOP 7
1477+
# GFX950-NEXT: S_NOP 2
14571478
# GCN-NEXT: V_MFMA
14581479
name: dgemm16x16_mfma_write_agpr_mfma_srca_read_overlap
14591480
body: |
@@ -1473,8 +1494,13 @@ body: |
14731494
...
14741495
# GCN-LABEL: name: dgemm16x16_mfma_write_agpr_sgemm_mfma_srca_read_overlap
14751496
# GCN: V_MFMA
1476-
# GCN-NEXT: S_NOP 7
1477-
# GCN-NEXT: S_NOP 2
1497+
# GFX940-NEXT: S_NOP 7
1498+
# GFX940-NEXT: S_NOP 2
1499+
1500+
# GFX950-NEXT: S_NOP 7
1501+
# GFX950-NEXT: S_NOP 7
1502+
# GFX950-NEXT: S_NOP 2
1503+
14781504
# GCN-NEXT: V_MFMA
14791505
name: dgemm16x16_mfma_write_agpr_sgemm_mfma_srca_read_overlap
14801506
body: |
@@ -1504,8 +1530,12 @@ body: |
15041530
...
15051531
# GCN-LABEL: name: dgemm16x16_mfma_write_agpr_mfma_srcb_read_overlap
15061532
# GCN: V_MFMA
1507-
# GCN-NEXT: S_NOP 7
1508-
# GCN-NEXT: S_NOP 2
1533+
# GFX940-NEXT: S_NOP 7
1534+
# GFX940-NEXT: S_NOP 2
1535+
1536+
# GFX950-NEXT: S_NOP 7
1537+
# GFX950-NEXT: S_NOP 7
1538+
# GFX950-NEXT: S_NOP 2
15091539
# GCN-NEXT: V_MFMA
15101540
name: dgemm16x16_mfma_write_agpr_mfma_srcb_read_overlap
15111541
body: |

0 commit comments

Comments
 (0)