Skip to content

Commit e524591

Browse files
VigneshwarJjrbyrnes
authored andcommitted
AMDGPU: Handle gfx950 XDL-write-VGPR-VALU-Mem-Exp wait state change (llvm#126727)
1 parent d51034c commit e524591

File tree

7 files changed

+34
-15177
lines changed

7 files changed

+34
-15177
lines changed

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2595,12 +2595,14 @@ static int GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(int NumPasses) {
25952595
return NumPasses + 3;
25962596
}
25972597

2598-
static int GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses) {
2599-
// 2 pass -> 5
2600-
// 4 pass -> 7
2601-
// 8 pass -> 11
2602-
// 16 pass -> 19
2603-
return NumPasses + 3;
2598+
static int GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses,
2599+
bool IsGFX950) {
2600+
// xdl def cycles | gfx940 | gfx950
2601+
// 2 pass | 5 5
2602+
// 4 pass | 7 8
2603+
// 8 pass | 11 12
2604+
// 16 pass | 19 20
2605+
return NumPasses + 3 + (NumPasses != 2 && IsGFX950);
26042606
}
26052607

26062608
static int GFX940_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses) {
@@ -2751,7 +2753,8 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
27512753
} else if (ST.hasGFX940Insts()) {
27522754
NeedWaitStates =
27532755
isXDL(ST, *MFMA)
2754-
? GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(NumPasses)
2756+
? GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(
2757+
NumPasses, ST.hasGFX950Insts())
27552758
: GFX940_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates(
27562759
NumPasses);
27572760
} else {

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.bf16.ll

Lines changed: 0 additions & 446 deletions
This file was deleted.

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll

Lines changed: 0 additions & 1929 deletions
This file was deleted.

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll

Lines changed: 0 additions & 2338 deletions
This file was deleted.

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll

Lines changed: 0 additions & 6042 deletions
This file was deleted.

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll

Lines changed: 0 additions & 4403 deletions
This file was deleted.

llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -734,7 +734,8 @@ body: |
734734
...
735735
# GCN-LABEL: name: smfmac16x16_write_vgpr_flat_read
736736
# GCN: V_SMFMAC
737-
# GCN-NEXT: S_NOP 6
737+
# GFX940-NEXT: S_NOP 6
738+
# GFX950-NEXT: S_NOP 7
738739
# GCN-NEXT: FLAT_STORE_DWORD
739740
name: smfmac16x16_write_vgpr_flat_read
740741
body: |
@@ -745,7 +746,8 @@ body: |
745746
# GCN-LABEL: name: xdl_smfma16x16_write_vgpr_flat_read
746747
# GCN: V_MFMA
747748
# GCN-NEXT: S_NOP 7
748-
# GCN-NEXT: S_NOP 2
749+
# GFX940-NEXT: S_NOP 2
750+
# GFX950-NEXT: S_NOP 3
749751
# GCN-NEXT: FLAT_STORE_DWORD
750752
name: xdl_smfma16x16_write_vgpr_flat_read
751753
body: |
@@ -756,7 +758,8 @@ body: |
756758
# GCN-LABEL: name: smfmac32x32_write_vgpr_flat_read
757759
# GCN: V_SMFMAC
758760
# GCN-NEXT: S_NOP 7
759-
# GCN-NEXT: S_NOP 2
761+
# GFX940-NEXT: S_NOP 2
762+
# GFX950-NEXT: S_NOP 3
760763
# GCN-NEXT: FLAT_STORE_DWORD
761764
name: smfmac32x32_write_vgpr_flat_read
762765
body: |
@@ -768,7 +771,8 @@ body: |
768771
# GCN: V_MFMA
769772
# GCN-NEXT: S_NOP 7
770773
# GCN-NEXT: S_NOP 7
771-
# GCN-NEXT: S_NOP 2
774+
# GFX940-NEXT: S_NOP 2
775+
# GFX950-NEXT: S_NOP 3
772776
# GCN-NEXT: FLAT_STORE_DWORD
773777
name: xdl_smfma32x32_write_vgpr_flat_read
774778
body: |
@@ -823,7 +827,8 @@ body: |
823827
# GCN-LABEL: name: xdl_smfma16x16_write_vgpr_valu_read
824828
# GCN: V_MFMA
825829
# GCN-NEXT: S_NOP 7
826-
# GCN-NEXT: S_NOP 2
830+
# GFX940-NEXT: S_NOP 2
831+
# GFX950-NEXT: S_NOP 3
827832
# GCN-NEXT: V_MOV_B32
828833
name: xdl_smfma16x16_write_vgpr_valu_read
829834
body: |
@@ -835,7 +840,8 @@ body: |
835840
# GCN: V_MFMA
836841
# GCN-NEXT: S_NOP 7
837842
# GCN-NEXT: S_NOP 7
838-
# GCN-NEXT: S_NOP 2
843+
# GFX940-NEXT: S_NOP 2
844+
# GFX950-NEXT: S_NOP 3
839845
# GCN-NEXT: V_MOV_B32
840846
name: xdl_smfma32x32_write_vgpr_valu_read
841847
body: |
@@ -881,7 +887,8 @@ body: |
881887
# GCN-LABEL: name: xdl_smfma16x16_write_vgpr_accv_read
882888
# GCN: V_MFMA
883889
# GCN-NEXT: S_NOP 7
884-
# GCN-NEXT: S_NOP 2
890+
# GFX940-NEXT: S_NOP 2
891+
# GFX950-NEXT: S_NOP 3
885892
# GCN-NEXT: V_ACCVGPR_WRITE_B32_e64
886893
name: xdl_smfma16x16_write_vgpr_accv_read
887894
body: |
@@ -893,7 +900,8 @@ body: |
893900
# GCN: V_MFMA
894901
# GCN-NEXT: S_NOP 7
895902
# GCN-NEXT: S_NOP 7
896-
# GCN-NEXT: S_NOP 2
903+
# GFX940-NEXT: S_NOP 2
904+
# GFX950-NEXT: S_NOP 3
897905
# GCN-NEXT: V_ACCVGPR_WRITE_B32_e64
898906
name: xdl_smfma32x32_write_vgpr_accv_read
899907
body: |
@@ -1028,7 +1036,8 @@ body: |
10281036
# GCN: V_MFMA
10291037
# GCN-NEXT: S_NOP 7
10301038
# GCN-NEXT: S_NOP 7
1031-
# GCN-NEXT: S_NOP 2
1039+
# GFX940-NEXT: S_NOP 2
1040+
# GFX950-NEXT: S_NOP 3
10321041
# GCN-NEXT: V_MOV_B32_sdwa
10331042
name: xdl_smfma32x32_write_vgpr_valu_sdwa_write
10341043
body: |
@@ -1762,7 +1771,8 @@ body: |
17621771
...
17631772
# GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_vgpr_vm_read
17641773
# GCN: V_MFMA
1765-
# GCN-NEXT: S_NOP 6
1774+
# GFX940-NEXT: S_NOP 6
1775+
# GFX950-NEXT: S_NOP 7
17661776
# GCN-NEXT: BUFFER_STORE_DWORD
17671777
name: xdl_sgemm16X16X16_mfma_write_vgpr_vm_read
17681778
body: |
@@ -1772,7 +1782,8 @@ body: |
17721782
...
17731783
# GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_vgpr_valu_read
17741784
# GCN: V_MFMA
1775-
# GCN-NEXT: S_NOP 6
1785+
# GFX940-NEXT: S_NOP 6
1786+
# GFX950-NEXT: S_NOP 7
17761787
# GCN-NEXT: V_MOV_B32
17771788
name: xdl_sgemm16X16X16_mfma_write_vgpr_valu_read
17781789
body: |
@@ -1782,7 +1793,8 @@ body: |
17821793
...
17831794
# GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_vgpr_dot_read
17841795
# GCN: V_MFMA
1785-
# GCN-NEXT: S_NOP 6
1796+
# GFX940-NEXT: S_NOP 6
1797+
# GFX950-NEXT: S_NOP 7
17861798
# GCN-NEXT: V_DOT
17871799
name: xdl_sgemm16X16X16_mfma_write_vgpr_dot_read
17881800
body: |

0 commit comments

Comments
 (0)