Skip to content

Commit cad9de7

Browse files
committed
[AMDGPU] gfx940 MAI hazard recognizer
Differential Revision: https://reviews.llvm.org/D122263
1 parent a80bf18 commit cad9de7

File tree

6 files changed

+2214
-41
lines changed

6 files changed

+2214
-41
lines changed

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

Lines changed: 140 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,10 @@ static bool isXDL(const GCNSubtarget &ST, const MachineInstr &MI) {
133133
Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
134134
return false;
135135

136-
return true;
136+
if (!ST.hasGFX940Insts())
137+
return true;
138+
139+
return AMDGPU::getMAIIsGFX940XDL(Opcode);
137140
}
138141

139142
static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII,
@@ -1494,6 +1497,13 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
14941497
for (const MachineOperand &Use : MI->explicit_uses()) {
14951498
const int LegacyVALUNotDotWritesVGPRWaitStates = 2;
14961499
const int SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates = 2;
1500+
const int GFX940_XDL2PassWritesVGPROverlappedSMFMASrcCWaitStates = 3;
1501+
const int GFX940_XDL4PassWritesVGPROverlappedSMFMASrcCWaitStates = 5;
1502+
const int GFX940_SMFMA4PassWritesVGPROverlappedSMFMASrcCWaitStates = 4;
1503+
const int GFX940_XDL8PassWritesVGPROverlappedSMFMASrcCWaitStates = 9;
1504+
const int GFX940_SMFMA8PassWritesVGPROverlappedSMFMASrcCWaitStates = 8;
1505+
const int GFX940_XDL16PassWritesVGPROverlappedSMFMASrcCWaitStates = 17;
1506+
const int GFX940_SMFMA16PassWritesVGPROverlappedSMFMASrcCWaitStates = 16;
14971507
const int SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates = 8;
14981508
const int SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates = 16;
14991509
const int SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates = 3;
@@ -1504,9 +1514,18 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
15041514
const int SMFMA4x4WritesVGPROverlappedSrcABWaitStates = 5;
15051515
const int SMFMA16x16WritesVGPROverlappedSrcABWaitStates = 11;
15061516
const int SMFMA32x32WritesVGPROverlappedSrcABWaitStates = 19;
1517+
const int GFX940_SMFMA2PassWritesVGPROverlappedSrcABWaitStates = 4;
1518+
const int GFX940_SMFMA4PassWritesVGPROverlappedSrcABWaitStates = 6;
1519+
const int GFX940_SMFMA8PassWritesVGPROverlappedSrcABWaitStates = 10;
1520+
const int GFX940_SMFMA16PassWritesVGPROverlappedSrcABWaitStates = 18;
1521+
const int GFX940_XDL2PassWritesVGPROverlappedSrcABWaitStates = 5;
1522+
const int GFX940_XDL4PassWritesVGPROverlappedSrcABWaitStates = 7;
1523+
const int GFX940_XDL8PassWritesVGPROverlappedSrcABWaitStates = 11;
1524+
const int GFX940_XDL16PassWritesVGPROverlappedSrcABWaitStates = 19;
15071525
const int DMFMA4x4WritesVGPROverlappedMFMASrcABWaitStates = 6;
15081526
const int DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates = 11;
15091527
const int DMFMA4x4WritesVGPRFullSrcCWaitStates = 4;
1528+
const int GFX940_SMFMA4x4WritesVGPRFullSrcCWaitStates = 2;
15101529
const int MaxWaitStates = 19;
15111530

15121531
if (!Use.isReg())
@@ -1538,14 +1557,17 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
15381557
unsigned Opc1 = MI1->getOpcode();
15391558
int NeedWaitStates = 0;
15401559
if (OpNo == SrcCIdx) {
1541-
if (!isDGEMM(Opc) && isDGEMM(Opc1)) {
1560+
if (!isDGEMM(Opc) && (!ST.hasGFX940Insts() && isDGEMM(Opc1))) {
15421561
NeedWaitStates = 0;
15431562
} else if (FullReg) {
15441563
if ((Opc == AMDGPU::V_MFMA_F64_4X4X4F64_e64 ||
15451564
Opc == AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64) &&
15461565
(Opc1 == AMDGPU::V_MFMA_F64_4X4X4F64_e64 ||
15471566
Opc1 == AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64))
15481567
NeedWaitStates = DMFMA4x4WritesVGPRFullSrcCWaitStates;
1568+
else if (ST.hasGFX940Insts() &&
1569+
TSchedModel.computeInstrLatency(MI1) == 2)
1570+
NeedWaitStates = GFX940_SMFMA4x4WritesVGPRFullSrcCWaitStates;
15491571
} else {
15501572
switch (Opc1) {
15511573
case AMDGPU::V_MFMA_F64_16X16X4F64_e64:
@@ -1561,22 +1583,42 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
15611583
NeedWaitStates = DMFMA4x4WritesVGPROverlappedSrcCWaitStates;
15621584
break;
15631585
default:
1586+
if (ST.hasGFX940Insts() && isXDL(ST, *MI) && !isXDL(ST, *MI1))
1587+
break;
15641588
switch (TSchedModel.computeInstrLatency(MI1)) {
15651589
case 2:
1566-
NeedWaitStates = isDGEMM(Opc)
1567-
? SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates
1568-
: SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates;
1590+
NeedWaitStates = ST.hasGFX940Insts()
1591+
? isXDL(ST, *MI1)
1592+
? GFX940_XDL2PassWritesVGPROverlappedSMFMASrcCWaitStates
1593+
: SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates
1594+
: isDGEMM(Opc)
1595+
? SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates
1596+
: SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates;
1597+
break;
1598+
case 4:
1599+
assert(ST.hasGFX940Insts());
1600+
NeedWaitStates = isXDL(ST, *MI1)
1601+
? GFX940_XDL4PassWritesVGPROverlappedSMFMASrcCWaitStates
1602+
: GFX940_SMFMA4PassWritesVGPROverlappedSMFMASrcCWaitStates;
15691603
break;
15701604
case 8:
1571-
NeedWaitStates = isDGEMM(Opc)
1572-
? SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates
1573-
: SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates;
1605+
NeedWaitStates = ST.hasGFX940Insts()
1606+
? isXDL(ST, *MI1)
1607+
? GFX940_XDL8PassWritesVGPROverlappedSMFMASrcCWaitStates
1608+
: GFX940_SMFMA8PassWritesVGPROverlappedSMFMASrcCWaitStates
1609+
: isDGEMM(Opc)
1610+
? SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates
1611+
: SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates;
15741612
break;
15751613
case 16: LLVM_FALLTHROUGH;
15761614
default:
1577-
NeedWaitStates = isDGEMM(Opc)
1578-
? SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates
1579-
: SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates;
1615+
NeedWaitStates = ST.hasGFX940Insts()
1616+
? isXDL(ST, *MI1)
1617+
? GFX940_XDL16PassWritesVGPROverlappedSMFMASrcCWaitStates
1618+
: GFX940_SMFMA16PassWritesVGPROverlappedSMFMASrcCWaitStates
1619+
: isDGEMM(Opc)
1620+
? SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates
1621+
: SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates;
15801622
}
15811623
}
15821624
}
@@ -1595,14 +1637,32 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
15951637
default:
15961638
switch (TSchedModel.computeInstrLatency(MI1)) {
15971639
case 2:
1598-
NeedWaitStates = SMFMA4x4WritesVGPROverlappedSrcABWaitStates;
1640+
NeedWaitStates = ST.hasGFX940Insts()
1641+
? isXDL(ST, *MI1)
1642+
? GFX940_XDL2PassWritesVGPROverlappedSrcABWaitStates
1643+
: GFX940_SMFMA2PassWritesVGPROverlappedSrcABWaitStates
1644+
: SMFMA4x4WritesVGPROverlappedSrcABWaitStates;
1645+
break;
1646+
case 4:
1647+
assert(ST.hasGFX940Insts());
1648+
NeedWaitStates = isXDL(ST, *MI1)
1649+
? GFX940_XDL4PassWritesVGPROverlappedSrcABWaitStates
1650+
: GFX940_SMFMA4PassWritesVGPROverlappedSrcABWaitStates;
15991651
break;
16001652
case 8:
1601-
NeedWaitStates = SMFMA16x16WritesVGPROverlappedSrcABWaitStates;
1653+
NeedWaitStates = ST.hasGFX940Insts()
1654+
? isXDL(ST, *MI1)
1655+
? GFX940_XDL8PassWritesVGPROverlappedSrcABWaitStates
1656+
: GFX940_SMFMA8PassWritesVGPROverlappedSrcABWaitStates
1657+
: SMFMA16x16WritesVGPROverlappedSrcABWaitStates;
16021658
break;
16031659
case 16: LLVM_FALLTHROUGH;
16041660
default:
1605-
NeedWaitStates = SMFMA32x32WritesVGPROverlappedSrcABWaitStates;
1661+
NeedWaitStates = ST.hasGFX940Insts()
1662+
? isXDL(ST, *MI1)
1663+
? GFX940_XDL16PassWritesVGPROverlappedSrcABWaitStates
1664+
: GFX940_SMFMA16PassWritesVGPROverlappedSrcABWaitStates
1665+
: SMFMA32x32WritesVGPROverlappedSrcABWaitStates;
16061666
}
16071667
}
16081668
}
@@ -1717,6 +1777,14 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
17171777
const int SMFMA4x4WriteVgprVALUMemExpReadWaitStates = 5;
17181778
const int SMFMA16x16WriteVgprVALUMemExpReadWaitStates = 11;
17191779
const int SMFMA32x32WriteVgprVALUMemExpReadWaitStates = 19;
1780+
const int GFX940_SMFMA2PassWriteVgprVALUMemExpReadWaitStates = 4;
1781+
const int GFX940_SMFMA4PassWriteVgprVALUMemExpReadWaitStates = 6;
1782+
const int GFX940_SMFMA8PassWriteVgprVALUMemExpReadWaitStates = 10;
1783+
const int GFX940_SMFMA16PassWriteVgprVALUMemExpReadWaitStates = 18;
1784+
const int GFX940_XDL2PassWriteVgprVALUMemExpReadWaitStates = 5;
1785+
const int GFX940_XDL4PassWriteVgprVALUMemExpReadWaitStates = 7;
1786+
const int GFX940_XDL8PassWriteVgprVALUMemExpReadWaitStates = 11;
1787+
const int GFX940_XDL16PassWriteVgprVALUMemExpReadWaitStates = 19;
17201788
const int DMFMA4x4WriteVgprMemExpReadWaitStates = 9;
17211789
const int DMFMA16x16WriteVgprMemExpReadWaitStates = 18;
17221790
const int DMFMA4x4WriteVgprVALUReadWaitStates = 6;
@@ -1756,24 +1824,42 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
17561824
int NeedWaitStates = MaxWaitStates;
17571825
switch (HazardDefLatency) {
17581826
case 2:
1759-
NeedWaitStates = SMFMA4x4WriteVgprVALUMemExpReadWaitStates;
1827+
NeedWaitStates =
1828+
ST.hasGFX940Insts()
1829+
? isXDL(ST, *MFMA)
1830+
? GFX940_XDL2PassWriteVgprVALUMemExpReadWaitStates
1831+
: GFX940_SMFMA2PassWriteVgprVALUMemExpReadWaitStates
1832+
: SMFMA4x4WriteVgprVALUMemExpReadWaitStates;
17601833
break;
17611834
case 4:
17621835
assert(isDGEMM(MFMA->getOpcode()) || ST.hasGFX940Insts());
17631836
NeedWaitStates =
1764-
IsMemOrExport ? DMFMA4x4WriteVgprMemExpReadWaitStates
1765-
: DMFMA4x4WriteVgprVALUReadWaitStates;
1837+
isDGEMM(MFMA->getOpcode())
1838+
? IsMemOrExport ? DMFMA4x4WriteVgprMemExpReadWaitStates
1839+
: DMFMA4x4WriteVgprVALUReadWaitStates
1840+
: isXDL(ST, *MFMA)
1841+
? GFX940_XDL4PassWriteVgprVALUMemExpReadWaitStates
1842+
: GFX940_SMFMA4PassWriteVgprVALUMemExpReadWaitStates;
17661843
break;
17671844
case 8:
1768-
NeedWaitStates = SMFMA16x16WriteVgprVALUMemExpReadWaitStates;
1845+
NeedWaitStates =
1846+
ST.hasGFX940Insts()
1847+
? isXDL(ST, *MFMA)
1848+
? GFX940_XDL8PassWriteVgprVALUMemExpReadWaitStates
1849+
: GFX940_SMFMA8PassWriteVgprVALUMemExpReadWaitStates
1850+
: SMFMA16x16WriteVgprVALUMemExpReadWaitStates;
17691851
break;
17701852
case 16: LLVM_FALLTHROUGH;
17711853
default:
17721854
NeedWaitStates =
17731855
isDGEMM(MFMA->getOpcode())
17741856
? IsMemOrExport ? DMFMA16x16WriteVgprMemExpReadWaitStates
17751857
: DMFMA16x16WriteVgprVALUReadWaitStates
1776-
: SMFMA32x32WriteVgprVALUMemExpReadWaitStates;
1858+
: ST.hasGFX940Insts()
1859+
? isXDL(ST, *MFMA)
1860+
? GFX940_XDL16PassWriteVgprVALUMemExpReadWaitStates
1861+
: GFX940_SMFMA16PassWriteVgprVALUMemExpReadWaitStates
1862+
: SMFMA32x32WriteVgprVALUMemExpReadWaitStates;
17771863
break;
17781864
}
17791865

@@ -1803,7 +1889,16 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
18031889
const int SMFMA4x4WriteVgprVALUWawWaitStates = 5;
18041890
const int SMFMA16x16WriteVgprVALUWawWaitStates = 11;
18051891
const int SMFMA32x32WriteVgprVALUWawWaitStates = 19;
1892+
const int GFX940_SMFMA2PassWriteVgprVALUWawWaitStates = 4;
1893+
const int GFX940_SMFMA4PassWriteVgprVALUWawWaitStates = 6;
1894+
const int GFX940_SMFMA8PassWriteVgprVALUWawWaitStates = 10;
1895+
const int GFX940_SMFMA16PassWriteVgprVALUWawWaitStates = 18;
1896+
const int GFX940_XDL2PassWriteVgprVALUWawWaitStates = 5;
1897+
const int GFX940_XDL4PassWriteVgprVALUWawWaitStates = 7;
1898+
const int GFX940_XDL8PassWriteVgprVALUWawWaitStates = 11;
1899+
const int GFX940_XDL16PassWriteVgprVALUWawWaitStates = 19;
18061900
const int SMFMA4x4ReadVgprVALUWarWaitStates = 1;
1901+
const int GFX940_XDL4PassReadVgprVALUWarWaitStates = 3;
18071902
const int SMFMA16x16ReadVgprVALUWarWaitStates = 7;
18081903
const int SMFMA32x32ReadVgprVALUWarWaitStates = 15;
18091904
const int DMFMA4x4WriteVgprVALUWriteWaitStates = 6;
@@ -1828,19 +1923,35 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
18281923
int NeedWaitStates = MaxWaitStates;
18291924
switch (TSchedModel.computeInstrLatency(MFMA)) {
18301925
case 2:
1831-
NeedWaitStates = SMFMA4x4WriteVgprVALUWawWaitStates;
1926+
NeedWaitStates = ST.hasGFX940Insts()
1927+
? isXDL(ST, *MFMA)
1928+
? GFX940_XDL2PassWriteVgprVALUWawWaitStates
1929+
: GFX940_SMFMA2PassWriteVgprVALUWawWaitStates
1930+
: SMFMA4x4WriteVgprVALUWawWaitStates;
18321931
break;
18331932
case 4:
1834-
assert(isDGEMM(MFMA->getOpcode()));
1835-
NeedWaitStates = DMFMA4x4WriteVgprVALUWriteWaitStates;
1933+
assert(isDGEMM(MFMA->getOpcode()) || ST.hasGFX940Insts());
1934+
NeedWaitStates = isDGEMM(MFMA->getOpcode())
1935+
? DMFMA4x4WriteVgprVALUWriteWaitStates
1936+
: isXDL(ST, *MFMA)
1937+
? GFX940_XDL4PassWriteVgprVALUWawWaitStates
1938+
: GFX940_SMFMA4PassWriteVgprVALUWawWaitStates;
18361939
break;
18371940
case 8:
1838-
NeedWaitStates = SMFMA16x16WriteVgprVALUWawWaitStates;
1941+
NeedWaitStates = ST.hasGFX940Insts()
1942+
? isXDL(ST, *MFMA)
1943+
? GFX940_XDL8PassWriteVgprVALUWawWaitStates
1944+
: GFX940_SMFMA8PassWriteVgprVALUWawWaitStates
1945+
: SMFMA16x16WriteVgprVALUWawWaitStates;
18391946
break;
18401947
case 16: LLVM_FALLTHROUGH;
18411948
default:
18421949
NeedWaitStates = isDGEMM(MFMA->getOpcode())
18431950
? DMFMA16x16WriteVgprVALUWriteWaitStates
1951+
: ST.hasGFX940Insts()
1952+
? isXDL(ST, *MFMA)
1953+
? GFX940_XDL16PassWriteVgprVALUWawWaitStates
1954+
: GFX940_SMFMA16PassWriteVgprVALUWawWaitStates
18441955
: SMFMA32x32WriteVgprVALUWawWaitStates;
18451956
break;
18461957
}
@@ -1858,6 +1969,9 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
18581969
!MI.readsRegister(Reg, &TRI))
18591970
return false;
18601971

1972+
if (ST.hasGFX940Insts() && !isXDL(ST, MI))
1973+
return false;
1974+
18611975
const MachineOperand *SrcC =
18621976
TII.getNamedOperand(MI, AMDGPU::OpName::src2);
18631977
assert(SrcC);
@@ -1879,6 +1993,9 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
18791993
switch (HazardDefLatency) {
18801994
case 2: NeedWaitStates = SMFMA4x4ReadVgprVALUWarWaitStates;
18811995
break;
1996+
case 4: assert(ST.hasGFX940Insts());
1997+
NeedWaitStates = GFX940_XDL4PassReadVgprVALUWarWaitStates;
1998+
break;
18821999
case 8: NeedWaitStates = SMFMA16x16ReadVgprVALUWarWaitStates;
18832000
break;
18842001
case 16: LLVM_FALLTHROUGH;

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ unsigned getHostcallImplicitArgPosition() {
164164
#define GET_MIMGBiasMappingTable_IMPL
165165
#define GET_MIMGOffsetMappingTable_IMPL
166166
#define GET_MIMGG16MappingTable_IMPL
167+
#define GET_MAIInstInfoTable_IMPL
167168
#include "AMDGPUGenSearchableTables.inc"
168169

169170
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
@@ -342,6 +343,11 @@ bool getVOP3IsSingle(unsigned Opc) {
342343
return Info ? Info->IsSingle : false;
343344
}
344345

346+
bool getMAIIsGFX940XDL(unsigned Opc) {
347+
const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
348+
return Info ? Info->is_gfx940_xdl : false;
349+
}
350+
345351
// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
346352
// header files, so we need to wrap it in a function that takes unsigned
347353
// instead.

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,12 +68,18 @@ struct GcnBufferFormatInfo {
6868
unsigned DataFormat;
6969
};
7070

71+
struct MAIInstInfo {
72+
uint16_t Opcode;
73+
bool is_gfx940_xdl;
74+
};
75+
7176
#define GET_MIMGBaseOpcode_DECL
7277
#define GET_MIMGDim_DECL
7378
#define GET_MIMGEncoding_DECL
7479
#define GET_MIMGLZMapping_DECL
7580
#define GET_MIMGMIPMapping_DECL
7681
#define GET_MIMGBiASMapping_DECL
82+
#define GET_MAIInstInfoTable_DECL
7783
#include "AMDGPUGenSearchableTables.inc"
7884

7985
namespace IsaInfo {
@@ -444,6 +450,9 @@ bool getVOP2IsSingle(unsigned Opc);
444450
LLVM_READONLY
445451
bool getVOP3IsSingle(unsigned Opc);
446452

453+
LLVM_READONLY
454+
bool getMAIIsGFX940XDL(unsigned Opc);
455+
447456
LLVM_READONLY
448457
const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
449458
uint8_t NumComponents,

0 commit comments

Comments
 (0)