@@ -133,7 +133,10 @@ static bool isXDL(const GCNSubtarget &ST, const MachineInstr &MI) {
133
133
Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
134
134
return false ;
135
135
136
- return true ;
136
+ if (!ST.hasGFX940Insts ())
137
+ return true ;
138
+
139
+ return AMDGPU::getMAIIsGFX940XDL (Opcode);
137
140
}
138
141
139
142
static bool isSendMsgTraceDataOrGDS (const SIInstrInfo &TII,
@@ -1494,6 +1497,13 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
1494
1497
for (const MachineOperand &Use : MI->explicit_uses ()) {
1495
1498
const int LegacyVALUNotDotWritesVGPRWaitStates = 2 ;
1496
1499
const int SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates = 2 ;
1500
+ const int GFX940_XDL2PassWritesVGPROverlappedSMFMASrcCWaitStates = 3 ;
1501
+ const int GFX940_XDL4PassWritesVGPROverlappedSMFMASrcCWaitStates = 5 ;
1502
+ const int GFX940_SMFMA4PassWritesVGPROverlappedSMFMASrcCWaitStates = 4 ;
1503
+ const int GFX940_XDL8PassWritesVGPROverlappedSMFMASrcCWaitStates = 9 ;
1504
+ const int GFX940_SMFMA8PassWritesVGPROverlappedSMFMASrcCWaitStates = 8 ;
1505
+ const int GFX940_XDL16PassWritesVGPROverlappedSMFMASrcCWaitStates = 17 ;
1506
+ const int GFX940_SMFMA16PassWritesVGPROverlappedSMFMASrcCWaitStates = 16 ;
1497
1507
const int SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates = 8 ;
1498
1508
const int SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates = 16 ;
1499
1509
const int SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates = 3 ;
@@ -1504,9 +1514,18 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
1504
1514
const int SMFMA4x4WritesVGPROverlappedSrcABWaitStates = 5 ;
1505
1515
const int SMFMA16x16WritesVGPROverlappedSrcABWaitStates = 11 ;
1506
1516
const int SMFMA32x32WritesVGPROverlappedSrcABWaitStates = 19 ;
1517
+ const int GFX940_SMFMA2PassWritesVGPROverlappedSrcABWaitStates = 4 ;
1518
+ const int GFX940_SMFMA4PassWritesVGPROverlappedSrcABWaitStates = 6 ;
1519
+ const int GFX940_SMFMA8PassWritesVGPROverlappedSrcABWaitStates = 10 ;
1520
+ const int GFX940_SMFMA16PassWritesVGPROverlappedSrcABWaitStates = 18 ;
1521
+ const int GFX940_XDL2PassWritesVGPROverlappedSrcABWaitStates = 5 ;
1522
+ const int GFX940_XDL4PassWritesVGPROverlappedSrcABWaitStates = 7 ;
1523
+ const int GFX940_XDL8PassWritesVGPROverlappedSrcABWaitStates = 11 ;
1524
+ const int GFX940_XDL16PassWritesVGPROverlappedSrcABWaitStates = 19 ;
1507
1525
const int DMFMA4x4WritesVGPROverlappedMFMASrcABWaitStates = 6 ;
1508
1526
const int DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates = 11 ;
1509
1527
const int DMFMA4x4WritesVGPRFullSrcCWaitStates = 4 ;
1528
+ const int GFX940_SMFMA4x4WritesVGPRFullSrcCWaitStates = 2 ;
1510
1529
const int MaxWaitStates = 19 ;
1511
1530
1512
1531
if (!Use.isReg ())
@@ -1538,14 +1557,17 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
1538
1557
unsigned Opc1 = MI1->getOpcode ();
1539
1558
int NeedWaitStates = 0 ;
1540
1559
if (OpNo == SrcCIdx) {
1541
- if (!isDGEMM (Opc) && isDGEMM (Opc1)) {
1560
+ if (!isDGEMM (Opc) && (!ST. hasGFX940Insts () && isDGEMM (Opc1) )) {
1542
1561
NeedWaitStates = 0 ;
1543
1562
} else if (FullReg) {
1544
1563
if ((Opc == AMDGPU::V_MFMA_F64_4X4X4F64_e64 ||
1545
1564
Opc == AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64) &&
1546
1565
(Opc1 == AMDGPU::V_MFMA_F64_4X4X4F64_e64 ||
1547
1566
Opc1 == AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64))
1548
1567
NeedWaitStates = DMFMA4x4WritesVGPRFullSrcCWaitStates;
1568
+ else if (ST.hasGFX940Insts () &&
1569
+ TSchedModel.computeInstrLatency (MI1) == 2 )
1570
+ NeedWaitStates = GFX940_SMFMA4x4WritesVGPRFullSrcCWaitStates;
1549
1571
} else {
1550
1572
switch (Opc1) {
1551
1573
case AMDGPU::V_MFMA_F64_16X16X4F64_e64:
@@ -1561,22 +1583,42 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
1561
1583
NeedWaitStates = DMFMA4x4WritesVGPROverlappedSrcCWaitStates;
1562
1584
break ;
1563
1585
default :
1586
+ if (ST.hasGFX940Insts () && isXDL (ST, *MI) && !isXDL (ST, *MI1))
1587
+ break ;
1564
1588
switch (TSchedModel.computeInstrLatency (MI1)) {
1565
1589
case 2 :
1566
- NeedWaitStates = isDGEMM (Opc)
1567
- ? SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates
1568
- : SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates;
1590
+ NeedWaitStates = ST.hasGFX940Insts ()
1591
+ ? isXDL (ST, *MI1)
1592
+ ? GFX940_XDL2PassWritesVGPROverlappedSMFMASrcCWaitStates
1593
+ : SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates
1594
+ : isDGEMM (Opc)
1595
+ ? SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates
1596
+ : SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates;
1597
+ break ;
1598
+ case 4 :
1599
+ assert (ST.hasGFX940Insts ());
1600
+ NeedWaitStates = isXDL (ST, *MI1)
1601
+ ? GFX940_XDL4PassWritesVGPROverlappedSMFMASrcCWaitStates
1602
+ : GFX940_SMFMA4PassWritesVGPROverlappedSMFMASrcCWaitStates;
1569
1603
break ;
1570
1604
case 8 :
1571
- NeedWaitStates = isDGEMM (Opc)
1572
- ? SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates
1573
- : SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates;
1605
+ NeedWaitStates = ST.hasGFX940Insts ()
1606
+ ? isXDL (ST, *MI1)
1607
+ ? GFX940_XDL8PassWritesVGPROverlappedSMFMASrcCWaitStates
1608
+ : GFX940_SMFMA8PassWritesVGPROverlappedSMFMASrcCWaitStates
1609
+ : isDGEMM (Opc)
1610
+ ? SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates
1611
+ : SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates;
1574
1612
break ;
1575
1613
case 16 : LLVM_FALLTHROUGH;
1576
1614
default :
1577
- NeedWaitStates = isDGEMM (Opc)
1578
- ? SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates
1579
- : SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates;
1615
+ NeedWaitStates = ST.hasGFX940Insts ()
1616
+ ? isXDL (ST, *MI1)
1617
+ ? GFX940_XDL16PassWritesVGPROverlappedSMFMASrcCWaitStates
1618
+ : GFX940_SMFMA16PassWritesVGPROverlappedSMFMASrcCWaitStates
1619
+ : isDGEMM (Opc)
1620
+ ? SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates
1621
+ : SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates;
1580
1622
}
1581
1623
}
1582
1624
}
@@ -1595,14 +1637,32 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
1595
1637
default :
1596
1638
switch (TSchedModel.computeInstrLatency (MI1)) {
1597
1639
case 2 :
1598
- NeedWaitStates = SMFMA4x4WritesVGPROverlappedSrcABWaitStates;
1640
+ NeedWaitStates = ST.hasGFX940Insts ()
1641
+ ? isXDL (ST, *MI1)
1642
+ ? GFX940_XDL2PassWritesVGPROverlappedSrcABWaitStates
1643
+ : GFX940_SMFMA2PassWritesVGPROverlappedSrcABWaitStates
1644
+ : SMFMA4x4WritesVGPROverlappedSrcABWaitStates;
1645
+ break ;
1646
+ case 4 :
1647
+ assert (ST.hasGFX940Insts ());
1648
+ NeedWaitStates = isXDL (ST, *MI1)
1649
+ ? GFX940_XDL4PassWritesVGPROverlappedSrcABWaitStates
1650
+ : GFX940_SMFMA4PassWritesVGPROverlappedSrcABWaitStates;
1599
1651
break ;
1600
1652
case 8 :
1601
- NeedWaitStates = SMFMA16x16WritesVGPROverlappedSrcABWaitStates;
1653
+ NeedWaitStates = ST.hasGFX940Insts ()
1654
+ ? isXDL (ST, *MI1)
1655
+ ? GFX940_XDL8PassWritesVGPROverlappedSrcABWaitStates
1656
+ : GFX940_SMFMA8PassWritesVGPROverlappedSrcABWaitStates
1657
+ : SMFMA16x16WritesVGPROverlappedSrcABWaitStates;
1602
1658
break ;
1603
1659
case 16 : LLVM_FALLTHROUGH;
1604
1660
default :
1605
- NeedWaitStates = SMFMA32x32WritesVGPROverlappedSrcABWaitStates;
1661
+ NeedWaitStates = ST.hasGFX940Insts ()
1662
+ ? isXDL (ST, *MI1)
1663
+ ? GFX940_XDL16PassWritesVGPROverlappedSrcABWaitStates
1664
+ : GFX940_SMFMA16PassWritesVGPROverlappedSrcABWaitStates
1665
+ : SMFMA32x32WritesVGPROverlappedSrcABWaitStates;
1606
1666
}
1607
1667
}
1608
1668
}
@@ -1717,6 +1777,14 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
1717
1777
const int SMFMA4x4WriteVgprVALUMemExpReadWaitStates = 5 ;
1718
1778
const int SMFMA16x16WriteVgprVALUMemExpReadWaitStates = 11 ;
1719
1779
const int SMFMA32x32WriteVgprVALUMemExpReadWaitStates = 19 ;
1780
+ const int GFX940_SMFMA2PassWriteVgprVALUMemExpReadWaitStates = 4 ;
1781
+ const int GFX940_SMFMA4PassWriteVgprVALUMemExpReadWaitStates = 6 ;
1782
+ const int GFX940_SMFMA8PassWriteVgprVALUMemExpReadWaitStates = 10 ;
1783
+ const int GFX940_SMFMA16PassWriteVgprVALUMemExpReadWaitStates = 18 ;
1784
+ const int GFX940_XDL2PassWriteVgprVALUMemExpReadWaitStates = 5 ;
1785
+ const int GFX940_XDL4PassWriteVgprVALUMemExpReadWaitStates = 7 ;
1786
+ const int GFX940_XDL8PassWriteVgprVALUMemExpReadWaitStates = 11 ;
1787
+ const int GFX940_XDL16PassWriteVgprVALUMemExpReadWaitStates = 19 ;
1720
1788
const int DMFMA4x4WriteVgprMemExpReadWaitStates = 9 ;
1721
1789
const int DMFMA16x16WriteVgprMemExpReadWaitStates = 18 ;
1722
1790
const int DMFMA4x4WriteVgprVALUReadWaitStates = 6 ;
@@ -1756,24 +1824,42 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
1756
1824
int NeedWaitStates = MaxWaitStates;
1757
1825
switch (HazardDefLatency) {
1758
1826
case 2 :
1759
- NeedWaitStates = SMFMA4x4WriteVgprVALUMemExpReadWaitStates;
1827
+ NeedWaitStates =
1828
+ ST.hasGFX940Insts ()
1829
+ ? isXDL (ST, *MFMA)
1830
+ ? GFX940_XDL2PassWriteVgprVALUMemExpReadWaitStates
1831
+ : GFX940_SMFMA2PassWriteVgprVALUMemExpReadWaitStates
1832
+ : SMFMA4x4WriteVgprVALUMemExpReadWaitStates;
1760
1833
break ;
1761
1834
case 4 :
1762
1835
assert (isDGEMM (MFMA->getOpcode ()) || ST.hasGFX940Insts ());
1763
1836
NeedWaitStates =
1764
- IsMemOrExport ? DMFMA4x4WriteVgprMemExpReadWaitStates
1765
- : DMFMA4x4WriteVgprVALUReadWaitStates;
1837
+ isDGEMM (MFMA->getOpcode ())
1838
+ ? IsMemOrExport ? DMFMA4x4WriteVgprMemExpReadWaitStates
1839
+ : DMFMA4x4WriteVgprVALUReadWaitStates
1840
+ : isXDL (ST, *MFMA)
1841
+ ? GFX940_XDL4PassWriteVgprVALUMemExpReadWaitStates
1842
+ : GFX940_SMFMA4PassWriteVgprVALUMemExpReadWaitStates;
1766
1843
break ;
1767
1844
case 8 :
1768
- NeedWaitStates = SMFMA16x16WriteVgprVALUMemExpReadWaitStates;
1845
+ NeedWaitStates =
1846
+ ST.hasGFX940Insts ()
1847
+ ? isXDL (ST, *MFMA)
1848
+ ? GFX940_XDL8PassWriteVgprVALUMemExpReadWaitStates
1849
+ : GFX940_SMFMA8PassWriteVgprVALUMemExpReadWaitStates
1850
+ : SMFMA16x16WriteVgprVALUMemExpReadWaitStates;
1769
1851
break ;
1770
1852
case 16 : LLVM_FALLTHROUGH;
1771
1853
default :
1772
1854
NeedWaitStates =
1773
1855
isDGEMM (MFMA->getOpcode ())
1774
1856
? IsMemOrExport ? DMFMA16x16WriteVgprMemExpReadWaitStates
1775
1857
: DMFMA16x16WriteVgprVALUReadWaitStates
1776
- : SMFMA32x32WriteVgprVALUMemExpReadWaitStates;
1858
+ : ST.hasGFX940Insts ()
1859
+ ? isXDL (ST, *MFMA)
1860
+ ? GFX940_XDL16PassWriteVgprVALUMemExpReadWaitStates
1861
+ : GFX940_SMFMA16PassWriteVgprVALUMemExpReadWaitStates
1862
+ : SMFMA32x32WriteVgprVALUMemExpReadWaitStates;
1777
1863
break ;
1778
1864
}
1779
1865
@@ -1803,7 +1889,16 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
1803
1889
const int SMFMA4x4WriteVgprVALUWawWaitStates = 5 ;
1804
1890
const int SMFMA16x16WriteVgprVALUWawWaitStates = 11 ;
1805
1891
const int SMFMA32x32WriteVgprVALUWawWaitStates = 19 ;
1892
+ const int GFX940_SMFMA2PassWriteVgprVALUWawWaitStates = 4 ;
1893
+ const int GFX940_SMFMA4PassWriteVgprVALUWawWaitStates = 6 ;
1894
+ const int GFX940_SMFMA8PassWriteVgprVALUWawWaitStates = 10 ;
1895
+ const int GFX940_SMFMA16PassWriteVgprVALUWawWaitStates = 18 ;
1896
+ const int GFX940_XDL2PassWriteVgprVALUWawWaitStates = 5 ;
1897
+ const int GFX940_XDL4PassWriteVgprVALUWawWaitStates = 7 ;
1898
+ const int GFX940_XDL8PassWriteVgprVALUWawWaitStates = 11 ;
1899
+ const int GFX940_XDL16PassWriteVgprVALUWawWaitStates = 19 ;
1806
1900
const int SMFMA4x4ReadVgprVALUWarWaitStates = 1 ;
1901
+ const int GFX940_XDL4PassReadVgprVALUWarWaitStates = 3 ;
1807
1902
const int SMFMA16x16ReadVgprVALUWarWaitStates = 7 ;
1808
1903
const int SMFMA32x32ReadVgprVALUWarWaitStates = 15 ;
1809
1904
const int DMFMA4x4WriteVgprVALUWriteWaitStates = 6 ;
@@ -1828,19 +1923,35 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
1828
1923
int NeedWaitStates = MaxWaitStates;
1829
1924
switch (TSchedModel.computeInstrLatency (MFMA)) {
1830
1925
case 2 :
1831
- NeedWaitStates = SMFMA4x4WriteVgprVALUWawWaitStates;
1926
+ NeedWaitStates = ST.hasGFX940Insts ()
1927
+ ? isXDL (ST, *MFMA)
1928
+ ? GFX940_XDL2PassWriteVgprVALUWawWaitStates
1929
+ : GFX940_SMFMA2PassWriteVgprVALUWawWaitStates
1930
+ : SMFMA4x4WriteVgprVALUWawWaitStates;
1832
1931
break ;
1833
1932
case 4 :
1834
- assert (isDGEMM (MFMA->getOpcode ()));
1835
- NeedWaitStates = DMFMA4x4WriteVgprVALUWriteWaitStates;
1933
+ assert (isDGEMM (MFMA->getOpcode ()) || ST.hasGFX940Insts ());
1934
+ NeedWaitStates = isDGEMM (MFMA->getOpcode ())
1935
+ ? DMFMA4x4WriteVgprVALUWriteWaitStates
1936
+ : isXDL (ST, *MFMA)
1937
+ ? GFX940_XDL4PassWriteVgprVALUWawWaitStates
1938
+ : GFX940_SMFMA4PassWriteVgprVALUWawWaitStates;
1836
1939
break ;
1837
1940
case 8 :
1838
- NeedWaitStates = SMFMA16x16WriteVgprVALUWawWaitStates;
1941
+ NeedWaitStates = ST.hasGFX940Insts ()
1942
+ ? isXDL (ST, *MFMA)
1943
+ ? GFX940_XDL8PassWriteVgprVALUWawWaitStates
1944
+ : GFX940_SMFMA8PassWriteVgprVALUWawWaitStates
1945
+ : SMFMA16x16WriteVgprVALUWawWaitStates;
1839
1946
break ;
1840
1947
case 16 : LLVM_FALLTHROUGH;
1841
1948
default :
1842
1949
NeedWaitStates = isDGEMM (MFMA->getOpcode ())
1843
1950
? DMFMA16x16WriteVgprVALUWriteWaitStates
1951
+ : ST.hasGFX940Insts ()
1952
+ ? isXDL (ST, *MFMA)
1953
+ ? GFX940_XDL16PassWriteVgprVALUWawWaitStates
1954
+ : GFX940_SMFMA16PassWriteVgprVALUWawWaitStates
1844
1955
: SMFMA32x32WriteVgprVALUWawWaitStates;
1845
1956
break ;
1846
1957
}
@@ -1858,6 +1969,9 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
1858
1969
!MI.readsRegister (Reg, &TRI))
1859
1970
return false ;
1860
1971
1972
+ if (ST.hasGFX940Insts () && !isXDL (ST, MI))
1973
+ return false ;
1974
+
1861
1975
const MachineOperand *SrcC =
1862
1976
TII.getNamedOperand (MI, AMDGPU::OpName::src2);
1863
1977
assert (SrcC);
@@ -1879,6 +1993,9 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
1879
1993
switch (HazardDefLatency) {
1880
1994
case 2 : NeedWaitStates = SMFMA4x4ReadVgprVALUWarWaitStates;
1881
1995
break ;
1996
+ case 4 : assert (ST.hasGFX940Insts ());
1997
+ NeedWaitStates = GFX940_XDL4PassReadVgprVALUWarWaitStates;
1998
+ break ;
1882
1999
case 8 : NeedWaitStates = SMFMA16x16ReadVgprVALUWarWaitStates;
1883
2000
break ;
1884
2001
case 16 : LLVM_FALLTHROUGH;
0 commit comments