@@ -1378,13 +1378,19 @@ let SubtargetPredicate = HasVmemPrefInsts in {
1378
1378
}
1379
1379
1380
1380
//===----------------------------------------------------------------------===//
1381
- // Flat Patterns
1381
+ // Utilities
1382
1382
//===----------------------------------------------------------------------===//
1383
+ class Mem_wrap<dag op, bit true16> {
1384
+ dag ret = !if(true16, (EXTRACT_SUBREG op, lo16), op);
1385
+ }
1383
1386
1387
+ //===----------------------------------------------------------------------===//
1388
+ // Flat Patterns
1389
+ //===----------------------------------------------------------------------===//
1384
1390
// Patterns for global loads with no offset.
1385
- class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1391
+ class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit true16 > : GCNPat <
1386
1392
(vt (node (FlatOffset i64:$vaddr, i32:$offset))),
1387
- (inst $vaddr, $offset)
1393
+ Mem_wrap< (inst $vaddr, $offset), true16>.ret
1388
1394
>;
1389
1395
1390
1396
class FlatLoadPat_CPOL <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
@@ -1462,14 +1468,14 @@ class GlobalLoadSaddrPat_D16_t16 <FLAT_Pseudo inst, SDPatternOperator node, Valu
1462
1468
(inst $saddr, $voffset, $offset, $cpol)
1463
1469
>;
1464
1470
1465
- class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1471
+ class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit true16 > : GCNPat <
1466
1472
(vt (node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset))),
1467
- (inst $vaddr, $offset)
1473
+ Mem_wrap< (inst $vaddr, $offset), true16>.ret
1468
1474
>;
1469
1475
1470
- class FlatLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1476
+ class FlatLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit true16 > : GCNPat <
1471
1477
(vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset, CPol:$cpol))),
1472
- (inst $saddr, $voffset, $offset, $cpol)
1478
+ Mem_wrap< (inst $saddr, $voffset, $offset, $cpol), true16>.ret
1473
1479
>;
1474
1480
1475
1481
class FlatLoadSignedPat_M0 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
@@ -1629,9 +1635,9 @@ multiclass FlatSignedAtomicPat <string inst, string node, ValueType vt,
1629
1635
def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), noRtnNode, vt, data_vt>;
1630
1636
}
1631
1637
1632
- class ScratchLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1638
+ class ScratchLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit true16 > : GCNPat <
1633
1639
(vt (node (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset))),
1634
- (inst $vaddr, $offset)
1640
+ Mem_wrap< (inst $vaddr, $offset), true16>.ret
1635
1641
>;
1636
1642
1637
1643
class ScratchLoadSignedPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
@@ -1649,9 +1655,9 @@ class ScratchStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType
1649
1655
(inst getVregSrcForVT<vt>.ret:$data, $vaddr, $offset)
1650
1656
>;
1651
1657
1652
- class ScratchLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1658
+ class ScratchLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit true16 > : GCNPat <
1653
1659
(vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset))),
1654
- (inst $saddr, $offset)
1660
+ Mem_wrap< (inst $saddr, $offset), true16>.ret
1655
1661
>;
1656
1662
1657
1663
class ScratchLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
@@ -1670,9 +1676,9 @@ class ScratchStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
1670
1676
(inst getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
1671
1677
>;
1672
1678
1673
- class ScratchLoadSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1679
+ class ScratchLoadSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit true16 > : GCNPat <
1674
1680
(vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset, CPol:$cpol))),
1675
- (inst $vaddr, $saddr, $offset, $cpol)
1681
+ Mem_wrap< (inst $vaddr, $saddr, $offset, $cpol), true16>.ret
1676
1682
>;
1677
1683
1678
1684
class ScratchStoreSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
@@ -1721,14 +1727,14 @@ multiclass GlobalStoreLDSPats<FLAT_Pseudo inst, SDPatternOperator node> {
1721
1727
}
1722
1728
}
1723
1729
1724
- multiclass GlobalFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1725
- def : FlatLoadSignedPat <inst, node, vt> {
1730
+ multiclass GlobalFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit isTrue16 = 0 > {
1731
+ def : FlatLoadSignedPat <inst, node, vt, isTrue16 > {
1726
1732
let AddedComplexity = 10;
1727
1733
let SubtargetPredicate = inst.SubtargetPredicate;
1728
1734
let OtherPredicates = inst.OtherPredicates;
1729
1735
}
1730
1736
1731
- def : FlatLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1737
+ def : FlatLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt, isTrue16 > {
1732
1738
let AddedComplexity = 11;
1733
1739
let SubtargetPredicate = inst.SubtargetPredicate;
1734
1740
let OtherPredicates = inst.OtherPredicates;
@@ -1860,16 +1866,16 @@ multiclass GlobalFLATAtomicIntrPats<string inst, string node, ValueType vt,
1860
1866
defm : GlobalFLATAtomicPats<inst, node, vt, data_vt, /* isIntr */ 1>;
1861
1867
}
1862
1868
1863
- multiclass ScratchFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1864
- def : ScratchLoadSignedPat <inst, node, vt> {
1869
+ multiclass ScratchFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit isTrue16 = 0 > {
1870
+ def : ScratchLoadSignedPat <inst, node, vt, isTrue16 > {
1865
1871
let AddedComplexity = 25;
1866
1872
}
1867
1873
1868
- def : ScratchLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1874
+ def : ScratchLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt, isTrue16 > {
1869
1875
let AddedComplexity = 26;
1870
1876
}
1871
1877
1872
- def : ScratchLoadSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> {
1878
+ def : ScratchLoadSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt, isTrue16 > {
1873
1879
let SubtargetPredicate = HasFlatScratchSVSMode;
1874
1880
let AddedComplexity = 27;
1875
1881
}
@@ -1937,12 +1943,12 @@ multiclass ScratchFLATLoadPats_D16_t16<string inst, SDPatternOperator node, Valu
1937
1943
}
1938
1944
}
1939
1945
1940
- multiclass FlatLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1941
- def : FlatLoadPat <inst, node, vt> {
1946
+ multiclass FlatLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit isTrue16 = 0 > {
1947
+ def : FlatLoadPat <inst, node, vt, isTrue16 > {
1942
1948
let OtherPredicates = [HasFlatAddressSpace];
1943
1949
}
1944
1950
1945
- def : FlatLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1951
+ def : FlatLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt, isTrue16 > {
1946
1952
let AddedComplexity = 9;
1947
1953
let SubtargetPredicate = HasFlatGVSMode;
1948
1954
}
@@ -2018,6 +2024,13 @@ let True16Predicate = p in {
2018
2024
}
2019
2025
2020
2026
let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predicate = UseRealTrue16Insts in {
2027
+ defm : FlatStorePats_t16 <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
2028
+ defm : FlatStorePats_t16 <FLAT_STORE_SHORT, store_flat, i16>;
2029
+ def : FlatStorePat <FLAT_STORE_BYTE_t16, atomic_store_8_flat, i16>;
2030
+ def : FlatStorePat <FLAT_STORE_SHORT_t16, atomic_store_16_flat, i16>;
2031
+ } // End let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predicate = UseRealTrue16Insts
2032
+
2033
+ let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace, NotHas16bitD16HWBug], True16Predicate = UseRealTrue16Insts in {
2021
2034
defm : FlatLoadPats_D16_t16<FLAT_LOAD_UBYTE_D16_t16, extloadi8_flat, i16>;
2022
2035
defm : FlatLoadPats_D16_t16<FLAT_LOAD_UBYTE_D16_t16, zextloadi8_flat, i16>;
2023
2036
defm : FlatLoadPats_D16_t16<FLAT_LOAD_SBYTE_D16_t16, sextloadi8_flat, i16>;
@@ -2026,11 +2039,18 @@ let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predi
2026
2039
defm : FlatLoadPats_D16_t16<FLAT_LOAD_UBYTE_D16_t16, atomic_load_zext_8_flat, i16>;
2027
2040
defm : FlatLoadPats_D16_t16<FLAT_LOAD_SHORT_D16_t16, atomic_load_nonext_16_flat, i16>;
2028
2041
defm : FlatLoadPats_D16_t16<FLAT_LOAD_SBYTE_D16_t16, atomic_load_sext_8_flat, i16>;
2029
- defm : FlatStorePats_t16 <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
2030
- defm : FlatStorePats_t16 <FLAT_STORE_SHORT, store_flat, i16>;
2031
- def : FlatStorePat <FLAT_STORE_BYTE_t16, atomic_store_8_flat, i16>;
2032
- def : FlatStorePat <FLAT_STORE_SHORT_t16, atomic_store_16_flat, i16>;
2033
- } // End let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predicate = UseRealTrue16Insts
2042
+ }
2043
+
2044
+ let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace, Has16bitD16HWBug], True16Predicate = UseRealTrue16Insts in {
2045
+ defm : FlatLoadPats <FLAT_LOAD_UBYTE, extloadi8_flat, i16, /*true16*/1>;
2046
+ defm : FlatLoadPats <FLAT_LOAD_UBYTE, zextloadi8_flat, i16, /*true16*/1>;
2047
+ defm : FlatLoadPats <FLAT_LOAD_SBYTE, sextloadi8_flat, i16, /*true16*/1>;
2048
+ defm : FlatLoadPats <FLAT_LOAD_USHORT, load_flat, i16, /*true16*/1>;
2049
+ defm : FlatLoadPats <FLAT_LOAD_UBYTE, atomic_load_aext_8_flat, i16, /*true16*/1>;
2050
+ defm : FlatLoadPats <FLAT_LOAD_UBYTE, atomic_load_zext_8_flat, i16, /*true16*/1>;
2051
+ defm : FlatLoadPats <FLAT_LOAD_USHORT, atomic_load_nonext_16_flat, i16, /*true16*/1>;
2052
+ defm : FlatLoadPats <FLAT_LOAD_SBYTE, atomic_load_sext_8_flat, i16, /*true16*/1>;
2053
+ }
2034
2054
2035
2055
defm : FlatLoadPats <FLAT_LOAD_DWORD, atomic_load_nonext_32_flat, i32>;
2036
2056
defm : FlatLoadPats <FLAT_LOAD_DWORDX2, atomic_load_nonext_64_flat, i64>;
@@ -2161,22 +2181,37 @@ defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_nonext_16_global, i16
2161
2181
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_zext_16_global, i16>;
2162
2182
}
2163
2183
2164
- let OtherPredicates = [D16PreservesUnusedBits], True16Predicate = UseRealTrue16Insts in {
2165
- defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", extloadi8_global, i16>;
2166
- defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", zextloadi8_global, i16>;
2167
- defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SBYTE_D16", sextloadi8_global, i16>;
2168
- defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SHORT_D16", load_global, i16>;
2169
- defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", atomic_load_aext_8_global, i16>;
2170
- defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", atomic_load_zext_8_global, i16>;
2171
- defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SBYTE_D16", atomic_load_sext_8_global, i16>;
2172
- defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SHORT_D16", atomic_load_nonext_16_global, i16>;
2173
- defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SHORT_D16", atomic_load_zext_16_global, i16>;
2174
- defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_BYTE", truncstorei8_global, i16>;
2175
- defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_SHORT", store_global, i16>;
2176
- defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_BYTE", atomic_store_8_global, i16>;
2177
- defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_SHORT", atomic_store_16_global, i16>;
2184
+ let OtherPredicates = [HasFlatGlobalInsts, D16PreservesUnusedBits], True16Predicate = UseRealTrue16Insts in {
2185
+ defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_BYTE", truncstorei8_global, i16>;
2186
+ defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_SHORT", store_global, i16>;
2187
+ defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_BYTE", atomic_store_8_global, i16>;
2188
+ defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_SHORT", atomic_store_16_global, i16>;
2178
2189
} // end OtherPredicates = [HasFlatGlobalInsts, D16PreservesUnusedBits], True16Predicate = UseRealTrue16Insts
2179
2190
2191
+ let OtherPredicates = [HasFlatGlobalInsts, D16PreservesUnusedBits, NotHas16bitD16HWBug], True16Predicate = UseRealTrue16Insts in {
2192
+ defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", extloadi8_global, i16>;
2193
+ defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", zextloadi8_global, i16>;
2194
+ defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SBYTE_D16", sextloadi8_global, i16>;
2195
+ defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SHORT_D16", load_global, i16>;
2196
+ defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", atomic_load_aext_8_global, i16>;
2197
+ defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", atomic_load_zext_8_global, i16>;
2198
+ defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SBYTE_D16", atomic_load_sext_8_global, i16>;
2199
+ defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SHORT_D16", atomic_load_nonext_16_global, i16>;
2200
+ defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SHORT_D16", atomic_load_zext_16_global, i16>;
2201
+ }
2202
+
2203
+ let OtherPredicates = [HasFlatGlobalInsts, D16PreservesUnusedBits, Has16bitD16HWBug], True16Predicate = UseRealTrue16Insts in {
2204
+ defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i16, /*true16*/1>;
2205
+ defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16, /*true16*/1>;
2206
+ defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16, /*true16*/1>;
2207
+ defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, load_global, i16, /*true16*/1>;
2208
+ defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_aext_8_global, i16, /*true16*/1>;
2209
+ defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_zext_8_global, i16, /*true16*/1>;
2210
+ defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, atomic_load_sext_8_global, i16, /*true16*/1>;
2211
+ defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_nonext_16_global, i16, /*true16*/1>;
2212
+ defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_zext_16_global, i16, /*true16*/1>;
2213
+ }
2214
+
2180
2215
foreach vt = Reg32Types.types in {
2181
2216
defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, load_global, vt>;
2182
2217
defm : GlobalFLATStorePats <GLOBAL_STORE_DWORD, store_global, vt>;
@@ -2386,12 +2421,20 @@ defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i16>;
2386
2421
}
2387
2422
2388
2423
let True16Predicate = UseRealTrue16Insts in {
2389
- defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_UBYTE_D16", extloadi8_private, i16>;
2390
- defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_UBYTE_D16", zextloadi8_private, i16>;
2391
- defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_SBYTE_D16", sextloadi8_private, i16>;
2392
- defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_SHORT_D16", load_private, i16>;
2393
- defm : ScratchFLATStorePats_t16 <"SCRATCH_STORE_SHORT", store_private, i16>;
2394
- defm : ScratchFLATStorePats_t16 <"SCRATCH_STORE_BYTE", truncstorei8_private, i16>;
2424
+ let OtherPredicates = [NotHas16bitD16HWBug] in {
2425
+ defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_UBYTE_D16", extloadi8_private, i16>;
2426
+ defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_UBYTE_D16", zextloadi8_private, i16>;
2427
+ defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_SBYTE_D16", sextloadi8_private, i16>;
2428
+ defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_SHORT_D16", load_private, i16>;
2429
+ }
2430
+ let OtherPredicates = [Has16bitD16HWBug] in {
2431
+ defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i16, /*true16*/1>;
2432
+ defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i16, /*true16*/1>;
2433
+ defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i16, /*true16*/1>;
2434
+ defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, load_private, i16, /*true16*/1>;
2435
+ }
2436
+ defm : ScratchFLATStorePats_t16 <"SCRATCH_STORE_SHORT", store_private, i16>;
2437
+ defm : ScratchFLATStorePats_t16 <"SCRATCH_STORE_BYTE", truncstorei8_private, i16>;
2395
2438
} // End True16Predicate = UseRealTrue16Insts
2396
2439
2397
2440
foreach vt = Reg32Types.types in {
0 commit comments