@@ -1378,13 +1378,19 @@ let SubtargetPredicate = HasVmemPrefInsts in {
13781378}
13791379
13801380//===----------------------------------------------------------------------===//
1381- // Flat Patterns
1381+ // Utilities
13821382//===----------------------------------------------------------------------===//
1383+ class Mem_wrap<dag op, bit true16> {
1384+ dag ret = !if(true16, (EXTRACT_SUBREG op, lo16), op);
1385+ }
13831386
1387+ //===----------------------------------------------------------------------===//
1388+ // Flat Patterns
1389+ //===----------------------------------------------------------------------===//
13841390// Patterns for global loads with no offset.
1385- class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1391+ class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit true16 > : GCNPat <
13861392 (vt (node (FlatOffset i64:$vaddr, i32:$offset))),
1387- (inst $vaddr, $offset)
1393+ Mem_wrap< (inst $vaddr, $offset), true16>.ret
13881394>;
13891395
13901396class FlatLoadPat_CPOL <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
@@ -1462,14 +1468,14 @@ class GlobalLoadSaddrPat_D16_t16 <FLAT_Pseudo inst, SDPatternOperator node, Valu
14621468 (inst $saddr, $voffset, $offset, $cpol)
14631469>;
14641470
1465- class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1471+ class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit true16 > : GCNPat <
14661472 (vt (node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset))),
1467- (inst $vaddr, $offset)
1473+ Mem_wrap< (inst $vaddr, $offset), true16>.ret
14681474>;
14691475
1470- class FlatLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1476+ class FlatLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit true16 > : GCNPat <
14711477 (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset, CPol:$cpol))),
1472- (inst $saddr, $voffset, $offset, $cpol)
1478+ Mem_wrap< (inst $saddr, $voffset, $offset, $cpol), true16>.ret
14731479>;
14741480
14751481class FlatLoadSignedPat_M0 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
@@ -1629,9 +1635,9 @@ multiclass FlatSignedAtomicPat <string inst, string node, ValueType vt,
16291635 def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), noRtnNode, vt, data_vt>;
16301636}
16311637
1632- class ScratchLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1638+ class ScratchLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit true16 > : GCNPat <
16331639 (vt (node (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset))),
1634- (inst $vaddr, $offset)
1640+ Mem_wrap< (inst $vaddr, $offset), true16>.ret
16351641>;
16361642
16371643class ScratchLoadSignedPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
@@ -1649,9 +1655,9 @@ class ScratchStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType
16491655 (inst getVregSrcForVT<vt>.ret:$data, $vaddr, $offset)
16501656>;
16511657
1652- class ScratchLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1658+ class ScratchLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit true16 > : GCNPat <
16531659 (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset))),
1654- (inst $saddr, $offset)
1660+ Mem_wrap< (inst $saddr, $offset), true16>.ret
16551661>;
16561662
16571663class ScratchLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
@@ -1670,9 +1676,9 @@ class ScratchStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
16701676 (inst getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
16711677>;
16721678
1673- class ScratchLoadSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1679+ class ScratchLoadSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit true16 > : GCNPat <
16741680 (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset, CPol:$cpol))),
1675- (inst $vaddr, $saddr, $offset, $cpol)
1681+ Mem_wrap< (inst $vaddr, $saddr, $offset, $cpol), true16>.ret
16761682>;
16771683
16781684class ScratchStoreSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
@@ -1721,14 +1727,14 @@ multiclass GlobalStoreLDSPats<FLAT_Pseudo inst, SDPatternOperator node> {
17211727 }
17221728}
17231729
1724- multiclass GlobalFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1725- def : FlatLoadSignedPat <inst, node, vt> {
1730+ multiclass GlobalFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit isTrue16 = 0 > {
1731+ def : FlatLoadSignedPat <inst, node, vt, isTrue16 > {
17261732 let AddedComplexity = 10;
17271733 let SubtargetPredicate = inst.SubtargetPredicate;
17281734 let OtherPredicates = inst.OtherPredicates;
17291735 }
17301736
1731- def : FlatLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1737+ def : FlatLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt, isTrue16 > {
17321738 let AddedComplexity = 11;
17331739 let SubtargetPredicate = inst.SubtargetPredicate;
17341740 let OtherPredicates = inst.OtherPredicates;
@@ -1860,16 +1866,16 @@ multiclass GlobalFLATAtomicIntrPats<string inst, string node, ValueType vt,
18601866 defm : GlobalFLATAtomicPats<inst, node, vt, data_vt, /* isIntr */ 1>;
18611867}
18621868
1863- multiclass ScratchFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1864- def : ScratchLoadSignedPat <inst, node, vt> {
1869+ multiclass ScratchFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit isTrue16 = 0 > {
1870+ def : ScratchLoadSignedPat <inst, node, vt, isTrue16 > {
18651871 let AddedComplexity = 25;
18661872 }
18671873
1868- def : ScratchLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1874+ def : ScratchLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt, isTrue16 > {
18691875 let AddedComplexity = 26;
18701876 }
18711877
1872- def : ScratchLoadSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> {
1878+ def : ScratchLoadSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt, isTrue16 > {
18731879 let SubtargetPredicate = HasFlatScratchSVSMode;
18741880 let AddedComplexity = 27;
18751881 }
@@ -1937,12 +1943,12 @@ multiclass ScratchFLATLoadPats_D16_t16<string inst, SDPatternOperator node, Valu
19371943 }
19381944}
19391945
1940- multiclass FlatLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1941- def : FlatLoadPat <inst, node, vt> {
1946+ multiclass FlatLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit isTrue16 = 0 > {
1947+ def : FlatLoadPat <inst, node, vt, isTrue16 > {
19421948 let OtherPredicates = [HasFlatAddressSpace];
19431949 }
19441950
1945- def : FlatLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1951+ def : FlatLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt, isTrue16 > {
19461952 let AddedComplexity = 9;
19471953 let SubtargetPredicate = HasFlatGVSMode;
19481954 }
@@ -2018,6 +2024,13 @@ let True16Predicate = p in {
20182024}
20192025
20202026let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predicate = UseRealTrue16Insts in {
2027+ defm : FlatStorePats_t16 <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
2028+ defm : FlatStorePats_t16 <FLAT_STORE_SHORT, store_flat, i16>;
2029+ def : FlatStorePat <FLAT_STORE_BYTE_t16, atomic_store_8_flat, i16>;
2030+ def : FlatStorePat <FLAT_STORE_SHORT_t16, atomic_store_16_flat, i16>;
2031+ } // End let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predicate = UseRealTrue16Insts
2032+
2033+ let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace, NotHas16bitD16HWBug], True16Predicate = UseRealTrue16Insts in {
20212034 defm : FlatLoadPats_D16_t16<FLAT_LOAD_UBYTE_D16_t16, extloadi8_flat, i16>;
20222035 defm : FlatLoadPats_D16_t16<FLAT_LOAD_UBYTE_D16_t16, zextloadi8_flat, i16>;
20232036 defm : FlatLoadPats_D16_t16<FLAT_LOAD_SBYTE_D16_t16, sextloadi8_flat, i16>;
@@ -2026,11 +2039,18 @@ let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predi
20262039 defm : FlatLoadPats_D16_t16<FLAT_LOAD_UBYTE_D16_t16, atomic_load_zext_8_flat, i16>;
20272040 defm : FlatLoadPats_D16_t16<FLAT_LOAD_SHORT_D16_t16, atomic_load_nonext_16_flat, i16>;
20282041 defm : FlatLoadPats_D16_t16<FLAT_LOAD_SBYTE_D16_t16, atomic_load_sext_8_flat, i16>;
2029- defm : FlatStorePats_t16 <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
2030- defm : FlatStorePats_t16 <FLAT_STORE_SHORT, store_flat, i16>;
2031- def : FlatStorePat <FLAT_STORE_BYTE_t16, atomic_store_8_flat, i16>;
2032- def : FlatStorePat <FLAT_STORE_SHORT_t16, atomic_store_16_flat, i16>;
2033- } // End let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predicate = UseRealTrue16Insts
2042+ }
2043+
2044+ let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace, Has16bitD16HWBug], True16Predicate = UseRealTrue16Insts in {
2045+ defm : FlatLoadPats <FLAT_LOAD_UBYTE, extloadi8_flat, i16, /*true16*/1>;
2046+ defm : FlatLoadPats <FLAT_LOAD_UBYTE, zextloadi8_flat, i16, /*true16*/1>;
2047+ defm : FlatLoadPats <FLAT_LOAD_SBYTE, sextloadi8_flat, i16, /*true16*/1>;
2048+ defm : FlatLoadPats <FLAT_LOAD_USHORT, load_flat, i16, /*true16*/1>;
2049+ defm : FlatLoadPats <FLAT_LOAD_UBYTE, atomic_load_aext_8_flat, i16, /*true16*/1>;
2050+ defm : FlatLoadPats <FLAT_LOAD_UBYTE, atomic_load_zext_8_flat, i16, /*true16*/1>;
2051+ defm : FlatLoadPats <FLAT_LOAD_USHORT, atomic_load_nonext_16_flat, i16, /*true16*/1>;
2052+ defm : FlatLoadPats <FLAT_LOAD_SBYTE, atomic_load_sext_8_flat, i16, /*true16*/1>;
2053+ }
20342054
20352055defm : FlatLoadPats <FLAT_LOAD_DWORD, atomic_load_nonext_32_flat, i32>;
20362056defm : FlatLoadPats <FLAT_LOAD_DWORDX2, atomic_load_nonext_64_flat, i64>;
@@ -2161,22 +2181,37 @@ defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_nonext_16_global, i16
21612181defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_zext_16_global, i16>;
21622182}
21632183
2164- let OtherPredicates = [D16PreservesUnusedBits], True16Predicate = UseRealTrue16Insts in {
2165- defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", extloadi8_global, i16>;
2166- defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", zextloadi8_global, i16>;
2167- defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SBYTE_D16", sextloadi8_global, i16>;
2168- defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SHORT_D16", load_global, i16>;
2169- defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", atomic_load_aext_8_global, i16>;
2170- defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", atomic_load_zext_8_global, i16>;
2171- defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SBYTE_D16", atomic_load_sext_8_global, i16>;
2172- defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SHORT_D16", atomic_load_nonext_16_global, i16>;
2173- defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SHORT_D16", atomic_load_zext_16_global, i16>;
2174- defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_BYTE", truncstorei8_global, i16>;
2175- defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_SHORT", store_global, i16>;
2176- defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_BYTE", atomic_store_8_global, i16>;
2177- defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_SHORT", atomic_store_16_global, i16>;
2184+ let OtherPredicates = [HasFlatGlobalInsts, D16PreservesUnusedBits], True16Predicate = UseRealTrue16Insts in {
2185+ defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_BYTE", truncstorei8_global, i16>;
2186+ defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_SHORT", store_global, i16>;
2187+ defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_BYTE", atomic_store_8_global, i16>;
2188+ defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_SHORT", atomic_store_16_global, i16>;
21782189} // end OtherPredicates = [HasFlatGlobalInsts, D16PreservesUnusedBits], True16Predicate = UseRealTrue16Insts
21792190
2191+ let OtherPredicates = [HasFlatGlobalInsts, D16PreservesUnusedBits, NotHas16bitD16HWBug], True16Predicate = UseRealTrue16Insts in {
2192+ defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", extloadi8_global, i16>;
2193+ defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", zextloadi8_global, i16>;
2194+ defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SBYTE_D16", sextloadi8_global, i16>;
2195+ defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SHORT_D16", load_global, i16>;
2196+ defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", atomic_load_aext_8_global, i16>;
2197+ defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", atomic_load_zext_8_global, i16>;
2198+ defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SBYTE_D16", atomic_load_sext_8_global, i16>;
2199+ defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SHORT_D16", atomic_load_nonext_16_global, i16>;
2200+ defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SHORT_D16", atomic_load_zext_16_global, i16>;
2201+ }
2202+
2203+ let OtherPredicates = [HasFlatGlobalInsts, D16PreservesUnusedBits, Has16bitD16HWBug], True16Predicate = UseRealTrue16Insts in {
2204+ defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i16, /*true16*/1>;
2205+ defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16, /*true16*/1>;
2206+ defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16, /*true16*/1>;
2207+ defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, load_global, i16, /*true16*/1>;
2208+ defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_aext_8_global, i16, /*true16*/1>;
2209+ defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_zext_8_global, i16, /*true16*/1>;
2210+ defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, atomic_load_sext_8_global, i16, /*true16*/1>;
2211+ defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_nonext_16_global, i16, /*true16*/1>;
2212+ defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_zext_16_global, i16, /*true16*/1>;
2213+ }
2214+
21802215foreach vt = Reg32Types.types in {
21812216defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, load_global, vt>;
21822217defm : GlobalFLATStorePats <GLOBAL_STORE_DWORD, store_global, vt>;
@@ -2386,12 +2421,20 @@ defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i16>;
23862421}
23872422
23882423let True16Predicate = UseRealTrue16Insts in {
2389- defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_UBYTE_D16", extloadi8_private, i16>;
2390- defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_UBYTE_D16", zextloadi8_private, i16>;
2391- defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_SBYTE_D16", sextloadi8_private, i16>;
2392- defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_SHORT_D16", load_private, i16>;
2393- defm : ScratchFLATStorePats_t16 <"SCRATCH_STORE_SHORT", store_private, i16>;
2394- defm : ScratchFLATStorePats_t16 <"SCRATCH_STORE_BYTE", truncstorei8_private, i16>;
2424+ let OtherPredicates = [NotHas16bitD16HWBug] in {
2425+ defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_UBYTE_D16", extloadi8_private, i16>;
2426+ defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_UBYTE_D16", zextloadi8_private, i16>;
2427+ defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_SBYTE_D16", sextloadi8_private, i16>;
2428+ defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_SHORT_D16", load_private, i16>;
2429+ }
2430+ let OtherPredicates = [Has16bitD16HWBug] in {
2431+ defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i16, /*true16*/1>;
2432+ defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i16, /*true16*/1>;
2433+ defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i16, /*true16*/1>;
2434+ defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, load_private, i16, /*true16*/1>;
2435+ }
2436+ defm : ScratchFLATStorePats_t16 <"SCRATCH_STORE_SHORT", store_private, i16>;
2437+ defm : ScratchFLATStorePats_t16 <"SCRATCH_STORE_BYTE", truncstorei8_private, i16>;
23952438} // End True16Predicate = UseRealTrue16Insts
23962439
23972440foreach vt = Reg32Types.types in {
0 commit comments