Skip to content

Commit 222a222

Browse files
committed
add d16-hw-bug flag
1 parent 693146d commit 222a222

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+18796
-29133
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -586,6 +586,12 @@ def FeatureRealTrue16Insts : SubtargetFeature<"real-true16",
586586
"Use true 16-bit registers"
587587
>;
588588

589+
def Feature16bitD16HWBug : SubtargetFeature<"d16-hw-bug",
590+
"Enable16bitD16HWBug",
591+
"true",
592+
"Disable D16 for 16 bit data type for true16 mode"
593+
>;
594+
589595
def FeatureBF16TransInsts : SubtargetFeature<"bf16-trans-insts",
590596
"HasBF16TransInsts",
591597
"true",
@@ -1934,7 +1940,9 @@ def FeatureISAVersion11_Common : FeatureSet<
19341940
FeaturePackedTID,
19351941
FeatureVcmpxPermlaneHazard,
19361942
FeatureMemoryAtomicFAddF32DenormalSupport,
1937-
FeatureRealTrue16Insts]>;
1943+
FeatureRealTrue16Insts,
1944+
Feature16bitD16HWBug,
1945+
]>;
19381946

19391947
// There are few workarounds that need to be
19401948
// added to all targets. This pessimizes codegen
@@ -2570,6 +2578,13 @@ def UseFakeTrue16Insts : True16PredicateClass<"Subtarget->hasTrue16BitInsts() &&
25702578
// FIXME When we default to RealTrue16 instead of Fake, change the line as follows.
25712579
// AssemblerPredicate<(all_of FeatureTrue16BitInsts, (not FeatureRealTrue16Insts))>;
25722580

2581+
// Do not use D16 inst for 16bit data type
2582+
def Has16bitD16HWBug: Predicate<"Subtarget->has16bitD16HWBug()">,
2583+
AssemblerPredicate<(all_of FeatureTrue16BitInsts, FeatureRealTrue16Insts, Feature16bitD16HWBug)>;
2584+
def NotHas16bitD16HWBug: Predicate<"Subtarget->useRealTrue16Insts() && "
2585+
"!Subtarget->has16bitD16HWBug()">,
2586+
AssemblerPredicate<(all_of FeatureTrue16BitInsts, FeatureRealTrue16Insts, (not Feature16bitD16HWBug))>;
2587+
25732588
def HasBF16TransInsts : Predicate<"Subtarget->hasBF16TransInsts()">,
25742589
AssemblerPredicate<(all_of FeatureBF16TransInsts)>;
25752590

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@ bool AMDGPUSubtarget::useRealTrue16Insts() const {
3838
return hasTrue16BitInsts() && EnableRealTrue16Insts;
3939
}
4040

41+
bool AMDGPUSubtarget::has16bitD16HWBug() const {
42+
return hasTrue16BitInsts() && useRealTrue16Insts() && Enable16bitD16HWBug;
43+
}
44+
4145
// Returns the maximum per-workgroup LDS allocation size (in bytes) that still
4246
// allows the given function to achieve an occupancy of NWaves waves per
4347
// SIMD / EU, taking into account only the function's *maximum* workgroup size.

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ class AMDGPUSubtarget {
5959
bool HasCvtPkF16F32Inst = false;
6060
bool HasF32ToF16BF16ConversionSRInsts = false;
6161
bool EnableRealTrue16Insts = false;
62+
bool Enable16bitD16HWBug = false;
6263
bool HasBF16TransInsts = false;
6364
bool HasBF16ConversionInsts = false;
6465
bool HasBF16PackedInsts = false;
@@ -224,6 +225,8 @@ class AMDGPUSubtarget {
224225
// supported and the support for fake True16 instructions is removed.
225226
bool useRealTrue16Insts() const;
226227

228+
bool has16bitD16HWBug() const;
229+
227230
bool hasBF16TransInsts() const { return HasBF16TransInsts; }
228231

229232
bool hasBF16ConversionInsts() const {

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 91 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1378,13 +1378,19 @@ let SubtargetPredicate = HasVmemPrefInsts in {
13781378
}
13791379

13801380
//===----------------------------------------------------------------------===//
1381-
// Flat Patterns
1381+
// Utilities
13821382
//===----------------------------------------------------------------------===//
1383+
class Mem_wrap<dag op, bit true16> {
1384+
dag ret = !if(true16, (EXTRACT_SUBREG op, lo16), op);
1385+
}
13831386

1387+
//===----------------------------------------------------------------------===//
1388+
// Flat Patterns
1389+
//===----------------------------------------------------------------------===//
13841390
// Patterns for global loads with no offset.
1385-
class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1391+
class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit true16> : GCNPat <
13861392
(vt (node (FlatOffset i64:$vaddr, i32:$offset))),
1387-
(inst $vaddr, $offset)
1393+
Mem_wrap<(inst $vaddr, $offset), true16>.ret
13881394
>;
13891395

13901396
class FlatLoadPat_CPOL <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
@@ -1462,14 +1468,14 @@ class GlobalLoadSaddrPat_D16_t16 <FLAT_Pseudo inst, SDPatternOperator node, Valu
14621468
(inst $saddr, $voffset, $offset, $cpol)
14631469
>;
14641470

1465-
class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1471+
class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit true16> : GCNPat <
14661472
(vt (node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset))),
1467-
(inst $vaddr, $offset)
1473+
Mem_wrap<(inst $vaddr, $offset), true16>.ret
14681474
>;
14691475

1470-
class FlatLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1476+
class FlatLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit true16> : GCNPat <
14711477
(vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset, CPol:$cpol))),
1472-
(inst $saddr, $voffset, $offset, $cpol)
1478+
Mem_wrap<(inst $saddr, $voffset, $offset, $cpol), true16>.ret
14731479
>;
14741480

14751481
class FlatLoadSignedPat_M0 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
@@ -1629,9 +1635,9 @@ multiclass FlatSignedAtomicPat <string inst, string node, ValueType vt,
16291635
def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), noRtnNode, vt, data_vt>;
16301636
}
16311637

1632-
class ScratchLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1638+
class ScratchLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit true16> : GCNPat <
16331639
(vt (node (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset))),
1634-
(inst $vaddr, $offset)
1640+
Mem_wrap<(inst $vaddr, $offset), true16>.ret
16351641
>;
16361642

16371643
class ScratchLoadSignedPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
@@ -1649,9 +1655,9 @@ class ScratchStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType
16491655
(inst getVregSrcForVT<vt>.ret:$data, $vaddr, $offset)
16501656
>;
16511657

1652-
class ScratchLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1658+
class ScratchLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit true16> : GCNPat <
16531659
(vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset))),
1654-
(inst $saddr, $offset)
1660+
Mem_wrap<(inst $saddr, $offset), true16>.ret
16551661
>;
16561662

16571663
class ScratchLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
@@ -1670,9 +1676,9 @@ class ScratchStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
16701676
(inst getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
16711677
>;
16721678

1673-
class ScratchLoadSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1679+
class ScratchLoadSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit true16> : GCNPat <
16741680
(vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset, CPol:$cpol))),
1675-
(inst $vaddr, $saddr, $offset, $cpol)
1681+
Mem_wrap<(inst $vaddr, $saddr, $offset, $cpol), true16>.ret
16761682
>;
16771683

16781684
class ScratchStoreSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
@@ -1721,14 +1727,14 @@ multiclass GlobalStoreLDSPats<FLAT_Pseudo inst, SDPatternOperator node> {
17211727
}
17221728
}
17231729

1724-
multiclass GlobalFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1725-
def : FlatLoadSignedPat <inst, node, vt> {
1730+
multiclass GlobalFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit isTrue16 = 0> {
1731+
def : FlatLoadSignedPat <inst, node, vt, isTrue16> {
17261732
let AddedComplexity = 10;
17271733
let SubtargetPredicate = inst.SubtargetPredicate;
17281734
let OtherPredicates = inst.OtherPredicates;
17291735
}
17301736

1731-
def : FlatLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1737+
def : FlatLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt, isTrue16> {
17321738
let AddedComplexity = 11;
17331739
let SubtargetPredicate = inst.SubtargetPredicate;
17341740
let OtherPredicates = inst.OtherPredicates;
@@ -1860,16 +1866,16 @@ multiclass GlobalFLATAtomicIntrPats<string inst, string node, ValueType vt,
18601866
defm : GlobalFLATAtomicPats<inst, node, vt, data_vt, /* isIntr */ 1>;
18611867
}
18621868

1863-
multiclass ScratchFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1864-
def : ScratchLoadSignedPat <inst, node, vt> {
1869+
multiclass ScratchFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit isTrue16 = 0> {
1870+
def : ScratchLoadSignedPat <inst, node, vt, isTrue16> {
18651871
let AddedComplexity = 25;
18661872
}
18671873

1868-
def : ScratchLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1874+
def : ScratchLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt, isTrue16> {
18691875
let AddedComplexity = 26;
18701876
}
18711877

1872-
def : ScratchLoadSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> {
1878+
def : ScratchLoadSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt, isTrue16> {
18731879
let SubtargetPredicate = HasFlatScratchSVSMode;
18741880
let AddedComplexity = 27;
18751881
}
@@ -1937,12 +1943,12 @@ multiclass ScratchFLATLoadPats_D16_t16<string inst, SDPatternOperator node, Valu
19371943
}
19381944
}
19391945

1940-
multiclass FlatLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1941-
def : FlatLoadPat <inst, node, vt> {
1946+
multiclass FlatLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit isTrue16 = 0> {
1947+
def : FlatLoadPat <inst, node, vt, isTrue16> {
19421948
let OtherPredicates = [HasFlatAddressSpace];
19431949
}
19441950

1945-
def : FlatLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1951+
def : FlatLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt, isTrue16> {
19461952
let AddedComplexity = 9;
19471953
let SubtargetPredicate = HasFlatGVSMode;
19481954
}
@@ -2018,6 +2024,13 @@ let True16Predicate = p in {
20182024
}
20192025

20202026
let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predicate = UseRealTrue16Insts in {
2027+
defm : FlatStorePats_t16 <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
2028+
defm : FlatStorePats_t16 <FLAT_STORE_SHORT, store_flat, i16>;
2029+
def : FlatStorePat <FLAT_STORE_BYTE_t16, atomic_store_8_flat, i16>;
2030+
def : FlatStorePat <FLAT_STORE_SHORT_t16, atomic_store_16_flat, i16>;
2031+
} // End let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predicate = UseRealTrue16Insts
2032+
2033+
let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace, NotHas16bitD16HWBug], True16Predicate = UseRealTrue16Insts in {
20212034
defm : FlatLoadPats_D16_t16<FLAT_LOAD_UBYTE_D16_t16, extloadi8_flat, i16>;
20222035
defm : FlatLoadPats_D16_t16<FLAT_LOAD_UBYTE_D16_t16, zextloadi8_flat, i16>;
20232036
defm : FlatLoadPats_D16_t16<FLAT_LOAD_SBYTE_D16_t16, sextloadi8_flat, i16>;
@@ -2026,11 +2039,18 @@ let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predi
20262039
defm : FlatLoadPats_D16_t16<FLAT_LOAD_UBYTE_D16_t16, atomic_load_zext_8_flat, i16>;
20272040
defm : FlatLoadPats_D16_t16<FLAT_LOAD_SHORT_D16_t16, atomic_load_nonext_16_flat, i16>;
20282041
defm : FlatLoadPats_D16_t16<FLAT_LOAD_SBYTE_D16_t16, atomic_load_sext_8_flat, i16>;
2029-
defm : FlatStorePats_t16 <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
2030-
defm : FlatStorePats_t16 <FLAT_STORE_SHORT, store_flat, i16>;
2031-
def : FlatStorePat <FLAT_STORE_BYTE_t16, atomic_store_8_flat, i16>;
2032-
def : FlatStorePat <FLAT_STORE_SHORT_t16, atomic_store_16_flat, i16>;
2033-
} // End let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predicate = UseRealTrue16Insts
2042+
}
2043+
2044+
let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace, Has16bitD16HWBug], True16Predicate = UseRealTrue16Insts in {
2045+
defm : FlatLoadPats <FLAT_LOAD_UBYTE, extloadi8_flat, i16, /*true16*/1>;
2046+
defm : FlatLoadPats <FLAT_LOAD_UBYTE, zextloadi8_flat, i16, /*true16*/1>;
2047+
defm : FlatLoadPats <FLAT_LOAD_SBYTE, sextloadi8_flat, i16, /*true16*/1>;
2048+
defm : FlatLoadPats <FLAT_LOAD_USHORT, load_flat, i16, /*true16*/1>;
2049+
defm : FlatLoadPats <FLAT_LOAD_UBYTE, atomic_load_aext_8_flat, i16, /*true16*/1>;
2050+
defm : FlatLoadPats <FLAT_LOAD_UBYTE, atomic_load_zext_8_flat, i16, /*true16*/1>;
2051+
defm : FlatLoadPats <FLAT_LOAD_USHORT, atomic_load_nonext_16_flat, i16, /*true16*/1>;
2052+
defm : FlatLoadPats <FLAT_LOAD_SBYTE, atomic_load_sext_8_flat, i16, /*true16*/1>;
2053+
}
20342054

20352055
defm : FlatLoadPats <FLAT_LOAD_DWORD, atomic_load_nonext_32_flat, i32>;
20362056
defm : FlatLoadPats <FLAT_LOAD_DWORDX2, atomic_load_nonext_64_flat, i64>;
@@ -2161,22 +2181,37 @@ defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_nonext_16_global, i16
21612181
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_zext_16_global, i16>;
21622182
}
21632183

2164-
let OtherPredicates = [D16PreservesUnusedBits], True16Predicate = UseRealTrue16Insts in {
2165-
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", extloadi8_global, i16>;
2166-
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", zextloadi8_global, i16>;
2167-
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SBYTE_D16", sextloadi8_global, i16>;
2168-
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SHORT_D16", load_global, i16>;
2169-
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", atomic_load_aext_8_global, i16>;
2170-
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", atomic_load_zext_8_global, i16>;
2171-
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SBYTE_D16", atomic_load_sext_8_global, i16>;
2172-
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SHORT_D16", atomic_load_nonext_16_global, i16>;
2173-
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SHORT_D16", atomic_load_zext_16_global, i16>;
2174-
defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_BYTE", truncstorei8_global, i16>;
2175-
defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_SHORT", store_global, i16>;
2176-
defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_BYTE", atomic_store_8_global, i16>;
2177-
defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_SHORT", atomic_store_16_global, i16>;
2184+
let OtherPredicates = [HasFlatGlobalInsts, D16PreservesUnusedBits], True16Predicate = UseRealTrue16Insts in {
2185+
defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_BYTE", truncstorei8_global, i16>;
2186+
defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_SHORT", store_global, i16>;
2187+
defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_BYTE", atomic_store_8_global, i16>;
2188+
defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_SHORT", atomic_store_16_global, i16>;
21782189
} // end OtherPredicates = [HasFlatGlobalInsts, D16PreservesUnusedBits], True16Predicate = UseRealTrue16Insts
21792190

2191+
let OtherPredicates = [HasFlatGlobalInsts, D16PreservesUnusedBits, NotHas16bitD16HWBug], True16Predicate = UseRealTrue16Insts in {
2192+
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", extloadi8_global, i16>;
2193+
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", zextloadi8_global, i16>;
2194+
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SBYTE_D16", sextloadi8_global, i16>;
2195+
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SHORT_D16", load_global, i16>;
2196+
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", atomic_load_aext_8_global, i16>;
2197+
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", atomic_load_zext_8_global, i16>;
2198+
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SBYTE_D16", atomic_load_sext_8_global, i16>;
2199+
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SHORT_D16", atomic_load_nonext_16_global, i16>;
2200+
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SHORT_D16", atomic_load_zext_16_global, i16>;
2201+
}
2202+
2203+
let OtherPredicates = [HasFlatGlobalInsts, D16PreservesUnusedBits, Has16bitD16HWBug], True16Predicate = UseRealTrue16Insts in {
2204+
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i16, /*true16*/1>;
2205+
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16, /*true16*/1>;
2206+
defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16, /*true16*/1>;
2207+
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, load_global, i16, /*true16*/1>;
2208+
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_aext_8_global, i16, /*true16*/1>;
2209+
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_zext_8_global, i16, /*true16*/1>;
2210+
defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, atomic_load_sext_8_global, i16, /*true16*/1>;
2211+
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_nonext_16_global, i16, /*true16*/1>;
2212+
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_zext_16_global, i16, /*true16*/1>;
2213+
}
2214+
21802215
foreach vt = Reg32Types.types in {
21812216
defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, load_global, vt>;
21822217
defm : GlobalFLATStorePats <GLOBAL_STORE_DWORD, store_global, vt>;
@@ -2386,12 +2421,20 @@ defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i16>;
23862421
}
23872422

23882423
let True16Predicate = UseRealTrue16Insts in {
2389-
defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_UBYTE_D16", extloadi8_private, i16>;
2390-
defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_UBYTE_D16", zextloadi8_private, i16>;
2391-
defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_SBYTE_D16", sextloadi8_private, i16>;
2392-
defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_SHORT_D16", load_private, i16>;
2393-
defm : ScratchFLATStorePats_t16 <"SCRATCH_STORE_SHORT", store_private, i16>;
2394-
defm : ScratchFLATStorePats_t16 <"SCRATCH_STORE_BYTE", truncstorei8_private, i16>;
2424+
let OtherPredicates = [NotHas16bitD16HWBug] in {
2425+
defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_UBYTE_D16", extloadi8_private, i16>;
2426+
defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_UBYTE_D16", zextloadi8_private, i16>;
2427+
defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_SBYTE_D16", sextloadi8_private, i16>;
2428+
defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_SHORT_D16", load_private, i16>;
2429+
}
2430+
let OtherPredicates = [Has16bitD16HWBug] in {
2431+
defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i16, /*true16*/1>;
2432+
defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i16, /*true16*/1>;
2433+
defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i16, /*true16*/1>;
2434+
defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, load_private, i16, /*true16*/1>;
2435+
}
2436+
defm : ScratchFLATStorePats_t16 <"SCRATCH_STORE_SHORT", store_private, i16>;
2437+
defm : ScratchFLATStorePats_t16 <"SCRATCH_STORE_BYTE", truncstorei8_private, i16>;
23952438
} // End True16Predicate = UseRealTrue16Insts
23962439

23972440
foreach vt = Reg32Types.types in {

0 commit comments

Comments
 (0)