Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,12 @@ def FeatureRealTrue16Insts : SubtargetFeature<"real-true16",
"Use true 16-bit registers"
>;

def Feature16bitD16HWBug : SubtargetFeature<"d16-hw-bug",
"Enable16bitD16HWBug",
"true",
"Disable D16 for 16 bit data type for true16 mode"
>;

def FeatureBF16TransInsts : SubtargetFeature<"bf16-trans-insts",
"HasBF16TransInsts",
"true",
Expand Down Expand Up @@ -1934,7 +1940,9 @@ def FeatureISAVersion11_Common : FeatureSet<
FeaturePackedTID,
FeatureVcmpxPermlaneHazard,
FeatureMemoryAtomicFAddF32DenormalSupport,
FeatureRealTrue16Insts]>;
FeatureRealTrue16Insts,
Feature16bitD16HWBug,
]>;

// There are few workarounds that need to be
// added to all targets. This pessimizes codegen
Expand Down Expand Up @@ -2570,6 +2578,13 @@ def UseFakeTrue16Insts : True16PredicateClass<"Subtarget->hasTrue16BitInsts() &&
// FIXME When we default to RealTrue16 instead of Fake, change the line as follows.
// AssemblerPredicate<(all_of FeatureTrue16BitInsts, (not FeatureRealTrue16Insts))>;

// Do not use D16 inst for 16bit data type
def Has16bitD16HWBug: Predicate<"Subtarget->has16bitD16HWBug()">,
AssemblerPredicate<(all_of FeatureTrue16BitInsts, FeatureRealTrue16Insts, Feature16bitD16HWBug)>;
def NotHas16bitD16HWBug: Predicate<"Subtarget->useRealTrue16Insts() && "
"!Subtarget->has16bitD16HWBug()">,
AssemblerPredicate<(all_of FeatureTrue16BitInsts, FeatureRealTrue16Insts, (not Feature16bitD16HWBug))>;

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would think FeatureRealTrueD16Insts would be inserted into the GFX11 feature list?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we want to turn off D16 in GFX11 true16 mode, then it should not be in the GFX11 feature list

Copy link
Contributor Author

@broxigarchen broxigarchen Sep 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think about it. We might need similar flag in ECC feature and thus using a D16HWBUG flag would be more beneficial here.

Changing to D16HWBUG flag and add it to gfx11 feature list

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, this approach looks pretty clean.

def HasBF16TransInsts : Predicate<"Subtarget->hasBF16TransInsts()">,
AssemblerPredicate<(all_of FeatureBF16TransInsts)>;

Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ bool AMDGPUSubtarget::useRealTrue16Insts() const {
return hasTrue16BitInsts() && EnableRealTrue16Insts;
}

bool AMDGPUSubtarget::has16bitD16HWBug() const {
return hasTrue16BitInsts() && useRealTrue16Insts() && Enable16bitD16HWBug;
}

// Returns the maximum per-workgroup LDS allocation size (in bytes) that still
// allows the given function to achieve an occupancy of NWaves waves per
// SIMD / EU, taking into account only the function's *maximum* workgroup size.
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ class AMDGPUSubtarget {
bool HasCvtPkF16F32Inst = false;
bool HasF32ToF16BF16ConversionSRInsts = false;
bool EnableRealTrue16Insts = false;
bool Enable16bitD16HWBug = false;
bool HasBF16TransInsts = false;
bool HasBF16ConversionInsts = false;
bool HasBF16PackedInsts = false;
Expand Down Expand Up @@ -224,6 +225,8 @@ class AMDGPUSubtarget {
// supported and the support for fake True16 instructions is removed.
bool useRealTrue16Insts() const;

bool has16bitD16HWBug() const;

bool hasBF16TransInsts() const { return HasBF16TransInsts; }

bool hasBF16ConversionInsts() const {
Expand Down
139 changes: 91 additions & 48 deletions llvm/lib/Target/AMDGPU/FLATInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1378,13 +1378,19 @@ let SubtargetPredicate = HasVmemPrefInsts in {
}

//===----------------------------------------------------------------------===//
// Flat Patterns
// Utilities
//===----------------------------------------------------------------------===//
class Mem_wrap<dag op, bit true16> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would probably call this class 'if_lo16' or 'extract_lo16'. The name Mem_wrap is too generic. This is a nice helper though, it simplifies this a lot.

dag ret = !if(true16, (EXTRACT_SUBREG op, lo16), op);
}

//===----------------------------------------------------------------------===//
// Flat Patterns
//===----------------------------------------------------------------------===//
// Patterns for global loads with no offset.
class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit true16> : GCNPat <
(vt (node (FlatOffset i64:$vaddr, i32:$offset))),
(inst $vaddr, $offset)
Mem_wrap<(inst $vaddr, $offset), true16>.ret
>;

class FlatLoadPat_CPOL <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
Expand Down Expand Up @@ -1462,14 +1468,14 @@ class GlobalLoadSaddrPat_D16_t16 <FLAT_Pseudo inst, SDPatternOperator node, Valu
(inst $saddr, $voffset, $offset, $cpol)
>;

class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit true16> : GCNPat <
(vt (node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset))),
(inst $vaddr, $offset)
Mem_wrap<(inst $vaddr, $offset), true16>.ret
>;

class FlatLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
class FlatLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit true16> : GCNPat <
(vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset, CPol:$cpol))),
(inst $saddr, $voffset, $offset, $cpol)
Mem_wrap<(inst $saddr, $voffset, $offset, $cpol), true16>.ret
>;

class FlatLoadSignedPat_M0 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
Expand Down Expand Up @@ -1629,9 +1635,9 @@ multiclass FlatSignedAtomicPat <string inst, string node, ValueType vt,
def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), noRtnNode, vt, data_vt>;
}

class ScratchLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
class ScratchLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit true16> : GCNPat <
(vt (node (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset))),
(inst $vaddr, $offset)
Mem_wrap<(inst $vaddr, $offset), true16>.ret
>;

class ScratchLoadSignedPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
Expand All @@ -1649,9 +1655,9 @@ class ScratchStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType
(inst getVregSrcForVT<vt>.ret:$data, $vaddr, $offset)
>;

class ScratchLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
class ScratchLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit true16> : GCNPat <
(vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset))),
(inst $saddr, $offset)
Mem_wrap<(inst $saddr, $offset), true16>.ret
>;

class ScratchLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
Expand All @@ -1670,9 +1676,9 @@ class ScratchStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
(inst getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
>;

class ScratchLoadSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
class ScratchLoadSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit true16> : GCNPat <
(vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset, CPol:$cpol))),
(inst $vaddr, $saddr, $offset, $cpol)
Mem_wrap<(inst $vaddr, $saddr, $offset, $cpol), true16>.ret
>;

class ScratchStoreSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
Expand Down Expand Up @@ -1721,14 +1727,14 @@ multiclass GlobalStoreLDSPats<FLAT_Pseudo inst, SDPatternOperator node> {
}
}

multiclass GlobalFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
def : FlatLoadSignedPat <inst, node, vt> {
multiclass GlobalFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit isTrue16 = 0> {
def : FlatLoadSignedPat <inst, node, vt, isTrue16> {
let AddedComplexity = 10;
let SubtargetPredicate = inst.SubtargetPredicate;
let OtherPredicates = inst.OtherPredicates;
}

def : FlatLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
def : FlatLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt, isTrue16> {
let AddedComplexity = 11;
let SubtargetPredicate = inst.SubtargetPredicate;
let OtherPredicates = inst.OtherPredicates;
Expand Down Expand Up @@ -1860,16 +1866,16 @@ multiclass GlobalFLATAtomicIntrPats<string inst, string node, ValueType vt,
defm : GlobalFLATAtomicPats<inst, node, vt, data_vt, /* isIntr */ 1>;
}

multiclass ScratchFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
def : ScratchLoadSignedPat <inst, node, vt> {
multiclass ScratchFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit isTrue16 = 0> {
def : ScratchLoadSignedPat <inst, node, vt, isTrue16> {
let AddedComplexity = 25;
}

def : ScratchLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
def : ScratchLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt, isTrue16> {
let AddedComplexity = 26;
}

def : ScratchLoadSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> {
def : ScratchLoadSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt, isTrue16> {
let SubtargetPredicate = HasFlatScratchSVSMode;
let AddedComplexity = 27;
}
Expand Down Expand Up @@ -1937,12 +1943,12 @@ multiclass ScratchFLATLoadPats_D16_t16<string inst, SDPatternOperator node, Valu
}
}

multiclass FlatLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
def : FlatLoadPat <inst, node, vt> {
multiclass FlatLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, bit isTrue16 = 0> {
def : FlatLoadPat <inst, node, vt, isTrue16> {
let OtherPredicates = [HasFlatAddressSpace];
}

def : FlatLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
def : FlatLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt, isTrue16> {
let AddedComplexity = 9;
let SubtargetPredicate = HasFlatGVSMode;
}
Expand Down Expand Up @@ -2018,6 +2024,13 @@ let True16Predicate = p in {
}

let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predicate = UseRealTrue16Insts in {
defm : FlatStorePats_t16 <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
defm : FlatStorePats_t16 <FLAT_STORE_SHORT, store_flat, i16>;
def : FlatStorePat <FLAT_STORE_BYTE_t16, atomic_store_8_flat, i16>;
def : FlatStorePat <FLAT_STORE_SHORT_t16, atomic_store_16_flat, i16>;
} // End let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predicate = UseRealTrue16Insts

let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace, NotHas16bitD16HWBug], True16Predicate = UseRealTrue16Insts in {
defm : FlatLoadPats_D16_t16<FLAT_LOAD_UBYTE_D16_t16, extloadi8_flat, i16>;
defm : FlatLoadPats_D16_t16<FLAT_LOAD_UBYTE_D16_t16, zextloadi8_flat, i16>;
defm : FlatLoadPats_D16_t16<FLAT_LOAD_SBYTE_D16_t16, sextloadi8_flat, i16>;
Expand All @@ -2026,11 +2039,18 @@ let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predi
defm : FlatLoadPats_D16_t16<FLAT_LOAD_UBYTE_D16_t16, atomic_load_zext_8_flat, i16>;
defm : FlatLoadPats_D16_t16<FLAT_LOAD_SHORT_D16_t16, atomic_load_nonext_16_flat, i16>;
defm : FlatLoadPats_D16_t16<FLAT_LOAD_SBYTE_D16_t16, atomic_load_sext_8_flat, i16>;
defm : FlatStorePats_t16 <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
defm : FlatStorePats_t16 <FLAT_STORE_SHORT, store_flat, i16>;
def : FlatStorePat <FLAT_STORE_BYTE_t16, atomic_store_8_flat, i16>;
def : FlatStorePat <FLAT_STORE_SHORT_t16, atomic_store_16_flat, i16>;
} // End let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predicate = UseRealTrue16Insts
}

let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace, Has16bitD16HWBug], True16Predicate = UseRealTrue16Insts in {
defm : FlatLoadPats <FLAT_LOAD_UBYTE, extloadi8_flat, i16, /*true16*/1>;
defm : FlatLoadPats <FLAT_LOAD_UBYTE, zextloadi8_flat, i16, /*true16*/1>;
defm : FlatLoadPats <FLAT_LOAD_SBYTE, sextloadi8_flat, i16, /*true16*/1>;
defm : FlatLoadPats <FLAT_LOAD_USHORT, load_flat, i16, /*true16*/1>;
defm : FlatLoadPats <FLAT_LOAD_UBYTE, atomic_load_aext_8_flat, i16, /*true16*/1>;
defm : FlatLoadPats <FLAT_LOAD_UBYTE, atomic_load_zext_8_flat, i16, /*true16*/1>;
defm : FlatLoadPats <FLAT_LOAD_USHORT, atomic_load_nonext_16_flat, i16, /*true16*/1>;
defm : FlatLoadPats <FLAT_LOAD_SBYTE, atomic_load_sext_8_flat, i16, /*true16*/1>;
}

defm : FlatLoadPats <FLAT_LOAD_DWORD, atomic_load_nonext_32_flat, i32>;
defm : FlatLoadPats <FLAT_LOAD_DWORDX2, atomic_load_nonext_64_flat, i64>;
Expand Down Expand Up @@ -2161,22 +2181,37 @@ defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_nonext_16_global, i16
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_zext_16_global, i16>;
}

let OtherPredicates = [D16PreservesUnusedBits], True16Predicate = UseRealTrue16Insts in {
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", extloadi8_global, i16>;
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", zextloadi8_global, i16>;
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SBYTE_D16", sextloadi8_global, i16>;
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SHORT_D16", load_global, i16>;
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", atomic_load_aext_8_global, i16>;
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", atomic_load_zext_8_global, i16>;
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SBYTE_D16", atomic_load_sext_8_global, i16>;
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SHORT_D16", atomic_load_nonext_16_global, i16>;
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SHORT_D16", atomic_load_zext_16_global, i16>;
defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_BYTE", truncstorei8_global, i16>;
defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_SHORT", store_global, i16>;
defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_BYTE", atomic_store_8_global, i16>;
defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_SHORT", atomic_store_16_global, i16>;
let OtherPredicates = [HasFlatGlobalInsts, D16PreservesUnusedBits], True16Predicate = UseRealTrue16Insts in {
defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_BYTE", truncstorei8_global, i16>;
defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_SHORT", store_global, i16>;
defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_BYTE", atomic_store_8_global, i16>;
defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_SHORT", atomic_store_16_global, i16>;
} // end OtherPredicates = [HasFlatGlobalInsts, D16PreservesUnusedBits], True16Predicate = UseRealTrue16Insts

let OtherPredicates = [HasFlatGlobalInsts, D16PreservesUnusedBits, NotHas16bitD16HWBug], True16Predicate = UseRealTrue16Insts in {
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", extloadi8_global, i16>;
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", zextloadi8_global, i16>;
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SBYTE_D16", sextloadi8_global, i16>;
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SHORT_D16", load_global, i16>;
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", atomic_load_aext_8_global, i16>;
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", atomic_load_zext_8_global, i16>;
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SBYTE_D16", atomic_load_sext_8_global, i16>;
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SHORT_D16", atomic_load_nonext_16_global, i16>;
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SHORT_D16", atomic_load_zext_16_global, i16>;
}

let OtherPredicates = [HasFlatGlobalInsts, D16PreservesUnusedBits, Has16bitD16HWBug], True16Predicate = UseRealTrue16Insts in {
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i16, /*true16*/1>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16, /*true16*/1>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16, /*true16*/1>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, load_global, i16, /*true16*/1>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_aext_8_global, i16, /*true16*/1>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_zext_8_global, i16, /*true16*/1>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, atomic_load_sext_8_global, i16, /*true16*/1>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_nonext_16_global, i16, /*true16*/1>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_zext_16_global, i16, /*true16*/1>;
}

foreach vt = Reg32Types.types in {
defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, load_global, vt>;
defm : GlobalFLATStorePats <GLOBAL_STORE_DWORD, store_global, vt>;
Expand Down Expand Up @@ -2386,12 +2421,20 @@ defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i16>;
}

let True16Predicate = UseRealTrue16Insts in {
defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_UBYTE_D16", extloadi8_private, i16>;
defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_UBYTE_D16", zextloadi8_private, i16>;
defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_SBYTE_D16", sextloadi8_private, i16>;
defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_SHORT_D16", load_private, i16>;
defm : ScratchFLATStorePats_t16 <"SCRATCH_STORE_SHORT", store_private, i16>;
defm : ScratchFLATStorePats_t16 <"SCRATCH_STORE_BYTE", truncstorei8_private, i16>;
let OtherPredicates = [NotHas16bitD16HWBug] in {
defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_UBYTE_D16", extloadi8_private, i16>;
defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_UBYTE_D16", zextloadi8_private, i16>;
defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_SBYTE_D16", sextloadi8_private, i16>;
defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_SHORT_D16", load_private, i16>;
}
let OtherPredicates = [Has16bitD16HWBug] in {
defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i16, /*true16*/1>;
defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i16, /*true16*/1>;
defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i16, /*true16*/1>;
defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, load_private, i16, /*true16*/1>;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could use a whitespace after this bracket.

defm : ScratchFLATStorePats_t16 <"SCRATCH_STORE_SHORT", store_private, i16>;
defm : ScratchFLATStorePats_t16 <"SCRATCH_STORE_BYTE", truncstorei8_private, i16>;
} // End True16Predicate = UseRealTrue16Insts

foreach vt = Reg32Types.types in {
Expand Down
Loading
Loading