Skip to content

Commit 23d3ee2

Browse files
committed
[AMDGPU] Use fake16 load/store with +real-true16 and sram-ecc
Fixes: SC1-6072
1 parent 03cb514 commit 23d3ee2

14 files changed

+3811
-1276
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2597,6 +2597,10 @@ def UseFakeTrue16Insts : True16PredicateClass<"Subtarget->hasTrue16BitInsts() &&
25972597
// FIXME When we default to RealTrue16 instead of Fake, change the line as follows.
25982598
// AssemblerPredicate<(all_of FeatureTrue16BitInsts, (not FeatureRealTrue16Insts))>;
25992599

2600+
def UseLoadTrue16WithSramECC : True16PredicateClass<"Subtarget->useRealTrue16Insts() && "
2601+
"!Subtarget->d16PreservesUnusedBits()">,
2602+
AssemblerPredicate<(all_of FeatureTrue16BitInsts, FeatureSRAMECC)>;
2603+
26002604
def HasD16Writes32BitVgpr: Predicate<"Subtarget->hasD16Writes32BitVgpr()">,
26012605
AssemblerPredicate<(all_of FeatureTrue16BitInsts, FeatureRealTrue16Insts, FeatureD16Writes32BitVgpr)>;
26022606
def NotHasD16Writes32BitVgpr: Predicate<"!Subtarget->hasD16Writes32BitVgpr()">,

llvm/lib/Target/AMDGPU/DSInstructions.td

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -951,6 +951,11 @@ class DSReadPat <DS_Pseudo inst, ValueType vt, PatFrag frag, int gds=0> : GCNPat
951951
(inst $ptr, Offset:$offset, (i1 gds))
952952
>;
953953

954+
class DSReadPat_t16 <DS_Pseudo inst, ValueType vt, PatFrag frag, int gds=0> : GCNPat <
955+
(vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))),
956+
(EXTRACT_SUBREG (inst $ptr, Offset:$offset, (i1 gds)), lo16)
957+
>;
958+
954959
multiclass DSReadPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
955960

956961
let OtherPredicates = [LDSRequiresM0Init] in {
@@ -968,13 +973,14 @@ multiclass DSReadPat_t16<DS_Pseudo inst, ValueType vt, string frag> {
968973
def : DSReadPat<inst, vt, !cast<PatFrag>(frag#"_m0")>;
969974
}
970975

971-
let OtherPredicates = [NotLDSRequiresM0Init] in {
972-
let True16Predicate = NotUseRealTrue16Insts in {
973-
def : DSReadPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
974-
}
975-
let True16Predicate = UseRealTrue16Insts in {
976-
def : DSReadPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_t16"), vt, !cast<PatFrag>(frag)>;
977-
}
976+
let OtherPredicates = [NotLDSRequiresM0Init], True16Predicate = NotUseRealTrue16Insts in {
977+
def : DSReadPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
978+
}
979+
let OtherPredicates = [NotLDSRequiresM0Init, D16PreservesUnusedBits], True16Predicate = UseRealTrue16Insts in {
980+
def : DSReadPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_t16"), vt, !cast<PatFrag>(frag)>;
981+
}
982+
let OtherPredicates = [NotLDSRequiresM0Init], True16Predicate = UseLoadTrue16WithSramECC in {
983+
def : DSReadPat_t16<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
978984
}
979985
}
980986

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 123 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1383,6 +1383,11 @@ class FlatLoadPat_D16_t16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType v
13831383
(inst $vaddr, $offset, (i32 0))
13841384
>;
13851385

1386+
class FlatLoadPat_t16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1387+
(vt (node (FlatOffset i64:$vaddr, i32:$offset))),
1388+
(EXTRACT_SUBREG (inst $vaddr, $offset), lo16)
1389+
>;
1390+
13861391
class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
13871392
(node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset), vt:$in),
13881393
(inst $vaddr, $offset, 0, $in)
@@ -1393,6 +1398,11 @@ class FlatSignedLoadPat_D16_t16 <FLAT_Pseudo inst, SDPatternOperator node, Value
13931398
(inst $vaddr, $offset, (i32 0))
13941399
>;
13951400

1401+
class FlatSignedLoadPat_t16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1402+
(vt (node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset))),
1403+
(EXTRACT_SUBREG (inst $vaddr, $offset, (i32 0)), lo16)
1404+
>;
1405+
13961406
class GlobalLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
13971407
(vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset, CPol:$cpol), vt:$in)),
13981408
(inst $saddr, $voffset, $offset, $cpol, $in)
@@ -1408,6 +1418,11 @@ class FlatLoadSaddrPat_D16_t16 <FLAT_Pseudo inst, SDPatternOperator node, ValueT
14081418
(inst $saddr, $voffset, $offset, $cpol)
14091419
>;
14101420

1421+
class FlatLoadSaddrPat_t16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1422+
(vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset, CPol:$cpol))),
1423+
(EXTRACT_SUBREG (inst $saddr, $voffset, $offset, $cpol), lo16)
1424+
>;
1425+
14111426
class FlatLoadLDSSignedPat_M0 <FLAT_Pseudo inst, SDPatternOperator node> : GCNPat <
14121427
(node (i64 VReg_64:$vaddr), (i32 VGPR_32:$dsaddr), (i32 timm:$offset), (i32 timm:$cpol), M0),
14131428
(inst $dsaddr, $vaddr, $offset, $cpol)
@@ -1443,6 +1458,11 @@ class GlobalLoadSaddrPat_D16_t16 <FLAT_Pseudo inst, SDPatternOperator node, Valu
14431458
(inst $saddr, $voffset, $offset, $cpol)
14441459
>;
14451460

1461+
class GlobalLoadSaddrPat_t16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1462+
(vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset, CPol:$cpol))),
1463+
(EXTRACT_SUBREG (inst $saddr, $voffset, $offset, $cpol), lo16)
1464+
>;
1465+
14461466
class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
14471467
(vt (node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset))),
14481468
(inst $vaddr, $offset)
@@ -1625,6 +1645,11 @@ class ScratchLoadSignedPat_D16_t16 <FLAT_Pseudo inst, SDPatternOperator node, Va
16251645
(inst $vaddr, $offset, 0)
16261646
>;
16271647

1648+
class ScratchLoadSignedPat_t16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1649+
(vt (node (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset))),
1650+
(EXTRACT_SUBREG (inst $vaddr, $offset), lo16)
1651+
>;
1652+
16281653
class ScratchStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
16291654
(node vt:$data, (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset)),
16301655
(inst getVregSrcForVT<vt>.ret:$data, $vaddr, $offset)
@@ -1645,6 +1670,11 @@ class ScratchLoadSaddrPat_D16_t16 <FLAT_Pseudo inst, SDPatternOperator node, Val
16451670
(inst $saddr, $offset, 0)
16461671
>;
16471672

1673+
class ScratchLoadSaddrPat_t16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1674+
(vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset))),
1675+
(EXTRACT_SUBREG (inst $saddr, $offset), lo16)
1676+
>;
1677+
16481678
class ScratchStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
16491679
ValueType vt> : GCNPat <
16501680
(node vt:$data, (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset)),
@@ -1672,6 +1702,11 @@ class ScratchLoadSVaddrPat_D16_t16 <FLAT_Pseudo inst, SDPatternOperator node, Va
16721702
(inst $vaddr, $saddr, $offset, $cpol)
16731703
>;
16741704

1705+
class ScratchLoadSVaddrPat_t16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1706+
(vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset, CPol:$cpol))),
1707+
(EXTRACT_SUBREG (inst $vaddr, $saddr, $offset, $cpol), lo16)
1708+
>;
1709+
16751710
multiclass GlobalLoadLDSPats_M0<FLAT_Pseudo inst, SDPatternOperator node> {
16761711
def : FlatLoadLDSSignedPat_M0 <inst, node> {
16771712
let AddedComplexity = 10;
@@ -1764,6 +1799,16 @@ multiclass GlobalFLATLoadPats_D16_t16<string inst, SDPatternOperator node, Value
17641799
}
17651800
}
17661801

1802+
multiclass GlobalFLATLoadPats_t16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1803+
def : FlatSignedLoadPat_t16<inst, node, vt> {
1804+
let AddedComplexity = 10;
1805+
}
1806+
1807+
def : GlobalLoadSaddrPat_t16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1808+
let AddedComplexity = 11;
1809+
}
1810+
}
1811+
17671812
multiclass GlobalFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node,
17681813
ValueType vt> {
17691814
def : FlatStoreSignedPat <inst, node, vt> {
@@ -1872,8 +1917,8 @@ multiclass ScratchFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node,
18721917
}
18731918
}
18741919

1875-
multiclass ScratchFLATStorePats_t16<string inst, SDPatternOperator node,
1876-
ValueType vt> {
1920+
multiclass ScratchFLATStorePats_D16_t16<string inst, SDPatternOperator node,
1921+
ValueType vt> {
18771922
def : ScratchStoreSignedPat <!cast<FLAT_Pseudo>(inst#"_t16"), node, vt> {
18781923
let AddedComplexity = 25;
18791924
}
@@ -1918,6 +1963,21 @@ multiclass ScratchFLATLoadPats_D16_t16<string inst, SDPatternOperator node, Valu
19181963
}
19191964
}
19201965

1966+
multiclass ScratchFLATLoadPats_t16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1967+
def : ScratchLoadSignedPat_t16 <inst, node, vt> {
1968+
let AddedComplexity = 25;
1969+
}
1970+
1971+
def : ScratchLoadSaddrPat_t16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1972+
let AddedComplexity = 26;
1973+
}
1974+
1975+
def : ScratchLoadSVaddrPat_t16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> {
1976+
let SubtargetPredicate = HasFlatScratchSVSMode;
1977+
let AddedComplexity = 27;
1978+
}
1979+
}
1980+
19211981
multiclass FlatLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
19221982
def : FlatLoadPat <inst, node, vt> {
19231983
let OtherPredicates = [HasFlatAddressSpace];
@@ -1947,6 +2007,17 @@ multiclass FlatLoadPats_D16_t16<FLAT_Pseudo inst, SDPatternOperator node, ValueT
19472007
}
19482008
}
19492009

2010+
multiclass FlatLoadPats_t16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
2011+
def : FlatLoadPat_t16 <inst, node, vt> {
2012+
let OtherPredicates = [HasFlatAddressSpace];
2013+
}
2014+
2015+
def : FlatLoadSaddrPat_t16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
2016+
let AddedComplexity = 9;
2017+
let SubtargetPredicate = HasFlatGVSMode;
2018+
}
2019+
}
2020+
19502021
multiclass FlatStorePats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
19512022
def : FlatStorePat <inst, node, vt> {
19522023
let OtherPredicates = [HasFlatAddressSpace];
@@ -1997,6 +2068,17 @@ let True16Predicate = NotUseRealTrue16Insts in {
19972068
defm : FlatStorePats <FLAT_STORE_SHORT, atomic_store_16_flat, i16>;
19982069
}
19992070

2071+
let True16Predicate = UseLoadTrue16WithSramECC in {
2072+
defm : FlatLoadPats_t16 <FLAT_LOAD_UBYTE, extloadi8_flat, i16>;
2073+
defm : FlatLoadPats_t16 <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>;
2074+
defm : FlatLoadPats_t16 <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>;
2075+
defm : FlatLoadPats_t16 <FLAT_LOAD_USHORT, load_flat, i16>;
2076+
defm : FlatLoadPats_t16 <FLAT_LOAD_UBYTE, atomic_load_aext_8_flat, i16>;
2077+
defm : FlatLoadPats_t16 <FLAT_LOAD_UBYTE, atomic_load_zext_8_flat, i16>;
2078+
defm : FlatLoadPats_t16 <FLAT_LOAD_USHORT, atomic_load_nonext_16_flat, i16>;
2079+
defm : FlatLoadPats_t16 <FLAT_LOAD_SBYTE, atomic_load_sext_8_flat, i16>;
2080+
}
2081+
20002082
let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predicate = UseRealTrue16Insts in {
20012083
defm : FlatLoadPats_D16_t16<FLAT_LOAD_UBYTE_D16_t16, extloadi8_flat, i16>;
20022084
defm : FlatLoadPats_D16_t16<FLAT_LOAD_UBYTE_D16_t16, zextloadi8_flat, i16>;
@@ -2006,11 +2088,14 @@ let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predi
20062088
defm : FlatLoadPats_D16_t16<FLAT_LOAD_UBYTE_D16_t16, atomic_load_zext_8_flat, i16>;
20072089
defm : FlatLoadPats_D16_t16<FLAT_LOAD_SHORT_D16_t16, atomic_load_nonext_16_flat, i16>;
20082090
defm : FlatLoadPats_D16_t16<FLAT_LOAD_SBYTE_D16_t16, atomic_load_sext_8_flat, i16>;
2091+
} // End let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predicate = UseRealTrue16Insts
2092+
2093+
let OtherPredicates = [D16PreservesUnusedBits], True16Predicate = UseRealTrue16Insts in {
20092094
defm : FlatStorePats_t16 <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
20102095
defm : FlatStorePats_t16 <FLAT_STORE_SHORT, store_flat, i16>;
20112096
defm : FlatStorePats_t16 <FLAT_STORE_BYTE, atomic_store_8_flat, i16>;
20122097
defm : FlatStorePats_t16 <FLAT_STORE_SHORT, atomic_store_16_flat, i16>;
2013-
} // End let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predicate = UseRealTrue16Insts
2098+
}
20142099

20152100
defm : FlatLoadPats <FLAT_LOAD_DWORD, atomic_load_nonext_32_flat, i32>;
20162101
defm : FlatLoadPats <FLAT_LOAD_DWORDX2, atomic_load_nonext_64_flat, i64>;
@@ -2140,6 +2225,20 @@ defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_nonext_16_global, i16
21402225
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_zext_16_global, i16>;
21412226
}
21422227

2228+
let True16Predicate = UseLoadTrue16WithSramECC in {
2229+
defm : GlobalFLATLoadPats_t16 <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>;
2230+
defm : GlobalFLATLoadPats_t16 <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>;
2231+
defm : GlobalFLATLoadPats_t16 <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>;
2232+
defm : GlobalFLATLoadPats_t16 <GLOBAL_LOAD_SSHORT, atomic_load_sext_16_global, i32>;
2233+
defm : GlobalFLATLoadPats_t16 <GLOBAL_LOAD_USHORT, atomic_load_zext_16_global, i32>;
2234+
defm : GlobalFLATLoadPats_t16 <GLOBAL_LOAD_USHORT, load_global, i16>;
2235+
defm : GlobalFLATLoadPats_t16 <GLOBAL_LOAD_UBYTE, atomic_load_aext_8_global, i16>;
2236+
defm : GlobalFLATLoadPats_t16 <GLOBAL_LOAD_UBYTE, atomic_load_zext_8_global, i16>;
2237+
defm : GlobalFLATLoadPats_t16 <GLOBAL_LOAD_SBYTE, atomic_load_sext_8_global, i16>;
2238+
defm : GlobalFLATLoadPats_t16 <GLOBAL_LOAD_USHORT, atomic_load_nonext_16_global, i16>;
2239+
defm : GlobalFLATLoadPats_t16 <GLOBAL_LOAD_USHORT, atomic_load_zext_16_global, i16>;
2240+
}
2241+
21432242
let OtherPredicates = [D16PreservesUnusedBits], True16Predicate = UseRealTrue16Insts in {
21442243
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", extloadi8_global, i16>;
21452244
defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", zextloadi8_global, i16>;
@@ -2192,6 +2291,13 @@ defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i16>;
21922291
defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, atomic_store_16_global, i16>;
21932292
}
21942293

2294+
let OtherPredicates = [HasFlatGlobalInsts], True16Predicate = UseRealTrue16Insts in {
2295+
defm : GlobalFLATStorePats_D16_t16 <"GLOBAL_STORE_BYTE", truncstorei8_global, i16>;
2296+
defm : GlobalFLATStorePats_D16_t16 <"GLOBAL_STORE_SHORT", store_global, i16>;
2297+
defm : GlobalFLATStorePats_D16_t16 <"GLOBAL_STORE_BYTE", atomic_store_8_global, i16>;
2298+
defm : GlobalFLATStorePats_D16_t16 <"GLOBAL_STORE_SHORT", atomic_store_16_global, i16>;
2299+
}
2300+
21952301
let OtherPredicates = [HasD16LoadStore] in {
21962302
defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>;
21972303
defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i32>;
@@ -2362,14 +2468,24 @@ defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, store_private, i16>;
23622468
defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i16>;
23632469
}
23642470

2365-
let True16Predicate = UseRealTrue16Insts in {
2471+
let True16Predicate = UseLoadTrue16WithSramECC in {
2472+
defm : ScratchFLATLoadPats_t16 <SCRATCH_LOAD_UBYTE, extloadi8_private, i16>;
2473+
defm : ScratchFLATLoadPats_t16 <SCRATCH_LOAD_UBYTE, zextloadi8_private, i16>;
2474+
defm : ScratchFLATLoadPats_t16 <SCRATCH_LOAD_SBYTE, sextloadi8_private, i16>;
2475+
defm : ScratchFLATLoadPats_t16 <SCRATCH_LOAD_USHORT, load_private, i16>;
2476+
}
2477+
2478+
let OtherPredicates = [D16PreservesUnusedBits], True16Predicate = UseRealTrue16Insts in {
23662479
defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_UBYTE_D16", extloadi8_private, i16>;
23672480
defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_UBYTE_D16", zextloadi8_private, i16>;
23682481
defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_SBYTE_D16", sextloadi8_private, i16>;
23692482
defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_SHORT_D16", load_private, i16>;
2370-
defm : ScratchFLATStorePats_t16 <"SCRATCH_STORE_SHORT", store_private, i16>;
2371-
defm : ScratchFLATStorePats_t16 <"SCRATCH_STORE_BYTE", truncstorei8_private, i16>;
2372-
} // End True16Predicate = UseRealTrue16Insts
2483+
} // End OtherPredicates = [D16PreservesUnusedBits], True16Predicate = UseRealTrue16Insts
2484+
2485+
let True16Predicate = UseRealTrue16Insts in {
2486+
defm : ScratchFLATStorePats_D16_t16 <"SCRATCH_STORE_SHORT", store_private, i16>;
2487+
defm : ScratchFLATStorePats_D16_t16 <"SCRATCH_STORE_BYTE", truncstorei8_private, i16>;
2488+
}
23732489

23742490
foreach vt = Reg32Types.types in {
23752491
defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORD, load_private, vt>;

0 commit comments

Comments
 (0)