@@ -11,7 +11,8 @@ let WantsRoot = true in {
11
11
def GlobalOffset : ComplexPattern<iPTR, 2, "SelectGlobalOffset", [], [], -10>;
12
12
def ScratchOffset : ComplexPattern<iPTR, 2, "SelectScratchOffset", [], [], -10>;
13
13
14
- def GlobalSAddr : ComplexPattern<iPTR, 3, "SelectGlobalSAddr", [], [], -10>;
14
+ def GlobalSAddr : ComplexPattern<iPTR, 4, "SelectGlobalSAddr", [], [], -10>;
15
+ def GlobalSAddrGLC : ComplexPattern<iPTR, 4, "SelectGlobalSAddrGLC", [], [], -10>;
15
16
def ScratchSAddr : ComplexPattern<iPTR, 2, "SelectScratchSAddr", [], [], -10>;
16
17
def ScratchSVAddr : ComplexPattern<iPTR, 3, "SelectScratchSVAddr", [], [], -10>;
17
18
}
@@ -1252,13 +1253,13 @@ class GlobalLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueTyp
1252
1253
>;
1253
1254
1254
1255
class FlatLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1255
- (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), vt:$in)),
1256
- (inst $saddr, $voffset, $offset, (i32 0) , $in)
1256
+ (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset, CPol:$cpol ), vt:$in)),
1257
+ (inst $saddr, $voffset, $offset, $cpol , $in)
1257
1258
>;
1258
1259
1259
1260
class FlatLoadSaddrPat_D16_t16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1260
- (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset))),
1261
- (inst $saddr, $voffset, $offset, (i32 0) )
1261
+ (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset, CPol:$cpol ))),
1262
+ (inst $saddr, $voffset, $offset, $cpol )
1262
1263
>;
1263
1264
1264
1265
class GlobalLoadSaddrPat_D16_t16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
@@ -1272,26 +1273,26 @@ class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt>
1272
1273
>;
1273
1274
1274
1275
class FlatLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1275
- (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset))),
1276
- (inst $saddr, $voffset, $offset, 0 )
1276
+ (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset, CPol:$cpol ))),
1277
+ (inst $saddr, $voffset, $offset, $cpol )
1277
1278
>;
1278
1279
1279
1280
class FlatStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
1280
1281
ValueType vt> : GCNPat <
1281
- (node vt:$data, (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset)),
1282
- (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
1282
+ (node vt:$data, (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset, CPol:$cpol )),
1283
+ (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset, $cpol )
1283
1284
>;
1284
1285
1285
- class GlobalAtomicSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
1286
- ValueType vt, ValueType data_vt = vt> : GCNPat <
1287
- (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), data_vt:$data)),
1288
- (inst $voffset, getVregSrcForVT<data_vt>.ret:$data, $saddr, $offset)
1286
+ class FlatAtomicSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ComplexPattern pat ,
1287
+ ValueType vt, ValueType data_vt = vt> : GCNPat <
1288
+ (vt (node (pat (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset, CPol:$cpol ), data_vt:$data)),
1289
+ (inst $voffset, getVregSrcForVT<data_vt>.ret:$data, $saddr, $offset, $cpol )
1289
1290
>;
1290
1291
1291
1292
class GlobalAtomicNoRtnSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
1292
1293
ValueType vt> : GCNPat <
1293
- (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), vt:$data),
1294
- (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
1294
+ (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset, CPol:$cpol ), vt:$data),
1295
+ (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset, $cpol )
1295
1296
>;
1296
1297
1297
1298
class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
@@ -1320,6 +1321,12 @@ multiclass FlatAtomicNoRtnPatBase <string inst, string node, ValueType vt,
1320
1321
let AddedComplexity = 1 in
1321
1322
def : GCNPat <(vt (noRtnNode (FlatOffset i64:$vaddr, i32:$offset), data_vt:$data)),
1322
1323
(!cast<FLAT_Pseudo>(inst) VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>;
1324
+
1325
+ def : FlatAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR"), !cast<SDPatternOperator>(node),
1326
+ GlobalSAddr, vt, data_vt> {
1327
+ let AddedComplexity = 9;
1328
+ let SubtargetPredicate = HasFlatGVSMode;
1329
+ }
1323
1330
}
1324
1331
1325
1332
multiclass FlatAtomicNoRtnPatWithAddrSpace<string inst, string node, string addrSpaceSuffix,
@@ -1338,6 +1345,11 @@ multiclass FlatAtomicRtnPatBase <string inst, string node, ValueType vt,
1338
1345
1339
1346
def : GCNPat <(vt (rtnNode (FlatOffset i64:$vaddr, i32:$offset), data_vt:$data)),
1340
1347
(!cast<FLAT_Pseudo>(inst#"_RTN") VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>;
1348
+
1349
+ def : FlatAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR_RTN"), rtnNode, GlobalSAddrGLC, vt, data_vt> {
1350
+ let AddedComplexity = 8;
1351
+ let SubtargetPredicate = HasFlatGVSMode;
1352
+ }
1341
1353
}
1342
1354
1343
1355
multiclass FlatAtomicRtnPatWithAddrSpace<string inst, string intr, string addrSpaceSuffix,
@@ -1507,7 +1519,8 @@ multiclass GlobalFLATAtomicPatsNoRtnBase<string inst, string node, ValueType vt,
1507
1519
def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), !cast<SDPatternOperator>(node), vt, data_vt>;
1508
1520
1509
1521
let AddedComplexity = 13 in
1510
- def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR"), !cast<SDPatternOperator>(node), vt, data_vt>;
1522
+ def : FlatAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR"), !cast<SDPatternOperator>(node),
1523
+ GlobalSAddr, vt, data_vt>;
1511
1524
}
1512
1525
1513
1526
multiclass GlobalFLATAtomicPatsRtnBase<string inst, string node, ValueType vt,
@@ -1518,7 +1531,7 @@ multiclass GlobalFLATAtomicPatsRtnBase<string inst, string node, ValueType vt,
1518
1531
def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst#"_RTN"), rtnNode, vt, data_vt>;
1519
1532
1520
1533
let AddedComplexity = 12 in
1521
- def : GlobalAtomicSaddrPat <!cast<FLAT_Pseudo>(inst#"_SADDR_RTN"), rtnNode, vt, data_vt>;
1534
+ def : FlatAtomicSaddrPat <!cast<FLAT_Pseudo>(inst#"_SADDR_RTN"), rtnNode, GlobalSAddrGLC , vt, data_vt>;
1522
1535
}
1523
1536
1524
1537
multiclass GlobalFLATAtomicPatsNoRtn<string inst, string node, ValueType vt,
@@ -1797,12 +1810,13 @@ defm : FlatAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_"#as, f64>;
1797
1810
defm : FlatStorePats <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
1798
1811
defm : FlatStorePats <FLAT_STORE_SHORT, store_flat, i16>;
1799
1812
1800
- let SubtargetPredicate = isGFX12Plus in {
1801
- defm : FlatAtomicRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32 >;
1813
+ } // End OtherPredicates = [HasFlatAddressSpace]
1802
1814
1803
- let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
1804
- defm : FlatAtomicNoRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32>;
1805
- }
1815
+ let OtherPredicates = [isGFX12Plus] in
1816
+ defm : FlatAtomicRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32>;
1817
+
1818
+ let OtherPredicates = [isGFX12Plus, HasAtomicCSubNoRtnInsts] in
1819
+ defm : FlatAtomicNoRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32>;
1806
1820
1807
1821
let OtherPredicates = [HasD16LoadStore] in {
1808
1822
defm : FlatStorePats <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>;
@@ -1826,8 +1840,6 @@ defm : FlatLoadPats_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>;
1826
1840
defm : FlatLoadPats_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>;
1827
1841
}
1828
1842
1829
- } // End OtherPredicates = [HasFlatAddressSpace]
1830
-
1831
1843
let OtherPredicates = [HasFlatGlobalInsts] in {
1832
1844
1833
1845
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_aext_8_global, i32>;
0 commit comments