Skip to content

Commit cfa918b

Browse files
authored
[AMDGPU] Select flat GVS atomics on gfx1250 (#149554)
1 parent 13f7786 commit cfa918b

File tree

7 files changed

+6120
-26
lines changed

7 files changed

+6120
-26
lines changed

llvm/lib/Target/AMDGPU/AMDGPUGISel.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,9 @@ def gi_global_offset :
137137
def gi_global_saddr :
138138
GIComplexOperandMatcher<s64, "selectGlobalSAddr">,
139139
GIComplexPatternEquiv<GlobalSAddr>;
140+
def gi_global_saddr_glc :
141+
GIComplexOperandMatcher<s64, "selectGlobalSAddrGLC">,
142+
GIComplexPatternEquiv<GlobalSAddrGLC>;
140143

141144
def gi_mubuf_scratch_offset :
142145
GIComplexOperandMatcher<s32, "selectMUBUFScratchOffset">,

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1968,6 +1968,29 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
19681968
return true;
19691969
}
19701970

1971+
bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N, SDValue Addr,
1972+
SDValue &SAddr, SDValue &VOffset,
1973+
SDValue &Offset,
1974+
SDValue &CPol) const {
1975+
if (!SelectGlobalSAddr(N, Addr, SAddr, VOffset, Offset))
1976+
return false;
1977+
1978+
CPol = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
1979+
return true;
1980+
}
1981+
1982+
bool AMDGPUDAGToDAGISel::SelectGlobalSAddrGLC(SDNode *N, SDValue Addr,
1983+
SDValue &SAddr, SDValue &VOffset,
1984+
SDValue &Offset,
1985+
SDValue &CPol) const {
1986+
if (!SelectGlobalSAddr(N, Addr, SAddr, VOffset, Offset))
1987+
return false;
1988+
1989+
unsigned CPolVal = AMDGPU::CPol::GLC;
1990+
CPol = CurDAG->getTargetConstant(CPolVal, SDLoc(), MVT::i32);
1991+
return true;
1992+
}
1993+
19711994
static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr) {
19721995
if (auto *FI = dyn_cast<FrameIndexSDNode>(SAddr)) {
19731996
SAddr = CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,12 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
163163
SDValue &Offset) const;
164164
bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
165165
SDValue &VOffset, SDValue &Offset) const;
166+
bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
167+
SDValue &VOffset, SDValue &Offset,
168+
SDValue &CPol) const;
169+
bool SelectGlobalSAddrGLC(SDNode *N, SDValue Addr, SDValue &SAddr,
170+
SDValue &VOffset, SDValue &Offset,
171+
SDValue &CPol) const;
166172
bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
167173
SDValue &Offset) const;
168174
bool checkFlatScratchSVSSwizzleBug(SDValue VAddr, SDValue SAddr,

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5485,7 +5485,8 @@ AMDGPUInstructionSelector::selectScratchOffset(MachineOperand &Root) const {
54855485

54865486
// Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset)
54875487
InstructionSelector::ComplexRendererFns
5488-
AMDGPUInstructionSelector::selectGlobalSAddr(MachineOperand &Root) const {
5488+
AMDGPUInstructionSelector::selectGlobalSAddr(MachineOperand &Root,
5489+
unsigned CPolBits) const {
54895490
Register Addr = Root.getReg();
54905491
Register PtrBase;
54915492
int64_t ConstOffset;
@@ -5529,6 +5530,7 @@ AMDGPUInstructionSelector::selectGlobalSAddr(MachineOperand &Root) const {
55295530
MIB.addReg(HighBits);
55305531
}, // voffset
55315532
[=](MachineInstrBuilder &MIB) { MIB.addImm(SplitImmOffset); },
5533+
[=](MachineInstrBuilder &MIB) { MIB.addImm(CPolBits); },
55325534
}};
55335535
}
55345536
}
@@ -5568,6 +5570,9 @@ AMDGPUInstructionSelector::selectGlobalSAddr(MachineOperand &Root) const {
55685570
},
55695571
[=](MachineInstrBuilder &MIB) { // offset
55705572
MIB.addImm(ImmOffset);
5573+
},
5574+
[=](MachineInstrBuilder &MIB) { // cpol
5575+
MIB.addImm(CPolBits);
55715576
}}};
55725577
}
55735578
}
@@ -5591,10 +5596,21 @@ AMDGPUInstructionSelector::selectGlobalSAddr(MachineOperand &Root) const {
55915596
return {{
55925597
[=](MachineInstrBuilder &MIB) { MIB.addReg(AddrDef->Reg); }, // saddr
55935598
[=](MachineInstrBuilder &MIB) { MIB.addReg(VOffset); }, // voffset
5594-
[=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); } // offset
5599+
[=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); }, // offset
5600+
[=](MachineInstrBuilder &MIB) { MIB.addImm(CPolBits); } // cpol
55955601
}};
55965602
}
55975603

5604+
InstructionSelector::ComplexRendererFns
5605+
AMDGPUInstructionSelector::selectGlobalSAddr(MachineOperand &Root) const {
5606+
return selectGlobalSAddr(Root, 0);
5607+
}
5608+
5609+
InstructionSelector::ComplexRendererFns
5610+
AMDGPUInstructionSelector::selectGlobalSAddrGLC(MachineOperand &Root) const {
5611+
return selectGlobalSAddr(Root, AMDGPU::CPol::GLC);
5612+
}
5613+
55985614
InstructionSelector::ComplexRendererFns
55995615
AMDGPUInstructionSelector::selectScratchSAddr(MachineOperand &Root) const {
56005616
Register Addr = Root.getReg();

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,8 +253,12 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
253253
InstructionSelector::ComplexRendererFns
254254
selectScratchOffset(MachineOperand &Root) const;
255255

256+
InstructionSelector::ComplexRendererFns
257+
selectGlobalSAddr(MachineOperand &Root, unsigned CPolBits) const;
256258
InstructionSelector::ComplexRendererFns
257259
selectGlobalSAddr(MachineOperand &Root) const;
260+
InstructionSelector::ComplexRendererFns
261+
selectGlobalSAddrGLC(MachineOperand &Root) const;
258262

259263
InstructionSelector::ComplexRendererFns
260264
selectScratchSAddr(MachineOperand &Root) const;

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 36 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ let WantsRoot = true in {
1111
def GlobalOffset : ComplexPattern<iPTR, 2, "SelectGlobalOffset", [], [], -10>;
1212
def ScratchOffset : ComplexPattern<iPTR, 2, "SelectScratchOffset", [], [], -10>;
1313

14-
def GlobalSAddr : ComplexPattern<iPTR, 3, "SelectGlobalSAddr", [], [], -10>;
14+
def GlobalSAddr : ComplexPattern<iPTR, 4, "SelectGlobalSAddr", [], [], -10>;
15+
def GlobalSAddrGLC : ComplexPattern<iPTR, 4, "SelectGlobalSAddrGLC", [], [], -10>;
1516
def ScratchSAddr : ComplexPattern<iPTR, 2, "SelectScratchSAddr", [], [], -10>;
1617
def ScratchSVAddr : ComplexPattern<iPTR, 3, "SelectScratchSVAddr", [], [], -10>;
1718
}
@@ -1252,13 +1253,13 @@ class GlobalLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueTyp
12521253
>;
12531254

12541255
class FlatLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1255-
(vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), vt:$in)),
1256-
(inst $saddr, $voffset, $offset, (i32 0), $in)
1256+
(vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset, CPol:$cpol), vt:$in)),
1257+
(inst $saddr, $voffset, $offset, $cpol, $in)
12571258
>;
12581259

12591260
class FlatLoadSaddrPat_D16_t16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1260-
(vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset))),
1261-
(inst $saddr, $voffset, $offset, (i32 0))
1261+
(vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset, CPol:$cpol))),
1262+
(inst $saddr, $voffset, $offset, $cpol)
12621263
>;
12631264

12641265
class GlobalLoadSaddrPat_D16_t16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
@@ -1272,26 +1273,26 @@ class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt>
12721273
>;
12731274

12741275
class FlatLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1275-
(vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset))),
1276-
(inst $saddr, $voffset, $offset, 0)
1276+
(vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset, CPol:$cpol))),
1277+
(inst $saddr, $voffset, $offset, $cpol)
12771278
>;
12781279

12791280
class FlatStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
12801281
ValueType vt> : GCNPat <
1281-
(node vt:$data, (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset)),
1282-
(inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
1282+
(node vt:$data, (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset, CPol:$cpol)),
1283+
(inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset, $cpol)
12831284
>;
12841285

1285-
class GlobalAtomicSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
1286-
ValueType vt, ValueType data_vt = vt> : GCNPat <
1287-
(vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), data_vt:$data)),
1288-
(inst $voffset, getVregSrcForVT<data_vt>.ret:$data, $saddr, $offset)
1286+
class FlatAtomicSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ComplexPattern pat,
1287+
ValueType vt, ValueType data_vt = vt> : GCNPat <
1288+
(vt (node (pat (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset, CPol:$cpol), data_vt:$data)),
1289+
(inst $voffset, getVregSrcForVT<data_vt>.ret:$data, $saddr, $offset, $cpol)
12891290
>;
12901291

12911292
class GlobalAtomicNoRtnSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
12921293
ValueType vt> : GCNPat <
1293-
(node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), vt:$data),
1294-
(inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
1294+
(node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset, CPol:$cpol), vt:$data),
1295+
(inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset, $cpol)
12951296
>;
12961297

12971298
class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
@@ -1320,6 +1321,12 @@ multiclass FlatAtomicNoRtnPatBase <string inst, string node, ValueType vt,
13201321
let AddedComplexity = 1 in
13211322
def : GCNPat <(vt (noRtnNode (FlatOffset i64:$vaddr, i32:$offset), data_vt:$data)),
13221323
(!cast<FLAT_Pseudo>(inst) VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>;
1324+
1325+
def : FlatAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR"), !cast<SDPatternOperator>(node),
1326+
GlobalSAddr, vt, data_vt> {
1327+
let AddedComplexity = 9;
1328+
let SubtargetPredicate = HasFlatGVSMode;
1329+
}
13231330
}
13241331

13251332
multiclass FlatAtomicNoRtnPatWithAddrSpace<string inst, string node, string addrSpaceSuffix,
@@ -1338,6 +1345,11 @@ multiclass FlatAtomicRtnPatBase <string inst, string node, ValueType vt,
13381345

13391346
def : GCNPat <(vt (rtnNode (FlatOffset i64:$vaddr, i32:$offset), data_vt:$data)),
13401347
(!cast<FLAT_Pseudo>(inst#"_RTN") VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>;
1348+
1349+
def : FlatAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR_RTN"), rtnNode, GlobalSAddrGLC, vt, data_vt> {
1350+
let AddedComplexity = 8;
1351+
let SubtargetPredicate = HasFlatGVSMode;
1352+
}
13411353
}
13421354

13431355
multiclass FlatAtomicRtnPatWithAddrSpace<string inst, string intr, string addrSpaceSuffix,
@@ -1507,7 +1519,8 @@ multiclass GlobalFLATAtomicPatsNoRtnBase<string inst, string node, ValueType vt,
15071519
def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), !cast<SDPatternOperator>(node), vt, data_vt>;
15081520

15091521
let AddedComplexity = 13 in
1510-
def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR"), !cast<SDPatternOperator>(node), vt, data_vt>;
1522+
def : FlatAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR"), !cast<SDPatternOperator>(node),
1523+
GlobalSAddr, vt, data_vt>;
15111524
}
15121525

15131526
multiclass GlobalFLATAtomicPatsRtnBase<string inst, string node, ValueType vt,
@@ -1518,7 +1531,7 @@ multiclass GlobalFLATAtomicPatsRtnBase<string inst, string node, ValueType vt,
15181531
def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst#"_RTN"), rtnNode, vt, data_vt>;
15191532

15201533
let AddedComplexity = 12 in
1521-
def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR_RTN"), rtnNode, vt, data_vt>;
1534+
def : FlatAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR_RTN"), rtnNode, GlobalSAddrGLC, vt, data_vt>;
15221535
}
15231536

15241537
multiclass GlobalFLATAtomicPatsNoRtn<string inst, string node, ValueType vt,
@@ -1797,12 +1810,13 @@ defm : FlatAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_"#as, f64>;
17971810
defm : FlatStorePats <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
17981811
defm : FlatStorePats <FLAT_STORE_SHORT, store_flat, i16>;
17991812

1800-
let SubtargetPredicate = isGFX12Plus in {
1801-
defm : FlatAtomicRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32 >;
1813+
} // End OtherPredicates = [HasFlatAddressSpace]
18021814

1803-
let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
1804-
defm : FlatAtomicNoRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32>;
1805-
}
1815+
let OtherPredicates = [isGFX12Plus] in
1816+
defm : FlatAtomicRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32>;
1817+
1818+
let OtherPredicates = [isGFX12Plus, HasAtomicCSubNoRtnInsts] in
1819+
defm : FlatAtomicNoRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32>;
18061820

18071821
let OtherPredicates = [HasD16LoadStore] in {
18081822
defm : FlatStorePats <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>;
@@ -1826,8 +1840,6 @@ defm : FlatLoadPats_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>;
18261840
defm : FlatLoadPats_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>;
18271841
}
18281842

1829-
} // End OtherPredicates = [HasFlatAddressSpace]
1830-
18311843
let OtherPredicates = [HasFlatGlobalInsts] in {
18321844

18331845
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_aext_8_global, i32>;

0 commit comments

Comments
 (0)