Skip to content

Commit 8606fd1

Browse files
rampitecmahesh-attarde
authored andcommitted
[AMDGPU] Select scale_offset with SMEM instructions (llvm#150078)
1 parent 58c6fba commit 8606fd1

File tree

6 files changed

+671
-73
lines changed

6 files changed

+671
-73
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 110 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1872,6 +1872,23 @@ static SDValue matchZExtFromI32(SDValue Op) {
18721872
return (ExtSrc.getValueType() == MVT::i32) ? ExtSrc : SDValue();
18731873
}
18741874

1875+
// If this matches *_extend i32:x, return x
1876+
// Otherwise if the value is I32 returns x.
1877+
static SDValue matchExtFromI32orI32(SDValue Op, bool IsSigned,
1878+
const SelectionDAG *DAG) {
1879+
if (Op.getValueType() == MVT::i32)
1880+
return Op;
1881+
1882+
if (Op.getOpcode() != (IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND) &&
1883+
Op.getOpcode() != ISD::ANY_EXTEND &&
1884+
!(DAG->SignBitIsZero(Op) &&
1885+
Op.getOpcode() == (IsSigned ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND)))
1886+
return SDValue();
1887+
1888+
SDValue ExtSrc = Op.getOperand(0);
1889+
return (ExtSrc.getValueType() == MVT::i32) ? ExtSrc : SDValue();
1890+
}
1891+
18751892
// Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset)
18761893
bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
18771894
SDValue Addr,
@@ -2159,17 +2176,59 @@ bool AMDGPUDAGToDAGISel::isSOffsetLegalWithImmOffset(SDValue *SOffset,
21592176
return true;
21602177
}
21612178

2179+
// Given \p Offset and load node \p N check if an \p Offset is a multiple of
2180+
// the load byte size. If it is update \p Offset to a pre-scaled value and
2181+
// return true.
2182+
bool AMDGPUDAGToDAGISel::SelectScaleOffset(SDNode *N, SDValue &Offset,
2183+
bool IsSigned) const {
2184+
bool ScaleOffset = false;
2185+
if (!Subtarget->hasScaleOffset() || !Offset)
2186+
return false;
2187+
2188+
unsigned Size =
2189+
(unsigned)cast<MemSDNode>(N)->getMemoryVT().getFixedSizeInBits() / 8;
2190+
2191+
SDValue Off = Offset;
2192+
if (SDValue Ext = matchExtFromI32orI32(Offset, IsSigned, CurDAG))
2193+
Off = Ext;
2194+
2195+
if (isPowerOf2_32(Size) && Off.getOpcode() == ISD::SHL) {
2196+
if (auto *C = dyn_cast<ConstantSDNode>(Off.getOperand(1)))
2197+
ScaleOffset = C->getZExtValue() == Log2_32(Size);
2198+
} else if (Offset.getOpcode() == ISD::MUL ||
2199+
(IsSigned && Offset.getOpcode() == AMDGPUISD::MUL_I24) ||
2200+
Offset.getOpcode() == AMDGPUISD::MUL_U24 ||
2201+
(Offset.isMachineOpcode() &&
2202+
Offset.getMachineOpcode() ==
2203+
(IsSigned ? AMDGPU::S_MUL_I64_I32_PSEUDO
2204+
: AMDGPU::S_MUL_U64_U32_PSEUDO))) {
2205+
if (auto *C = dyn_cast<ConstantSDNode>(Offset.getOperand(1)))
2206+
ScaleOffset = C->getZExtValue() == Size;
2207+
}
2208+
2209+
if (ScaleOffset)
2210+
Offset = Off.getOperand(0);
2211+
2212+
return ScaleOffset;
2213+
}
2214+
21622215
// Match an immediate (if Offset is not null) or an SGPR (if SOffset is
21632216
// not null) offset. If Imm32Only is true, match only 32-bit immediate
21642217
// offsets available on CI.
2165-
bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
2218+
bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDNode *N, SDValue ByteOffsetNode,
21662219
SDValue *SOffset, SDValue *Offset,
21672220
bool Imm32Only, bool IsBuffer,
2168-
bool HasSOffset,
2169-
int64_t ImmOffset) const {
2221+
bool HasSOffset, int64_t ImmOffset,
2222+
bool *ScaleOffset) const {
21702223
assert((!SOffset || !Offset) &&
21712224
"Cannot match both soffset and offset at the same time!");
21722225

2226+
if (ScaleOffset) {
2227+
assert(N && SOffset);
2228+
2229+
*ScaleOffset = SelectScaleOffset(N, ByteOffsetNode, false /* IsSigned */);
2230+
}
2231+
21732232
ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
21742233
if (!C) {
21752234
if (!SOffset)
@@ -2254,24 +2313,25 @@ SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
22542313
// Match a base and an immediate (if Offset is not null) or an SGPR (if
22552314
// SOffset is not null) or an immediate+SGPR offset. If Imm32Only is
22562315
// true, match only 32-bit immediate offsets available on CI.
2257-
bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,
2258-
SDValue *SOffset, SDValue *Offset,
2259-
bool Imm32Only, bool IsBuffer,
2260-
bool HasSOffset,
2261-
int64_t ImmOffset) const {
2316+
bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDNode *N, SDValue Addr,
2317+
SDValue &SBase, SDValue *SOffset,
2318+
SDValue *Offset, bool Imm32Only,
2319+
bool IsBuffer, bool HasSOffset,
2320+
int64_t ImmOffset,
2321+
bool *ScaleOffset) const {
22622322
if (SOffset && Offset) {
22632323
assert(!Imm32Only && !IsBuffer);
22642324
SDValue B;
22652325

2266-
if (!SelectSMRDBaseOffset(Addr, B, nullptr, Offset, false, false, true))
2326+
if (!SelectSMRDBaseOffset(N, Addr, B, nullptr, Offset, false, false, true))
22672327
return false;
22682328

22692329
int64_t ImmOff = 0;
22702330
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(*Offset))
22712331
ImmOff = C->getSExtValue();
22722332

2273-
return SelectSMRDBaseOffset(B, SBase, SOffset, nullptr, false, false, true,
2274-
ImmOff);
2333+
return SelectSMRDBaseOffset(N, B, SBase, SOffset, nullptr, false, false,
2334+
true, ImmOff, ScaleOffset);
22752335
}
22762336

22772337
// A 32-bit (address + offset) should not cause unsigned 32-bit integer
@@ -2291,23 +2351,25 @@ bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,
22912351
if (!N0 || !N1)
22922352
return false;
22932353

2294-
if (SelectSMRDOffset(N1, SOffset, Offset, Imm32Only, IsBuffer, HasSOffset,
2295-
ImmOffset)) {
2354+
if (SelectSMRDOffset(N, N1, SOffset, Offset, Imm32Only, IsBuffer, HasSOffset,
2355+
ImmOffset, ScaleOffset)) {
22962356
SBase = N0;
22972357
return true;
22982358
}
2299-
if (SelectSMRDOffset(N0, SOffset, Offset, Imm32Only, IsBuffer, HasSOffset,
2300-
ImmOffset)) {
2359+
if (SelectSMRDOffset(N, N0, SOffset, Offset, Imm32Only, IsBuffer, HasSOffset,
2360+
ImmOffset, ScaleOffset)) {
23012361
SBase = N1;
23022362
return true;
23032363
}
23042364
return false;
23052365
}
23062366

2307-
bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
2367+
bool AMDGPUDAGToDAGISel::SelectSMRD(SDNode *N, SDValue Addr, SDValue &SBase,
23082368
SDValue *SOffset, SDValue *Offset,
2309-
bool Imm32Only) const {
2310-
if (SelectSMRDBaseOffset(Addr, SBase, SOffset, Offset, Imm32Only)) {
2369+
bool Imm32Only, bool *ScaleOffset) const {
2370+
if (SelectSMRDBaseOffset(N, Addr, SBase, SOffset, Offset, Imm32Only,
2371+
/* IsBuffer */ false, /* HasSOffset */ false,
2372+
/* ImmOffset */ 0, ScaleOffset)) {
23112373
SBase = Expand32BitAddress(SBase);
23122374
return true;
23132375
}
@@ -2323,36 +2385,51 @@ bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
23232385

23242386
bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
23252387
SDValue &Offset) const {
2326-
return SelectSMRD(Addr, SBase, /* SOffset */ nullptr, &Offset);
2388+
return SelectSMRD(/* N */ nullptr, Addr, SBase, /* SOffset */ nullptr,
2389+
&Offset);
23272390
}
23282391

23292392
bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
23302393
SDValue &Offset) const {
23312394
assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
2332-
return SelectSMRD(Addr, SBase, /* SOffset */ nullptr, &Offset,
2333-
/* Imm32Only */ true);
2395+
return SelectSMRD(/* N */ nullptr, Addr, SBase, /* SOffset */ nullptr,
2396+
&Offset, /* Imm32Only */ true);
23342397
}
23352398

2336-
bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
2337-
SDValue &SOffset) const {
2338-
return SelectSMRD(Addr, SBase, &SOffset, /* Offset */ nullptr);
2399+
bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDNode *N, SDValue Addr, SDValue &SBase,
2400+
SDValue &SOffset, SDValue &CPol) const {
2401+
bool ScaleOffset;
2402+
if (!SelectSMRD(N, Addr, SBase, &SOffset, /* Offset */ nullptr,
2403+
/* Imm32Only */ false, &ScaleOffset))
2404+
return false;
2405+
2406+
CPol = CurDAG->getTargetConstant(ScaleOffset ? AMDGPU::CPol::SCAL : 0,
2407+
SDLoc(N), MVT::i32);
2408+
return true;
23392409
}
23402410

2341-
bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm(SDValue Addr, SDValue &SBase,
2342-
SDValue &SOffset,
2343-
SDValue &Offset) const {
2344-
return SelectSMRD(Addr, SBase, &SOffset, &Offset);
2411+
bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm(SDNode *N, SDValue Addr,
2412+
SDValue &SBase, SDValue &SOffset,
2413+
SDValue &Offset,
2414+
SDValue &CPol) const {
2415+
bool ScaleOffset;
2416+
if (!SelectSMRD(N, Addr, SBase, &SOffset, &Offset, false, &ScaleOffset))
2417+
return false;
2418+
2419+
CPol = CurDAG->getTargetConstant(ScaleOffset ? AMDGPU::CPol::SCAL : 0,
2420+
SDLoc(N), MVT::i32);
2421+
return true;
23452422
}
23462423

23472424
bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue N, SDValue &Offset) const {
2348-
return SelectSMRDOffset(N, /* SOffset */ nullptr, &Offset,
2425+
return SelectSMRDOffset(/* N */ nullptr, N, /* SOffset */ nullptr, &Offset,
23492426
/* Imm32Only */ false, /* IsBuffer */ true);
23502427
}
23512428

23522429
bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue N,
23532430
SDValue &Offset) const {
23542431
assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
2355-
return SelectSMRDOffset(N, /* SOffset */ nullptr, &Offset,
2432+
return SelectSMRDOffset(/* N */ nullptr, N, /* SOffset */ nullptr, &Offset,
23562433
/* Imm32Only */ true, /* IsBuffer */ true);
23572434
}
23582435

@@ -2361,9 +2438,9 @@ bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(SDValue N, SDValue &SOffset,
23612438
// Match the (soffset + offset) pair as a 32-bit register base and
23622439
// an immediate offset.
23632440
return N.getValueType() == MVT::i32 &&
2364-
SelectSMRDBaseOffset(N, /* SBase */ SOffset, /* SOffset*/ nullptr,
2365-
&Offset, /* Imm32Only */ false,
2366-
/* IsBuffer */ true);
2441+
SelectSMRDBaseOffset(/* N */ nullptr, N, /* SBase */ SOffset,
2442+
/* SOffset*/ nullptr, &Offset,
2443+
/* Imm32Only */ false, /* IsBuffer */ true);
23672444
}
23682445

23692446
bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -176,22 +176,28 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
176176
bool SelectScratchSVAddr(SDNode *N, SDValue Addr, SDValue &VAddr,
177177
SDValue &SAddr, SDValue &Offset) const;
178178

179-
bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue *SOffset,
179+
bool SelectSMRDOffset(SDNode *N, SDValue ByteOffsetNode, SDValue *SOffset,
180180
SDValue *Offset, bool Imm32Only = false,
181181
bool IsBuffer = false, bool HasSOffset = false,
182-
int64_t ImmOffset = 0) const;
182+
int64_t ImmOffset = 0,
183+
bool *ScaleOffset = nullptr) const;
183184
SDValue Expand32BitAddress(SDValue Addr) const;
184-
bool SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase, SDValue *SOffset,
185-
SDValue *Offset, bool Imm32Only = false,
186-
bool IsBuffer = false, bool HasSOffset = false,
187-
int64_t ImmOffset = 0) const;
188-
bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue *SOffset,
189-
SDValue *Offset, bool Imm32Only = false) const;
185+
bool SelectSMRDBaseOffset(SDNode *N, SDValue Addr, SDValue &SBase,
186+
SDValue *SOffset, SDValue *Offset,
187+
bool Imm32Only = false, bool IsBuffer = false,
188+
bool HasSOffset = false, int64_t ImmOffset = 0,
189+
bool *ScaleOffset = nullptr) const;
190+
bool SelectSMRD(SDNode *N, SDValue Addr, SDValue &SBase, SDValue *SOffset,
191+
SDValue *Offset, bool Imm32Only = false,
192+
bool *ScaleOffset = nullptr) const;
190193
bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
191194
bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
192-
bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &SOffset) const;
193-
bool SelectSMRDSgprImm(SDValue Addr, SDValue &SBase, SDValue &SOffset,
194-
SDValue &Offset) const;
195+
bool SelectScaleOffset(SDNode *N, SDValue &Offset, bool IsSigned) const;
196+
bool SelectSMRDSgpr(SDNode *N, SDValue Addr, SDValue &SBase, SDValue &SOffset,
197+
SDValue &CPol) const;
198+
bool SelectSMRDSgprImm(SDNode *N, SDValue Addr, SDValue &SBase,
199+
SDValue &SOffset, SDValue &Offset,
200+
SDValue &CPol) const;
195201
bool SelectSMRDBufferImm(SDValue N, SDValue &Offset) const;
196202
bool SelectSMRDBufferImm32(SDValue N, SDValue &Offset) const;
197203
bool SelectSMRDBufferSgprImm(SDValue N, SDValue &SOffset,

0 commit comments

Comments
 (0)