@@ -1872,6 +1872,23 @@ static SDValue matchZExtFromI32(SDValue Op) {
18721872 return (ExtSrc.getValueType () == MVT::i32 ) ? ExtSrc : SDValue ();
18731873}
18741874
1875+ // If this matches *_extend i32:x, return x
1876+ // Otherwise if the value is I32 returns x.
1877+ static SDValue matchExtFromI32orI32 (SDValue Op, bool IsSigned,
1878+ const SelectionDAG *DAG) {
1879+ if (Op.getValueType () == MVT::i32 )
1880+ return Op;
1881+
1882+ if (Op.getOpcode () != (IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND) &&
1883+ Op.getOpcode () != ISD::ANY_EXTEND &&
1884+ !(DAG->SignBitIsZero (Op) &&
1885+ Op.getOpcode () == (IsSigned ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND)))
1886+ return SDValue ();
1887+
1888+ SDValue ExtSrc = Op.getOperand (0 );
1889+ return (ExtSrc.getValueType () == MVT::i32 ) ? ExtSrc : SDValue ();
1890+ }
1891+
18751892// Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset)
18761893bool AMDGPUDAGToDAGISel::SelectGlobalSAddr (SDNode *N,
18771894 SDValue Addr,
@@ -2159,17 +2176,59 @@ bool AMDGPUDAGToDAGISel::isSOffsetLegalWithImmOffset(SDValue *SOffset,
21592176 return true ;
21602177}
21612178
2179+ // Given \p Offset and load node \p N check if an \p Offset is a multiple of
2180+ // the load byte size. If it is update \p Offset to a pre-scaled value and
2181+ // return true.
2182+ bool AMDGPUDAGToDAGISel::SelectScaleOffset (SDNode *N, SDValue &Offset,
2183+ bool IsSigned) const {
2184+ bool ScaleOffset = false ;
2185+ if (!Subtarget->hasScaleOffset () || !Offset)
2186+ return false ;
2187+
2188+ unsigned Size =
2189+ (unsigned )cast<MemSDNode>(N)->getMemoryVT ().getFixedSizeInBits () / 8 ;
2190+
2191+ SDValue Off = Offset;
2192+ if (SDValue Ext = matchExtFromI32orI32 (Offset, IsSigned, CurDAG))
2193+ Off = Ext;
2194+
2195+ if (isPowerOf2_32 (Size) && Off.getOpcode () == ISD::SHL) {
2196+ if (auto *C = dyn_cast<ConstantSDNode>(Off.getOperand (1 )))
2197+ ScaleOffset = C->getZExtValue () == Log2_32 (Size);
2198+ } else if (Offset.getOpcode () == ISD::MUL ||
2199+ (IsSigned && Offset.getOpcode () == AMDGPUISD::MUL_I24) ||
2200+ Offset.getOpcode () == AMDGPUISD::MUL_U24 ||
2201+ (Offset.isMachineOpcode () &&
2202+ Offset.getMachineOpcode () ==
2203+ (IsSigned ? AMDGPU::S_MUL_I64_I32_PSEUDO
2204+ : AMDGPU::S_MUL_U64_U32_PSEUDO))) {
2205+ if (auto *C = dyn_cast<ConstantSDNode>(Offset.getOperand (1 )))
2206+ ScaleOffset = C->getZExtValue () == Size;
2207+ }
2208+
2209+ if (ScaleOffset)
2210+ Offset = Off.getOperand (0 );
2211+
2212+ return ScaleOffset;
2213+ }
2214+
21622215// Match an immediate (if Offset is not null) or an SGPR (if SOffset is
21632216// not null) offset. If Imm32Only is true, match only 32-bit immediate
21642217// offsets available on CI.
2165- bool AMDGPUDAGToDAGISel::SelectSMRDOffset (SDValue ByteOffsetNode,
2218+ bool AMDGPUDAGToDAGISel::SelectSMRDOffset (SDNode *N, SDValue ByteOffsetNode,
21662219 SDValue *SOffset, SDValue *Offset,
21672220 bool Imm32Only, bool IsBuffer,
2168- bool HasSOffset,
2169- int64_t ImmOffset ) const {
2221+ bool HasSOffset, int64_t ImmOffset,
2222+ bool *ScaleOffset ) const {
21702223 assert ((!SOffset || !Offset) &&
21712224 " Cannot match both soffset and offset at the same time!" );
21722225
2226+ if (ScaleOffset) {
2227+ assert (N && SOffset);
2228+
2229+ *ScaleOffset = SelectScaleOffset (N, ByteOffsetNode, false /* IsSigned */ );
2230+ }
2231+
21732232 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
21742233 if (!C) {
21752234 if (!SOffset)
@@ -2254,24 +2313,25 @@ SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
22542313// Match a base and an immediate (if Offset is not null) or an SGPR (if
22552314// SOffset is not null) or an immediate+SGPR offset. If Imm32Only is
22562315// true, match only 32-bit immediate offsets available on CI.
2257- bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset (SDValue Addr, SDValue &SBase,
2258- SDValue *SOffset, SDValue *Offset,
2259- bool Imm32Only, bool IsBuffer,
2260- bool HasSOffset,
2261- int64_t ImmOffset) const {
2316+ bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset (SDNode *N, SDValue Addr,
2317+ SDValue &SBase, SDValue *SOffset,
2318+ SDValue *Offset, bool Imm32Only,
2319+ bool IsBuffer, bool HasSOffset,
2320+ int64_t ImmOffset,
2321+ bool *ScaleOffset) const {
22622322 if (SOffset && Offset) {
22632323 assert (!Imm32Only && !IsBuffer);
22642324 SDValue B;
22652325
2266- if (!SelectSMRDBaseOffset (Addr, B, nullptr , Offset, false , false , true ))
2326+ if (!SelectSMRDBaseOffset (N, Addr, B, nullptr , Offset, false , false , true ))
22672327 return false ;
22682328
22692329 int64_t ImmOff = 0 ;
22702330 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(*Offset))
22712331 ImmOff = C->getSExtValue ();
22722332
2273- return SelectSMRDBaseOffset (B, SBase, SOffset, nullptr , false , false , true ,
2274- ImmOff);
2333+ return SelectSMRDBaseOffset (N, B, SBase, SOffset, nullptr , false , false ,
2334+ true , ImmOff, ScaleOffset );
22752335 }
22762336
22772337 // A 32-bit (address + offset) should not cause unsigned 32-bit integer
@@ -2291,23 +2351,25 @@ bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,
22912351 if (!N0 || !N1)
22922352 return false ;
22932353
2294- if (SelectSMRDOffset (N1, SOffset, Offset, Imm32Only, IsBuffer, HasSOffset,
2295- ImmOffset)) {
2354+ if (SelectSMRDOffset (N, N1, SOffset, Offset, Imm32Only, IsBuffer, HasSOffset,
2355+ ImmOffset, ScaleOffset )) {
22962356 SBase = N0;
22972357 return true ;
22982358 }
2299- if (SelectSMRDOffset (N0, SOffset, Offset, Imm32Only, IsBuffer, HasSOffset,
2300- ImmOffset)) {
2359+ if (SelectSMRDOffset (N, N0, SOffset, Offset, Imm32Only, IsBuffer, HasSOffset,
2360+ ImmOffset, ScaleOffset )) {
23012361 SBase = N1;
23022362 return true ;
23032363 }
23042364 return false ;
23052365}
23062366
2307- bool AMDGPUDAGToDAGISel::SelectSMRD (SDValue Addr, SDValue &SBase,
2367+ bool AMDGPUDAGToDAGISel::SelectSMRD (SDNode *N, SDValue Addr, SDValue &SBase,
23082368 SDValue *SOffset, SDValue *Offset,
2309- bool Imm32Only) const {
2310- if (SelectSMRDBaseOffset (Addr, SBase, SOffset, Offset, Imm32Only)) {
2369+ bool Imm32Only, bool *ScaleOffset) const {
2370+ if (SelectSMRDBaseOffset (N, Addr, SBase, SOffset, Offset, Imm32Only,
2371+ /* IsBuffer */ false , /* HasSOffset */ false ,
2372+ /* ImmOffset */ 0 , ScaleOffset)) {
23112373 SBase = Expand32BitAddress (SBase);
23122374 return true ;
23132375 }
@@ -2323,36 +2385,51 @@ bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
23232385
23242386bool AMDGPUDAGToDAGISel::SelectSMRDImm (SDValue Addr, SDValue &SBase,
23252387 SDValue &Offset) const {
2326- return SelectSMRD (Addr, SBase, /* SOffset */ nullptr , &Offset);
2388+ return SelectSMRD (/* N */ nullptr , Addr, SBase, /* SOffset */ nullptr ,
2389+ &Offset);
23272390}
23282391
23292392bool AMDGPUDAGToDAGISel::SelectSMRDImm32 (SDValue Addr, SDValue &SBase,
23302393 SDValue &Offset) const {
23312394 assert (Subtarget->getGeneration () == AMDGPUSubtarget::SEA_ISLANDS);
2332- return SelectSMRD (Addr, SBase, /* SOffset */ nullptr , &Offset ,
2333- /* Imm32Only */ true );
2395+ return SelectSMRD (/* N */ nullptr , Addr, SBase, /* SOffset */ nullptr ,
2396+ &Offset, /* Imm32Only */ true );
23342397}
23352398
2336- bool AMDGPUDAGToDAGISel::SelectSMRDSgpr (SDValue Addr, SDValue &SBase,
2337- SDValue &SOffset) const {
2338- return SelectSMRD (Addr, SBase, &SOffset, /* Offset */ nullptr );
2399+ bool AMDGPUDAGToDAGISel::SelectSMRDSgpr (SDNode *N, SDValue Addr, SDValue &SBase,
2400+ SDValue &SOffset, SDValue &CPol) const {
2401+ bool ScaleOffset;
2402+ if (!SelectSMRD (N, Addr, SBase, &SOffset, /* Offset */ nullptr ,
2403+ /* Imm32Only */ false , &ScaleOffset))
2404+ return false ;
2405+
2406+ CPol = CurDAG->getTargetConstant (ScaleOffset ? AMDGPU::CPol::SCAL : 0 ,
2407+ SDLoc (N), MVT::i32 );
2408+ return true ;
23392409}
23402410
2341- bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm (SDValue Addr, SDValue &SBase,
2342- SDValue &SOffset,
2343- SDValue &Offset) const {
2344- return SelectSMRD (Addr, SBase, &SOffset, &Offset);
2411+ bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm (SDNode *N, SDValue Addr,
2412+ SDValue &SBase, SDValue &SOffset,
2413+ SDValue &Offset,
2414+ SDValue &CPol) const {
2415+ bool ScaleOffset;
2416+ if (!SelectSMRD (N, Addr, SBase, &SOffset, &Offset, false , &ScaleOffset))
2417+ return false ;
2418+
2419+ CPol = CurDAG->getTargetConstant (ScaleOffset ? AMDGPU::CPol::SCAL : 0 ,
2420+ SDLoc (N), MVT::i32 );
2421+ return true ;
23452422}
23462423
23472424bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm (SDValue N, SDValue &Offset) const {
2348- return SelectSMRDOffset (N, /* SOffset */ nullptr , &Offset,
2425+ return SelectSMRDOffset (/* N */ nullptr , N, /* SOffset */ nullptr , &Offset,
23492426 /* Imm32Only */ false , /* IsBuffer */ true );
23502427}
23512428
23522429bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32 (SDValue N,
23532430 SDValue &Offset) const {
23542431 assert (Subtarget->getGeneration () == AMDGPUSubtarget::SEA_ISLANDS);
2355- return SelectSMRDOffset (N, /* SOffset */ nullptr , &Offset,
2432+ return SelectSMRDOffset (/* N */ nullptr , N, /* SOffset */ nullptr , &Offset,
23562433 /* Imm32Only */ true , /* IsBuffer */ true );
23572434}
23582435
@@ -2361,9 +2438,9 @@ bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(SDValue N, SDValue &SOffset,
23612438 // Match the (soffset + offset) pair as a 32-bit register base and
23622439 // an immediate offset.
23632440 return N.getValueType () == MVT::i32 &&
2364- SelectSMRDBaseOffset (N, /* SBase */ SOffset, /* SOffset */ nullptr ,
2365- &Offset, /* Imm32Only */ false ,
2366- /* IsBuffer */ true );
2441+ SelectSMRDBaseOffset (/* N */ nullptr , N, /* SBase */ SOffset ,
2442+ /* SOffset */ nullptr , &Offset ,
2443+ /* Imm32Only */ false , /* IsBuffer */ true );
23672444}
23682445
23692446bool AMDGPUDAGToDAGISel::SelectMOVRELOffset (SDValue Index,
0 commit comments