@@ -1863,15 +1863,6 @@ bool AMDGPUDAGToDAGISel::SelectScratchOffset(SDNode *N, SDValue Addr,
18631863 SIInstrFlags::FlatScratch);
18641864}
18651865
1866- // If this matches zero_extend i32:x, return x
1867- static SDValue matchZExtFromI32 (SDValue Op) {
1868- if (Op.getOpcode () != ISD::ZERO_EXTEND)
1869- return SDValue ();
1870-
1871- SDValue ExtSrc = Op.getOperand (0 );
1872- return (ExtSrc.getValueType () == MVT::i32 ) ? ExtSrc : SDValue ();
1873- }
1874-
18751866// If this matches *_extend i32:x, return x
18761867// Otherwise if the value is I32 returns x.
18771868static SDValue matchExtFromI32orI32 (SDValue Op, bool IsSigned,
@@ -1890,12 +1881,13 @@ static SDValue matchExtFromI32orI32(SDValue Op, bool IsSigned,
18901881}
18911882
18921883// Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset)
1893- bool AMDGPUDAGToDAGISel::SelectGlobalSAddr (SDNode *N,
1894- SDValue Addr,
1895- SDValue &SAddr,
1896- SDValue &VOffset ,
1897- SDValue &Offset ) const {
1884+ // or (64-bit SGPR base) + (sext vgpr offset) + sext(imm offset)
1885+ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr (SDNode *N, SDValue Addr,
1886+ SDValue &SAddr, SDValue &VOffset,
1887+ SDValue &Offset, bool &ScaleOffset ,
1888+ bool NeedIOffset ) const {
18981889 int64_t ImmOffset = 0 ;
1890+ ScaleOffset = false ;
18991891
19001892 // Match the immediate offset first, which canonically is moved as low as
19011893 // possible.
@@ -1905,7 +1897,8 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
19051897 int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue ();
19061898 const SIInstrInfo *TII = Subtarget->getInstrInfo ();
19071899
1908- if (TII->isLegalFLATOffset (COffsetVal, AMDGPUAS::GLOBAL_ADDRESS,
1900+ if (NeedIOffset &&
1901+ TII->isLegalFLATOffset (COffsetVal, AMDGPUAS::GLOBAL_ADDRESS,
19091902 SIInstrFlags::FlatGlobal)) {
19101903 Addr = LHS;
19111904 ImmOffset = COffsetVal;
@@ -1915,11 +1908,14 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
19151908 // saddr + large_offset -> saddr +
19161909 // (voffset = large_offset & ~MaxOffset) +
19171910 // (large_offset & MaxOffset);
1918- int64_t SplitImmOffset, RemainderOffset;
1919- std::tie (SplitImmOffset, RemainderOffset) = TII->splitFlatOffset (
1920- COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, SIInstrFlags::FlatGlobal);
1911+ int64_t SplitImmOffset = 0 , RemainderOffset = COffsetVal;
1912+ if (NeedIOffset) {
1913+ std::tie (SplitImmOffset, RemainderOffset) = TII->splitFlatOffset (
1914+ COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, SIInstrFlags::FlatGlobal);
1915+ }
19211916
1922- if (isUInt<32 >(RemainderOffset)) {
1917+ if (Subtarget->hasSignedGVSOffset () ? isInt<32 >(RemainderOffset)
1918+ : isUInt<32 >(RemainderOffset)) {
19231919 SDNode *VMov = CurDAG->getMachineNode (
19241920 AMDGPU::V_MOV_B32_e32, SL, MVT::i32 ,
19251921 CurDAG->getTargetConstant (RemainderOffset, SDLoc (), MVT::i32 ));
@@ -1946,21 +1942,26 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
19461942 // Match the variable offset.
19471943 if (Addr.getOpcode () == ISD::ADD) {
19481944 LHS = Addr.getOperand (0 );
1949- RHS = Addr.getOperand (1 );
19501945
19511946 if (!LHS->isDivergent ()) {
1952- // add (i64 sgpr), (zero_extend (i32 vgpr))
1953- if (SDValue ZextRHS = matchZExtFromI32 (RHS)) {
1947+ // add (i64 sgpr), (*_extend (i32 vgpr))
1948+ RHS = Addr.getOperand (1 );
1949+ ScaleOffset = SelectScaleOffset (N, RHS, Subtarget->hasSignedGVSOffset ());
1950+ if (SDValue ExtRHS = matchExtFromI32orI32 (
1951+ RHS, Subtarget->hasSignedGVSOffset (), CurDAG)) {
19541952 SAddr = LHS;
1955- VOffset = ZextRHS ;
1953+ VOffset = ExtRHS ;
19561954 }
19571955 }
19581956
1957+ RHS = Addr.getOperand (1 );
19591958 if (!SAddr && !RHS->isDivergent ()) {
1960- // add (zero_extend (i32 vgpr)), (i64 sgpr)
1961- if (SDValue ZextLHS = matchZExtFromI32 (LHS)) {
1959+ // add (*_extend (i32 vgpr)), (i64 sgpr)
1960+ ScaleOffset = SelectScaleOffset (N, LHS, Subtarget->hasSignedGVSOffset ());
1961+ if (SDValue ExtLHS = matchExtFromI32orI32 (
1962+ LHS, Subtarget->hasSignedGVSOffset (), CurDAG)) {
19621963 SAddr = RHS;
1963- VOffset = ZextLHS ;
1964+ VOffset = ExtLHS ;
19641965 }
19651966 }
19661967
@@ -1970,6 +1971,27 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
19701971 }
19711972 }
19721973
1974+ if (Subtarget->hasScaleOffset () &&
1975+ (Addr.getOpcode () == (Subtarget->hasSignedGVSOffset ()
1976+ ? AMDGPUISD::MAD_I64_I32
1977+ : AMDGPUISD::MAD_U64_U32) ||
1978+ (Addr.getOpcode () == AMDGPUISD::MAD_U64_U32 &&
1979+ CurDAG->SignBitIsZero (Addr.getOperand (0 )))) &&
1980+ Addr.getOperand (0 )->isDivergent () &&
1981+ isa<ConstantSDNode>(Addr.getOperand (1 )) &&
1982+ !Addr.getOperand (2 )->isDivergent ()) {
1983+ // mad_u64_u32 (i32 vgpr), (i32 c), (i64 sgpr)
1984+ unsigned Size =
1985+ (unsigned )cast<MemSDNode>(N)->getMemoryVT ().getFixedSizeInBits () / 8 ;
1986+ ScaleOffset = Addr.getConstantOperandVal (1 ) == Size;
1987+ if (ScaleOffset) {
1988+ SAddr = Addr.getOperand (2 );
1989+ VOffset = Addr.getOperand (0 );
1990+ Offset = CurDAG->getTargetConstant (ImmOffset, SDLoc (), MVT::i32 );
1991+ return true ;
1992+ }
1993+ }
1994+
19731995 if (Addr->isDivergent () || Addr.getOpcode () == ISD::UNDEF ||
19741996 isa<ConstantSDNode>(Addr))
19751997 return false ;
@@ -1989,21 +2011,24 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N, SDValue Addr,
19892011 SDValue &SAddr, SDValue &VOffset,
19902012 SDValue &Offset,
19912013 SDValue &CPol) const {
1992- if (!SelectGlobalSAddr (N, Addr, SAddr, VOffset, Offset))
2014+ bool ScaleOffset;
2015+ if (!SelectGlobalSAddr (N, Addr, SAddr, VOffset, Offset, ScaleOffset))
19932016 return false ;
19942017
1995- CPol = CurDAG->getTargetConstant (0 , SDLoc (), MVT::i32 );
2018+ CPol = CurDAG->getTargetConstant (ScaleOffset ? AMDGPU::CPol::SCAL : 0 ,
2019+ SDLoc (), MVT::i32 );
19962020 return true ;
19972021}
19982022
19992023bool AMDGPUDAGToDAGISel::SelectGlobalSAddrGLC (SDNode *N, SDValue Addr,
20002024 SDValue &SAddr, SDValue &VOffset,
20012025 SDValue &Offset,
20022026 SDValue &CPol) const {
2003- if (!SelectGlobalSAddr (N, Addr, SAddr, VOffset, Offset))
2027+ bool ScaleOffset;
2028+ if (!SelectGlobalSAddr (N, Addr, SAddr, VOffset, Offset, ScaleOffset))
20042029 return false ;
20052030
2006- unsigned CPolVal = AMDGPU::CPol::GLC;
2031+ unsigned CPolVal = (ScaleOffset ? AMDGPU::CPol::SCAL : 0 ) | AMDGPU::CPol::GLC;
20072032 CPol = CurDAG->getTargetConstant (CPolVal, SDLoc (), MVT::i32 );
20082033 return true ;
20092034}
0 commit comments