@@ -1863,15 +1863,6 @@ bool AMDGPUDAGToDAGISel::SelectScratchOffset(SDNode *N, SDValue Addr,
18631863                              SIInstrFlags::FlatScratch);
18641864}
18651865
1866- //  If this matches zero_extend i32:x, return x
1867- static  SDValue matchZExtFromI32 (SDValue Op) {
1868-   if  (Op.getOpcode () != ISD::ZERO_EXTEND)
1869-     return  SDValue ();
1870- 
1871-   SDValue ExtSrc = Op.getOperand (0 );
1872-   return  (ExtSrc.getValueType () == MVT::i32 ) ? ExtSrc : SDValue ();
1873- }
1874- 
18751866//  If this matches *_extend i32:x, return x
18761867//  Otherwise if the value is I32 returns x.
18771868static  SDValue matchExtFromI32orI32 (SDValue Op, bool  IsSigned,
@@ -1890,12 +1881,13 @@ static SDValue matchExtFromI32orI32(SDValue Op, bool IsSigned,
18901881}
18911882
18921883//  Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset)
1893- bool   AMDGPUDAGToDAGISel::SelectGlobalSAddr (SDNode *N, 
1894-                                             SDValue Addr,
1895-                                            SDValue &SAddr,
1896-                                            SDValue &VOffset ,
1897-                                            SDValue &Offset ) const  {
1884+ //  or (64-bit SGPR base) + (sext vgpr offset) + sext(imm offset) 
1885+ bool   AMDGPUDAGToDAGISel::SelectGlobalSAddr (SDNode *N,  SDValue Addr,
1886+                                            SDValue &SAddr, SDValue &VOffset, 
1887+                                            SDValue &Offset,  bool  &ScaleOffset ,
1888+                                            bool  NeedIOffset ) const  {
18981889  int64_t  ImmOffset = 0 ;
1890+   ScaleOffset = false ;
18991891
19001892  //  Match the immediate offset first, which canonically is moved as low as
19011893  //  possible.
@@ -1905,7 +1897,8 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
19051897    int64_t  COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue ();
19061898    const  SIInstrInfo *TII = Subtarget->getInstrInfo ();
19071899
1908-     if  (TII->isLegalFLATOffset (COffsetVal, AMDGPUAS::GLOBAL_ADDRESS,
1900+     if  (NeedIOffset &&
1901+         TII->isLegalFLATOffset (COffsetVal, AMDGPUAS::GLOBAL_ADDRESS,
19091902                               SIInstrFlags::FlatGlobal)) {
19101903      Addr = LHS;
19111904      ImmOffset = COffsetVal;
@@ -1915,11 +1908,14 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
19151908        //  saddr + large_offset -> saddr +
19161909        //                          (voffset = large_offset & ~MaxOffset) +
19171910        //                          (large_offset & MaxOffset);
1918-         int64_t  SplitImmOffset, RemainderOffset;
1919-         std::tie (SplitImmOffset, RemainderOffset) = TII->splitFlatOffset (
1920-             COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, SIInstrFlags::FlatGlobal);
1911+         int64_t  SplitImmOffset = 0 , RemainderOffset = COffsetVal;
1912+         if  (NeedIOffset) {
1913+           std::tie (SplitImmOffset, RemainderOffset) = TII->splitFlatOffset (
1914+               COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, SIInstrFlags::FlatGlobal);
1915+         }
19211916
1922-         if  (isUInt<32 >(RemainderOffset)) {
1917+         if  (Subtarget->hasSignedGVSOffset () ? isInt<32 >(RemainderOffset)
1918+                                             : isUInt<32 >(RemainderOffset)) {
19231919          SDNode *VMov = CurDAG->getMachineNode (
19241920              AMDGPU::V_MOV_B32_e32, SL, MVT::i32 ,
19251921              CurDAG->getTargetConstant (RemainderOffset, SDLoc (), MVT::i32 ));
@@ -1946,21 +1942,26 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
19461942  //  Match the variable offset.
19471943  if  (Addr.getOpcode () == ISD::ADD) {
19481944    LHS = Addr.getOperand (0 );
1949-     RHS = Addr.getOperand (1 );
19501945
19511946    if  (!LHS->isDivergent ()) {
1952-       //  add (i64 sgpr), (zero_extend (i32 vgpr))
1953-       if  (SDValue ZextRHS = matchZExtFromI32 (RHS)) {
1947+       //  add (i64 sgpr), (*_extend (i32 vgpr))
1948+       RHS = Addr.getOperand (1 );
1949+       ScaleOffset = SelectScaleOffset (N, RHS, Subtarget->hasSignedGVSOffset ());
1950+       if  (SDValue ExtRHS = matchExtFromI32orI32 (
1951+               RHS, Subtarget->hasSignedGVSOffset (), CurDAG)) {
19541952        SAddr = LHS;
1955-         VOffset = ZextRHS ;
1953+         VOffset = ExtRHS ;
19561954      }
19571955    }
19581956
1957+     RHS = Addr.getOperand (1 );
19591958    if  (!SAddr && !RHS->isDivergent ()) {
1960-       //  add (zero_extend (i32 vgpr)), (i64 sgpr)
1961-       if  (SDValue ZextLHS = matchZExtFromI32 (LHS)) {
1959+       //  add (*_extend (i32 vgpr)), (i64 sgpr)
1960+       ScaleOffset = SelectScaleOffset (N, LHS, Subtarget->hasSignedGVSOffset ());
1961+       if  (SDValue ExtLHS = matchExtFromI32orI32 (
1962+               LHS, Subtarget->hasSignedGVSOffset (), CurDAG)) {
19621963        SAddr = RHS;
1963-         VOffset = ZextLHS ;
1964+         VOffset = ExtLHS ;
19641965      }
19651966    }
19661967
@@ -1970,6 +1971,27 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
19701971    }
19711972  }
19721973
1974+   if  (Subtarget->hasScaleOffset () &&
1975+       (Addr.getOpcode () == (Subtarget->hasSignedGVSOffset ()
1976+                                 ? AMDGPUISD::MAD_I64_I32
1977+                                 : AMDGPUISD::MAD_U64_U32) ||
1978+        (Addr.getOpcode () == AMDGPUISD::MAD_U64_U32 &&
1979+         CurDAG->SignBitIsZero (Addr.getOperand (0 )))) &&
1980+       Addr.getOperand (0 )->isDivergent () &&
1981+       isa<ConstantSDNode>(Addr.getOperand (1 )) &&
1982+       !Addr.getOperand (2 )->isDivergent ()) {
1983+     //  mad_u64_u32 (i32 vgpr), (i32 c), (i64 sgpr)
1984+     unsigned  Size =
1985+         (unsigned )cast<MemSDNode>(N)->getMemoryVT ().getFixedSizeInBits () / 8 ;
1986+     ScaleOffset = Addr.getConstantOperandVal (1 ) == Size;
1987+     if  (ScaleOffset) {
1988+       SAddr = Addr.getOperand (2 );
1989+       VOffset = Addr.getOperand (0 );
1990+       Offset = CurDAG->getTargetConstant (ImmOffset, SDLoc (), MVT::i32 );
1991+       return  true ;
1992+     }
1993+   }
1994+ 
19731995  if  (Addr->isDivergent () || Addr.getOpcode () == ISD::UNDEF ||
19741996      isa<ConstantSDNode>(Addr))
19751997    return  false ;
@@ -1989,21 +2011,24 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N, SDValue Addr,
19892011                                           SDValue &SAddr, SDValue &VOffset,
19902012                                           SDValue &Offset,
19912013                                           SDValue &CPol) const  {
1992-   if  (!SelectGlobalSAddr (N, Addr, SAddr, VOffset, Offset))
2014+   bool  ScaleOffset;
2015+   if  (!SelectGlobalSAddr (N, Addr, SAddr, VOffset, Offset, ScaleOffset))
19932016    return  false ;
19942017
1995-   CPol = CurDAG->getTargetConstant (0 , SDLoc (), MVT::i32 );
2018+   CPol = CurDAG->getTargetConstant (ScaleOffset ? AMDGPU::CPol::SCAL : 0 ,
2019+                                    SDLoc (), MVT::i32 );
19962020  return  true ;
19972021}
19982022
19992023bool  AMDGPUDAGToDAGISel::SelectGlobalSAddrGLC (SDNode *N, SDValue Addr,
20002024                                              SDValue &SAddr, SDValue &VOffset,
20012025                                              SDValue &Offset,
20022026                                              SDValue &CPol) const  {
2003-   if  (!SelectGlobalSAddr (N, Addr, SAddr, VOffset, Offset))
2027+   bool  ScaleOffset;
2028+   if  (!SelectGlobalSAddr (N, Addr, SAddr, VOffset, Offset, ScaleOffset))
20042029    return  false ;
20052030
2006-   unsigned  CPolVal = AMDGPU::CPol::GLC;
2031+   unsigned  CPolVal = (ScaleOffset ? AMDGPU::CPol::SCAL :  0 ) |  AMDGPU::CPol::GLC;
20072032  CPol = CurDAG->getTargetConstant (CPolVal, SDLoc (), MVT::i32 );
20082033  return  true ;
20092034}
0 commit comments