AmrDeveloper
diff --git a/‎llvm/lib/Target/X86/X86ISelLowering.cpp‎
Lines changed: 2 additions & 112 deletions b/‎llvm/lib/Target/X86/X86ISelLowering.cpp‎
Lines changed: 2 additions & 112 deletions
@@ -53344,80 +53344,6 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
-// Look for a RMW operation that only touches one bit of a larger than legal
-// type and fold it to a BTC/BTR/BTS pattern acting on a single i32 sub value.
-static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
-                              SelectionDAG &DAG,
-                              const X86Subtarget &Subtarget) {
-  using namespace SDPatternMatch;
-
-  // Only handle normal stores and its chain was a matching normal load.
-  auto *Ld = dyn_cast<LoadSDNode>(St->getChain());
-  if (!ISD::isNormalStore(St) || !St->isSimple() || !Ld ||
-      !ISD::isNormalLoad(Ld) || !Ld->isSimple() ||
-      Ld->getBasePtr() != St->getBasePtr() ||
-      Ld->getOffset() != St->getOffset())
-    return SDValue();
-
-  SDValue LoadVal(Ld, 0);
-  SDValue StoredVal = St->getValue();
-  EVT VT = StoredVal.getValueType();
-
-  // Only narrow larger than legal scalar integers.
-  if (!VT.isScalarInteger() ||
-      VT.getSizeInBits() <= (Subtarget.is64Bit() ? 64 : 32))
-    return SDValue();
-
-  // BTR: X & ~(1 << ShAmt)
-  // BTS: X | (1 << ShAmt)
-  // BTC: X ^ (1 << ShAmt)
-  SDValue ShAmt;
-  if (!StoredVal.hasOneUse() ||
-      !(sd_match(StoredVal, m_And(m_Specific(LoadVal),
-                                  m_Not(m_Shl(m_One(), m_Value(ShAmt))))) ||
-        sd_match(StoredVal,
-                 m_Or(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt)))) ||
-        sd_match(StoredVal,
-                 m_Xor(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt))))))
-    return SDValue();
-
-  // Ensure the shift amount is in bounds.
-  KnownBits KnownAmt = DAG.computeKnownBits(ShAmt);
-  if (KnownAmt.getMaxValue().uge(VT.getSizeInBits()))
-    return SDValue();
-
-  // Split the shift into an alignment shift that moves the active i32 block to
-  // the bottom bits for truncation and a modulo shift that can act on the i32.
-  EVT AmtVT = ShAmt.getValueType();
-  SDValue AlignAmt = DAG.getNode(ISD::AND, DL, AmtVT, ShAmt,
-                                 DAG.getSignedConstant(-32LL, DL, AmtVT));
-  SDValue ModuloAmt =
-      DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, DAG.getConstant(31, DL, AmtVT));
-
-  // Compute the byte offset for the i32 block that is changed by the RMW.
-  // combineTruncate will adjust the load for us in a similar way.
-  EVT PtrVT = St->getBasePtr().getValueType();
-  SDValue PtrBitOfs = DAG.getZExtOrTrunc(AlignAmt, DL, PtrVT);
-  SDValue PtrByteOfs = DAG.getNode(ISD::SRL, DL, PtrVT, PtrBitOfs,
-                                   DAG.getShiftAmountConstant(3, PtrVT, DL));
-  SDValue NewPtr = DAG.getMemBasePlusOffset(St->getBasePtr(), PtrByteOfs, DL,
-                                            SDNodeFlags::NoUnsignedWrap);
-
-  // Reconstruct the BTC/BTR/BTS pattern for the i32 block and store.
-  SDValue X = DAG.getNode(ISD::SRL, DL, VT, LoadVal, AlignAmt);
-  X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X);
-
-  SDValue Mask =
-      DAG.getNode(ISD::SHL, DL, MVT::i32, DAG.getConstant(1, DL, MVT::i32),
-                  DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8));
-  if (StoredVal.getOpcode() == ISD::AND)
-    Mask = DAG.getNOT(DL, Mask, MVT::i32);
-
-  SDValue Res = DAG.getNode(StoredVal.getOpcode(), DL, MVT::i32, X, Mask);
-  return DAG.getStore(St->getChain(), DL, Res, NewPtr, St->getPointerInfo(),
-                      Align(), St->getMemOperand()->getFlags());
-}
-
 static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
                             TargetLowering::DAGCombinerInfo &DCI,
                             const X86Subtarget &Subtarget) {
@@ -53644,9 +53570,6 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
     }
   }
 
-  if (SDValue R = narrowBitOpRMW(St, dl, DAG, Subtarget))
-    return R;
-
   // Convert store(cmov(load(p), x, CC), p) to cstore(x, p, CC)
   //         store(cmov(x, load(p), CC), p) to cstore(x, p, InvertCC)
   if ((VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
@@ -54579,9 +54502,8 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
   // truncation, see if we can convert the shift into a pointer offset instead.
   // Limit this to normal (non-ext) scalar integer loads.
   if (SrcVT.isScalarInteger() && Src.getOpcode() == ISD::SRL &&
-      Src.hasOneUse() && ISD::isNormalLoad(Src.getOperand(0).getNode()) &&
-      (Src.getOperand(0).hasOneUse() ||
-       !DAG.getTargetLoweringInfo().isOperationLegal(ISD::LOAD, SrcVT))) {
+      Src.hasOneUse() && Src.getOperand(0).hasOneUse() &&
+      ISD::isNormalLoad(Src.getOperand(0).getNode())) {
     auto *Ld = cast<LoadSDNode>(Src.getOperand(0));
     if (Ld->isSimple() && VT.isByteSized() &&
         isPowerOf2_64(VT.getSizeInBits())) {
@@ -56381,7 +56303,6 @@ static SDValue combineAVX512SetCCToKMOV(EVT VT, SDValue Op0, ISD::CondCode CC,
 static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
                             TargetLowering::DAGCombinerInfo &DCI,
                             const X86Subtarget &Subtarget) {
-  using namespace SDPatternMatch;
   const ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
   const SDValue LHS = N->getOperand(0);
   const SDValue RHS = N->getOperand(1);
@@ -56440,37 +56361,6 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
       if (SDValue AndN = MatchAndCmpEq(RHS, LHS))
         return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC);
 
-      // If we're performing a bit test on a larger than legal type, attempt
-      // to (aligned) shift down the value to the bottom 32-bits and then
-      // perform the bittest on the i32 value.
-      // ICMP_ZERO(AND(X,SHL(1,IDX)))
-      // --> ICMP_ZERO(AND(TRUNC(SRL(X,AND(IDX,-32))),SHL(1,AND(IDX,31))))
-      if (isNullConstant(RHS) &&
-          OpVT.getScalarSizeInBits() > (Subtarget.is64Bit() ? 64 : 32)) {
-        SDValue X, ShAmt;
-        if (sd_match(LHS, m_OneUse(m_And(m_Value(X),
-                                         m_Shl(m_One(), m_Value(ShAmt)))))) {
-          // Only attempt this if the shift amount is known to be in bounds.
-          KnownBits KnownAmt = DAG.computeKnownBits(ShAmt);
-          if (KnownAmt.getMaxValue().ult(OpVT.getScalarSizeInBits())) {
-            EVT AmtVT = ShAmt.getValueType();
-            SDValue AlignAmt =
-                DAG.getNode(ISD::AND, DL, AmtVT, ShAmt,
-                            DAG.getSignedConstant(-32LL, DL, AmtVT));
-            SDValue ModuloAmt = DAG.getNode(ISD::AND, DL, AmtVT, ShAmt,
-                                            DAG.getConstant(31, DL, AmtVT));
-            SDValue Mask = DAG.getNode(
-                ISD::SHL, DL, MVT::i32, DAG.getConstant(1, DL, MVT::i32),
-                DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8));
-            X = DAG.getNode(ISD::SRL, DL, OpVT, X, AlignAmt);
-            X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X);
-            X = DAG.getNode(ISD::AND, DL, MVT::i32, X, Mask);
-            return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, MVT::i32),
-                                CC);
-          }
-        }
-      }
-
       // cmpeq(trunc(x),C) --> cmpeq(x,C)
       // cmpne(trunc(x),C) --> cmpne(x,C)
       // iff x upper bits are zero.