@@ -53345,8 +53345,7 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
5334553345}
5334653346
5334753347// Look for a RMW operation that only touches one bit of a larger than legal
53348- // type and fold it to a BTC/BTR/BTS or bit insertion pattern acting on a single
53349- // i32 sub value.
53348+ // type and fold it to a BTC/BTR/BTS pattern acting on a single i32 sub value.
5335053349static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
5335153350 SelectionDAG &DAG,
5335253351 const X86Subtarget &Subtarget) {
@@ -53372,42 +53371,28 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
5337253371 // BTR: X & ~(1 << ShAmt)
5337353372 // BTS: X | (1 << ShAmt)
5337453373 // BTC: X ^ (1 << ShAmt)
53375- //
53376- // BitInsert: (X & ~(1 << ShAmt)) | (InsertBit << ShAmt)
53377- SDValue InsertBit, ShAmt;
53374+ SDValue ShAmt;
5337853375 if (!StoredVal.hasOneUse() ||
5337953376 !(sd_match(StoredVal, m_And(m_Specific(LoadVal),
5338053377 m_Not(m_Shl(m_One(), m_Value(ShAmt))))) ||
5338153378 sd_match(StoredVal,
5338253379 m_Or(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt)))) ||
5338353380 sd_match(StoredVal,
53384- m_Xor(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt)))) ||
53385- sd_match(StoredVal,
53386- m_Or(m_And(m_Specific(LoadVal),
53387- m_Not(m_Shl(m_One(), m_Value(ShAmt)))),
53388- m_Shl(m_Value(InsertBit), m_Deferred(ShAmt))))))
53381+ m_Xor(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt))))))
5338953382 return SDValue();
5339053383
5339153384 // Ensure the shift amount is in bounds.
5339253385 KnownBits KnownAmt = DAG.computeKnownBits(ShAmt);
5339353386 if (KnownAmt.getMaxValue().uge(VT.getSizeInBits()))
5339453387 return SDValue();
5339553388
53396- // If we're inserting a bit then it must be the LSB.
53397- if (InsertBit) {
53398- KnownBits KnownInsert = DAG.computeKnownBits(InsertBit);
53399- if (KnownInsert.countMinLeadingZeros() < (VT.getSizeInBits() - 1))
53400- return SDValue();
53401- }
53402-
5340353389 // Split the shift into an alignment shift that moves the active i32 block to
5340453390 // the bottom bits for truncation and a modulo shift that can act on the i32.
5340553391 EVT AmtVT = ShAmt.getValueType();
5340653392 SDValue AlignAmt = DAG.getNode(ISD::AND, DL, AmtVT, ShAmt,
5340753393 DAG.getSignedConstant(-32LL, DL, AmtVT));
5340853394 SDValue ModuloAmt =
5340953395 DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, DAG.getConstant(31, DL, AmtVT));
53410- ModuloAmt = DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8);
5341153396
5341253397 // Compute the byte offset for the i32 block that is changed by the RMW.
5341353398 // combineTruncate will adjust the load for us in a similar way.
@@ -53422,23 +53407,13 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
5342253407 SDValue X = DAG.getNode(ISD::SRL, DL, VT, LoadVal, AlignAmt);
5342353408 X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X);
5342453409
53425- SDValue Mask = DAG.getNode(ISD::SHL, DL, MVT::i32,
53426- DAG.getConstant(1, DL, MVT::i32), ModuloAmt);
53427-
53428- SDValue Res;
53429- if (InsertBit) {
53430- SDValue BitMask =
53431- DAG.getNode(ISD::SHL, DL, MVT::i32,
53432- DAG.getZExtOrTrunc(InsertBit, DL, MVT::i32), ModuloAmt);
53433- Res =
53434- DAG.getNode(ISD::AND, DL, MVT::i32, X, DAG.getNOT(DL, Mask, MVT::i32));
53435- Res = DAG.getNode(ISD::OR, DL, MVT::i32, Res, BitMask);
53436- } else {
53437- if (StoredVal.getOpcode() == ISD::AND)
53438- Mask = DAG.getNOT(DL, Mask, MVT::i32);
53439- Res = DAG.getNode(StoredVal.getOpcode(), DL, MVT::i32, X, Mask);
53440- }
53410+ SDValue Mask =
53411+ DAG.getNode(ISD::SHL, DL, MVT::i32, DAG.getConstant(1, DL, MVT::i32),
53412+ DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8));
53413+ if (StoredVal.getOpcode() == ISD::AND)
53414+ Mask = DAG.getNOT(DL, Mask, MVT::i32);
5344153415
53416+ SDValue Res = DAG.getNode(StoredVal.getOpcode(), DL, MVT::i32, X, Mask);
5344253417 return DAG.getStore(St->getChain(), DL, Res, NewPtr, St->getPointerInfo(),
5344353418 Align(), St->getMemOperand()->getFlags());
5344453419}
0 commit comments