Skip to content

Commit 8484584

Browse files
authored
Revert "[X86] Narrow BT/BTC/BTR/BTS compare + RMW patterns on very large integers (llvm#165540)" (llvm#165979)
This reverts commit a55a720. See breaks i386 on bot and Rust, see llvm#165540.
1 parent 37d6320 commit 8484584

File tree

2 files changed

+6333
-1106
lines changed

2 files changed

+6333
-1106
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 2 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -53344,80 +53344,6 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
5334453344
return SDValue();
5334553345
}
5334653346

53347-
// Look for a RMW operation that only touches one bit of a larger than legal
53348-
// type and fold it to a BTC/BTR/BTS pattern acting on a single i32 sub value.
53349-
static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
53350-
SelectionDAG &DAG,
53351-
const X86Subtarget &Subtarget) {
53352-
using namespace SDPatternMatch;
53353-
53354-
// Only handle normal stores and its chain was a matching normal load.
53355-
auto *Ld = dyn_cast<LoadSDNode>(St->getChain());
53356-
if (!ISD::isNormalStore(St) || !St->isSimple() || !Ld ||
53357-
!ISD::isNormalLoad(Ld) || !Ld->isSimple() ||
53358-
Ld->getBasePtr() != St->getBasePtr() ||
53359-
Ld->getOffset() != St->getOffset())
53360-
return SDValue();
53361-
53362-
SDValue LoadVal(Ld, 0);
53363-
SDValue StoredVal = St->getValue();
53364-
EVT VT = StoredVal.getValueType();
53365-
53366-
// Only narrow larger than legal scalar integers.
53367-
if (!VT.isScalarInteger() ||
53368-
VT.getSizeInBits() <= (Subtarget.is64Bit() ? 64 : 32))
53369-
return SDValue();
53370-
53371-
// BTR: X & ~(1 << ShAmt)
53372-
// BTS: X | (1 << ShAmt)
53373-
// BTC: X ^ (1 << ShAmt)
53374-
SDValue ShAmt;
53375-
if (!StoredVal.hasOneUse() ||
53376-
!(sd_match(StoredVal, m_And(m_Specific(LoadVal),
53377-
m_Not(m_Shl(m_One(), m_Value(ShAmt))))) ||
53378-
sd_match(StoredVal,
53379-
m_Or(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt)))) ||
53380-
sd_match(StoredVal,
53381-
m_Xor(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt))))))
53382-
return SDValue();
53383-
53384-
// Ensure the shift amount is in bounds.
53385-
KnownBits KnownAmt = DAG.computeKnownBits(ShAmt);
53386-
if (KnownAmt.getMaxValue().uge(VT.getSizeInBits()))
53387-
return SDValue();
53388-
53389-
// Split the shift into an alignment shift that moves the active i32 block to
53390-
// the bottom bits for truncation and a modulo shift that can act on the i32.
53391-
EVT AmtVT = ShAmt.getValueType();
53392-
SDValue AlignAmt = DAG.getNode(ISD::AND, DL, AmtVT, ShAmt,
53393-
DAG.getSignedConstant(-32LL, DL, AmtVT));
53394-
SDValue ModuloAmt =
53395-
DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, DAG.getConstant(31, DL, AmtVT));
53396-
53397-
// Compute the byte offset for the i32 block that is changed by the RMW.
53398-
// combineTruncate will adjust the load for us in a similar way.
53399-
EVT PtrVT = St->getBasePtr().getValueType();
53400-
SDValue PtrBitOfs = DAG.getZExtOrTrunc(AlignAmt, DL, PtrVT);
53401-
SDValue PtrByteOfs = DAG.getNode(ISD::SRL, DL, PtrVT, PtrBitOfs,
53402-
DAG.getShiftAmountConstant(3, PtrVT, DL));
53403-
SDValue NewPtr = DAG.getMemBasePlusOffset(St->getBasePtr(), PtrByteOfs, DL,
53404-
SDNodeFlags::NoUnsignedWrap);
53405-
53406-
// Reconstruct the BTC/BTR/BTS pattern for the i32 block and store.
53407-
SDValue X = DAG.getNode(ISD::SRL, DL, VT, LoadVal, AlignAmt);
53408-
X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X);
53409-
53410-
SDValue Mask =
53411-
DAG.getNode(ISD::SHL, DL, MVT::i32, DAG.getConstant(1, DL, MVT::i32),
53412-
DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8));
53413-
if (StoredVal.getOpcode() == ISD::AND)
53414-
Mask = DAG.getNOT(DL, Mask, MVT::i32);
53415-
53416-
SDValue Res = DAG.getNode(StoredVal.getOpcode(), DL, MVT::i32, X, Mask);
53417-
return DAG.getStore(St->getChain(), DL, Res, NewPtr, St->getPointerInfo(),
53418-
Align(), St->getMemOperand()->getFlags());
53419-
}
53420-
5342153347
static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
5342253348
TargetLowering::DAGCombinerInfo &DCI,
5342353349
const X86Subtarget &Subtarget) {
@@ -53644,9 +53570,6 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
5364453570
}
5364553571
}
5364653572

53647-
if (SDValue R = narrowBitOpRMW(St, dl, DAG, Subtarget))
53648-
return R;
53649-
5365053573
// Convert store(cmov(load(p), x, CC), p) to cstore(x, p, CC)
5365153574
// store(cmov(x, load(p), CC), p) to cstore(x, p, InvertCC)
5365253575
if ((VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
@@ -54579,9 +54502,8 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
5457954502
// truncation, see if we can convert the shift into a pointer offset instead.
5458054503
// Limit this to normal (non-ext) scalar integer loads.
5458154504
if (SrcVT.isScalarInteger() && Src.getOpcode() == ISD::SRL &&
54582-
Src.hasOneUse() && ISD::isNormalLoad(Src.getOperand(0).getNode()) &&
54583-
(Src.getOperand(0).hasOneUse() ||
54584-
!DAG.getTargetLoweringInfo().isOperationLegal(ISD::LOAD, SrcVT))) {
54505+
Src.hasOneUse() && Src.getOperand(0).hasOneUse() &&
54506+
ISD::isNormalLoad(Src.getOperand(0).getNode())) {
5458554507
auto *Ld = cast<LoadSDNode>(Src.getOperand(0));
5458654508
if (Ld->isSimple() && VT.isByteSized() &&
5458754509
isPowerOf2_64(VT.getSizeInBits())) {
@@ -56381,7 +56303,6 @@ static SDValue combineAVX512SetCCToKMOV(EVT VT, SDValue Op0, ISD::CondCode CC,
5638156303
static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
5638256304
TargetLowering::DAGCombinerInfo &DCI,
5638356305
const X86Subtarget &Subtarget) {
56384-
using namespace SDPatternMatch;
5638556306
const ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
5638656307
const SDValue LHS = N->getOperand(0);
5638756308
const SDValue RHS = N->getOperand(1);
@@ -56440,37 +56361,6 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
5644056361
if (SDValue AndN = MatchAndCmpEq(RHS, LHS))
5644156362
return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC);
5644256363

56443-
// If we're performing a bit test on a larger than legal type, attempt
56444-
// to (aligned) shift down the value to the bottom 32-bits and then
56445-
// perform the bittest on the i32 value.
56446-
// ICMP_ZERO(AND(X,SHL(1,IDX)))
56447-
// --> ICMP_ZERO(AND(TRUNC(SRL(X,AND(IDX,-32))),SHL(1,AND(IDX,31))))
56448-
if (isNullConstant(RHS) &&
56449-
OpVT.getScalarSizeInBits() > (Subtarget.is64Bit() ? 64 : 32)) {
56450-
SDValue X, ShAmt;
56451-
if (sd_match(LHS, m_OneUse(m_And(m_Value(X),
56452-
m_Shl(m_One(), m_Value(ShAmt)))))) {
56453-
// Only attempt this if the shift amount is known to be in bounds.
56454-
KnownBits KnownAmt = DAG.computeKnownBits(ShAmt);
56455-
if (KnownAmt.getMaxValue().ult(OpVT.getScalarSizeInBits())) {
56456-
EVT AmtVT = ShAmt.getValueType();
56457-
SDValue AlignAmt =
56458-
DAG.getNode(ISD::AND, DL, AmtVT, ShAmt,
56459-
DAG.getSignedConstant(-32LL, DL, AmtVT));
56460-
SDValue ModuloAmt = DAG.getNode(ISD::AND, DL, AmtVT, ShAmt,
56461-
DAG.getConstant(31, DL, AmtVT));
56462-
SDValue Mask = DAG.getNode(
56463-
ISD::SHL, DL, MVT::i32, DAG.getConstant(1, DL, MVT::i32),
56464-
DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8));
56465-
X = DAG.getNode(ISD::SRL, DL, OpVT, X, AlignAmt);
56466-
X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X);
56467-
X = DAG.getNode(ISD::AND, DL, MVT::i32, X, Mask);
56468-
return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, MVT::i32),
56469-
CC);
56470-
}
56471-
}
56472-
}
56473-
5647456364
// cmpeq(trunc(x),C) --> cmpeq(x,C)
5647556365
// cmpne(trunc(x),C) --> cmpne(x,C)
5647656366
// iff x upper bits are zero.

0 commit comments

Comments
 (0)