Skip to content

Commit 30b6b95

Browse files
committed
[DAGCombine] Remove OneUse restriction when folding (shl (add x, c1), c2) and (shl (sext (add x, c1)), c2)
1 parent ff521bb commit 30b6b95

File tree

13 files changed

+150
-49
lines changed

13 files changed

+150
-49
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10070,7 +10070,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
1007010070
// Variant of version done on multiply, except mul by a power of 2 is turned
1007110071
// into a shift.
1007210072
if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
10073-
N0->hasOneUse() && TLI.isDesirableToCommuteWithShift(N, Level)) {
10073+
TLI.isDesirableToCommuteWithShift(N, Level)) {
1007410074
SDValue N01 = N0.getOperand(1);
1007510075
if (SDValue Shl1 =
1007610076
DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
@@ -10089,8 +10089,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
1008910089
// TODO: Should we limit this with isLegalAddImmediate?
1009010090
if (N0.getOpcode() == ISD::SIGN_EXTEND &&
1009110091
N0.getOperand(0).getOpcode() == ISD::ADD &&
10092-
N0.getOperand(0)->getFlags().hasNoSignedWrap() && N0->hasOneUse() &&
10093-
N0.getOperand(0)->hasOneUse() &&
10092+
N0.getOperand(0)->getFlags().hasNoSignedWrap() &&
1009410093
TLI.isDesirableToCommuteWithShift(N, Level)) {
1009510094
SDValue Add = N0.getOperand(0);
1009610095
SDLoc DL(N0);

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17518,6 +17518,11 @@ AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
1751817518
SDValue ShiftLHS = N->getOperand(0);
1751917519
EVT VT = N->getValueType(0);
1752017520

17521+
if ((ShiftLHS.getOpcode() == ISD::SIGN_EXTEND &&
17522+
!(ShiftLHS->hasOneUse() && ShiftLHS.getOperand(0)->hasOneUse())) ||
17523+
!ShiftLHS->hasOneUse())
17524+
return false;
17525+
1752117526
// If ShiftLHS is unsigned bit extraction: ((x >> C) & mask), then do not
1752217527
// combine it with shift 'N' to let it be lowered to UBFX except:
1752317528
// ((x >> C) & mask) << C.

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1036,6 +1036,13 @@ bool AMDGPUTargetLowering::isDesirableToCommuteWithShift(
10361036
assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
10371037
N->getOpcode() == ISD::SRL) &&
10381038
"Expected shift op");
1039+
1040+
SDValue ShiftLHS = N->getOperand(0);
1041+
if ((ShiftLHS.getOpcode() == ISD::SIGN_EXTEND &&
1042+
!(ShiftLHS->hasOneUse() && ShiftLHS.getOperand(0)->hasOneUse())) ||
1043+
!ShiftLHS->hasOneUse())
1044+
return false;
1045+
10391046
// Always commute pre-type legalization and right shifts.
10401047
// We're looking for shl(or(x,y),z) patterns.
10411048
if (Level < CombineLevel::AfterLegalizeTypes ||

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13803,6 +13803,12 @@ ARMTargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
1380313803
N->getOpcode() == ISD::SRL) &&
1380413804
"Expected shift op");
1380513805

13806+
SDValue ShiftLHS = N->getOperand(0);
13807+
if ((ShiftLHS.getOpcode() == ISD::SIGN_EXTEND &&
13808+
!(ShiftLHS->hasOneUse() && ShiftLHS.getOperand(0)->hasOneUse())) ||
13809+
!ShiftLHS->hasOneUse())
13810+
return false;
13811+
1380613812
if (Level == BeforeLegalizeTypes)
1380713813
return true;
1380813814

llvm/lib/Target/Hexagon/HexagonISelLowering.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2156,6 +2156,21 @@ bool HexagonTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
21562156
return X.getValueType().isScalarInteger(); // 'tstbit'
21572157
}
21582158

2159+
bool HexagonTargetLowering::isDesirableToCommuteWithShift(
2160+
const SDNode *N, CombineLevel Level) const {
2161+
assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
2162+
N->getOpcode() == ISD::SRL) &&
2163+
"Expected shift op");
2164+
2165+
SDValue ShiftLHS = N->getOperand(0);
2166+
if ((ShiftLHS.getOpcode() == ISD::SIGN_EXTEND &&
2167+
!(ShiftLHS->hasOneUse() && ShiftLHS.getOperand(0)->hasOneUse())) ||
2168+
!ShiftLHS->hasOneUse())
2169+
return false;
2170+
2171+
return true;
2172+
}
2173+
21592174
bool HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
21602175
return isTruncateFree(EVT::getEVT(Ty1), EVT::getEVT(Ty2));
21612176
}

llvm/lib/Target/Hexagon/HexagonISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,9 @@ class HexagonTargetLowering : public TargetLowering {
155155

156156
bool hasBitTest(SDValue X, SDValue Y) const override;
157157

158+
bool isDesirableToCommuteWithShift(const SDNode *N,
159+
CombineLevel Level) const override;
160+
158161
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
159162

160163
/// Return true if an FMA operation is faster than a pair of mul and add

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18877,3 +18877,18 @@ Value *PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
1887718877
return Builder.CreateOr(
1887818878
Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
1887918879
}
18880+
18881+
bool PPCTargetLowering::isDesirableToCommuteWithShift(
18882+
const SDNode *N, CombineLevel Level) const {
18883+
assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
18884+
N->getOpcode() == ISD::SRL) &&
18885+
"Expected shift op");
18886+
18887+
SDValue ShiftLHS = N->getOperand(0);
18888+
if ((ShiftLHS.getOpcode() == ISD::SIGN_EXTEND &&
18889+
!(ShiftLHS->hasOneUse() && ShiftLHS.getOperand(0)->hasOneUse())) ||
18890+
!ShiftLHS->hasOneUse())
18891+
return false;
18892+
18893+
return true;
18894+
}

llvm/lib/Target/PowerPC/PPCISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1489,6 +1489,9 @@ namespace llvm {
14891489
/// through to determine the optimal load/store instruction format.
14901490
unsigned computeMOFlags(const SDNode *Parent, SDValue N,
14911491
SelectionDAG &DAG) const;
1492+
1493+
bool isDesirableToCommuteWithShift(const SDNode *N,
1494+
CombineLevel Level) const override;
14921495
}; // end class PPCTargetLowering
14931496

14941497
namespace PPC {

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17504,8 +17504,46 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
1750417504
// (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
1750517505
SDValue N0 = N->getOperand(0);
1750617506
EVT Ty = N0.getValueType();
17507+
17508+
// LD/ST will optimize constant Offset extraction, so when AddNode is used by
17509+
// LD/ST, it can still complete the folding optimization operation performed
17510+
// above.
17511+
auto isLDST = [&]() {
17512+
bool canOptAwlays = false;
17513+
if (N0->getOpcode() == ISD::ADD && !N0->hasOneUse()) {
17514+
for (SDNode *Use : N0->uses()) {
17515+
// This use is the one we're on right now. Skip it
17516+
if (Use == N || Use->getOpcode() == ISD::SELECT)
17517+
continue;
17518+
if (!isa<StoreSDNode>(Use) && !isa<LoadSDNode>(Use)) {
17519+
canOptAwlays = false;
17520+
break;
17521+
}
17522+
canOptAwlays = true;
17523+
}
17524+
}
17525+
17526+
if (N0->getOpcode() == ISD::SIGN_EXTEND &&
17527+
!N0->getOperand(0)->hasOneUse()) {
17528+
for (SDNode *Use : N0->getOperand(0)->uses()) {
17529+
// This use is the one we're on right now. Skip it
17530+
if (Use == N0.getNode() || Use->getOpcode() == ISD::SELECT)
17531+
continue;
17532+
if (!isa<StoreSDNode>(Use) && !isa<LoadSDNode>(Use)) {
17533+
canOptAwlays = false;
17534+
break;
17535+
}
17536+
canOptAwlays = true;
17537+
}
17538+
}
17539+
return canOptAwlays;
17540+
};
17541+
1750717542
if (Ty.isScalarInteger() &&
1750817543
(N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
17544+
if (N0.getOpcode() == ISD::ADD && !N0->hasOneUse()) {
17545+
return isLDST();
17546+
}
1750917547
auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
1751017548
auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
1751117549
if (C1 && C2) {
@@ -17540,6 +17578,16 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
1754017578
return false;
1754117579
}
1754217580
}
17581+
17582+
if ((N0->getOpcode() == ISD::ADD || N0->getOpcode() == ISD::OR) &&
17583+
!N0->hasOneUse())
17584+
return false;
17585+
17586+
if (N0->getOpcode() == ISD::SIGN_EXTEND &&
17587+
N0->getOperand(0)->getOpcode() == ISD::ADD &&
17588+
!(N0->hasOneUse() && N0->getOperand(0)->hasOneUse()))
17589+
return isLDST();
17590+
1754317591
return true;
1754417592
}
1754517593

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59541,3 +59541,18 @@ Align X86TargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
5954159541
return Align(1ULL << ExperimentalPrefInnermostLoopAlignment);
5954259542
return TargetLowering::getPrefLoopAlignment();
5954359543
}
59544+
59545+
bool X86TargetLowering::isDesirableToCommuteWithShift(
59546+
const SDNode *N, CombineLevel Level) const {
59547+
assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
59548+
N->getOpcode() == ISD::SRL) &&
59549+
"Expected shift op");
59550+
59551+
SDValue ShiftLHS = N->getOperand(0);
59552+
if ((ShiftLHS.getOpcode() == ISD::SIGN_EXTEND &&
59553+
!(ShiftLHS->hasOneUse() && ShiftLHS.getOperand(0)->hasOneUse())) ||
59554+
!ShiftLHS->hasOneUse())
59555+
return false;
59556+
59557+
return true;
59558+
}

0 commit comments

Comments
 (0)