@@ -118,6 +118,7 @@ using namespace llvm;
118118#define DEBUG_TYPE "arm-isel"
119119
120120STATISTIC(NumTailCalls, "Number of tail calls");
121+ STATISTIC(NumOptimizedImms, "Number of times immediates were optimized");
121122STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
122123STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
123124STATISTIC(NumConstpoolPromoted,
@@ -128,6 +129,12 @@ ARMInterworking("arm-interworking", cl::Hidden,
128129 cl::desc("Enable / disable ARM interworking (for debugging only)"),
129130 cl::init(true));
130131
132+ static cl::opt<bool>
133+ EnableOptimizeLogicalImm("arm-enable-logical-imm", cl::Hidden,
134+ cl::desc("Enable ARM logical imm instruction "
135+ "optimization"),
136+ cl::init(true));
137+
131138static cl::opt<bool> EnableConstpoolPromotion(
132139 "arm-promote-constant", cl::Hidden,
133140 cl::desc("Enable / disable promotion of unnamed_addr constants into "
@@ -20138,6 +20145,109 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
2013820145 }
2013920146}
2014020147
20148+ static bool isLegalLogicalImmediate(unsigned Imm, const ARMSubtarget *Subtarget) {
20149+ // Handle special cases first
20150+ if (!Subtarget->isThumb())
20151+ return ARM_AM::getSOImmVal(Imm) != -1;
20152+ if (Subtarget->isThumb2())
20153+ return ARM_AM::getT2SOImmVal(Imm) != -1;
20154+ // Thumb1 only has 8-bit unsigned immediate.
20155+ return Imm <= 255;
20156+ }
20157+
20158+ static bool optimizeLogicalImm(SDValue Op, unsigned Imm, const APInt &Demanded,
20159+ TargetLowering::TargetLoweringOpt &TLO,
20160+ unsigned NewOpc, const ARMSubtarget *Subtarget) {
20161+ unsigned OldImm = Imm, NewImm;
20162+
20163+ // Return if the immediate is already all zeros, all ones, a bimm32.
20164+ if (Imm == 0 || Imm == ~0U || isLegalLogicalImmediate(Imm, Subtarget))
20165+ return false;
20166+
20167+ // bic/orn/eon
20168+ if ((Op.getOpcode() == ISD::AND || (Subtarget->isThumb2() && Op.getOpcode() == ISD::OR)) && isLegalLogicalImmediate(~Imm, Subtarget))
20169+ return false;
20170+
20171+ unsigned DemandedBits = Demanded.getZExtValue();
20172+
20173+ // Clear bits that are not demanded.
20174+ Imm &= DemandedBits;
20175+
20176+ // Try to extend the immediate to a legal ARM rotating immediate
20177+ // by filling in non-demanded bits. ARM supports:
20178+ // - An 8-bit value rotated by an even number of bits (0, 2, 4, 6, ..., 30)
20179+ // - Any 8-bit immediate (Thumb2 also supports 16-bit splat patterns)
20180+ unsigned NonDemandedBits = ~DemandedBits;
20181+
20182+ // Try filling with 0
20183+ NewImm = Imm & DemandedBits;
20184+ if (isLegalLogicalImmediate(NewImm, Subtarget) ||
20185+ ((Op.getOpcode() == ISD::AND ||
20186+ (Subtarget->isThumb2() && Op.getOpcode() == ISD::OR)) &&
20187+ isLegalLogicalImmediate(~NewImm, Subtarget))) {
20188+ ++NumOptimizedImms;
20189+ } else {
20190+ // Try filling with 1
20191+ NewImm = Imm | NonDemandedBits;
20192+ if (isLegalLogicalImmediate(NewImm, Subtarget) ||
20193+ ((Op.getOpcode() == ISD::AND ||
20194+ (Subtarget->isThumb2() && Op.getOpcode() == ISD::OR)) &&
20195+ isLegalLogicalImmediate(~NewImm, Subtarget))) {
20196+ ++NumOptimizedImms;
20197+ } else {
20198+ return false;
20199+ }
20200+ }
20201+
20202+ (void)OldImm;
20203+ assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
20204+ "demanded bits should never be altered");
20205+ assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm");
20206+
20207+ // Create the new constant immediate node.
20208+ EVT VT = Op.getValueType();
20209+ SDLoc DL(Op);
20210+ SDValue New;
20211+
20212+ // If the new constant immediate is all-zeros or all-ones, let the target
20213+ // independent DAG combine optimize this node.
20214+ if (NewImm == 0 || NewImm == ~0U) {
20215+ New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
20216+ TLO.DAG.getConstant(NewImm, DL, VT));
20217+ // Otherwise, create a machine node so that target independent DAG combine
20218+ // doesn't undo this optimization.
20219+ } else {
20220+ // bic/orn/eon
20221+ if (isLegalLogicalImmediate(NewImm, Subtarget)) {
20222+ SDValue EncConst = TLO.DAG.getTargetConstant(NewImm, DL, VT);
20223+ New = SDValue(
20224+ TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst),
20225+ 0);
20226+ } else if ((Op.getOpcode() == ISD::AND ||
20227+ (Subtarget->isThumb2() && Op.getOpcode() == ISD::OR)) &&
20228+ isLegalLogicalImmediate(~NewImm, Subtarget)) {
20229+
20230+ if (Op.getOpcode() == ISD::OR) {
20231+ // ORN
20232+ NewOpc = ARM::t2ORNri;
20233+ } else {
20234+ // AND -> BIC
20235+ NewOpc = Subtarget->isThumb()
20236+ ? Subtarget->isThumb2() ? ARM::t2BICri : ARM::tBIC
20237+ : ARM::BICri;
20238+ }
20239+ SDValue EncConst = TLO.DAG.getTargetConstant(~NewImm, DL, VT);
20240+ New = SDValue(
20241+ TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst),
20242+ 0);
20243+ } else {
20244+ return false;
20245+ }
20246+ }
20247+
20248+ return TLO.CombineTo(Op, New);
20249+ }
20250+
2014120251bool ARMTargetLowering::targetShrinkDemandedConstant(
2014220252 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
2014320253 TargetLoweringOpt &TLO) const {
@@ -20146,78 +20256,82 @@ bool ARMTargetLowering::targetShrinkDemandedConstant(
2014620256 if (!TLO.LegalOps)
2014720257 return false;
2014820258
20149- // Only optimize AND for now.
20150- if (Op.getOpcode() != ISD::AND)
20259+ if (!EnableOptimizeLogicalImm)
2015120260 return false;
2015220261
2015320262 EVT VT = Op.getValueType();
20154-
20155- // Ignore vectors.
2015620263 if (VT.isVector())
2015720264 return false;
2015820265
2015920266 assert(VT == MVT::i32 && "Unexpected integer type");
2016020267
20268+ // Exit early if we demand all bits.
20269+ if (DemandedBits.popcount() == 32)
20270+ return false;
20271+
2016120272 // Make sure the RHS really is a constant.
2016220273 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
2016320274 if (!C)
2016420275 return false;
2016520276
2016620277 unsigned Mask = C->getZExtValue();
2016720278
20168- unsigned Demanded = DemandedBits.getZExtValue();
20169- unsigned ShrunkMask = Mask & Demanded;
20170- unsigned ExpandedMask = Mask | ~Demanded;
20171-
20172- // If the mask is all zeros, let the target-independent code replace the
20173- // result with zero.
20174- if (ShrunkMask == 0)
20175- return false;
20176-
20177- // If the mask is all ones, erase the AND. (Currently, the target-independent
20178- // code won't do this, so we have to do it explicitly to avoid an infinite
20179- // loop in obscure cases.)
20180- if (ExpandedMask == ~0U)
20181- return TLO.CombineTo(Op, Op.getOperand(0));
20182-
20183- auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
20184- return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
20185- };
20186- auto UseMask = [Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
20187- if (NewMask == Mask)
20188- return true;
20189- SDLoc DL(Op);
20190- SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
20191- SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
20192- return TLO.CombineTo(Op, NewOp);
20193- };
20194-
20195- // Prefer uxtb mask.
20196- if (IsLegalMask(0xFF))
20197- return UseMask(0xFF);
20279+ // If thumb, check for uxth and uxtb masks.
20280+ if (Subtarget->isThumb1Only() && Op.getOpcode() == ISD::AND) {
20281+ unsigned Demanded = DemandedBits.getZExtValue();
20282+ unsigned ShrunkMask = Mask & Demanded;
20283+ unsigned ExpandedMask = Mask | ~Demanded;
2019820284
20199- // Prefer uxth mask.
20200- if (IsLegalMask(0xFFFF))
20201- return UseMask(0xFFFF);
20285+ // If the mask is all zeros, let the target-independent code replace the
20286+ // result with zero.
20287+ if (ShrunkMask == 0)
20288+ return false;
2020220289
20203- // [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2.
20204- // FIXME: Prefer a contiguous sequence of bits for other optimizations.
20205- if (ShrunkMask < 256)
20206- return UseMask(ShrunkMask);
20290+ // If the mask is all ones, erase the AND. (Currently, the
20291+ // target-independent code won't do this, so we have to do it explicitly to
20292+ // avoid an infinite loop in obscure cases.)
20293+ if (ExpandedMask == ~0U)
20294+ return TLO.CombineTo(Op, Op.getOperand(0));
20295+ auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
20296+ return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
20297+ };
20298+ auto UseMask = [Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
20299+ if (NewMask == Mask)
20300+ return true;
20301+ SDLoc DL(Op);
20302+ SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
20303+ SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
20304+ return TLO.CombineTo(Op, NewOp);
20305+ };
2020720306
20208- // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2.
20209- // FIXME: Prefer a contiguous sequence of bits for other optimizations.
20210- if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256)
20211- return UseMask(ExpandedMask);
20307+ if (IsLegalMask(0xFF))
20308+ return UseMask(0xFF);
20309+ if (IsLegalMask(0xFFFF))
20310+ return UseMask(0xFFFF);
20311+ }
2021220312
20213- // Potential improvements:
20214- //
20215- // We could try to recognize lsls+lsrs or lsrs+lsls pairs here.
20216- // We could try to prefer Thumb1 immediates which can be lowered to a
20217- // two-instruction sequence.
20218- // We could try to recognize more legal ARM/Thumb2 immediates here.
20313+ unsigned NewOpc;
20314+ switch (Op.getOpcode()) {
20315+ default:
20316+ return false;
20317+ case ISD::AND:
20318+ NewOpc = Subtarget->isThumb()
20319+ ? Subtarget->isThumb2() ? ARM::t2ANDri : ARM::tAND
20320+ : ARM::ANDri;
20321+ break;
20322+ case ISD::OR:
20323+ NewOpc = Subtarget->isThumb()
20324+ ? Subtarget->isThumb2() ? ARM::t2ORRri : ARM::tORR
20325+ : ARM::ORRri;
20326+ break;
20327+ case ISD::XOR:
20328+ NewOpc = Subtarget->isThumb()
20329+ ? Subtarget->isThumb2() ? ARM::t2EORri : ARM::tEOR
20330+ : ARM::EORri;
20331+ break;
20332+ }
2021920333
20220- return false ;
20334+ return optimizeLogicalImm(Op, Mask, DemandedBits, TLO, NewOpc, Subtarget) ;
2022120335}
2022220336
2022320337bool ARMTargetLowering::SimplifyDemandedBitsForTargetNode(
0 commit comments