@@ -118,6 +118,7 @@ using namespace llvm;
118118#define DEBUG_TYPE "arm-isel"
119119
120120STATISTIC(NumTailCalls, "Number of tail calls");
121+ STATISTIC(NumOptimizedImms, "Number of times immediates were optimized");
121122STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
122123STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
123124STATISTIC(NumConstpoolPromoted,
@@ -128,6 +129,12 @@ ARMInterworking("arm-interworking", cl::Hidden,
128129 cl::desc("Enable / disable ARM interworking (for debugging only)"),
129130 cl::init(true));
130131
132+ static cl::opt<bool>
133+ EnableOptimizeLogicalImm("arm-enable-logical-imm", cl::Hidden,
134+ cl::desc("Enable ARM logical imm instruction "
135+ "optimization"),
136+ cl::init(true));
137+
131138static cl::opt<bool> EnableConstpoolPromotion(
132139 "arm-promote-constant", cl::Hidden,
133140 cl::desc("Enable / disable promotion of unnamed_addr constants into "
@@ -20138,6 +20145,112 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
2013820145 }
2013920146}
2014020147
20148+ static bool isLegalLogicalImmediate(unsigned Imm,
20149+ const ARMSubtarget *Subtarget) {
20150+ // Handle special cases first
20151+ if (!Subtarget->isThumb())
20152+ return ARM_AM::getSOImmVal(Imm) != -1;
20153+ if (Subtarget->isThumb2())
20154+ return ARM_AM::getT2SOImmVal(Imm) != -1;
20155+ // Thumb1 only has 8-bit unsigned immediate.
20156+ return Imm <= 255;
20157+ }
20158+
20159+ static bool optimizeLogicalImm(SDValue Op, unsigned Imm, const APInt &Demanded,
20160+ TargetLowering::TargetLoweringOpt &TLO,
20161+ unsigned NewOpc, const ARMSubtarget *Subtarget) {
20162+ unsigned OldImm = Imm, NewImm;
20163+
20164+ // Return if the immediate is already all zeros, all ones, a bimm32.
20165+ if (Imm == 0 || Imm == ~0U || isLegalLogicalImmediate(Imm, Subtarget))
20166+ return false;
20167+
20168+ // bic/orn/eon
20169+ if ((Op.getOpcode() == ISD::AND ||
20170+ (Subtarget->isThumb2() && Op.getOpcode() == ISD::OR)) &&
20171+ isLegalLogicalImmediate(~Imm, Subtarget))
20172+ return false;
20173+
20174+ unsigned DemandedBits = Demanded.getZExtValue();
20175+
20176+ // Clear bits that are not demanded.
20177+ Imm &= DemandedBits;
20178+
20179+ // Try to extend the immediate to a legal ARM rotating immediate
20180+ // by filling in non-demanded bits. ARM supports:
20181+ // - An 8-bit value rotated by an even number of bits (0, 2, 4, 6, ..., 30)
20182+ // - Any 8-bit immediate (Thumb2 also supports 16-bit splat patterns)
20183+ unsigned NonDemandedBits = ~DemandedBits;
20184+
20185+ // Try filling with 0
20186+ NewImm = Imm & DemandedBits;
20187+ if (isLegalLogicalImmediate(NewImm, Subtarget) ||
20188+ ((Op.getOpcode() == ISD::AND ||
20189+ (Subtarget->isThumb2() && Op.getOpcode() == ISD::OR)) &&
20190+ isLegalLogicalImmediate(~NewImm, Subtarget))) {
20191+ ++NumOptimizedImms;
20192+ } else {
20193+ // Try filling with 1
20194+ NewImm = Imm | NonDemandedBits;
20195+ if (isLegalLogicalImmediate(NewImm, Subtarget) ||
20196+ ((Op.getOpcode() == ISD::AND ||
20197+ (Subtarget->isThumb2() && Op.getOpcode() == ISD::OR)) &&
20198+ isLegalLogicalImmediate(~NewImm, Subtarget))) {
20199+ ++NumOptimizedImms;
20200+ } else {
20201+ return false;
20202+ }
20203+ }
20204+
20205+ (void)OldImm;
20206+ assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
20207+ "demanded bits should never be altered");
20208+ assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm");
20209+
20210+ // Create the new constant immediate node.
20211+ EVT VT = Op.getValueType();
20212+ SDLoc DL(Op);
20213+ SDValue New;
20214+
20215+ // If the new constant immediate is all-zeros or all-ones, let the target
20216+ // independent DAG combine optimize this node.
20217+ if (NewImm == 0 || NewImm == ~0U) {
20218+ New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
20219+ TLO.DAG.getConstant(NewImm, DL, VT));
20220+ // Otherwise, create a machine node so that target independent DAG combine
20221+ // doesn't undo this optimization.
20222+ } else {
20223+ // bic/orn/eon
20224+ if (isLegalLogicalImmediate(NewImm, Subtarget)) {
20225+ SDValue EncConst = TLO.DAG.getTargetConstant(NewImm, DL, VT);
20226+ New = SDValue(
20227+ TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst),
20228+ 0);
20229+ } else if ((Op.getOpcode() == ISD::AND ||
20230+ (Subtarget->isThumb2() && Op.getOpcode() == ISD::OR)) &&
20231+ isLegalLogicalImmediate(~NewImm, Subtarget)) {
20232+
20233+ if (Op.getOpcode() == ISD::OR) {
20234+ // ORN
20235+ NewOpc = ARM::t2ORNri;
20236+ } else {
20237+ // AND -> BIC
20238+ NewOpc = Subtarget->isThumb()
20239+ ? Subtarget->isThumb2() ? ARM::t2BICri : ARM::tBIC
20240+ : ARM::BICri;
20241+ }
20242+ SDValue EncConst = TLO.DAG.getTargetConstant(~NewImm, DL, VT);
20243+ New = SDValue(
20244+ TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst),
20245+ 0);
20246+ } else {
20247+ return false;
20248+ }
20249+ }
20250+
20251+ return TLO.CombineTo(Op, New);
20252+ }
20253+
2014120254bool ARMTargetLowering::targetShrinkDemandedConstant(
2014220255 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
2014320256 TargetLoweringOpt &TLO) const {
@@ -20146,78 +20259,82 @@ bool ARMTargetLowering::targetShrinkDemandedConstant(
2014620259 if (!TLO.LegalOps)
2014720260 return false;
2014820261
20149- // Only optimize AND for now.
20150- if (Op.getOpcode() != ISD::AND)
20262+ if (!EnableOptimizeLogicalImm)
2015120263 return false;
2015220264
2015320265 EVT VT = Op.getValueType();
20154-
20155- // Ignore vectors.
2015620266 if (VT.isVector())
2015720267 return false;
2015820268
2015920269 assert(VT == MVT::i32 && "Unexpected integer type");
2016020270
20271+ // Exit early if we demand all bits.
20272+ if (DemandedBits.popcount() == 32)
20273+ return false;
20274+
2016120275 // Make sure the RHS really is a constant.
2016220276 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
2016320277 if (!C)
2016420278 return false;
2016520279
2016620280 unsigned Mask = C->getZExtValue();
2016720281
20168- unsigned Demanded = DemandedBits.getZExtValue();
20169- unsigned ShrunkMask = Mask & Demanded;
20170- unsigned ExpandedMask = Mask | ~Demanded;
20171-
20172- // If the mask is all zeros, let the target-independent code replace the
20173- // result with zero.
20174- if (ShrunkMask == 0)
20175- return false;
20176-
20177- // If the mask is all ones, erase the AND. (Currently, the target-independent
20178- // code won't do this, so we have to do it explicitly to avoid an infinite
20179- // loop in obscure cases.)
20180- if (ExpandedMask == ~0U)
20181- return TLO.CombineTo(Op, Op.getOperand(0));
20182-
20183- auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
20184- return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
20185- };
20186- auto UseMask = [Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
20187- if (NewMask == Mask)
20188- return true;
20189- SDLoc DL(Op);
20190- SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
20191- SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
20192- return TLO.CombineTo(Op, NewOp);
20193- };
20194-
20195- // Prefer uxtb mask.
20196- if (IsLegalMask(0xFF))
20197- return UseMask(0xFF);
20282+ // If thumb, check for uxth and uxtb masks.
20283+ if (Subtarget->isThumb1Only() && Op.getOpcode() == ISD::AND) {
20284+ unsigned Demanded = DemandedBits.getZExtValue();
20285+ unsigned ShrunkMask = Mask & Demanded;
20286+ unsigned ExpandedMask = Mask | ~Demanded;
2019820287
20199- // Prefer uxth mask.
20200- if (IsLegalMask(0xFFFF))
20201- return UseMask(0xFFFF);
20288+ // If the mask is all zeros, let the target-independent code replace the
20289+ // result with zero.
20290+ if (ShrunkMask == 0)
20291+ return false;
2020220292
20203- // [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2.
20204- // FIXME: Prefer a contiguous sequence of bits for other optimizations.
20205- if (ShrunkMask < 256)
20206- return UseMask(ShrunkMask);
20293+ // If the mask is all ones, erase the AND. (Currently, the
20294+ // target-independent code won't do this, so we have to do it explicitly to
20295+ // avoid an infinite loop in obscure cases.)
20296+ if (ExpandedMask == ~0U)
20297+ return TLO.CombineTo(Op, Op.getOperand(0));
20298+ auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
20299+ return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
20300+ };
20301+ auto UseMask = [Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
20302+ if (NewMask == Mask)
20303+ return true;
20304+ SDLoc DL(Op);
20305+ SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
20306+ SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
20307+ return TLO.CombineTo(Op, NewOp);
20308+ };
2020720309
20208- // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2.
20209- // FIXME: Prefer a contiguous sequence of bits for other optimizations.
20210- if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256)
20211- return UseMask(ExpandedMask);
20310+ if (IsLegalMask(0xFF))
20311+ return UseMask(0xFF);
20312+ if (IsLegalMask(0xFFFF))
20313+ return UseMask(0xFFFF);
20314+ }
2021220315
20213- // Potential improvements:
20214- //
20215- // We could try to recognize lsls+lsrs or lsrs+lsls pairs here.
20216- // We could try to prefer Thumb1 immediates which can be lowered to a
20217- // two-instruction sequence.
20218- // We could try to recognize more legal ARM/Thumb2 immediates here.
20316+ unsigned NewOpc;
20317+ switch (Op.getOpcode()) {
20318+ default:
20319+ return false;
20320+ case ISD::AND:
20321+ NewOpc = Subtarget->isThumb()
20322+ ? Subtarget->isThumb2() ? ARM::t2ANDri : ARM::tAND
20323+ : ARM::ANDri;
20324+ break;
20325+ case ISD::OR:
20326+ NewOpc = Subtarget->isThumb()
20327+ ? Subtarget->isThumb2() ? ARM::t2ORRri : ARM::tORR
20328+ : ARM::ORRri;
20329+ break;
20330+ case ISD::XOR:
20331+ NewOpc = Subtarget->isThumb()
20332+ ? Subtarget->isThumb2() ? ARM::t2EORri : ARM::tEOR
20333+ : ARM::EORri;
20334+ break;
20335+ }
2021920336
20220- return false ;
20337+ return optimizeLogicalImm(Op, Mask, DemandedBits, TLO, NewOpc, Subtarget) ;
2022120338}
2022220339
2022320340bool ARMTargetLowering::SimplifyDemandedBitsForTargetNode(
0 commit comments