@@ -118,6 +118,7 @@ using namespace llvm;
118118#define DEBUG_TYPE "arm-isel"
119119
120120STATISTIC(NumTailCalls, "Number of tail calls");
121+ STATISTIC(NumOptimizedImms, "Number of times immediates were optimized");
121122STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
122123STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
123124STATISTIC(NumConstpoolPromoted,
@@ -20105,6 +20106,16 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
2010520106 }
2010620107}
2010720108
20109+ static bool isLegalLogicalImmediate(unsigned Imm,
20110+ const ARMSubtarget *Subtarget) {
20111+ if (!Subtarget->isThumb())
20112+ return ARM_AM::getSOImmVal(Imm) != -1;
20113+ if (Subtarget->isThumb2())
20114+ return ARM_AM::getT2SOImmVal(Imm) != -1;
20115+ // Thumb1 only has 8-bit unsigned immediate.
20116+ return Imm <= 255;
20117+ }
20118+
2010820119bool ARMTargetLowering::targetShrinkDemandedConstant(
2010920120 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
2011020121 TargetLoweringOpt &TLO) const {
@@ -20113,10 +20124,6 @@ bool ARMTargetLowering::targetShrinkDemandedConstant(
2011320124 if (!TLO.LegalOps)
2011420125 return false;
2011520126
20116- // Only optimize AND for now.
20117- if (Op.getOpcode() != ISD::AND)
20118- return false;
20119-
2012020127 EVT VT = Op.getValueType();
2012120128
2012220129 // Ignore vectors.
@@ -20125,28 +20132,28 @@ bool ARMTargetLowering::targetShrinkDemandedConstant(
2012520132
2012620133 assert(VT == MVT::i32 && "Unexpected integer type");
2012720134
20135+ // Exit early if we demand all bits.
20136+ if (DemandedBits.popcount() == 32)
20137+ return false;
20138+
20139+ // Only optimize AND for now.
20140+ if (Op.getOpcode() != ISD::AND)
20141+ return false;
20142+
2012820143 // Make sure the RHS really is a constant.
2012920144 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
2013020145 if (!C)
2013120146 return false;
2013220147
2013320148 unsigned Mask = C->getZExtValue();
2013420149
20150+ if (Mask == 0 || Mask == ~0U)
20151+ return false;
20152+
2013520153 unsigned Demanded = DemandedBits.getZExtValue();
2013620154 unsigned ShrunkMask = Mask & Demanded;
2013720155 unsigned ExpandedMask = Mask | ~Demanded;
2013820156
20139- // If the mask is all zeros, let the target-independent code replace the
20140- // result with zero.
20141- if (ShrunkMask == 0)
20142- return false;
20143-
20144- // If the mask is all ones, erase the AND. (Currently, the target-independent
20145- // code won't do this, so we have to do it explicitly to avoid an infinite
20146- // loop in obscure cases.)
20147- if (ExpandedMask == ~0U)
20148- return TLO.CombineTo(Op, Op.getOperand(0));
20149-
2015020157 auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
2015120158 return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
2015220159 };
@@ -20159,30 +20166,61 @@ bool ARMTargetLowering::targetShrinkDemandedConstant(
2015920166 return TLO.CombineTo(Op, NewOp);
2016020167 };
2016120168
20162- // Prefer uxtb mask.
20163- if (IsLegalMask(0xFF))
20164- return UseMask(0xFF);
20169+ // If the mask is all zeros, let the target-independent code replace the
20170+ // result with zero.
20171+ if (ShrunkMask == 0) {
20172+ ++NumOptimizedImms;
20173+ return UseMask(ShrunkMask);
20174+ }
2016520175
20166- // Prefer uxth mask.
20167- if (IsLegalMask(0xFFFF))
20168- return UseMask(0xFFFF);
20176+ // If the mask is all ones, erase the AND. (Currently, the target-independent
20177+ // code won't do this, so we have to do it explicitly to avoid an infinite
20178+ // loop in obscure cases.)
20179+ if (ExpandedMask == ~0U) {
20180+ ++NumOptimizedImms;
20181+ return UseMask(ExpandedMask);
20182+ }
2016920183
20170- // [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2.
20171- // FIXME: Prefer a contiguous sequence of bits for other optimizations.
20172- if (ShrunkMask < 256)
20184+ // If thumb, check for uxth and uxtb masks first and foremost.
20185+ if (Subtarget->isThumb1Only() && Subtarget->hasV6Ops()) {
20186+ if (IsLegalMask(0xFF)) {
20187+ ++NumOptimizedImms;
20188+ return UseMask(0xFF);
20189+ }
20190+
20191+ if (IsLegalMask(0xFFFF)) {
20192+ ++NumOptimizedImms;
20193+ return UseMask(0xFFFF);
20194+ }
20195+ }
20196+
20197+ // Don't optimize if it is legal already.
20198+ if (isLegalLogicalImmediate(Mask, Subtarget))
20199+ return false;
20200+
20201+ if (isLegalLogicalImmediate(ShrunkMask, Subtarget)) {
20202+ ++NumOptimizedImms;
2017320203 return UseMask(ShrunkMask);
20204+ }
2017420205
20175- // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2.
20176- // FIXME: Prefer a contiguous sequence of bits for other optimizations.
20177- if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256)
20206+ // FIXME: The check for v6 is because this interferes with some ubfx
20207+ // optimizations
20208+ if (!Subtarget->hasV6Ops() &&
20209+ isLegalLogicalImmediate(~ExpandedMask, Subtarget)) {
20210+ ++NumOptimizedImms;
2017820211 return UseMask(ExpandedMask);
20212+ }
20213+
20214+ if ((~ExpandedMask) < 256) {
20215+ ++NumOptimizedImms;
20216+ return UseMask(ExpandedMask);
20217+ }
2017920218
2018020219 // Potential improvements:
2018120220 //
2018220221 // We could try to recognize lsls+lsrs or lsrs+lsls pairs here.
2018320222 // We could try to prefer Thumb1 immediates which can be lowered to a
2018420223 // two-instruction sequence.
20185- // We could try to recognize more legal ARM/Thumb2 immediates here.
2018620224
2018720225 return false;
2018820226}
0 commit comments