Skip to content

Commit 1ffacd7

Browse files
committed
Update ARMISelLowering.cpp
1 parent 6cb942c commit 1ffacd7

File tree

5 files changed

+341
-127
lines changed

5 files changed

+341
-127
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 169 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ using namespace llvm;
118118
#define DEBUG_TYPE "arm-isel"
119119

120120
STATISTIC(NumTailCalls, "Number of tail calls");
121+
STATISTIC(NumOptimizedImms, "Number of times immediates were optimized");
121122
STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
122123
STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
123124
STATISTIC(NumConstpoolPromoted,
@@ -128,6 +129,12 @@ ARMInterworking("arm-interworking", cl::Hidden,
128129
cl::desc("Enable / disable ARM interworking (for debugging only)"),
129130
cl::init(true));
130131

132+
static cl::opt<bool>
133+
EnableOptimizeLogicalImm("arm-enable-logical-imm", cl::Hidden,
134+
cl::desc("Enable ARM logical imm instruction "
135+
"optimization"),
136+
cl::init(true));
137+
131138
static cl::opt<bool> EnableConstpoolPromotion(
132139
"arm-promote-constant", cl::Hidden,
133140
cl::desc("Enable / disable promotion of unnamed_addr constants into "
@@ -20138,6 +20145,112 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
2013820145
}
2013920146
}
2014020147

20148+
static bool isLegalLogicalImmediate(unsigned Imm,
20149+
const ARMSubtarget *Subtarget) {
20150+
// Handle special cases first
20151+
if (!Subtarget->isThumb())
20152+
return ARM_AM::getSOImmVal(Imm) != -1;
20153+
if (Subtarget->isThumb2())
20154+
return ARM_AM::getT2SOImmVal(Imm) != -1;
20155+
// Thumb1 only has 8-bit unsigned immediate.
20156+
return Imm <= 255;
20157+
}
20158+
20159+
static bool optimizeLogicalImm(SDValue Op, unsigned Imm, const APInt &Demanded,
20160+
TargetLowering::TargetLoweringOpt &TLO,
20161+
unsigned NewOpc, const ARMSubtarget *Subtarget) {
20162+
unsigned OldImm = Imm, NewImm;
20163+
20164+
// Return if the immediate is already all zeros, all ones, a bimm32.
20165+
if (Imm == 0 || Imm == ~0U || isLegalLogicalImmediate(Imm, Subtarget))
20166+
return false;
20167+
20168+
// bic/orn/eon
20169+
if ((Op.getOpcode() == ISD::AND ||
20170+
(Subtarget->isThumb2() && Op.getOpcode() == ISD::OR)) &&
20171+
isLegalLogicalImmediate(~Imm, Subtarget))
20172+
return false;
20173+
20174+
unsigned DemandedBits = Demanded.getZExtValue();
20175+
20176+
// Clear bits that are not demanded.
20177+
Imm &= DemandedBits;
20178+
20179+
// Try to extend the immediate to a legal ARM rotating immediate
20180+
// by filling in non-demanded bits. ARM supports:
20181+
// - An 8-bit value rotated by an even number of bits (0, 2, 4, 6, ..., 30)
20182+
// - Any 8-bit immediate (Thumb2 also supports 16-bit splat patterns)
20183+
unsigned NonDemandedBits = ~DemandedBits;
20184+
20185+
// Try filling with 0
20186+
NewImm = Imm & DemandedBits;
20187+
if (isLegalLogicalImmediate(NewImm, Subtarget) ||
20188+
((Op.getOpcode() == ISD::AND ||
20189+
(Subtarget->isThumb2() && Op.getOpcode() == ISD::OR)) &&
20190+
isLegalLogicalImmediate(~NewImm, Subtarget))) {
20191+
++NumOptimizedImms;
20192+
} else {
20193+
// Try filling with 1
20194+
NewImm = Imm | NonDemandedBits;
20195+
if (isLegalLogicalImmediate(NewImm, Subtarget) ||
20196+
((Op.getOpcode() == ISD::AND ||
20197+
(Subtarget->isThumb2() && Op.getOpcode() == ISD::OR)) &&
20198+
isLegalLogicalImmediate(~NewImm, Subtarget))) {
20199+
++NumOptimizedImms;
20200+
} else {
20201+
return false;
20202+
}
20203+
}
20204+
20205+
(void)OldImm;
20206+
assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
20207+
"demanded bits should never be altered");
20208+
assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm");
20209+
20210+
// Create the new constant immediate node.
20211+
EVT VT = Op.getValueType();
20212+
SDLoc DL(Op);
20213+
SDValue New;
20214+
20215+
// If the new constant immediate is all-zeros or all-ones, let the target
20216+
// independent DAG combine optimize this node.
20217+
if (NewImm == 0 || NewImm == ~0U) {
20218+
New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
20219+
TLO.DAG.getConstant(NewImm, DL, VT));
20220+
// Otherwise, create a machine node so that target independent DAG combine
20221+
// doesn't undo this optimization.
20222+
} else {
20223+
// bic/orn/eon
20224+
if (isLegalLogicalImmediate(NewImm, Subtarget)) {
20225+
SDValue EncConst = TLO.DAG.getTargetConstant(NewImm, DL, VT);
20226+
New = SDValue(
20227+
TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst),
20228+
0);
20229+
} else if ((Op.getOpcode() == ISD::AND ||
20230+
(Subtarget->isThumb2() && Op.getOpcode() == ISD::OR)) &&
20231+
isLegalLogicalImmediate(~NewImm, Subtarget)) {
20232+
20233+
if (Op.getOpcode() == ISD::OR) {
20234+
// ORN
20235+
NewOpc = ARM::t2ORNri;
20236+
} else {
20237+
// AND -> BIC
20238+
NewOpc = Subtarget->isThumb()
20239+
? Subtarget->isThumb2() ? ARM::t2BICri : ARM::tBIC
20240+
: ARM::BICri;
20241+
}
20242+
SDValue EncConst = TLO.DAG.getTargetConstant(~NewImm, DL, VT);
20243+
New = SDValue(
20244+
TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst),
20245+
0);
20246+
} else {
20247+
return false;
20248+
}
20249+
}
20250+
20251+
return TLO.CombineTo(Op, New);
20252+
}
20253+
2014120254
bool ARMTargetLowering::targetShrinkDemandedConstant(
2014220255
SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
2014320256
TargetLoweringOpt &TLO) const {
@@ -20146,78 +20259,82 @@ bool ARMTargetLowering::targetShrinkDemandedConstant(
2014620259
if (!TLO.LegalOps)
2014720260
return false;
2014820261

20149-
// Only optimize AND for now.
20150-
if (Op.getOpcode() != ISD::AND)
20262+
if (!EnableOptimizeLogicalImm)
2015120263
return false;
2015220264

2015320265
EVT VT = Op.getValueType();
20154-
20155-
// Ignore vectors.
2015620266
if (VT.isVector())
2015720267
return false;
2015820268

2015920269
assert(VT == MVT::i32 && "Unexpected integer type");
2016020270

20271+
// Exit early if we demand all bits.
20272+
if (DemandedBits.popcount() == 32)
20273+
return false;
20274+
2016120275
// Make sure the RHS really is a constant.
2016220276
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
2016320277
if (!C)
2016420278
return false;
2016520279

2016620280
unsigned Mask = C->getZExtValue();
2016720281

20168-
unsigned Demanded = DemandedBits.getZExtValue();
20169-
unsigned ShrunkMask = Mask & Demanded;
20170-
unsigned ExpandedMask = Mask | ~Demanded;
20171-
20172-
// If the mask is all zeros, let the target-independent code replace the
20173-
// result with zero.
20174-
if (ShrunkMask == 0)
20175-
return false;
20176-
20177-
// If the mask is all ones, erase the AND. (Currently, the target-independent
20178-
// code won't do this, so we have to do it explicitly to avoid an infinite
20179-
// loop in obscure cases.)
20180-
if (ExpandedMask == ~0U)
20181-
return TLO.CombineTo(Op, Op.getOperand(0));
20182-
20183-
auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
20184-
return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
20185-
};
20186-
auto UseMask = [Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
20187-
if (NewMask == Mask)
20188-
return true;
20189-
SDLoc DL(Op);
20190-
SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
20191-
SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
20192-
return TLO.CombineTo(Op, NewOp);
20193-
};
20194-
20195-
// Prefer uxtb mask.
20196-
if (IsLegalMask(0xFF))
20197-
return UseMask(0xFF);
20282+
// If thumb, check for uxth and uxtb masks.
20283+
if (Subtarget->isThumb1Only() && Op.getOpcode() == ISD::AND) {
20284+
unsigned Demanded = DemandedBits.getZExtValue();
20285+
unsigned ShrunkMask = Mask & Demanded;
20286+
unsigned ExpandedMask = Mask | ~Demanded;
2019820287

20199-
// Prefer uxth mask.
20200-
if (IsLegalMask(0xFFFF))
20201-
return UseMask(0xFFFF);
20288+
// If the mask is all zeros, let the target-independent code replace the
20289+
// result with zero.
20290+
if (ShrunkMask == 0)
20291+
return false;
2020220292

20203-
// [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2.
20204-
// FIXME: Prefer a contiguous sequence of bits for other optimizations.
20205-
if (ShrunkMask < 256)
20206-
return UseMask(ShrunkMask);
20293+
// If the mask is all ones, erase the AND. (Currently, the
20294+
// target-independent code won't do this, so we have to do it explicitly to
20295+
// avoid an infinite loop in obscure cases.)
20296+
if (ExpandedMask == ~0U)
20297+
return TLO.CombineTo(Op, Op.getOperand(0));
20298+
auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
20299+
return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
20300+
};
20301+
auto UseMask = [Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
20302+
if (NewMask == Mask)
20303+
return true;
20304+
SDLoc DL(Op);
20305+
SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
20306+
SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
20307+
return TLO.CombineTo(Op, NewOp);
20308+
};
2020720309

20208-
// [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2.
20209-
// FIXME: Prefer a contiguous sequence of bits for other optimizations.
20210-
if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256)
20211-
return UseMask(ExpandedMask);
20310+
if (IsLegalMask(0xFF))
20311+
return UseMask(0xFF);
20312+
if (IsLegalMask(0xFFFF))
20313+
return UseMask(0xFFFF);
20314+
}
2021220315

20213-
// Potential improvements:
20214-
//
20215-
// We could try to recognize lsls+lsrs or lsrs+lsls pairs here.
20216-
// We could try to prefer Thumb1 immediates which can be lowered to a
20217-
// two-instruction sequence.
20218-
// We could try to recognize more legal ARM/Thumb2 immediates here.
20316+
unsigned NewOpc;
20317+
switch (Op.getOpcode()) {
20318+
default:
20319+
return false;
20320+
case ISD::AND:
20321+
NewOpc = Subtarget->isThumb()
20322+
? Subtarget->isThumb2() ? ARM::t2ANDri : ARM::tAND
20323+
: ARM::ANDri;
20324+
break;
20325+
case ISD::OR:
20326+
NewOpc = Subtarget->isThumb()
20327+
? Subtarget->isThumb2() ? ARM::t2ORRri : ARM::tORR
20328+
: ARM::ORRri;
20329+
break;
20330+
case ISD::XOR:
20331+
NewOpc = Subtarget->isThumb()
20332+
? Subtarget->isThumb2() ? ARM::t2EORri : ARM::tEOR
20333+
: ARM::EORri;
20334+
break;
20335+
}
2021920336

20220-
return false;
20337+
return optimizeLogicalImm(Op, Mask, DemandedBits, TLO, NewOpc, Subtarget);
2022120338
}
2022220339

2022320340
bool ARMTargetLowering::SimplifyDemandedBitsForTargetNode(

llvm/test/CodeGen/ARM/funnel-shift-rot.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
1919
define i8 @rotl_i8_const_shift(i8 %x) {
2020
; CHECK-LABEL: rotl_i8_const_shift:
2121
; CHECK: @ %bb.0:
22-
; CHECK-NEXT: uxtb r1, r0
22+
; CHECK-NEXT: and r1, r0, #224
2323
; CHECK-NEXT: lsl r0, r0, #3
2424
; CHECK-NEXT: orr r0, r0, r1, lsr #5
2525
; CHECK-NEXT: bx lr
@@ -161,8 +161,7 @@ define <4 x i32> @rotl_v4i32_rotl_const_shift(<4 x i32> %x) {
161161
define i8 @rotr_i8_const_shift(i8 %x) {
162162
; CHECK-LABEL: rotr_i8_const_shift:
163163
; CHECK: @ %bb.0:
164-
; CHECK-NEXT: uxtb r1, r0
165-
; CHECK-NEXT: lsr r1, r1, #3
164+
; CHECK-NEXT: ubfx r1, r0, #3, #5
166165
; CHECK-NEXT: orr r0, r1, r0, lsl #5
167166
; CHECK-NEXT: bx lr
168167
%f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 3)

llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll

Lines changed: 36 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
2121
; ARM-LABEL: scalar_i8_signbit_eq:
2222
; ARM: @ %bb.0:
2323
; ARM-NEXT: uxtb r1, r1
24-
; ARM-NEXT: lsl r0, r0, r1
24+
; ARM-NEXT: mov r2, #128
25+
; ARM-NEXT: and r0, r2, r0, lsl r1
2526
; ARM-NEXT: mov r1, #1
26-
; ARM-NEXT: uxtb r0, r0
2727
; ARM-NEXT: eor r0, r1, r0, lsr #7
2828
; ARM-NEXT: bx lr
2929
;
@@ -42,7 +42,7 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
4242
; THUMB78-NEXT: uxtb r1, r1
4343
; THUMB78-NEXT: lsls r0, r1
4444
; THUMB78-NEXT: movs r1, #1
45-
; THUMB78-NEXT: uxtb r0, r0
45+
; THUMB78-NEXT: and r0, r0, #128
4646
; THUMB78-NEXT: eor.w r0, r1, r0, lsr #7
4747
; THUMB78-NEXT: bx lr
4848
%t0 = lshr i8 128, %y
@@ -122,9 +122,9 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
122122
; ARM-LABEL: scalar_i16_signbit_eq:
123123
; ARM: @ %bb.0:
124124
; ARM-NEXT: uxth r1, r1
125-
; ARM-NEXT: lsl r0, r0, r1
125+
; ARM-NEXT: mov r2, #32768
126+
; ARM-NEXT: and r0, r2, r0, lsl r1
126127
; ARM-NEXT: mov r1, #1
127-
; ARM-NEXT: uxth r0, r0
128128
; ARM-NEXT: eor r0, r1, r0, lsr #15
129129
; ARM-NEXT: bx lr
130130
;
@@ -144,7 +144,7 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
144144
; THUMB78-NEXT: uxth r1, r1
145145
; THUMB78-NEXT: lsls r0, r1
146146
; THUMB78-NEXT: movs r1, #1
147-
; THUMB78-NEXT: uxth r0, r0
147+
; THUMB78-NEXT: and r0, r0, #32768
148148
; THUMB78-NEXT: eor.w r0, r1, r0, lsr #15
149149
; THUMB78-NEXT: bx lr
150150
%t0 = lshr i16 32768, %y
@@ -862,21 +862,35 @@ define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwi
862862
;------------------------------------------------------------------------------;
863863

864864
define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
865-
; ARM-LABEL: scalar_i8_signbit_ne:
866-
; ARM: @ %bb.0:
867-
; ARM-NEXT: uxtb r1, r1
868-
; ARM-NEXT: lsl r0, r0, r1
869-
; ARM-NEXT: uxtb r0, r0
870-
; ARM-NEXT: lsr r0, r0, #7
871-
; ARM-NEXT: bx lr
865+
; ARM6-LABEL: scalar_i8_signbit_ne:
866+
; ARM6: @ %bb.0:
867+
; ARM6-NEXT: uxtb r1, r1
868+
; ARM6-NEXT: mov r2, #128
869+
; ARM6-NEXT: and r0, r2, r0, lsl r1
870+
; ARM6-NEXT: lsr r0, r0, #7
871+
; ARM6-NEXT: bx lr
872872
;
873-
; THUMB-LABEL: scalar_i8_signbit_ne:
874-
; THUMB: @ %bb.0:
875-
; THUMB-NEXT: uxtb r1, r1
876-
; THUMB-NEXT: lsls r0, r1
877-
; THUMB-NEXT: uxtb r0, r0
878-
; THUMB-NEXT: lsrs r0, r0, #7
879-
; THUMB-NEXT: bx lr
873+
; ARM78-LABEL: scalar_i8_signbit_ne:
874+
; ARM78: @ %bb.0:
875+
; ARM78-NEXT: uxtb r1, r1
876+
; ARM78-NEXT: lsl r0, r0, r1
877+
; ARM78-NEXT: ubfx r0, r0, #7, #1
878+
; ARM78-NEXT: bx lr
879+
;
880+
; THUMB6-LABEL: scalar_i8_signbit_ne:
881+
; THUMB6: @ %bb.0:
882+
; THUMB6-NEXT: uxtb r1, r1
883+
; THUMB6-NEXT: lsls r0, r1
884+
; THUMB6-NEXT: uxtb r0, r0
885+
; THUMB6-NEXT: lsrs r0, r0, #7
886+
; THUMB6-NEXT: bx lr
887+
;
888+
; THUMB78-LABEL: scalar_i8_signbit_ne:
889+
; THUMB78: @ %bb.0:
890+
; THUMB78-NEXT: uxtb r1, r1
891+
; THUMB78-NEXT: lsls r0, r1
892+
; THUMB78-NEXT: ubfx r0, r0, #7, #1
893+
; THUMB78-NEXT: bx lr
880894
%t0 = lshr i8 128, %y
881895
%t1 = and i8 %t0, %x
882896
%res = icmp ne i8 %t1, 0 ; we are perfectly happy with 'ne' predicate
@@ -1051,3 +1065,5 @@ define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind {
10511065
%res = icmp eq i8 %t1, 1 ; should be comparing with 0
10521066
ret i1 %res
10531067
}
1068+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
1069+
; THUMB: {{.*}}

0 commit comments

Comments
 (0)