Skip to content

Commit 4fd01b4

Browse files
committed
[ARM] Only change mask if demanded bits says we can optimize
1 parent f767f23 commit 4fd01b4

File tree

7 files changed

+182
-80
lines changed

7 files changed

+182
-80
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20138,6 +20138,17 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
2013820138
}
2013920139
}
2014020140

20141+
static bool isLegalLogicalImmediate(unsigned Imm,
20142+
const ARMSubtarget *Subtarget) {
20143+
// Handle special cases first
20144+
if (!Subtarget->isThumb())
20145+
return ARM_AM::getSOImmVal(Imm) != -1;
20146+
if (Subtarget->isThumb2())
20147+
return ARM_AM::getT2SOImmVal(Imm) != -1;
20148+
// Thumb1 only has 8-bit unsigned immediate.
20149+
return Imm <= 255;
20150+
}
20151+
2014120152
bool ARMTargetLowering::targetShrinkDemandedConstant(
2014220153
SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
2014320154
TargetLoweringOpt &TLO) const {
@@ -20192,30 +20203,30 @@ bool ARMTargetLowering::targetShrinkDemandedConstant(
2019220203
return TLO.CombineTo(Op, NewOp);
2019320204
};
2019420205

20195-
// Prefer uxtb mask.
20196-
if (IsLegalMask(0xFF))
20197-
return UseMask(0xFF);
20206+
// If thumb, check for uxth and uxtb masks first and foremost.
20207+
if (Subtarget->isThumb1Only() && Subtarget->hasV6Ops()) {
20208+
if (IsLegalMask(0xFF))
20209+
return UseMask(0xFF);
20210+
20211+
if (IsLegalMask(0xFF00))
20212+
return UseMask(0xFF00);
20213+
}
2019820214

20199-
// Prefer uxth mask.
20200-
if (IsLegalMask(0xFFFF))
20201-
return UseMask(0xFFFF);
20215+
// Don't optimize if it is legal already.
20216+
if (isLegalLogicalImmediate(Mask, Subtarget))
20217+
return false;
2020220218

20203-
// [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2.
20204-
// FIXME: Prefer a contiguous sequence of bits for other optimizations.
20205-
if (ShrunkMask < 256)
20219+
if (isLegalLogicalImmediate(ShrunkMask, Subtarget))
2020620220
return UseMask(ShrunkMask);
2020720221

20208-
// [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2.
20209-
// FIXME: Prefer a contiguous sequence of bits for other optimizations.
20210-
if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256)
20222+
if (isLegalLogicalImmediate(~ExpandedMask, Subtarget))
2021120223
return UseMask(ExpandedMask);
2021220224

2021320225
// Potential improvements:
2021420226
//
2021520227
// We could try to recognize lsls+lsrs or lsrs+lsls pairs here.
2021620228
// We could try to prefer Thumb1 immediates which can be lowered to a
2021720229
// two-instruction sequence.
20218-
// We could try to recognize more legal ARM/Thumb2 immediates here.
2021920230

2022020231
return false;
2022120232
}

llvm/test/CodeGen/ARM/funnel-shift-rot.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
1919
define i8 @rotl_i8_const_shift(i8 %x) {
2020
; CHECK-LABEL: rotl_i8_const_shift:
2121
; CHECK: @ %bb.0:
22-
; CHECK-NEXT: uxtb r1, r0
22+
; CHECK-NEXT: and r1, r0, #224
2323
; CHECK-NEXT: lsl r0, r0, #3
2424
; CHECK-NEXT: orr r0, r0, r1, lsr #5
2525
; CHECK-NEXT: bx lr
@@ -161,8 +161,7 @@ define <4 x i32> @rotl_v4i32_rotl_const_shift(<4 x i32> %x) {
161161
define i8 @rotr_i8_const_shift(i8 %x) {
162162
; CHECK-LABEL: rotr_i8_const_shift:
163163
; CHECK: @ %bb.0:
164-
; CHECK-NEXT: uxtb r1, r0
165-
; CHECK-NEXT: lsr r1, r1, #3
164+
; CHECK-NEXT: ubfx r1, r0, #3, #5
166165
; CHECK-NEXT: orr r0, r1, r0, lsl #5
167166
; CHECK-NEXT: bx lr
168167
%f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 3)

llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll

Lines changed: 36 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
2121
; ARM-LABEL: scalar_i8_signbit_eq:
2222
; ARM: @ %bb.0:
2323
; ARM-NEXT: uxtb r1, r1
24-
; ARM-NEXT: lsl r0, r0, r1
24+
; ARM-NEXT: mov r2, #128
25+
; ARM-NEXT: and r0, r2, r0, lsl r1
2526
; ARM-NEXT: mov r1, #1
26-
; ARM-NEXT: uxtb r0, r0
2727
; ARM-NEXT: eor r0, r1, r0, lsr #7
2828
; ARM-NEXT: bx lr
2929
;
@@ -42,7 +42,7 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
4242
; THUMB78-NEXT: uxtb r1, r1
4343
; THUMB78-NEXT: lsls r0, r1
4444
; THUMB78-NEXT: movs r1, #1
45-
; THUMB78-NEXT: uxtb r0, r0
45+
; THUMB78-NEXT: and r0, r0, #128
4646
; THUMB78-NEXT: eor.w r0, r1, r0, lsr #7
4747
; THUMB78-NEXT: bx lr
4848
%t0 = lshr i8 128, %y
@@ -122,9 +122,9 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
122122
; ARM-LABEL: scalar_i16_signbit_eq:
123123
; ARM: @ %bb.0:
124124
; ARM-NEXT: uxth r1, r1
125-
; ARM-NEXT: lsl r0, r0, r1
125+
; ARM-NEXT: mov r2, #32768
126+
; ARM-NEXT: and r0, r2, r0, lsl r1
126127
; ARM-NEXT: mov r1, #1
127-
; ARM-NEXT: uxth r0, r0
128128
; ARM-NEXT: eor r0, r1, r0, lsr #15
129129
; ARM-NEXT: bx lr
130130
;
@@ -144,7 +144,7 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
144144
; THUMB78-NEXT: uxth r1, r1
145145
; THUMB78-NEXT: lsls r0, r1
146146
; THUMB78-NEXT: movs r1, #1
147-
; THUMB78-NEXT: uxth r0, r0
147+
; THUMB78-NEXT: and r0, r0, #32768
148148
; THUMB78-NEXT: eor.w r0, r1, r0, lsr #15
149149
; THUMB78-NEXT: bx lr
150150
%t0 = lshr i16 32768, %y
@@ -862,21 +862,35 @@ define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwi
862862
;------------------------------------------------------------------------------;
863863

864864
define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
865-
; ARM-LABEL: scalar_i8_signbit_ne:
866-
; ARM: @ %bb.0:
867-
; ARM-NEXT: uxtb r1, r1
868-
; ARM-NEXT: lsl r0, r0, r1
869-
; ARM-NEXT: uxtb r0, r0
870-
; ARM-NEXT: lsr r0, r0, #7
871-
; ARM-NEXT: bx lr
865+
; ARM6-LABEL: scalar_i8_signbit_ne:
866+
; ARM6: @ %bb.0:
867+
; ARM6-NEXT: uxtb r1, r1
868+
; ARM6-NEXT: mov r2, #128
869+
; ARM6-NEXT: and r0, r2, r0, lsl r1
870+
; ARM6-NEXT: lsr r0, r0, #7
871+
; ARM6-NEXT: bx lr
872872
;
873-
; THUMB-LABEL: scalar_i8_signbit_ne:
874-
; THUMB: @ %bb.0:
875-
; THUMB-NEXT: uxtb r1, r1
876-
; THUMB-NEXT: lsls r0, r1
877-
; THUMB-NEXT: uxtb r0, r0
878-
; THUMB-NEXT: lsrs r0, r0, #7
879-
; THUMB-NEXT: bx lr
873+
; ARM78-LABEL: scalar_i8_signbit_ne:
874+
; ARM78: @ %bb.0:
875+
; ARM78-NEXT: uxtb r1, r1
876+
; ARM78-NEXT: lsl r0, r0, r1
877+
; ARM78-NEXT: ubfx r0, r0, #7, #1
878+
; ARM78-NEXT: bx lr
879+
;
880+
; THUMB6-LABEL: scalar_i8_signbit_ne:
881+
; THUMB6: @ %bb.0:
882+
; THUMB6-NEXT: uxtb r1, r1
883+
; THUMB6-NEXT: lsls r0, r1
884+
; THUMB6-NEXT: uxtb r0, r0
885+
; THUMB6-NEXT: lsrs r0, r0, #7
886+
; THUMB6-NEXT: bx lr
887+
;
888+
; THUMB78-LABEL: scalar_i8_signbit_ne:
889+
; THUMB78: @ %bb.0:
890+
; THUMB78-NEXT: uxtb r1, r1
891+
; THUMB78-NEXT: lsls r0, r1
892+
; THUMB78-NEXT: ubfx r0, r0, #7, #1
893+
; THUMB78-NEXT: bx lr
880894
%t0 = lshr i8 128, %y
881895
%t1 = and i8 %t0, %x
882896
%res = icmp ne i8 %t1, 0 ; we are perfectly happy with 'ne' predicate
@@ -1051,3 +1065,5 @@ define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind {
10511065
%res = icmp eq i8 %t1, 1 ; should be comparing with 0
10521066
ret i1 %res
10531067
}
1068+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
1069+
; THUMB: {{.*}}

llvm/test/CodeGen/ARM/sdiv-pow2-thumb-size.ll

Lines changed: 117 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
12
; RUN: llc -mtriple=thumbv8 %s -o - | FileCheck %s --check-prefixes=CHECK,T2
23
; RUN: llc -mtriple=thumbv8m.main %s -o - | FileCheck %s --check-prefixes=CHECK,T2
34
; RUN: llc -mtriple=thumbv8m.base %s -o - | FileCheck %s --check-prefixes=CHECK,T1
@@ -13,11 +14,23 @@
1314

1415
; Test sdiv i16
1516
define dso_local signext i16 @f0(i16 signext %F) local_unnamed_addr #0 {
16-
; CHECK-LABEL: f0
17-
; CHECK: movs r1, #2
18-
; CHECK-NEXT: sdiv r0, r0, r1
19-
; CHECK-NEXT: sxth r0, r0
20-
; CHECK-NEXT: bx lr
17+
; CHECK-LABEL: f0:
18+
; CHECK: @ %bb.0: @ %entry
19+
; CHECK-NEXT: movs r1, #2
20+
; CHECK-NEXT: sdiv r0, r0, r1
21+
; CHECK-NEXT: sxth r0, r0
22+
; CHECK-NEXT: bx lr
23+
;
24+
; V6M-LABEL: f0:
25+
; V6M: @ %bb.0: @ %entry
26+
; V6M-NEXT: movs r1, #255
27+
; V6M-NEXT: lsls r1, r1, #8
28+
; V6M-NEXT: ands r1, r0
29+
; V6M-NEXT: lsrs r1, r1, #15
30+
; V6M-NEXT: adds r0, r0, r1
31+
; V6M-NEXT: sxth r0, r0
32+
; V6M-NEXT: asrs r0, r0, #1
33+
; V6M-NEXT: bx lr
2134

2235
entry:
2336
%0 = sdiv i16 %F, 2
@@ -26,10 +39,19 @@ entry:
2639

2740
; Same as above, but now with i32
2841
define dso_local i32 @f1(i32 %F) local_unnamed_addr #0 {
29-
; CHECK-LABEL: f1
30-
; CHECK: movs r1, #4
31-
; CHECK-NEXT: sdiv r0, r0, r1
32-
; CHECK-NEXT: bx lr
42+
; CHECK-LABEL: f1:
43+
; CHECK: @ %bb.0: @ %entry
44+
; CHECK-NEXT: movs r1, #4
45+
; CHECK-NEXT: sdiv r0, r0, r1
46+
; CHECK-NEXT: bx lr
47+
;
48+
; V6M-LABEL: f1:
49+
; V6M: @ %bb.0: @ %entry
50+
; V6M-NEXT: asrs r1, r0, #31
51+
; V6M-NEXT: lsrs r1, r1, #30
52+
; V6M-NEXT: adds r0, r0, r1
53+
; V6M-NEXT: asrs r0, r0, #2
54+
; V6M-NEXT: bx lr
3355

3456
entry:
3557
%div = sdiv i32 %F, 4
@@ -38,10 +60,18 @@ entry:
3860

3961
; The immediate is not a power of 2, so we expect a sdiv.
4062
define dso_local i32 @f2(i32 %F) local_unnamed_addr #0 {
41-
; CHECK-LABEL: f2
42-
; CHECK: movs r1, #5
43-
; CHECK-NEXT: sdiv r0, r0, r1
44-
; CHECK-NEXT: bx lr
63+
; CHECK-LABEL: f2:
64+
; CHECK: @ %bb.0: @ %entry
65+
; CHECK-NEXT: movs r1, #5
66+
; CHECK-NEXT: sdiv r0, r0, r1
67+
; CHECK-NEXT: bx lr
68+
;
69+
; V6M-LABEL: f2:
70+
; V6M: @ %bb.0: @ %entry
71+
; V6M-NEXT: push {r7, lr}
72+
; V6M-NEXT: movs r1, #5
73+
; V6M-NEXT: bl __divsi3
74+
; V6M-NEXT: pop {r7, pc}
4575

4676
entry:
4777
%div = sdiv i32 %F, 5
@@ -51,8 +81,28 @@ entry:
5181
; Try a larger power of 2 immediate: immediates larger than
5282
; 128 don't give any code size savings.
5383
define dso_local i32 @f3(i32 %F) local_unnamed_addr #0 {
54-
; CHECK-LABEL: f3
55-
; CHECK-NOT: sdiv
84+
; T2-LABEL: f3:
85+
; T2: @ %bb.0: @ %entry
86+
; T2-NEXT: asrs r1, r0, #31
87+
; T2-NEXT: add.w r0, r0, r1, lsr #24
88+
; T2-NEXT: asrs r0, r0, #8
89+
; T2-NEXT: bx lr
90+
;
91+
; T1-LABEL: f3:
92+
; T1: @ %bb.0: @ %entry
93+
; T1-NEXT: asrs r1, r0, #31
94+
; T1-NEXT: lsrs r1, r1, #24
95+
; T1-NEXT: adds r0, r0, r1
96+
; T1-NEXT: asrs r0, r0, #8
97+
; T1-NEXT: bx lr
98+
;
99+
; V6M-LABEL: f3:
100+
; V6M: @ %bb.0: @ %entry
101+
; V6M-NEXT: asrs r1, r0, #31
102+
; V6M-NEXT: lsrs r1, r1, #24
103+
; V6M-NEXT: adds r0, r0, r1
104+
; V6M-NEXT: asrs r0, r0, #8
105+
; V6M-NEXT: bx lr
56106
entry:
57107
%div = sdiv i32 %F, 256
58108
ret i32 %div
@@ -65,39 +115,65 @@ attributes #0 = { minsize norecurse nounwind optsize readnone }
65115
; the sdiv to sdiv, but to the faster instruction sequence.
66116

67117
define dso_local signext i16 @f4(i16 signext %F) {
68-
; T2-LABEL: f4
69-
; T2: uxth r1, r0
70-
; T2-NEXT: add.w r0, r0, r1, lsr #15
71-
; T2-NEXT: sxth r0, r0
72-
; T2-NEXT: asrs r0, r0, #1
73-
; T2-NEXT: bx lr
74-
75-
; T1-LABEL: f4
76-
; T1: uxth r1, r0
77-
; T1-NEXT: lsrs r1, r1, #15
78-
; T1-NEXT: adds r0, r0, r1
79-
; T1-NEXT: sxth r0, r0
80-
; T1-NEXT: asrs r0, r0, #1
81-
; T1-NEXT: bx lr
118+
; T2-LABEL: f4:
119+
; T2: @ %bb.0: @ %entry
120+
; T2-NEXT: and r1, r0, #32768
121+
; T2-NEXT: add.w r0, r0, r1, lsr #15
122+
; T2-NEXT: sxth r0, r0
123+
; T2-NEXT: asrs r0, r0, #1
124+
; T2-NEXT: bx lr
125+
;
126+
; T1-LABEL: f4:
127+
; T1: @ %bb.0: @ %entry
128+
; T1-NEXT: movw r1, #65280
129+
; T1-NEXT: ands r1, r0
130+
; T1-NEXT: lsrs r1, r1, #15
131+
; T1-NEXT: adds r0, r0, r1
132+
; T1-NEXT: sxth r0, r0
133+
; T1-NEXT: asrs r0, r0, #1
134+
; T1-NEXT: bx lr
135+
;
136+
; V6M-LABEL: f4:
137+
; V6M: @ %bb.0: @ %entry
138+
; V6M-NEXT: movs r1, #255
139+
; V6M-NEXT: lsls r1, r1, #8
140+
; V6M-NEXT: ands r1, r0
141+
; V6M-NEXT: lsrs r1, r1, #15
142+
; V6M-NEXT: adds r0, r0, r1
143+
; V6M-NEXT: sxth r0, r0
144+
; V6M-NEXT: asrs r0, r0, #1
145+
; V6M-NEXT: bx lr
146+
82147

83148
entry:
84149
%0 = sdiv i16 %F, 2
85150
ret i16 %0
86151
}
87152

88153
define dso_local i32 @f5(i32 %F) {
89-
; T2-LABEL: f5
90-
; T2: asrs r1, r0, #31
91-
; T2-NEXT: add.w r0, r0, r1, lsr #30
92-
; T2-NEXT: asrs r0, r0, #2
93-
; T2-NEXT: bx lr
94-
95-
; T1-LABEL: f5
96-
; T1: asrs r1, r0, #31
97-
; T1-NEXT: lsrs r1, r1, #30
98-
; T1-NEXT: adds r0, r0, r1
99-
; T1-NEXT: asrs r0, r0, #2
100-
; T1-NEXT: bx lr
154+
; T2-LABEL: f5:
155+
; T2: @ %bb.0: @ %entry
156+
; T2-NEXT: asrs r1, r0, #31
157+
; T2-NEXT: add.w r0, r0, r1, lsr #30
158+
; T2-NEXT: asrs r0, r0, #2
159+
; T2-NEXT: bx lr
160+
;
161+
; T1-LABEL: f5:
162+
; T1: @ %bb.0: @ %entry
163+
; T1-NEXT: asrs r1, r0, #31
164+
; T1-NEXT: lsrs r1, r1, #30
165+
; T1-NEXT: adds r0, r0, r1
166+
; T1-NEXT: asrs r0, r0, #2
167+
; T1-NEXT: bx lr
168+
;
169+
; V6M-LABEL: f5:
170+
; V6M: @ %bb.0: @ %entry
171+
; V6M-NEXT: asrs r1, r0, #31
172+
; V6M-NEXT: lsrs r1, r1, #30
173+
; V6M-NEXT: adds r0, r0, r1
174+
; V6M-NEXT: asrs r0, r0, #2
175+
; V6M-NEXT: bx lr
176+
101177

102178
entry:
103179
%div = sdiv i32 %F, 4

llvm/test/CodeGen/ARM/va_arg.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ define double @test2(i32 %a, ptr %b, ...) nounwind optsize {
3535
; CHECK-NEXT: add r0, sp, #4
3636
; CHECK-NEXT: stmib sp, {r2, r3}
3737
; CHECK-NEXT: add r0, r0, #11
38-
; CHECK-NEXT: bic r0, r0, #3
38+
; CHECK-NEXT: bic r0, r0, #7
3939
; CHECK-NEXT: str r2, [r1]
4040
; CHECK-NEXT: add r1, r0, #8
4141
; CHECK-NEXT: str r1, [sp]

0 commit comments

Comments
 (0)