Skip to content

Commit fa1037b

Browse files
committed
[ARM] Set isCheapToSpeculateCtlz and isCheapToSpeculateCttz as true for hasV5TOps and no Thumb 1
This is so that we don't expand to include unneeded 0 checks. Also fix the logic error in LegalizerInfo so it is NOT legal on Thumb1 in Fast-ISEL. Finally, Remove the README entry regarding this issue.
1 parent ba7e9ad commit fa1037b

File tree

5 files changed

+38
-61
lines changed

5 files changed

+38
-61
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21394,11 +21394,11 @@ bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
2139421394
}
2139521395

2139621396
bool ARMTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
21397-
return Subtarget->hasV6T2Ops();
21397+
return Subtarget->hasV5TOps() && !Subtarget->isThumb1Only();
2139821398
}
2139921399

2140021400
bool ARMTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
21401-
return Subtarget->hasV6T2Ops();
21401+
return Subtarget->hasV5TOps() && !Subtarget->isThumb1Only();
2140221402
}
2140321403

2140421404
bool ARMTargetLowering::isMaskAndCmp0FoldingBeneficial(

llvm/lib/Target/ARM/ARMLegalizerInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) : ST(ST) {
206206

207207
getActionDefinitionsBuilder({G_FREM, G_FPOW}).libcallFor({s32, s64});
208208

209-
if (ST.hasV5TOps()) {
209+
if (ST.hasV5TOps() && !ST.isThumb1Only()) {
210210
getActionDefinitionsBuilder(G_CTLZ)
211211
.legalFor({s32, s32})
212212
.clampScalar(1, s32, s32)

llvm/lib/Target/ARM/README.txt

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -697,22 +697,6 @@ target-neutral one.
697697

698698
//===---------------------------------------------------------------------===//
699699

700-
Optimize unnecessary checks for zero with __builtin_clz/ctz. Those builtins
701-
are specified to be undefined at zero, so portable code must check for zero
702-
and handle it as a special case. That is unnecessary on ARM where those
703-
operations are implemented in a way that is well-defined for zero. For
704-
example:
705-
706-
int f(int x) { return x ? __builtin_clz(x) : sizeof(int)*8; }
707-
708-
should just be implemented with a CLZ instruction. Since there are other
709-
targets, e.g., PPC, that share this behavior, it would be best to implement
710-
this in a target-independent way: we should probably fold that (when using
711-
"undefined at zero" semantics) to set the "defined at zero" bit and have
712-
the code generator expand out the right code.
713-
714-
//===---------------------------------------------------------------------===//
715-
716700
Clean up the test/MC/ARM files to have more robust register choices.
717701

718702
R0 should not be used as a register operand in the assembler tests as it's then

llvm/test/CodeGen/ARM/clz.ll

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,26 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
12
; RUN: llc -mtriple=arm-eabi -mattr=+v5t %s -o - | FileCheck %s -check-prefixes=CHECK,INLINE
23
; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s -check-prefixes=CHECK,LIBCALL
34

45
declare i32 @llvm.ctlz.i32(i32, i1)
56

6-
define i32 @test(i32 %x) {
7-
; CHECK-LABEL: test
8-
; INLINE: clz r0, r0
9-
; LIBCALL: b __clzsi2
7+
define i32 @undef_zero(i32 %x) {
8+
; INLINE-LABEL: undef_zero:
9+
; INLINE: @ %bb.0:
10+
; INLINE-NEXT: clz r0, r0
11+
; INLINE-NEXT: bx lr
12+
;
13+
; LIBCALL-LABEL: undef_zero:
14+
; LIBCALL: @ %bb.0:
15+
; LIBCALL-NEXT: b __clzsi2
1016
%tmp.1 = call i32 @llvm.ctlz.i32( i32 %x, i1 true )
1117
ret i32 %tmp.1
1218
}
1319

1420
define i32 @no_undef_zero(i32 %x) {
1521
; INLINE-LABEL: no_undef_zero:
1622
; INLINE: @ %bb.0:
17-
; INLINE-NEXT: cmp r0, #0
18-
; INLINE-NEXT: clzne r0, r0
19-
; INLINE-NEXT: moveq r0, #32
23+
; INLINE-NEXT: clz r0, r0
2024
; INLINE-NEXT: bx lr
2125
;
2226
; LIBCALL-LABEL: no_undef_zero:

llvm/test/CodeGen/ARM/cttz.ll

Lines changed: 24 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,11 @@ declare i64 @llvm.cttz.i64(i64, i1)
1717
define i8 @test_i8(i8 %a) {
1818
; CHECK-5-LABEL: test_i8:
1919
; CHECK-5: @ %bb.0:
20-
; CHECK-5-NEXT: tst r0, #255
21-
; CHECK-5-NEXT: moveq r0, #8
22-
; CHECK-5-NEXT: subne r1, r0, #1
23-
; CHECK-5-NEXT: bicne r0, r1, r0
24-
; CHECK-5-NEXT: clzne r0, r0
25-
; CHECK-5-NEXT: rsbne r0, r0, #32
20+
; CHECK-5-NEXT: orr r0, r0, #256
21+
; CHECK-5-NEXT: sub r1, r0, #1
22+
; CHECK-5-NEXT: bic r0, r1, r0
23+
; CHECK-5-NEXT: clz r0, r0
24+
; CHECK-5-NEXT: rsb r0, r0, #32
2625
; CHECK-5-NEXT: bx lr
2726
;
2827
; CHECK-LABEL: test_i8:
@@ -94,14 +93,11 @@ define i8 @test_i8(i8 %a) {
9493
define i16 @test_i16(i16 %a) {
9594
; CHECK-5-LABEL: test_i16:
9695
; CHECK-5: @ %bb.0:
97-
; CHECK-5-NEXT: mov r1, #255
98-
; CHECK-5-NEXT: orr r1, r1, #65280
99-
; CHECK-5-NEXT: tst r0, r1
100-
; CHECK-5-NEXT: moveq r0, #16
101-
; CHECK-5-NEXT: subne r1, r0, #1
102-
; CHECK-5-NEXT: bicne r0, r1, r0
103-
; CHECK-5-NEXT: clzne r0, r0
104-
; CHECK-5-NEXT: rsbne r0, r0, #32
96+
; CHECK-5-NEXT: orr r0, r0, #65536
97+
; CHECK-5-NEXT: sub r1, r0, #1
98+
; CHECK-5-NEXT: bic r0, r1, r0
99+
; CHECK-5-NEXT: clz r0, r0
100+
; CHECK-5-NEXT: rsb r0, r0, #32
105101
; CHECK-5-NEXT: bx lr
106102
;
107103
; CHECK-LABEL: test_i16:
@@ -173,12 +169,10 @@ define i16 @test_i16(i16 %a) {
173169
define i32 @test_i32(i32 %a) {
174170
; CHECK-5-LABEL: test_i32:
175171
; CHECK-5: @ %bb.0:
176-
; CHECK-5-NEXT: cmp r0, #0
177-
; CHECK-5-NEXT: moveq r0, #32
178-
; CHECK-5-NEXT: subne r1, r0, #1
179-
; CHECK-5-NEXT: bicne r0, r1, r0
180-
; CHECK-5-NEXT: clzne r0, r0
181-
; CHECK-5-NEXT: rsbne r0, r0, #32
172+
; CHECK-5-NEXT: sub r1, r0, #1
173+
; CHECK-5-NEXT: bic r0, r1, r0
174+
; CHECK-5-NEXT: clz r0, r0
175+
; CHECK-5-NEXT: rsb r0, r0, #32
182176
; CHECK-5-NEXT: bx lr
183177
;
184178
; CHECK-LABEL: test_i32:
@@ -242,22 +236,17 @@ define i32 @test_i32(i32 %a) {
242236
define i64 @test_i64(i64 %a) {
243237
; CHECK-5-LABEL: test_i64:
244238
; CHECK-5: @ %bb.0:
245-
; CHECK-5-NEXT: mov r2, r1
246-
; CHECK-5-NEXT: orrs r1, r0, r1
247-
; CHECK-5-NEXT: mov r1, #0
248-
; CHECK-5-NEXT: moveq r0, #64
249-
; CHECK-5-NEXT: bxeq lr
250-
; CHECK-5-NEXT: .LBB3_1: @ %cond.false
251-
; CHECK-5-NEXT: sub r3, r0, #1
252-
; CHECK-5-NEXT: bic r3, r3, r0
253-
; CHECK-5-NEXT: clz r12, r3
254-
; CHECK-5-NEXT: sub r3, r2, #1
255-
; CHECK-5-NEXT: bic r2, r3, r2
256-
; CHECK-5-NEXT: mov r3, r0
239+
; CHECK-5-NEXT: sub r3, r1, #1
240+
; CHECK-5-NEXT: sub r2, r0, #1
241+
; CHECK-5-NEXT: bic r1, r3, r1
242+
; CHECK-5-NEXT: bic r2, r2, r0
243+
; CHECK-5-NEXT: clz r1, r1
257244
; CHECK-5-NEXT: clz r2, r2
258-
; CHECK-5-NEXT: cmp r3, #0
259-
; CHECK-5-NEXT: rsb r0, r2, #64
260-
; CHECK-5-NEXT: rsbne r0, r12, #32
245+
; CHECK-5-NEXT: rsb r1, r1, #64
246+
; CHECK-5-NEXT: cmp r0, #0
247+
; CHECK-5-NEXT: rsbne r1, r2, #32
248+
; CHECK-5-NEXT: mov r0, r1
249+
; CHECK-5-NEXT: mov r1, #0
261250
; CHECK-5-NEXT: bx lr
262251
;
263252
; CHECK-LABEL: test_i64:

0 commit comments

Comments
 (0)