Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions llvm/lib/CodeGen/CodeGenPrepare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2552,9 +2552,9 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
(IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz(Ty)))
return false;

// Only handle legal scalar cases. Anything else requires too much work.
// Only handle scalar cases. Anything else requires too much work.
unsigned SizeInBits = Ty->getScalarSizeInBits();
if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits())
if (Ty->isVectorTy())
return false;

// Bail if the value is never zero.
Expand Down
140 changes: 76 additions & 64 deletions llvm/test/CodeGen/ARM/cttz.ll
Original file line number Diff line number Diff line change
Expand Up @@ -221,87 +221,99 @@ define i64 @test_i64(i64 %a) {
;
; CHECK-6M-LABEL: test_i64:
; CHECK-6M: @ %bb.0:
; CHECK-6M-NEXT: .save {r4, r5, r7, lr}
; CHECK-6M-NEXT: push {r4, r5, r7, lr}
; CHECK-6M-NEXT: .save {r4, r5, r6, lr}
; CHECK-6M-NEXT: push {r4, r5, r6, lr}
; CHECK-6M-NEXT: mov r3, r1
; CHECK-6M-NEXT: mov r2, r0
; CHECK-6M-NEXT: ldr r5, .LCPI3_0
; CHECK-6M-NEXT: adr r3, .LCPI3_1
; CHECK-6M-NEXT: movs r1, #0
; CHECK-6M-NEXT: orrs r0, r3
; CHECK-6M-NEXT: beq .LBB3_6
; CHECK-6M-NEXT: @ %bb.1: @ %cond.false
; CHECK-6M-NEXT: ldr r6, .LCPI3_0
; CHECK-6M-NEXT: adr r4, .LCPI3_1
; CHECK-6M-NEXT: movs r0, #32
; CHECK-6M-NEXT: cmp r1, #0
; CHECK-6M-NEXT: mov r4, r0
; CHECK-6M-NEXT: beq .LBB3_2
; CHECK-6M-NEXT: @ %bb.1:
; CHECK-6M-NEXT: rsbs r4, r1, #0
; CHECK-6M-NEXT: ands r4, r1
; CHECK-6M-NEXT: muls r4, r5, r4
; CHECK-6M-NEXT: lsrs r1, r4, #27
; CHECK-6M-NEXT: ldrb r4, [r3, r1]
; CHECK-6M-NEXT: .LBB3_2:
; CHECK-6M-NEXT: adds r4, #32
; CHECK-6M-NEXT: rsbs r1, r2, #0
; CHECK-6M-NEXT: ands r1, r2
; CHECK-6M-NEXT: muls r5, r1, r5
; CHECK-6M-NEXT: lsrs r1, r5, #27
; CHECK-6M-NEXT: cmp r3, #0
; CHECK-6M-NEXT: mov r5, r0
; CHECK-6M-NEXT: beq .LBB3_3
; CHECK-6M-NEXT: @ %bb.2: @ %cond.false
; CHECK-6M-NEXT: rsbs r5, r3, #0
; CHECK-6M-NEXT: ands r5, r3
; CHECK-6M-NEXT: muls r5, r6, r5
; CHECK-6M-NEXT: lsrs r3, r5, #27
; CHECK-6M-NEXT: ldrb r5, [r4, r3]
; CHECK-6M-NEXT: .LBB3_3: @ %cond.false
; CHECK-6M-NEXT: adds r5, #32
; CHECK-6M-NEXT: rsbs r3, r2, #0
; CHECK-6M-NEXT: ands r3, r2
; CHECK-6M-NEXT: muls r6, r3, r6
; CHECK-6M-NEXT: lsrs r3, r6, #27
; CHECK-6M-NEXT: cmp r2, #0
; CHECK-6M-NEXT: bne .LBB3_5
; CHECK-6M-NEXT: @ %bb.3:
; CHECK-6M-NEXT: beq .LBB3_6
; CHECK-6M-NEXT: .LBB3_4:
; CHECK-6M-NEXT: movs r1, #0
; CHECK-6M-NEXT: pop {r4, r5, r7, pc}
; CHECK-6M-NEXT: .LBB3_5:
; CHECK-6M-NEXT: ldrb r0, [r3, r1]
; CHECK-6M-NEXT: bne .LBB3_4
; CHECK-6M-NEXT: bne .LBB3_7
; CHECK-6M-NEXT: @ %bb.4: @ %cond.false
; CHECK-6M-NEXT: beq .LBB3_8
; CHECK-6M-NEXT: .LBB3_5: @ %cond.end
; CHECK-6M-NEXT: pop {r4, r5, r6, pc}
; CHECK-6M-NEXT: .LBB3_6:
; CHECK-6M-NEXT: mov r0, r4
; CHECK-6M-NEXT: movs r1, #0
; CHECK-6M-NEXT: pop {r4, r5, r7, pc}
; CHECK-6M-NEXT: movs r0, #64
; CHECK-6M-NEXT: pop {r4, r5, r6, pc}
; CHECK-6M-NEXT: .LBB3_7: @ %cond.false
; CHECK-6M-NEXT: ldrb r0, [r4, r3]
; CHECK-6M-NEXT: bne .LBB3_5
; CHECK-6M-NEXT: .LBB3_8: @ %cond.false
; CHECK-6M-NEXT: mov r0, r5
; CHECK-6M-NEXT: pop {r4, r5, r6, pc}
; CHECK-6M-NEXT: .p2align 2
; CHECK-6M-NEXT: @ %bb.7:
; CHECK-6M-NEXT: @ %bb.9:
; CHECK-6M-NEXT: .LCPI3_0:
; CHECK-6M-NEXT: .long 125613361 @ 0x77cb531
; CHECK-6M-NEXT: .LCPI3_1:
; CHECK-6M-NEXT: .ascii "\000\001\034\002\035\016\030\003\036\026\024\017\031\021\004\b\037\033\r\027\025\023\020\007\032\f\022\006\013\005\n\t"
;
; CHECK-8MBASE-LABEL: test_i64:
; CHECK-8MBASE: @ %bb.0:
; CHECK-8MBASE-NEXT: .save {r4, r5, r7, lr}
; CHECK-8MBASE-NEXT: push {r4, r5, r7, lr}
; CHECK-8MBASE-NEXT: .save {r4, r5, r6, lr}
; CHECK-8MBASE-NEXT: push {r4, r5, r6, lr}
; CHECK-8MBASE-NEXT: mov r3, r1
; CHECK-8MBASE-NEXT: mov r2, r0
; CHECK-8MBASE-NEXT: movw r5, #46385
; CHECK-8MBASE-NEXT: movt r5, #1916
; CHECK-8MBASE-NEXT: adr r3, .LCPI3_0
; CHECK-8MBASE-NEXT: movs r1, #0
; CHECK-8MBASE-NEXT: orrs r0, r3
; CHECK-8MBASE-NEXT: beq .LBB3_6
; CHECK-8MBASE-NEXT: @ %bb.1: @ %cond.false
; CHECK-8MBASE-NEXT: movw r6, #46385
; CHECK-8MBASE-NEXT: movt r6, #1916
; CHECK-8MBASE-NEXT: adr r4, .LCPI3_0
; CHECK-8MBASE-NEXT: movs r0, #32
; CHECK-8MBASE-NEXT: mov r4, r0
; CHECK-8MBASE-NEXT: cbz r1, .LBB3_2
; CHECK-8MBASE-NEXT: @ %bb.1:
; CHECK-8MBASE-NEXT: rsbs r4, r1, #0
; CHECK-8MBASE-NEXT: ands r4, r1
; CHECK-8MBASE-NEXT: muls r4, r5, r4
; CHECK-8MBASE-NEXT: lsrs r1, r4, #27
; CHECK-8MBASE-NEXT: ldrb r4, [r3, r1]
; CHECK-8MBASE-NEXT: .LBB3_2:
; CHECK-8MBASE-NEXT: adds r4, #32
; CHECK-8MBASE-NEXT: rsbs r1, r2, #0
; CHECK-8MBASE-NEXT: ands r1, r2
; CHECK-8MBASE-NEXT: muls r5, r1, r5
; CHECK-8MBASE-NEXT: lsrs r1, r5, #27
; CHECK-8MBASE-NEXT: mov r5, r0
; CHECK-8MBASE-NEXT: cbz r3, .LBB3_3
; CHECK-8MBASE-NEXT: @ %bb.2: @ %cond.false
; CHECK-8MBASE-NEXT: rsbs r5, r3, #0
; CHECK-8MBASE-NEXT: ands r5, r3
; CHECK-8MBASE-NEXT: muls r5, r6, r5
; CHECK-8MBASE-NEXT: lsrs r3, r5, #27
; CHECK-8MBASE-NEXT: ldrb r5, [r4, r3]
; CHECK-8MBASE-NEXT: .LBB3_3: @ %cond.false
; CHECK-8MBASE-NEXT: adds r5, #32
; CHECK-8MBASE-NEXT: rsbs r3, r2, #0
; CHECK-8MBASE-NEXT: ands r3, r2
; CHECK-8MBASE-NEXT: muls r6, r3, r6
; CHECK-8MBASE-NEXT: lsrs r3, r6, #27
; CHECK-8MBASE-NEXT: cmp r2, #0
; CHECK-8MBASE-NEXT: bne .LBB3_5
; CHECK-8MBASE-NEXT: @ %bb.3:
; CHECK-8MBASE-NEXT: beq .LBB3_6
; CHECK-8MBASE-NEXT: .LBB3_4:
; CHECK-8MBASE-NEXT: movs r1, #0
; CHECK-8MBASE-NEXT: pop {r4, r5, r7, pc}
; CHECK-8MBASE-NEXT: .LBB3_5:
; CHECK-8MBASE-NEXT: ldrb r0, [r3, r1]
; CHECK-8MBASE-NEXT: bne .LBB3_4
; CHECK-8MBASE-NEXT: bne .LBB3_7
; CHECK-8MBASE-NEXT: @ %bb.4: @ %cond.false
; CHECK-8MBASE-NEXT: beq .LBB3_8
Comment on lines +301 to +303
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW
There are some strange branches here and below (on the left, too).
X86 eliminates them in X86TargetLowering::EmitLoweredSelect. AFACT ARM's equivalent doesn't do this optimization.
There is also a PPCBranchCoalescing pass the could could help here. It is more or less generic, but currently lives under PowerPC directory.

; CHECK-8MBASE-NEXT: .LBB3_5: @ %cond.end
; CHECK-8MBASE-NEXT: pop {r4, r5, r6, pc}
; CHECK-8MBASE-NEXT: .LBB3_6:
; CHECK-8MBASE-NEXT: mov r0, r4
; CHECK-8MBASE-NEXT: movs r1, #0
; CHECK-8MBASE-NEXT: pop {r4, r5, r7, pc}
; CHECK-8MBASE-NEXT: movs r0, #64
; CHECK-8MBASE-NEXT: pop {r4, r5, r6, pc}
; CHECK-8MBASE-NEXT: .LBB3_7: @ %cond.false
; CHECK-8MBASE-NEXT: ldrb r0, [r4, r3]
; CHECK-8MBASE-NEXT: bne .LBB3_5
; CHECK-8MBASE-NEXT: .LBB3_8: @ %cond.false
; CHECK-8MBASE-NEXT: mov r0, r5
; CHECK-8MBASE-NEXT: pop {r4, r5, r6, pc}
; CHECK-8MBASE-NEXT: .p2align 2
; CHECK-8MBASE-NEXT: @ %bb.7:
; CHECK-8MBASE-NEXT: @ %bb.9:
; CHECK-8MBASE-NEXT: .LCPI3_0:
; CHECK-8MBASE-NEXT: .ascii "\000\001\034\002\035\016\030\003\036\026\024\017\031\021\004\b\037\033\r\027\025\023\020\007\032\f\022\006\013\005\n\t"
%tmp = call i64 @llvm.cttz.i64(i64 %a, i1 false)
Expand Down
26 changes: 20 additions & 6 deletions llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll
Original file line number Diff line number Diff line change
Expand Up @@ -62,15 +62,18 @@ declare i64 @llvm.ctlz.i64(i64, i1)
define i64 @ctlz_i64(i64 %a) nounwind {
; RV32I-LABEL: ctlz_i64:
; RV32I: # %bb.0:
; RV32I-NEXT: or a2, a0, a1
; RV32I-NEXT: beqz a2, .LBB1_3
; RV32I-NEXT: # %bb.1: # %cond.false
; RV32I-NEXT: lui a2, 349525
; RV32I-NEXT: lui a3, 209715
; RV32I-NEXT: lui a6, 61681
; RV32I-NEXT: addi a5, a2, 1365
; RV32I-NEXT: addi a4, a3, 819
; RV32I-NEXT: addi a3, a6, -241
; RV32I-NEXT: li a2, 32
; RV32I-NEXT: beqz a1, .LBB1_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: beqz a1, .LBB1_4
; RV32I-NEXT: # %bb.2: # %cond.false
; RV32I-NEXT: srli a0, a1, 1
; RV32I-NEXT: or a0, a1, a0
; RV32I-NEXT: srli a1, a0, 2
Expand Down Expand Up @@ -99,7 +102,11 @@ define i64 @ctlz_i64(i64 %a) nounwind {
; RV32I-NEXT: sub a0, a2, a0
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB1_2:
; RV32I-NEXT: .LBB1_3:
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: li a0, 64
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB1_4:
; RV32I-NEXT: srli a1, a0, 1
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 2
Expand Down Expand Up @@ -195,14 +202,17 @@ declare i64 @llvm.cttz.i64(i64, i1)
define i64 @cttz_i64(i64 %a) nounwind {
; RV32I-LABEL: cttz_i64:
; RV32I: # %bb.0:
; RV32I-NEXT: or a2, a0, a1
; RV32I-NEXT: beqz a2, .LBB3_3
; RV32I-NEXT: # %bb.1: # %cond.false
; RV32I-NEXT: lui a2, 349525
; RV32I-NEXT: lui a3, 209715
; RV32I-NEXT: lui a5, 61681
; RV32I-NEXT: addi a4, a2, 1365
; RV32I-NEXT: addi a3, a3, 819
; RV32I-NEXT: addi a2, a5, -241
; RV32I-NEXT: beqz a0, .LBB3_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: beqz a0, .LBB3_4
; RV32I-NEXT: # %bb.2: # %cond.false
; RV32I-NEXT: not a1, a0
; RV32I-NEXT: addi a0, a0, -1
; RV32I-NEXT: and a0, a1, a0
Expand All @@ -223,7 +233,11 @@ define i64 @cttz_i64(i64 %a) nounwind {
; RV32I-NEXT: srli a0, a0, 24
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB3_2:
; RV32I-NEXT: .LBB3_3:
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: li a0, 64
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB3_4:
; RV32I-NEXT: not a0, a1
; RV32I-NEXT: addi a1, a1, -1
; RV32I-NEXT: and a0, a0, a1
Expand Down
Loading
Loading