diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index e8dc7752b23c0..f9dcb472ed1d2 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -2552,9 +2552,9 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros, (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz(Ty))) return false; - // Only handle legal scalar cases. Anything else requires too much work. + // Only handle scalar cases. Anything else requires too much work. unsigned SizeInBits = Ty->getScalarSizeInBits(); - if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits()) + if (Ty->isVectorTy()) return false; // Bail if the value is never zero. diff --git a/llvm/test/CodeGen/ARM/cttz.ll b/llvm/test/CodeGen/ARM/cttz.ll index 76adc61c5971f..1146ad64ee709 100644 --- a/llvm/test/CodeGen/ARM/cttz.ll +++ b/llvm/test/CodeGen/ARM/cttz.ll @@ -221,43 +221,49 @@ define i64 @test_i64(i64 %a) { ; ; CHECK-6M-LABEL: test_i64: ; CHECK-6M: @ %bb.0: -; CHECK-6M-NEXT: .save {r4, r5, r7, lr} -; CHECK-6M-NEXT: push {r4, r5, r7, lr} +; CHECK-6M-NEXT: .save {r4, r5, r6, lr} +; CHECK-6M-NEXT: push {r4, r5, r6, lr} +; CHECK-6M-NEXT: mov r3, r1 ; CHECK-6M-NEXT: mov r2, r0 -; CHECK-6M-NEXT: ldr r5, .LCPI3_0 -; CHECK-6M-NEXT: adr r3, .LCPI3_1 +; CHECK-6M-NEXT: movs r1, #0 +; CHECK-6M-NEXT: orrs r0, r3 +; CHECK-6M-NEXT: beq .LBB3_6 +; CHECK-6M-NEXT: @ %bb.1: @ %cond.false +; CHECK-6M-NEXT: ldr r6, .LCPI3_0 +; CHECK-6M-NEXT: adr r4, .LCPI3_1 ; CHECK-6M-NEXT: movs r0, #32 -; CHECK-6M-NEXT: cmp r1, #0 -; CHECK-6M-NEXT: mov r4, r0 -; CHECK-6M-NEXT: beq .LBB3_2 -; CHECK-6M-NEXT: @ %bb.1: -; CHECK-6M-NEXT: rsbs r4, r1, #0 -; CHECK-6M-NEXT: ands r4, r1 -; CHECK-6M-NEXT: muls r4, r5, r4 -; CHECK-6M-NEXT: lsrs r1, r4, #27 -; CHECK-6M-NEXT: ldrb r4, [r3, r1] -; CHECK-6M-NEXT: .LBB3_2: -; CHECK-6M-NEXT: adds r4, #32 -; CHECK-6M-NEXT: rsbs r1, r2, #0 -; CHECK-6M-NEXT: ands r1, r2 -; CHECK-6M-NEXT: muls r5, r1, r5 -; CHECK-6M-NEXT: lsrs r1, r5, #27 +; CHECK-6M-NEXT: cmp r3, #0 +; CHECK-6M-NEXT: mov r5, r0 +; CHECK-6M-NEXT: beq .LBB3_3 +; CHECK-6M-NEXT: @ %bb.2: @ %cond.false +; CHECK-6M-NEXT: rsbs r5, r3, #0 +; CHECK-6M-NEXT: ands r5, r3 +; CHECK-6M-NEXT: muls r5, r6, r5 +; CHECK-6M-NEXT: lsrs r3, r5, #27 +; CHECK-6M-NEXT: ldrb r5, [r4, r3] +; CHECK-6M-NEXT: .LBB3_3: @ %cond.false +; CHECK-6M-NEXT: adds r5, #32 +; CHECK-6M-NEXT: rsbs r3, r2, #0 +; CHECK-6M-NEXT: ands r3, r2 +; CHECK-6M-NEXT: muls r6, r3, r6 +; CHECK-6M-NEXT: lsrs r3, r6, #27 ; CHECK-6M-NEXT: cmp r2, #0 -; CHECK-6M-NEXT: bne .LBB3_5 -; CHECK-6M-NEXT: @ %bb.3: -; CHECK-6M-NEXT: beq .LBB3_6 -; CHECK-6M-NEXT: .LBB3_4: -; CHECK-6M-NEXT: movs r1, #0 -; CHECK-6M-NEXT: pop {r4, r5, r7, pc} -; CHECK-6M-NEXT: .LBB3_5: -; CHECK-6M-NEXT: ldrb r0, [r3, r1] -; CHECK-6M-NEXT: bne .LBB3_4 +; CHECK-6M-NEXT: bne .LBB3_7 +; CHECK-6M-NEXT: @ %bb.4: @ %cond.false +; CHECK-6M-NEXT: beq .LBB3_8 +; CHECK-6M-NEXT: .LBB3_5: @ %cond.end +; CHECK-6M-NEXT: pop {r4, r5, r6, pc} ; CHECK-6M-NEXT: .LBB3_6: -; CHECK-6M-NEXT: mov r0, r4 -; CHECK-6M-NEXT: movs r1, #0 -; CHECK-6M-NEXT: pop {r4, r5, r7, pc} +; CHECK-6M-NEXT: movs r0, #64 +; CHECK-6M-NEXT: pop {r4, r5, r6, pc} +; CHECK-6M-NEXT: .LBB3_7: @ %cond.false +; CHECK-6M-NEXT: ldrb r0, [r4, r3] +; CHECK-6M-NEXT: bne .LBB3_5 +; CHECK-6M-NEXT: .LBB3_8: @ %cond.false +; CHECK-6M-NEXT: mov r0, r5 +; CHECK-6M-NEXT: pop {r4, r5, r6, pc} ; CHECK-6M-NEXT: .p2align 2 -; CHECK-6M-NEXT: @ %bb.7: +; CHECK-6M-NEXT: @ %bb.9: ; CHECK-6M-NEXT: .LCPI3_0: ; CHECK-6M-NEXT: .long 125613361 @ 0x77cb531 ; CHECK-6M-NEXT: .LCPI3_1: @@ -265,43 +271,49 @@ define i64 @test_i64(i64 %a) { ; ; CHECK-8MBASE-LABEL: test_i64: ; CHECK-8MBASE: @ %bb.0: -; CHECK-8MBASE-NEXT: .save {r4, r5, r7, lr} -; CHECK-8MBASE-NEXT: push {r4, r5, r7, lr} +; CHECK-8MBASE-NEXT: .save {r4, r5, r6, lr} +; CHECK-8MBASE-NEXT: push {r4, r5, r6, lr} +; CHECK-8MBASE-NEXT: mov r3, r1 ; CHECK-8MBASE-NEXT: mov r2, r0 -; CHECK-8MBASE-NEXT: movw r5, #46385 -; CHECK-8MBASE-NEXT: movt r5, #1916 -; CHECK-8MBASE-NEXT: adr r3, .LCPI3_0 +; CHECK-8MBASE-NEXT: movs r1, #0 +; CHECK-8MBASE-NEXT: orrs r0, r3 +; CHECK-8MBASE-NEXT: beq .LBB3_6 +; CHECK-8MBASE-NEXT: @ %bb.1: @ %cond.false +; CHECK-8MBASE-NEXT: movw r6, #46385 +; CHECK-8MBASE-NEXT: movt r6, #1916 +; CHECK-8MBASE-NEXT: adr r4, .LCPI3_0 ; CHECK-8MBASE-NEXT: movs r0, #32 -; CHECK-8MBASE-NEXT: mov r4, r0 -; CHECK-8MBASE-NEXT: cbz r1, .LBB3_2 -; CHECK-8MBASE-NEXT: @ %bb.1: -; CHECK-8MBASE-NEXT: rsbs r4, r1, #0 -; CHECK-8MBASE-NEXT: ands r4, r1 -; CHECK-8MBASE-NEXT: muls r4, r5, r4 -; CHECK-8MBASE-NEXT: lsrs r1, r4, #27 -; CHECK-8MBASE-NEXT: ldrb r4, [r3, r1] -; CHECK-8MBASE-NEXT: .LBB3_2: -; CHECK-8MBASE-NEXT: adds r4, #32 -; CHECK-8MBASE-NEXT: rsbs r1, r2, #0 -; CHECK-8MBASE-NEXT: ands r1, r2 -; CHECK-8MBASE-NEXT: muls r5, r1, r5 -; CHECK-8MBASE-NEXT: lsrs r1, r5, #27 +; CHECK-8MBASE-NEXT: mov r5, r0 +; CHECK-8MBASE-NEXT: cbz r3, .LBB3_3 +; CHECK-8MBASE-NEXT: @ %bb.2: @ %cond.false +; CHECK-8MBASE-NEXT: rsbs r5, r3, #0 +; CHECK-8MBASE-NEXT: ands r5, r3 +; CHECK-8MBASE-NEXT: muls r5, r6, r5 +; CHECK-8MBASE-NEXT: lsrs r3, r5, #27 +; CHECK-8MBASE-NEXT: ldrb r5, [r4, r3] +; CHECK-8MBASE-NEXT: .LBB3_3: @ %cond.false +; CHECK-8MBASE-NEXT: adds r5, #32 +; CHECK-8MBASE-NEXT: rsbs r3, r2, #0 +; CHECK-8MBASE-NEXT: ands r3, r2 +; CHECK-8MBASE-NEXT: muls r6, r3, r6 +; CHECK-8MBASE-NEXT: lsrs r3, r6, #27 ; CHECK-8MBASE-NEXT: cmp r2, #0 -; CHECK-8MBASE-NEXT: bne .LBB3_5 -; CHECK-8MBASE-NEXT: @ %bb.3: -; CHECK-8MBASE-NEXT: beq .LBB3_6 -; CHECK-8MBASE-NEXT: .LBB3_4: -; CHECK-8MBASE-NEXT: movs r1, #0 -; CHECK-8MBASE-NEXT: pop {r4, r5, r7, pc} -; CHECK-8MBASE-NEXT: .LBB3_5: -; CHECK-8MBASE-NEXT: ldrb r0, [r3, r1] -; CHECK-8MBASE-NEXT: bne .LBB3_4 +; CHECK-8MBASE-NEXT: bne .LBB3_7 +; CHECK-8MBASE-NEXT: @ %bb.4: @ %cond.false +; CHECK-8MBASE-NEXT: beq .LBB3_8 +; CHECK-8MBASE-NEXT: .LBB3_5: @ %cond.end +; CHECK-8MBASE-NEXT: pop {r4, r5, r6, pc} ; CHECK-8MBASE-NEXT: .LBB3_6: -; CHECK-8MBASE-NEXT: mov r0, r4 -; CHECK-8MBASE-NEXT: movs r1, #0 -; CHECK-8MBASE-NEXT: pop {r4, r5, r7, pc} +; CHECK-8MBASE-NEXT: movs r0, #64 +; CHECK-8MBASE-NEXT: pop {r4, r5, r6, pc} +; CHECK-8MBASE-NEXT: .LBB3_7: @ %cond.false +; CHECK-8MBASE-NEXT: ldrb r0, [r4, r3] +; CHECK-8MBASE-NEXT: bne .LBB3_5 +; CHECK-8MBASE-NEXT: .LBB3_8: @ %cond.false +; CHECK-8MBASE-NEXT: mov r0, r5 +; CHECK-8MBASE-NEXT: pop {r4, r5, r6, pc} ; CHECK-8MBASE-NEXT: .p2align 2 -; CHECK-8MBASE-NEXT: @ %bb.7: +; CHECK-8MBASE-NEXT: @ %bb.9: ; CHECK-8MBASE-NEXT: .LCPI3_0: ; CHECK-8MBASE-NEXT: .ascii "\000\001\034\002\035\016\030\003\036\026\024\017\031\021\004\b\037\033\r\027\025\023\020\007\032\f\022\006\013\005\n\t" %tmp = call i64 @llvm.cttz.i64(i64 %a, i1 false) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll index f9af74d6ec323..0632caecf8907 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll @@ -62,6 +62,9 @@ declare i64 @llvm.ctlz.i64(i64, i1) define i64 @ctlz_i64(i64 %a) nounwind { ; RV32I-LABEL: ctlz_i64: ; RV32I: # %bb.0: +; RV32I-NEXT: or a2, a0, a1 +; RV32I-NEXT: beqz a2, .LBB1_3 +; RV32I-NEXT: # %bb.1: # %cond.false ; RV32I-NEXT: lui a2, 349525 ; RV32I-NEXT: lui a3, 209715 ; RV32I-NEXT: lui a6, 61681 @@ -69,8 +72,8 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; RV32I-NEXT: addi a4, a3, 819 ; RV32I-NEXT: addi a3, a6, -241 ; RV32I-NEXT: li a2, 32 -; RV32I-NEXT: beqz a1, .LBB1_2 -; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: beqz a1, .LBB1_4 +; RV32I-NEXT: # %bb.2: # %cond.false ; RV32I-NEXT: srli a0, a1, 1 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 2 @@ -99,7 +102,11 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; RV32I-NEXT: sub a0, a2, a0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: ret -; RV32I-NEXT: .LBB1_2: +; RV32I-NEXT: .LBB1_3: +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: li a0, 64 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB1_4: ; RV32I-NEXT: srli a1, a0, 1 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 2 @@ -195,14 +202,17 @@ declare i64 @llvm.cttz.i64(i64, i1) define i64 @cttz_i64(i64 %a) nounwind { ; RV32I-LABEL: cttz_i64: ; RV32I: # %bb.0: +; RV32I-NEXT: or a2, a0, a1 +; RV32I-NEXT: beqz a2, .LBB3_3 +; RV32I-NEXT: # %bb.1: # %cond.false ; RV32I-NEXT: lui a2, 349525 ; RV32I-NEXT: lui a3, 209715 ; RV32I-NEXT: lui a5, 61681 ; RV32I-NEXT: addi a4, a2, 1365 ; RV32I-NEXT: addi a3, a3, 819 ; RV32I-NEXT: addi a2, a5, -241 -; RV32I-NEXT: beqz a0, .LBB3_2 -; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: beqz a0, .LBB3_4 +; RV32I-NEXT: # %bb.2: # %cond.false ; RV32I-NEXT: not a1, a0 ; RV32I-NEXT: addi a0, a0, -1 ; RV32I-NEXT: and a0, a1, a0 @@ -223,7 +233,11 @@ define i64 @cttz_i64(i64 %a) nounwind { ; RV32I-NEXT: srli a0, a0, 24 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: ret -; RV32I-NEXT: .LBB3_2: +; RV32I-NEXT: .LBB3_3: +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: li a0, 64 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB3_4: ; RV32I-NEXT: not a0, a1 ; RV32I-NEXT: addi a1, a1, -1 ; RV32I-NEXT: and a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll index a46168f114bb9..3a7d31253b05d 100644 --- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll @@ -374,39 +374,42 @@ define i64 @test_cttz_i64(i64 %a) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: neg a0, a0 -; RV32I-NEXT: and a0, s0, a0 -; RV32I-NEXT: lui a1, 30667 -; RV32I-NEXT: addi s3, a1, 1329 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: or a1, a0, a1 +; RV32I-NEXT: beqz a1, .LBB3_3 +; RV32I-NEXT: # %bb.1: # %cond.false +; RV32I-NEXT: neg a1, a0 +; RV32I-NEXT: and a1, a0, a1 +; RV32I-NEXT: lui a2, 30667 +; RV32I-NEXT: addi s2, a2, 1329 +; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __mulsi3 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lui s4, %hi(.LCPI3_0) -; RV32I-NEXT: addi s4, s4, %lo(.LCPI3_0) -; RV32I-NEXT: neg a0, s2 -; RV32I-NEXT: and a0, s2, a0 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: lui s3, %hi(.LCPI3_0) +; RV32I-NEXT: addi s3, s3, %lo(.LCPI3_0) +; RV32I-NEXT: neg a0, s0 +; RV32I-NEXT: and a0, s0, a0 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: bnez s2, .LBB3_3 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: li a0, 32 -; RV32I-NEXT: beqz s0, .LBB3_4 -; RV32I-NEXT: .LBB3_2: -; RV32I-NEXT: srli s1, s1, 27 -; RV32I-NEXT: add s1, s4, s1 -; RV32I-NEXT: lbu a0, 0(s1) -; RV32I-NEXT: j .LBB3_5 -; RV32I-NEXT: .LBB3_3: +; RV32I-NEXT: bnez s4, .LBB3_4 +; RV32I-NEXT: # %bb.2: # %cond.false ; RV32I-NEXT: srli a0, a0, 27 -; RV32I-NEXT: add a0, s4, a0 +; RV32I-NEXT: add a0, s3, a0 ; RV32I-NEXT: lbu a0, 0(a0) -; RV32I-NEXT: bnez s0, .LBB3_2 -; RV32I-NEXT: .LBB3_4: ; RV32I-NEXT: addi a0, a0, 32 -; RV32I-NEXT: .LBB3_5: +; RV32I-NEXT: j .LBB3_5 +; RV32I-NEXT: .LBB3_3: +; RV32I-NEXT: li a0, 64 +; RV32I-NEXT: j .LBB3_6 +; RV32I-NEXT: .LBB3_4: +; RV32I-NEXT: srli s1, s1, 27 +; RV32I-NEXT: add s1, s3, s1 +; RV32I-NEXT: lbu a0, 0(s1) +; RV32I-NEXT: .LBB3_5: # %cond.false ; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: .LBB3_6: # %cond.end ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -441,33 +444,35 @@ define i64 @test_cttz_i64(i64 %a) nounwind { ; ; RV32M-LABEL: test_cttz_i64: ; RV32M: # %bb.0: +; RV32M-NEXT: or a2, a0, a1 +; RV32M-NEXT: beqz a2, .LBB3_3 +; RV32M-NEXT: # %bb.1: # %cond.false ; RV32M-NEXT: lui a2, 30667 ; RV32M-NEXT: addi a3, a2, 1329 ; RV32M-NEXT: lui a2, %hi(.LCPI3_0) ; RV32M-NEXT: addi a2, a2, %lo(.LCPI3_0) -; RV32M-NEXT: bnez a1, .LBB3_3 -; RV32M-NEXT: # %bb.1: -; RV32M-NEXT: li a1, 32 -; RV32M-NEXT: beqz a0, .LBB3_4 -; RV32M-NEXT: .LBB3_2: -; RV32M-NEXT: neg a1, a0 -; RV32M-NEXT: and a0, a0, a1 +; RV32M-NEXT: bnez a0, .LBB3_4 +; RV32M-NEXT: # %bb.2: # %cond.false +; RV32M-NEXT: neg a0, a1 +; RV32M-NEXT: and a0, a1, a0 ; RV32M-NEXT: mul a0, a0, a3 ; RV32M-NEXT: srli a0, a0, 27 ; RV32M-NEXT: add a0, a2, a0 ; RV32M-NEXT: lbu a0, 0(a0) +; RV32M-NEXT: addi a0, a0, 32 ; RV32M-NEXT: li a1, 0 ; RV32M-NEXT: ret ; RV32M-NEXT: .LBB3_3: -; RV32M-NEXT: neg a4, a1 -; RV32M-NEXT: and a1, a1, a4 -; RV32M-NEXT: mul a1, a1, a3 -; RV32M-NEXT: srli a1, a1, 27 -; RV32M-NEXT: add a1, a2, a1 -; RV32M-NEXT: lbu a1, 0(a1) -; RV32M-NEXT: bnez a0, .LBB3_2 +; RV32M-NEXT: li a1, 0 +; RV32M-NEXT: li a0, 64 +; RV32M-NEXT: ret ; RV32M-NEXT: .LBB3_4: -; RV32M-NEXT: addi a0, a1, 32 +; RV32M-NEXT: neg a1, a0 +; RV32M-NEXT: and a0, a0, a1 +; RV32M-NEXT: mul a0, a0, a3 +; RV32M-NEXT: srli a0, a0, 27 +; RV32M-NEXT: add a0, a2, a0 +; RV32M-NEXT: lbu a0, 0(a0) ; RV32M-NEXT: li a1, 0 ; RV32M-NEXT: ret ; @@ -510,21 +515,28 @@ define i64 @test_cttz_i64(i64 %a) nounwind { ; ; RV32XTHEADBB-LABEL: test_cttz_i64: ; RV32XTHEADBB: # %bb.0: -; RV32XTHEADBB-NEXT: bnez a0, .LBB3_2 -; RV32XTHEADBB-NEXT: # %bb.1: +; RV32XTHEADBB-NEXT: or a2, a0, a1 +; RV32XTHEADBB-NEXT: beqz a2, .LBB3_3 +; RV32XTHEADBB-NEXT: # %bb.1: # %cond.false +; RV32XTHEADBB-NEXT: bnez a0, .LBB3_4 +; RV32XTHEADBB-NEXT: # %bb.2: # %cond.false ; RV32XTHEADBB-NEXT: addi a0, a1, -1 ; RV32XTHEADBB-NEXT: not a1, a1 ; RV32XTHEADBB-NEXT: and a0, a1, a0 ; RV32XTHEADBB-NEXT: th.ff1 a0, a0 ; RV32XTHEADBB-NEXT: li a1, 64 -; RV32XTHEADBB-NEXT: j .LBB3_3 -; RV32XTHEADBB-NEXT: .LBB3_2: +; RV32XTHEADBB-NEXT: j .LBB3_5 +; RV32XTHEADBB-NEXT: .LBB3_3: +; RV32XTHEADBB-NEXT: li a1, 0 +; RV32XTHEADBB-NEXT: li a0, 64 +; RV32XTHEADBB-NEXT: ret +; RV32XTHEADBB-NEXT: .LBB3_4: ; RV32XTHEADBB-NEXT: addi a1, a0, -1 ; RV32XTHEADBB-NEXT: not a0, a0 ; RV32XTHEADBB-NEXT: and a0, a0, a1 ; RV32XTHEADBB-NEXT: th.ff1 a0, a0 ; RV32XTHEADBB-NEXT: li a1, 32 -; RV32XTHEADBB-NEXT: .LBB3_3: +; RV32XTHEADBB-NEXT: .LBB3_5: # %cond.false ; RV32XTHEADBB-NEXT: sub a0, a1, a0 ; RV32XTHEADBB-NEXT: li a1, 0 ; RV32XTHEADBB-NEXT: ret @@ -1348,14 +1360,17 @@ define i32 @test_ctlz_i32(i32 %a) nounwind { define i64 @test_ctlz_i64(i64 %a) nounwind { ; RV32I-LABEL: test_ctlz_i64: ; RV32I: # %bb.0: +; RV32I-NEXT: or a2, a0, a1 +; RV32I-NEXT: beqz a2, .LBB11_3 +; RV32I-NEXT: # %bb.1: # %cond.false ; RV32I-NEXT: lui a2, 349525 ; RV32I-NEXT: lui a3, 209715 ; RV32I-NEXT: lui a5, 61681 ; RV32I-NEXT: addi a4, a2, 1365 ; RV32I-NEXT: addi a3, a3, 819 ; RV32I-NEXT: addi a2, a5, -241 -; RV32I-NEXT: bnez a1, .LBB11_2 -; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: bnez a1, .LBB11_4 +; RV32I-NEXT: # %bb.2: # %cond.false ; RV32I-NEXT: srli a1, a0, 1 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 2 @@ -1385,7 +1400,11 @@ define i64 @test_ctlz_i64(i64 %a) nounwind { ; RV32I-NEXT: addi a0, a0, 32 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: ret -; RV32I-NEXT: .LBB11_2: +; RV32I-NEXT: .LBB11_3: +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: li a0, 64 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB11_4: ; RV32I-NEXT: srli a0, a1, 1 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 2 @@ -1468,6 +1487,9 @@ define i64 @test_ctlz_i64(i64 %a) nounwind { ; ; RV32M-LABEL: test_ctlz_i64: ; RV32M: # %bb.0: +; RV32M-NEXT: or a2, a0, a1 +; RV32M-NEXT: beqz a2, .LBB11_3 +; RV32M-NEXT: # %bb.1: # %cond.false ; RV32M-NEXT: lui a2, 349525 ; RV32M-NEXT: lui a3, 209715 ; RV32M-NEXT: lui a6, 61681 @@ -1476,8 +1498,8 @@ define i64 @test_ctlz_i64(i64 %a) nounwind { ; RV32M-NEXT: addi a4, a3, 819 ; RV32M-NEXT: addi a3, a6, -241 ; RV32M-NEXT: addi a2, a7, 257 -; RV32M-NEXT: bnez a1, .LBB11_2 -; RV32M-NEXT: # %bb.1: +; RV32M-NEXT: bnez a1, .LBB11_4 +; RV32M-NEXT: # %bb.2: # %cond.false ; RV32M-NEXT: srli a1, a0, 1 ; RV32M-NEXT: or a0, a0, a1 ; RV32M-NEXT: srli a1, a0, 2 @@ -1504,7 +1526,11 @@ define i64 @test_ctlz_i64(i64 %a) nounwind { ; RV32M-NEXT: addi a0, a0, 32 ; RV32M-NEXT: li a1, 0 ; RV32M-NEXT: ret -; RV32M-NEXT: .LBB11_2: +; RV32M-NEXT: .LBB11_3: +; RV32M-NEXT: li a1, 0 +; RV32M-NEXT: li a0, 64 +; RV32M-NEXT: ret +; RV32M-NEXT: .LBB11_4: ; RV32M-NEXT: srli a0, a1, 1 ; RV32M-NEXT: or a0, a1, a0 ; RV32M-NEXT: srli a1, a0, 2 diff --git a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll index 04a2f67c4942b..723437a610ff8 100644 --- a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll +++ b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll @@ -61,14 +61,17 @@ declare i64 @llvm.ctlz.i64(i64, i1) define i64 @ctlz_i64(i64 %a) nounwind { ; RV32I-LABEL: ctlz_i64: ; RV32I: # %bb.0: +; RV32I-NEXT: or a2, a0, a1 +; RV32I-NEXT: beqz a2, .LBB1_3 +; RV32I-NEXT: # %bb.1: # %cond.false ; RV32I-NEXT: lui a2, 349525 ; RV32I-NEXT: lui a3, 209715 ; RV32I-NEXT: lui a5, 61681 ; RV32I-NEXT: addi a4, a2, 1365 ; RV32I-NEXT: addi a3, a3, 819 ; RV32I-NEXT: addi a2, a5, -241 -; RV32I-NEXT: bnez a1, .LBB1_2 -; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: bnez a1, .LBB1_4 +; RV32I-NEXT: # %bb.2: # %cond.false ; RV32I-NEXT: srli a1, a0, 1 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 2 @@ -98,7 +101,11 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; RV32I-NEXT: addi a0, a0, 32 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: ret -; RV32I-NEXT: .LBB1_2: +; RV32I-NEXT: .LBB1_3: +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: li a0, 64 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB1_4: ; RV32I-NEXT: srli a0, a1, 1 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 2 @@ -200,39 +207,42 @@ define i64 @cttz_i64(i64 %a) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: neg a0, a0 -; RV32I-NEXT: and a0, s0, a0 -; RV32I-NEXT: lui a1, 30667 -; RV32I-NEXT: addi s3, a1, 1329 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: or a1, a0, a1 +; RV32I-NEXT: beqz a1, .LBB3_3 +; RV32I-NEXT: # %bb.1: # %cond.false +; RV32I-NEXT: neg a1, a0 +; RV32I-NEXT: and a1, a0, a1 +; RV32I-NEXT: lui a2, 30667 +; RV32I-NEXT: addi s2, a2, 1329 +; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __mulsi3 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lui s4, %hi(.LCPI3_0) -; RV32I-NEXT: addi s4, s4, %lo(.LCPI3_0) -; RV32I-NEXT: neg a0, s2 -; RV32I-NEXT: and a0, s2, a0 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: lui s3, %hi(.LCPI3_0) +; RV32I-NEXT: addi s3, s3, %lo(.LCPI3_0) +; RV32I-NEXT: neg a0, s0 +; RV32I-NEXT: and a0, s0, a0 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: bnez s2, .LBB3_3 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: li a0, 32 -; RV32I-NEXT: beqz s0, .LBB3_4 -; RV32I-NEXT: .LBB3_2: -; RV32I-NEXT: srli s1, s1, 27 -; RV32I-NEXT: add s1, s4, s1 -; RV32I-NEXT: lbu a0, 0(s1) -; RV32I-NEXT: j .LBB3_5 -; RV32I-NEXT: .LBB3_3: +; RV32I-NEXT: bnez s4, .LBB3_4 +; RV32I-NEXT: # %bb.2: # %cond.false ; RV32I-NEXT: srli a0, a0, 27 -; RV32I-NEXT: add a0, s4, a0 +; RV32I-NEXT: add a0, s3, a0 ; RV32I-NEXT: lbu a0, 0(a0) -; RV32I-NEXT: bnez s0, .LBB3_2 -; RV32I-NEXT: .LBB3_4: ; RV32I-NEXT: addi a0, a0, 32 -; RV32I-NEXT: .LBB3_5: +; RV32I-NEXT: j .LBB3_5 +; RV32I-NEXT: .LBB3_3: +; RV32I-NEXT: li a0, 64 +; RV32I-NEXT: j .LBB3_6 +; RV32I-NEXT: .LBB3_4: +; RV32I-NEXT: srli s1, s1, 27 +; RV32I-NEXT: add s1, s3, s1 +; RV32I-NEXT: lbu a0, 0(s1) +; RV32I-NEXT: .LBB3_5: # %cond.false ; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: .LBB3_6: # %cond.end ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -244,21 +254,28 @@ define i64 @cttz_i64(i64 %a) nounwind { ; ; RV32XTHEADBB-LABEL: cttz_i64: ; RV32XTHEADBB: # %bb.0: -; RV32XTHEADBB-NEXT: bnez a0, .LBB3_2 -; RV32XTHEADBB-NEXT: # %bb.1: +; RV32XTHEADBB-NEXT: or a2, a0, a1 +; RV32XTHEADBB-NEXT: beqz a2, .LBB3_3 +; RV32XTHEADBB-NEXT: # %bb.1: # %cond.false +; RV32XTHEADBB-NEXT: bnez a0, .LBB3_4 +; RV32XTHEADBB-NEXT: # %bb.2: # %cond.false ; RV32XTHEADBB-NEXT: addi a0, a1, -1 ; RV32XTHEADBB-NEXT: not a1, a1 ; RV32XTHEADBB-NEXT: and a0, a1, a0 ; RV32XTHEADBB-NEXT: th.ff1 a0, a0 ; RV32XTHEADBB-NEXT: li a1, 64 -; RV32XTHEADBB-NEXT: j .LBB3_3 -; RV32XTHEADBB-NEXT: .LBB3_2: +; RV32XTHEADBB-NEXT: j .LBB3_5 +; RV32XTHEADBB-NEXT: .LBB3_3: +; RV32XTHEADBB-NEXT: li a1, 0 +; RV32XTHEADBB-NEXT: li a0, 64 +; RV32XTHEADBB-NEXT: ret +; RV32XTHEADBB-NEXT: .LBB3_4: ; RV32XTHEADBB-NEXT: addi a1, a0, -1 ; RV32XTHEADBB-NEXT: not a0, a0 ; RV32XTHEADBB-NEXT: and a0, a0, a1 ; RV32XTHEADBB-NEXT: th.ff1 a0, a0 ; RV32XTHEADBB-NEXT: li a1, 32 -; RV32XTHEADBB-NEXT: .LBB3_3: +; RV32XTHEADBB-NEXT: .LBB3_5: # %cond.false ; RV32XTHEADBB-NEXT: sub a0, a1, a0 ; RV32XTHEADBB-NEXT: li a1, 0 ; RV32XTHEADBB-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll index 98c86da41afa1..0f2284637ca6a 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll @@ -61,14 +61,17 @@ declare i64 @llvm.ctlz.i64(i64, i1) define i64 @ctlz_i64(i64 %a) nounwind { ; RV32I-LABEL: ctlz_i64: ; RV32I: # %bb.0: +; RV32I-NEXT: or a2, a0, a1 +; RV32I-NEXT: beqz a2, .LBB1_3 +; RV32I-NEXT: # %bb.1: # %cond.false ; RV32I-NEXT: lui a2, 349525 ; RV32I-NEXT: lui a3, 209715 ; RV32I-NEXT: lui a5, 61681 ; RV32I-NEXT: addi a4, a2, 1365 ; RV32I-NEXT: addi a3, a3, 819 ; RV32I-NEXT: addi a2, a5, -241 -; RV32I-NEXT: bnez a1, .LBB1_2 -; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: bnez a1, .LBB1_4 +; RV32I-NEXT: # %bb.2: # %cond.false ; RV32I-NEXT: srli a1, a0, 1 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 2 @@ -98,7 +101,11 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; RV32I-NEXT: addi a0, a0, 32 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: ret -; RV32I-NEXT: .LBB1_2: +; RV32I-NEXT: .LBB1_3: +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: li a0, 64 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB1_4: ; RV32I-NEXT: srli a0, a1, 1 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 2 @@ -190,39 +197,42 @@ define i64 @cttz_i64(i64 %a) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: neg a0, a0 -; RV32I-NEXT: and a0, s0, a0 -; RV32I-NEXT: lui a1, 30667 -; RV32I-NEXT: addi s3, a1, 1329 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: or a1, a0, a1 +; RV32I-NEXT: beqz a1, .LBB3_3 +; RV32I-NEXT: # %bb.1: # %cond.false +; RV32I-NEXT: neg a1, a0 +; RV32I-NEXT: and a1, a0, a1 +; RV32I-NEXT: lui a2, 30667 +; RV32I-NEXT: addi s2, a2, 1329 +; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __mulsi3 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lui s4, %hi(.LCPI3_0) -; RV32I-NEXT: addi s4, s4, %lo(.LCPI3_0) -; RV32I-NEXT: neg a0, s2 -; RV32I-NEXT: and a0, s2, a0 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: lui s3, %hi(.LCPI3_0) +; RV32I-NEXT: addi s3, s3, %lo(.LCPI3_0) +; RV32I-NEXT: neg a0, s0 +; RV32I-NEXT: and a0, s0, a0 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: bnez s2, .LBB3_3 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: li a0, 32 -; RV32I-NEXT: beqz s0, .LBB3_4 -; RV32I-NEXT: .LBB3_2: -; RV32I-NEXT: srli s1, s1, 27 -; RV32I-NEXT: add s1, s4, s1 -; RV32I-NEXT: lbu a0, 0(s1) -; RV32I-NEXT: j .LBB3_5 -; RV32I-NEXT: .LBB3_3: +; RV32I-NEXT: bnez s4, .LBB3_4 +; RV32I-NEXT: # %bb.2: # %cond.false ; RV32I-NEXT: srli a0, a0, 27 -; RV32I-NEXT: add a0, s4, a0 +; RV32I-NEXT: add a0, s3, a0 ; RV32I-NEXT: lbu a0, 0(a0) -; RV32I-NEXT: bnez s0, .LBB3_2 -; RV32I-NEXT: .LBB3_4: ; RV32I-NEXT: addi a0, a0, 32 -; RV32I-NEXT: .LBB3_5: +; RV32I-NEXT: j .LBB3_5 +; RV32I-NEXT: .LBB3_3: +; RV32I-NEXT: li a0, 64 +; RV32I-NEXT: j .LBB3_6 +; RV32I-NEXT: .LBB3_4: +; RV32I-NEXT: srli s1, s1, 27 +; RV32I-NEXT: add s1, s3, s1 +; RV32I-NEXT: lbu a0, 0(s1) +; RV32I-NEXT: .LBB3_5: # %cond.false ; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: .LBB3_6: # %cond.end ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/SPARC/ctlz.ll b/llvm/test/CodeGen/SPARC/ctlz.ll index 72505f221469e..75930190f5166 100644 --- a/llvm/test/CodeGen/SPARC/ctlz.ll +++ b/llvm/test/CodeGen/SPARC/ctlz.ll @@ -156,96 +156,54 @@ define i64 @i64_nopoison(i64 %x) nounwind { ; SPARC-LABEL: i64_nopoison: ; SPARC: ! %bb.0: ; SPARC-NEXT: save %sp, -96, %sp +; SPARC-NEXT: or %i1, %i0, %i2 +; SPARC-NEXT: cmp %i2, 0 +; SPARC-NEXT: be .LBB2_4 +; SPARC-NEXT: nop +; SPARC-NEXT: ! %bb.1: ! %cond.false +; SPARC-NEXT: call __clzsi2 +; SPARC-NEXT: mov %i1, %o0 +; SPARC-NEXT: mov %o0, %i2 ; SPARC-NEXT: call __clzsi2 ; SPARC-NEXT: mov %i0, %o0 ; SPARC-NEXT: cmp %i0, 0 -; SPARC-NEXT: bne .LBB2_2 -; SPARC-NEXT: nop -; SPARC-NEXT: ! %bb.1: -; SPARC-NEXT: srl %i1, 1, %i0 -; SPARC-NEXT: or %i1, %i0, %i0 -; SPARC-NEXT: srl %i0, 2, %i1 -; SPARC-NEXT: or %i0, %i1, %i0 -; SPARC-NEXT: srl %i0, 4, %i1 -; SPARC-NEXT: or %i0, %i1, %i0 -; SPARC-NEXT: srl %i0, 8, %i1 -; SPARC-NEXT: or %i0, %i1, %i0 -; SPARC-NEXT: srl %i0, 16, %i1 -; SPARC-NEXT: or %i0, %i1, %i0 -; SPARC-NEXT: xor %i0, -1, %i0 -; SPARC-NEXT: srl %i0, 1, %i1 -; SPARC-NEXT: sethi 1398101, %i2 -; SPARC-NEXT: or %i2, 341, %i2 -; SPARC-NEXT: and %i1, %i2, %i1 -; SPARC-NEXT: sub %i0, %i1, %i0 -; SPARC-NEXT: sethi 838860, %i1 -; SPARC-NEXT: or %i1, 819, %i1 -; SPARC-NEXT: and %i0, %i1, %i2 -; SPARC-NEXT: srl %i0, 2, %i0 -; SPARC-NEXT: and %i0, %i1, %i0 -; SPARC-NEXT: add %i2, %i0, %i0 -; SPARC-NEXT: srl %i0, 4, %i1 -; SPARC-NEXT: add %i0, %i1, %i0 -; SPARC-NEXT: sethi 246723, %i1 -; SPARC-NEXT: or %i1, 783, %i1 -; SPARC-NEXT: and %i0, %i1, %i0 -; SPARC-NEXT: sll %i0, 8, %i1 -; SPARC-NEXT: add %i0, %i1, %i0 -; SPARC-NEXT: sll %i0, 16, %i1 -; SPARC-NEXT: add %i0, %i1, %i0 -; SPARC-NEXT: srl %i0, 24, %i0 -; SPARC-NEXT: add %i0, 32, %o0 -; SPARC-NEXT: .LBB2_2: +; SPARC-NEXT: bne .LBB2_3 +; SPARC-NEXT: mov %o0, %i1 +; SPARC-NEXT: ! %bb.2: ! %cond.false +; SPARC-NEXT: add %i2, 32, %i1 +; SPARC-NEXT: .LBB2_3: ! %cond.false +; SPARC-NEXT: ret +; SPARC-NEXT: restore %g0, %g0, %o0 +; SPARC-NEXT: .LBB2_4: ; SPARC-NEXT: mov %g0, %i0 ; SPARC-NEXT: ret -; SPARC-NEXT: restore %g0, %o0, %o1 +; SPARC-NEXT: restore %g0, 64, %o1 ; ; SPARC-POPC-LABEL: i64_nopoison: ; SPARC-POPC: ! %bb.0: ; SPARC-POPC-NEXT: save %sp, -96, %sp +; SPARC-POPC-NEXT: or %i1, %i0, %i2 +; SPARC-POPC-NEXT: cmp %i2, 0 +; SPARC-POPC-NEXT: be .LBB2_4 +; SPARC-POPC-NEXT: nop +; SPARC-POPC-NEXT: ! %bb.1: ! %cond.false +; SPARC-POPC-NEXT: call __clzsi2 +; SPARC-POPC-NEXT: mov %i1, %o0 +; SPARC-POPC-NEXT: mov %o0, %i2 ; SPARC-POPC-NEXT: call __clzsi2 ; SPARC-POPC-NEXT: mov %i0, %o0 ; SPARC-POPC-NEXT: cmp %i0, 0 -; SPARC-POPC-NEXT: bne .LBB2_2 -; SPARC-POPC-NEXT: nop -; SPARC-POPC-NEXT: ! %bb.1: -; SPARC-POPC-NEXT: srl %i1, 1, %i0 -; SPARC-POPC-NEXT: or %i1, %i0, %i0 -; SPARC-POPC-NEXT: srl %i0, 2, %i1 -; SPARC-POPC-NEXT: or %i0, %i1, %i0 -; SPARC-POPC-NEXT: srl %i0, 4, %i1 -; SPARC-POPC-NEXT: or %i0, %i1, %i0 -; SPARC-POPC-NEXT: srl %i0, 8, %i1 -; SPARC-POPC-NEXT: or %i0, %i1, %i0 -; SPARC-POPC-NEXT: srl %i0, 16, %i1 -; SPARC-POPC-NEXT: or %i0, %i1, %i0 -; SPARC-POPC-NEXT: xor %i0, -1, %i0 -; SPARC-POPC-NEXT: srl %i0, 1, %i1 -; SPARC-POPC-NEXT: sethi 1398101, %i2 -; SPARC-POPC-NEXT: or %i2, 341, %i2 -; SPARC-POPC-NEXT: and %i1, %i2, %i1 -; SPARC-POPC-NEXT: sub %i0, %i1, %i0 -; SPARC-POPC-NEXT: sethi 838860, %i1 -; SPARC-POPC-NEXT: or %i1, 819, %i1 -; SPARC-POPC-NEXT: and %i0, %i1, %i2 -; SPARC-POPC-NEXT: srl %i0, 2, %i0 -; SPARC-POPC-NEXT: and %i0, %i1, %i0 -; SPARC-POPC-NEXT: add %i2, %i0, %i0 -; SPARC-POPC-NEXT: srl %i0, 4, %i1 -; SPARC-POPC-NEXT: add %i0, %i1, %i0 -; SPARC-POPC-NEXT: sethi 246723, %i1 -; SPARC-POPC-NEXT: or %i1, 783, %i1 -; SPARC-POPC-NEXT: and %i0, %i1, %i0 -; SPARC-POPC-NEXT: sll %i0, 8, %i1 -; SPARC-POPC-NEXT: add %i0, %i1, %i0 -; SPARC-POPC-NEXT: sll %i0, 16, %i1 -; SPARC-POPC-NEXT: add %i0, %i1, %i0 -; SPARC-POPC-NEXT: srl %i0, 24, %i0 -; SPARC-POPC-NEXT: add %i0, 32, %o0 -; SPARC-POPC-NEXT: .LBB2_2: +; SPARC-POPC-NEXT: bne .LBB2_3 +; SPARC-POPC-NEXT: mov %o0, %i1 +; SPARC-POPC-NEXT: ! %bb.2: ! %cond.false +; SPARC-POPC-NEXT: add %i2, 32, %i1 +; SPARC-POPC-NEXT: .LBB2_3: ! %cond.false +; SPARC-POPC-NEXT: ret +; SPARC-POPC-NEXT: restore %g0, %g0, %o0 +; SPARC-POPC-NEXT: .LBB2_4: ; SPARC-POPC-NEXT: mov %g0, %i0 ; SPARC-POPC-NEXT: ret -; SPARC-POPC-NEXT: restore %g0, %o0, %o1 +; SPARC-POPC-NEXT: restore %g0, 64, %o1 ; ; SPARC-VIS3-LABEL: i64_nopoison: ; SPARC-VIS3: ! %bb.0: diff --git a/llvm/test/CodeGen/SPARC/cttz.ll b/llvm/test/CodeGen/SPARC/cttz.ll index 05c47b868c830..edabd7d560eda 100644 --- a/llvm/test/CodeGen/SPARC/cttz.ll +++ b/llvm/test/CodeGen/SPARC/cttz.ll @@ -184,21 +184,31 @@ define i32 @i32_poison(i32 %x) nounwind { define i64 @i64_nopoison(i64 %x) nounwind { ; SPARC-LABEL: i64_nopoison: ; SPARC: ! %bb.0: +; SPARC-NEXT: or %o1, %o0, %o2 +; SPARC-NEXT: cmp %o2, 0 +; SPARC-NEXT: be .LBB2_3 +; SPARC-NEXT: nop +; SPARC-NEXT: ! %bb.1: ! %cond.false ; SPARC-NEXT: sethi 122669, %o2 ; SPARC-NEXT: or %o2, 305, %o2 ; SPARC-NEXT: sethi %hi(.LCPI2_0), %o3 -; SPARC-NEXT: cmp %o0, 0 -; SPARC-NEXT: be .LBB2_3 +; SPARC-NEXT: cmp %o1, 0 +; SPARC-NEXT: bne .LBB2_4 ; SPARC-NEXT: add %o3, %lo(.LCPI2_0), %o3 -; SPARC-NEXT: ! %bb.1: -; SPARC-NEXT: sub %g0, %o0, %o4 -; SPARC-NEXT: and %o0, %o4, %o0 +; SPARC-NEXT: ! %bb.2: ! %cond.false +; SPARC-NEXT: sub %g0, %o0, %o1 +; SPARC-NEXT: and %o0, %o1, %o0 ; SPARC-NEXT: smul %o0, %o2, %o0 ; SPARC-NEXT: srl %o0, 27, %o0 -; SPARC-NEXT: cmp %o1, 0 -; SPARC-NEXT: be .LBB2_4 ; SPARC-NEXT: ldub [%o3+%o0], %o0 -; SPARC-NEXT: .LBB2_2: +; SPARC-NEXT: add %o0, 32, %o1 +; SPARC-NEXT: retl +; SPARC-NEXT: mov %g0, %o0 +; SPARC-NEXT: .LBB2_3: +; SPARC-NEXT: mov %g0, %o0 +; SPARC-NEXT: retl +; SPARC-NEXT: mov 64, %o1 +; SPARC-NEXT: .LBB2_4: ; SPARC-NEXT: sub %g0, %o1, %o0 ; SPARC-NEXT: and %o1, %o0, %o0 ; SPARC-NEXT: smul %o0, %o2, %o0 @@ -206,33 +216,34 @@ define i64 @i64_nopoison(i64 %x) nounwind { ; SPARC-NEXT: ldub [%o3+%o0], %o1 ; SPARC-NEXT: retl ; SPARC-NEXT: mov %g0, %o0 -; SPARC-NEXT: .LBB2_3: -; SPARC-NEXT: mov 32, %o0 -; SPARC-NEXT: cmp %o1, 0 -; SPARC-NEXT: bne .LBB2_2 -; SPARC-NEXT: nop -; SPARC-NEXT: .LBB2_4: -; SPARC-NEXT: add %o0, 32, %o1 -; SPARC-NEXT: retl -; SPARC-NEXT: mov %g0, %o0 ; ; SPARC-POPC-LABEL: i64_nopoison: ; SPARC-POPC: ! %bb.0: +; SPARC-POPC-NEXT: or %o1, %o0, %o2 +; SPARC-POPC-NEXT: cmp %o2, 0 +; SPARC-POPC-NEXT: be .LBB2_3 +; SPARC-POPC-NEXT: nop +; SPARC-POPC-NEXT: ! %bb.1: ! %cond.false ; SPARC-POPC-NEXT: sethi 122669, %o2 ; SPARC-POPC-NEXT: or %o2, 305, %o2 ; SPARC-POPC-NEXT: sethi %hi(.LCPI2_0), %o3 -; SPARC-POPC-NEXT: cmp %o0, 0 -; SPARC-POPC-NEXT: be .LBB2_3 +; SPARC-POPC-NEXT: cmp %o1, 0 +; SPARC-POPC-NEXT: bne .LBB2_4 ; SPARC-POPC-NEXT: add %o3, %lo(.LCPI2_0), %o3 -; SPARC-POPC-NEXT: ! %bb.1: -; SPARC-POPC-NEXT: sub %g0, %o0, %o4 -; SPARC-POPC-NEXT: and %o0, %o4, %o0 +; SPARC-POPC-NEXT: ! %bb.2: ! %cond.false +; SPARC-POPC-NEXT: sub %g0, %o0, %o1 +; SPARC-POPC-NEXT: and %o0, %o1, %o0 ; SPARC-POPC-NEXT: smul %o0, %o2, %o0 ; SPARC-POPC-NEXT: srl %o0, 27, %o0 -; SPARC-POPC-NEXT: cmp %o1, 0 -; SPARC-POPC-NEXT: be .LBB2_4 ; SPARC-POPC-NEXT: ldub [%o3+%o0], %o0 -; SPARC-POPC-NEXT: .LBB2_2: +; SPARC-POPC-NEXT: add %o0, 32, %o1 +; SPARC-POPC-NEXT: retl +; SPARC-POPC-NEXT: mov %g0, %o0 +; SPARC-POPC-NEXT: .LBB2_3: +; SPARC-POPC-NEXT: mov %g0, %o0 +; SPARC-POPC-NEXT: retl +; SPARC-POPC-NEXT: mov 64, %o1 +; SPARC-POPC-NEXT: .LBB2_4: ; SPARC-POPC-NEXT: sub %g0, %o1, %o0 ; SPARC-POPC-NEXT: and %o1, %o0, %o0 ; SPARC-POPC-NEXT: smul %o0, %o2, %o0 @@ -240,15 +251,6 @@ define i64 @i64_nopoison(i64 %x) nounwind { ; SPARC-POPC-NEXT: ldub [%o3+%o0], %o1 ; SPARC-POPC-NEXT: retl ; SPARC-POPC-NEXT: mov %g0, %o0 -; SPARC-POPC-NEXT: .LBB2_3: -; SPARC-POPC-NEXT: mov 32, %o0 -; SPARC-POPC-NEXT: cmp %o1, 0 -; SPARC-POPC-NEXT: bne .LBB2_2 -; SPARC-POPC-NEXT: nop -; SPARC-POPC-NEXT: .LBB2_4: -; SPARC-POPC-NEXT: add %o0, 32, %o1 -; SPARC-POPC-NEXT: retl -; SPARC-POPC-NEXT: mov %g0, %o0 ; ; SPARC-VIS3-LABEL: i64_nopoison: ; SPARC-VIS3: ! %bb.0: diff --git a/llvm/test/CodeGen/X86/ctlo.ll b/llvm/test/CodeGen/X86/ctlo.ll index fecb62fbc5aea..752f6659948e6 100644 --- a/llvm/test/CodeGen/X86/ctlo.ll +++ b/llvm/test/CodeGen/X86/ctlo.ll @@ -285,30 +285,35 @@ define i32 @ctlo_i32_undef(i32 %x) { ret i32 %tmp2 } -define i64 @ctlo_i64(i64 %x) { +define i64 @ctlo_i64(i64 %x) nounwind { ; X86-NOCMOV-LABEL: ctlo_i64: ; X86-NOCMOV: # %bb.0: +; X86-NOCMOV-NEXT: pushl %esi ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOCMOV-NEXT: notl %ecx ; X86-NOCMOV-NEXT: notl %eax -; X86-NOCMOV-NEXT: bsrl %eax, %edx -; X86-NOCMOV-NEXT: movl $63, %eax -; X86-NOCMOV-NEXT: je .LBB6_2 -; X86-NOCMOV-NEXT: # %bb.1: -; X86-NOCMOV-NEXT: movl %edx, %eax -; X86-NOCMOV-NEXT: .LBB6_2: +; X86-NOCMOV-NEXT: xorl %edx, %edx +; X86-NOCMOV-NEXT: movl %eax, %esi +; X86-NOCMOV-NEXT: orl %ecx, %esi +; X86-NOCMOV-NEXT: je .LBB6_1 +; X86-NOCMOV-NEXT: # %bb.2: # %cond.false ; X86-NOCMOV-NEXT: testl %ecx, %ecx ; X86-NOCMOV-NEXT: jne .LBB6_3 -; X86-NOCMOV-NEXT: # %bb.4: +; X86-NOCMOV-NEXT: # %bb.4: # %cond.false +; X86-NOCMOV-NEXT: bsrl %eax, %eax ; X86-NOCMOV-NEXT: xorl $31, %eax -; X86-NOCMOV-NEXT: addl $32, %eax -; X86-NOCMOV-NEXT: xorl %edx, %edx +; X86-NOCMOV-NEXT: orl $32, %eax +; X86-NOCMOV-NEXT: popl %esi +; X86-NOCMOV-NEXT: retl +; X86-NOCMOV-NEXT: .LBB6_1: +; X86-NOCMOV-NEXT: movl $64, %eax +; X86-NOCMOV-NEXT: popl %esi ; X86-NOCMOV-NEXT: retl ; X86-NOCMOV-NEXT: .LBB6_3: ; X86-NOCMOV-NEXT: bsrl %ecx, %eax ; X86-NOCMOV-NEXT: xorl $31, %eax -; X86-NOCMOV-NEXT: xorl %edx, %edx +; X86-NOCMOV-NEXT: popl %esi ; X86-NOCMOV-NEXT: retl ; ; X86-CMOV-LABEL: ctlo_i64: diff --git a/llvm/test/CodeGen/X86/ctlz.ll b/llvm/test/CodeGen/X86/ctlz.ll index 0eabfeae853f7..1267fe9033454 100644 --- a/llvm/test/CodeGen/X86/ctlz.ll +++ b/llvm/test/CodeGen/X86/ctlz.ll @@ -399,27 +399,33 @@ define i32 @ctlz_i32_zero_test(i32 %n) { } ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. -define i64 @ctlz_i64_zero_test(i64 %n) { +define i64 @ctlz_i64_zero_test(i64 %n) nounwind { ; X86-NOCMOV-LABEL: ctlz_i64_zero_test: ; X86-NOCMOV: # %bb.0: +; X86-NOCMOV-NEXT: pushl %esi +; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOCMOV-NEXT: bsrl {{[0-9]+}}(%esp), %edx -; X86-NOCMOV-NEXT: movl $63, %eax -; X86-NOCMOV-NEXT: je .LBB7_2 -; X86-NOCMOV-NEXT: # %bb.1: -; X86-NOCMOV-NEXT: movl %edx, %eax -; X86-NOCMOV-NEXT: .LBB7_2: -; X86-NOCMOV-NEXT: testl %ecx, %ecx +; X86-NOCMOV-NEXT: xorl %edx, %edx +; X86-NOCMOV-NEXT: movl %ecx, %esi +; X86-NOCMOV-NEXT: orl %eax, %esi +; X86-NOCMOV-NEXT: je .LBB7_1 +; X86-NOCMOV-NEXT: # %bb.2: # %cond.false +; X86-NOCMOV-NEXT: testl %eax, %eax ; X86-NOCMOV-NEXT: jne .LBB7_3 -; X86-NOCMOV-NEXT: # %bb.4: +; X86-NOCMOV-NEXT: # %bb.4: # %cond.false +; X86-NOCMOV-NEXT: bsrl %ecx, %eax ; X86-NOCMOV-NEXT: xorl $31, %eax -; X86-NOCMOV-NEXT: addl $32, %eax -; X86-NOCMOV-NEXT: xorl %edx, %edx +; X86-NOCMOV-NEXT: orl $32, %eax +; X86-NOCMOV-NEXT: popl %esi +; X86-NOCMOV-NEXT: retl +; X86-NOCMOV-NEXT: .LBB7_1: +; X86-NOCMOV-NEXT: movl $64, %eax +; X86-NOCMOV-NEXT: popl %esi ; X86-NOCMOV-NEXT: retl ; X86-NOCMOV-NEXT: .LBB7_3: -; X86-NOCMOV-NEXT: bsrl %ecx, %eax +; X86-NOCMOV-NEXT: bsrl %eax, %eax ; X86-NOCMOV-NEXT: xorl $31, %eax -; X86-NOCMOV-NEXT: xorl %edx, %edx +; X86-NOCMOV-NEXT: popl %esi ; X86-NOCMOV-NEXT: retl ; ; X86-CMOV-LABEL: ctlz_i64_zero_test: diff --git a/llvm/test/CodeGen/X86/cttz.ll b/llvm/test/CodeGen/X86/cttz.ll index db949827af007..c95b7bd7f131a 100644 --- a/llvm/test/CodeGen/X86/cttz.ll +++ b/llvm/test/CodeGen/X86/cttz.ll @@ -352,26 +352,31 @@ define i32 @cttz_i32_zero_test(i32 %n) { } ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. -define i64 @cttz_i64_zero_test(i64 %n) { +define i64 @cttz_i64_zero_test(i64 %n) nounwind { ; X86-NOCMOV-LABEL: cttz_i64_zero_test: ; X86-NOCMOV: # %bb.0: +; X86-NOCMOV-NEXT: pushl %esi +; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOCMOV-NOT: rep -; X86-NOCMOV-NEXT: bsfl {{[0-9]+}}(%esp), %edx -; X86-NOCMOV-NEXT: movl $32, %eax -; X86-NOCMOV-NEXT: je .LBB7_2 -; X86-NOCMOV-NEXT: # %bb.1: -; X86-NOCMOV-NEXT: movl %edx, %eax -; X86-NOCMOV-NEXT: .LBB7_2: +; X86-NOCMOV-NEXT: xorl %edx, %edx +; X86-NOCMOV-NEXT: movl %ecx, %esi +; X86-NOCMOV-NEXT: orl %eax, %esi +; X86-NOCMOV-NEXT: je .LBB7_1 +; X86-NOCMOV-NEXT: # %bb.2: # %cond.false ; X86-NOCMOV-NEXT: testl %ecx, %ecx ; X86-NOCMOV-NEXT: jne .LBB7_3 -; X86-NOCMOV-NEXT: # %bb.4: +; X86-NOCMOV-NEXT: # %bb.4: # %cond.false +; X86-NOCMOV-NEXT: rep bsfl %eax, %eax ; X86-NOCMOV-NEXT: addl $32, %eax -; X86-NOCMOV-NEXT: xorl %edx, %edx +; X86-NOCMOV-NEXT: popl %esi +; X86-NOCMOV-NEXT: retl +; X86-NOCMOV-NEXT: .LBB7_1: +; X86-NOCMOV-NEXT: movl $64, %eax +; X86-NOCMOV-NEXT: popl %esi ; X86-NOCMOV-NEXT: retl ; X86-NOCMOV-NEXT: .LBB7_3: ; X86-NOCMOV-NEXT: rep bsfl %ecx, %eax -; X86-NOCMOV-NEXT: xorl %edx, %edx +; X86-NOCMOV-NEXT: popl %esi ; X86-NOCMOV-NEXT: retl ; ; X86-CMOV-LABEL: cttz_i64_zero_test: diff --git a/llvm/test/CodeGen/X86/lzcnt-cmp.ll b/llvm/test/CodeGen/X86/lzcnt-cmp.ll deleted file mode 100644 index 4f65739cc70dd..0000000000000 --- a/llvm/test/CodeGen/X86/lzcnt-cmp.ll +++ /dev/null @@ -1,188 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=X86,X86-BSR -; RUN: llc < %s -mtriple=i686-- -mattr=+lzcnt,+cmov | FileCheck %s --check-prefixes=X86,X86-LZCNT -; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=X64,X64-BSR -; RUN: llc < %s -mtriple=x86_64-- -mattr=+lzcnt | FileCheck %s --check-prefixes=X64,X64-LZCNT - -define i1 @lshr_ctlz_cmpeq_one_i64(i64 %in) nounwind { -; X86-LABEL: lshr_ctlz_cmpeq_one_i64: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: orl {{[0-9]+}}(%esp), %eax -; X86-NEXT: sete %al -; X86-NEXT: retl -; -; X64-LABEL: lshr_ctlz_cmpeq_one_i64: -; X64: # %bb.0: -; X64-NEXT: testq %rdi, %rdi -; X64-NEXT: sete %al -; X64-NEXT: retq - %ctlz = call i64 @llvm.ctlz.i64(i64 %in, i1 0) - %lshr = lshr i64 %ctlz, 6 - %icmp = icmp eq i64 %lshr, 1 - ret i1 %icmp -} - -define i1 @lshr_ctlz_undef_cmpeq_one_i64(i64 %in) nounwind { -; X86-BSR-LABEL: lshr_ctlz_undef_cmpeq_one_i64: -; X86-BSR: # %bb.0: -; X86-BSR-NEXT: xorl %eax, %eax -; X86-BSR-NEXT: retl -; -; X86-LZCNT-LABEL: lshr_ctlz_undef_cmpeq_one_i64: -; X86-LZCNT: # %bb.0: -; X86-LZCNT-NEXT: lzcntl {{[0-9]+}}(%esp), %ecx -; X86-LZCNT-NEXT: addl $32, %ecx -; X86-LZCNT-NEXT: xorl %eax, %eax -; X86-LZCNT-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; X86-LZCNT-NEXT: cmovel %ecx, %eax -; X86-LZCNT-NEXT: shrl $6, %eax -; X86-LZCNT-NEXT: # kill: def $al killed $al killed $eax -; X86-LZCNT-NEXT: retl -; -; X64-BSR-LABEL: lshr_ctlz_undef_cmpeq_one_i64: -; X64-BSR: # %bb.0: -; X64-BSR-NEXT: xorl %eax, %eax -; X64-BSR-NEXT: retq -; -; X64-LZCNT-LABEL: lshr_ctlz_undef_cmpeq_one_i64: -; X64-LZCNT: # %bb.0: -; X64-LZCNT-NEXT: lzcntq %rdi, %rax -; X64-LZCNT-NEXT: shrl $6, %eax -; X64-LZCNT-NEXT: # kill: def $al killed $al killed $rax -; X64-LZCNT-NEXT: retq - %ctlz = call i64 @llvm.ctlz.i64(i64 %in, i1 -1) - %lshr = lshr i64 %ctlz, 6 - %icmp = icmp eq i64 %lshr, 1 - ret i1 %icmp -} - -define i1 @lshr_ctlz_cmpne_zero_i64(i64 %in) nounwind { -; X86-LABEL: lshr_ctlz_cmpne_zero_i64: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: orl {{[0-9]+}}(%esp), %eax -; X86-NEXT: sete %al -; X86-NEXT: retl -; -; X64-LABEL: lshr_ctlz_cmpne_zero_i64: -; X64: # %bb.0: -; X64-NEXT: testq %rdi, %rdi -; X64-NEXT: sete %al -; X64-NEXT: retq - %ctlz = call i64 @llvm.ctlz.i64(i64 %in, i1 0) - %lshr = lshr i64 %ctlz, 6 - %icmp = icmp ne i64 %lshr, 0 - ret i1 %icmp -} - -define i1 @lshr_ctlz_undef_cmpne_zero_i64(i64 %in) nounwind { -; X86-BSR-LABEL: lshr_ctlz_undef_cmpne_zero_i64: -; X86-BSR: # %bb.0: -; X86-BSR-NEXT: xorl %eax, %eax -; X86-BSR-NEXT: retl -; -; X86-LZCNT-LABEL: lshr_ctlz_undef_cmpne_zero_i64: -; X86-LZCNT: # %bb.0: -; X86-LZCNT-NEXT: lzcntl {{[0-9]+}}(%esp), %ecx -; X86-LZCNT-NEXT: addl $32, %ecx -; X86-LZCNT-NEXT: xorl %eax, %eax -; X86-LZCNT-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; X86-LZCNT-NEXT: cmovel %ecx, %eax -; X86-LZCNT-NEXT: shrl $6, %eax -; X86-LZCNT-NEXT: # kill: def $al killed $al killed $eax -; X86-LZCNT-NEXT: retl -; -; X64-BSR-LABEL: lshr_ctlz_undef_cmpne_zero_i64: -; X64-BSR: # %bb.0: -; X64-BSR-NEXT: xorl %eax, %eax -; X64-BSR-NEXT: retq -; -; X64-LZCNT-LABEL: lshr_ctlz_undef_cmpne_zero_i64: -; X64-LZCNT: # %bb.0: -; X64-LZCNT-NEXT: lzcntq %rdi, %rax -; X64-LZCNT-NEXT: shrl $6, %eax -; X64-LZCNT-NEXT: # kill: def $al killed $al killed $rax -; X64-LZCNT-NEXT: retq - %ctlz = call i64 @llvm.ctlz.i64(i64 %in, i1 -1) - %lshr = lshr i64 %ctlz, 6 - %icmp = icmp ne i64 %lshr, 0 - ret i1 %icmp -} - -define <2 x i64> @lshr_ctlz_cmpeq_zero_v2i64(<2 x i64> %in) nounwind { -; X86-LABEL: lshr_ctlz_cmpeq_zero_v2i64: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: xorl %ecx, %ecx -; X86-NEXT: orl {{[0-9]+}}(%esp), %edx -; X86-NEXT: setne %cl -; X86-NEXT: negl %ecx -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: orl {{[0-9]+}}(%esp), %esi -; X86-NEXT: setne %dl -; X86-NEXT: negl %edx -; X86-NEXT: movl %edx, 12(%eax) -; X86-NEXT: movl %edx, 8(%eax) -; X86-NEXT: movl %ecx, 4(%eax) -; X86-NEXT: movl %ecx, (%eax) -; X86-NEXT: popl %esi -; X86-NEXT: retl $4 -; -; X64-LABEL: lshr_ctlz_cmpeq_zero_v2i64: -; X64: # %bb.0: -; X64-NEXT: pxor %xmm1, %xmm1 -; X64-NEXT: pcmpeqd %xmm1, %xmm0 -; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] -; X64-NEXT: pand %xmm1, %xmm0 -; X64-NEXT: pcmpeqd %xmm1, %xmm1 -; X64-NEXT: pxor %xmm1, %xmm0 -; X64-NEXT: retq - %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %in, i1 0) - %lshr = lshr <2 x i64> %ctlz, - %icmp = icmp eq <2 x i64> %lshr, zeroinitializer - %sext = sext <2 x i1> %icmp to <2 x i64> - ret <2 x i64> %sext -} - -define <2 x i64> @lshr_ctlz_cmpne_zero_v2i64(<2 x i64> %in) nounwind { -; X86-LABEL: lshr_ctlz_cmpne_zero_v2i64: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: xorl %ecx, %ecx -; X86-NEXT: orl {{[0-9]+}}(%esp), %edx -; X86-NEXT: sete %cl -; X86-NEXT: negl %ecx -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: orl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sete %dl -; X86-NEXT: negl %edx -; X86-NEXT: movl %edx, 12(%eax) -; X86-NEXT: movl %edx, 8(%eax) -; X86-NEXT: movl %ecx, 4(%eax) -; X86-NEXT: movl %ecx, (%eax) -; X86-NEXT: popl %esi -; X86-NEXT: retl $4 -; -; X64-LABEL: lshr_ctlz_cmpne_zero_v2i64: -; X64: # %bb.0: -; X64-NEXT: pxor %xmm1, %xmm1 -; X64-NEXT: pcmpeqd %xmm1, %xmm0 -; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] -; X64-NEXT: pand %xmm1, %xmm0 -; X64-NEXT: retq - %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %in, i1 0) - %lshr = lshr <2 x i64> %ctlz, - %icmp = icmp ne <2 x i64> %lshr, zeroinitializer - %sext = sext <2 x i1> %icmp to <2 x i64> - ret <2 x i64> %sext -} - -declare i64 @llvm.ctlz.i64(i64, i1) -declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1)