diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 56194e2614af2..585365faf6d48 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1429,8 +1429,8 @@ bool TargetLowering::SimplifyDemandedBits( // Do not increment Depth here; that can cause an infinite loop. KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth); // If the LHS already has zeros where RHSC does, this 'and' is dead. - if ((LHSKnown.Zero & DemandedBits) == - (~RHSC->getAPIntValue() & DemandedBits)) + + if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSC->getAPIntValue())) return TLO.CombineTo(Op, Op0); // If any of the set bits in the RHS are known zero on the LHS, shrink diff --git a/llvm/test/CodeGen/AMDGPU/fabs.f16.ll b/llvm/test/CodeGen/AMDGPU/fabs.f16.ll index 7a81af5243ee0..054712c1ec139 100644 --- a/llvm/test/CodeGen/AMDGPU/fabs.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fabs.f16.ll @@ -668,7 +668,7 @@ define amdgpu_kernel void @v_extract_fabs_no_fold_v2f16(ptr addrspace(1) %in) #0 ; CI-NEXT: flat_load_dword v0, v[0:1] ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_bfe_u32 v1, v0, 16, 15 -; CI-NEXT: v_and_b32_e32 v0, 0x7fff, v0 +; CI-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 ; CI-NEXT: flat_store_short v[0:1], v0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: flat_store_short v[0:1], v1 diff --git a/llvm/test/CodeGen/ARM/popcnt.ll b/llvm/test/CodeGen/ARM/popcnt.ll index edcae5e141e73..e048c815c0861 100644 --- a/llvm/test/CodeGen/ARM/popcnt.ll +++ b/llvm/test/CodeGen/ARM/popcnt.ll @@ -311,10 +311,12 @@ define i32 @ctpop16(i16 %x) nounwind readnone { ; CHECK-NEXT: and r2, r0, r1 ; CHECK-NEXT: and r0, r1, r0, lsr #2 ; CHECK-NEXT: add r0, r2, r0 +; CHECK-NEXT: mov r1, #15 +; CHECK-NEXT: orr r1, r1, #3840 ; CHECK-NEXT: add r0, r0, r0, lsr #4 -; CHECK-NEXT: and r1, r0, #3840 -; CHECK-NEXT: and r0, r0, #15 -; CHECK-NEXT: add r0, r0, r1, lsr #8 +; CHECK-NEXT: and r0, r0, r1 +; CHECK-NEXT: add r0, r0, r0, lsr #8 +; CHECK-NEXT: and r0, r0, #255 ; CHECK-NEXT: mov pc, lr %count = tail call i16 @llvm.ctpop.i16(i16 %x) %conv = zext i16 %count to i32 diff --git a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll index 161ed573c81f0..c2a5ee625ba80 100644 --- a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll @@ -201,9 +201,10 @@ define i16 @test_ctpop_i16(i16 %a) nounwind { ; LA32-NEXT: add.w $a0, $a2, $a0 ; LA32-NEXT: srli.w $a1, $a0, 4 ; LA32-NEXT: add.w $a0, $a0, $a1 -; LA32-NEXT: bstrpick.w $a1, $a0, 11, 8 -; LA32-NEXT: andi $a0, $a0, 15 +; LA32-NEXT: andi $a0, $a0, 3855 +; LA32-NEXT: srli.w $a1, $a0, 8 ; LA32-NEXT: add.w $a0, $a0, $a1 +; LA32-NEXT: andi $a0, $a0, 31 ; LA32-NEXT: ret ; ; LA64-LABEL: test_ctpop_i16: diff --git a/llvm/test/CodeGen/PowerPC/popcnt-zext.ll b/llvm/test/CodeGen/PowerPC/popcnt-zext.ll index d79edb5fc5d37..af60b43b1892d 100644 --- a/llvm/test/CodeGen/PowerPC/popcnt-zext.ll +++ b/llvm/test/CodeGen/PowerPC/popcnt-zext.ll @@ -24,8 +24,9 @@ define i16 @zpop_i8_i16(i8 %x) { ; SLOW-NEXT: srwi 4, 3, 4 ; SLOW-NEXT: add 3, 3, 4 ; SLOW-NEXT: rlwinm 4, 3, 24, 28, 31 -; SLOW-NEXT: clrlwi 3, 3, 28 +; SLOW-NEXT: andi. 3, 3, 3855 ; SLOW-NEXT: add 3, 3, 4 +; SLOW-NEXT: clrlwi 3, 3, 27 ; SLOW-NEXT: blr %z = zext i8 %x to i16 %pop = tail call i16 @llvm.ctpop.i16(i16 %z) @@ -173,9 +174,9 @@ define i32 @popz_i16_32(i16 %x) { ; SLOW-NEXT: srwi 4, 3, 4 ; SLOW-NEXT: add 3, 3, 4 ; SLOW-NEXT: rlwinm 4, 3, 24, 28, 31 -; SLOW-NEXT: clrlwi 3, 3, 28 +; SLOW-NEXT: andi. 3, 3, 3855 ; SLOW-NEXT: add 3, 3, 4 -; SLOW-NEXT: clrldi 3, 3, 32 +; SLOW-NEXT: clrlwi 3, 3, 27 ; SLOW-NEXT: blr %pop = tail call i16 @llvm.ctpop.i16(i16 %x) %z = zext i16 %pop to i32 @@ -278,7 +279,7 @@ define i64 @popa_i16_i64(i16 %x) { ; SLOW-NEXT: srwi 4, 3, 4 ; SLOW-NEXT: add 3, 3, 4 ; SLOW-NEXT: rlwinm 4, 3, 24, 28, 31 -; SLOW-NEXT: clrlwi 3, 3, 28 +; SLOW-NEXT: andi. 3, 3, 3855 ; SLOW-NEXT: add 3, 3, 4 ; SLOW-NEXT: rlwinm 3, 3, 0, 27, 27 ; SLOW-NEXT: blr diff --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll index da97ac0d74237..fc1daa5f45800 100644 --- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll @@ -142,13 +142,15 @@ define i16 @test_cttz_i16(i16 %a) nounwind { ; RV32_NOZBB-NEXT: and a1, a0, a2 ; RV32_NOZBB-NEXT: srli a0, a0, 2 ; RV32_NOZBB-NEXT: and a0, a0, a2 +; RV32_NOZBB-NEXT: lui a2, 1 ; RV32_NOZBB-NEXT: add a0, a1, a0 ; RV32_NOZBB-NEXT: srli a1, a0, 4 ; RV32_NOZBB-NEXT: add a0, a0, a1 -; RV32_NOZBB-NEXT: andi a1, a0, 15 -; RV32_NOZBB-NEXT: slli a0, a0, 20 -; RV32_NOZBB-NEXT: srli a0, a0, 28 -; RV32_NOZBB-NEXT: add a0, a1, a0 +; RV32_NOZBB-NEXT: addi a1, a2, -241 +; RV32_NOZBB-NEXT: and a0, a0, a1 +; RV32_NOZBB-NEXT: srli a1, a0, 8 +; RV32_NOZBB-NEXT: add a0, a0, a1 +; RV32_NOZBB-NEXT: andi a0, a0, 31 ; RV32_NOZBB-NEXT: ret ; RV32_NOZBB-NEXT: .LBB1_2: ; RV32_NOZBB-NEXT: li a0, 16 @@ -172,13 +174,15 @@ define i16 @test_cttz_i16(i16 %a) nounwind { ; RV64NOZBB-NEXT: and a1, a0, a2 ; RV64NOZBB-NEXT: srli a0, a0, 2 ; RV64NOZBB-NEXT: and a0, a0, a2 +; RV64NOZBB-NEXT: lui a2, 1 ; RV64NOZBB-NEXT: add a0, a1, a0 ; RV64NOZBB-NEXT: srli a1, a0, 4 ; RV64NOZBB-NEXT: add a0, a0, a1 -; RV64NOZBB-NEXT: andi a1, a0, 15 -; RV64NOZBB-NEXT: slli a0, a0, 52 -; RV64NOZBB-NEXT: srli a0, a0, 60 -; RV64NOZBB-NEXT: add a0, a1, a0 +; RV64NOZBB-NEXT: addiw a1, a2, -241 +; RV64NOZBB-NEXT: and a0, a0, a1 +; RV64NOZBB-NEXT: srli a1, a0, 8 +; RV64NOZBB-NEXT: add a0, a0, a1 +; RV64NOZBB-NEXT: andi a0, a0, 31 ; RV64NOZBB-NEXT: ret ; RV64NOZBB-NEXT: .LBB1_2: ; RV64NOZBB-NEXT: li a0, 16 @@ -631,13 +635,15 @@ define i16 @test_cttz_i16_zero_undef(i16 %a) nounwind { ; RV32_NOZBB-NEXT: and a1, a0, a2 ; RV32_NOZBB-NEXT: srli a0, a0, 2 ; RV32_NOZBB-NEXT: and a0, a0, a2 +; RV32_NOZBB-NEXT: lui a2, 1 ; RV32_NOZBB-NEXT: add a0, a1, a0 ; RV32_NOZBB-NEXT: srli a1, a0, 4 ; RV32_NOZBB-NEXT: add a0, a0, a1 -; RV32_NOZBB-NEXT: andi a1, a0, 15 -; RV32_NOZBB-NEXT: slli a0, a0, 20 -; RV32_NOZBB-NEXT: srli a0, a0, 28 -; RV32_NOZBB-NEXT: add a0, a1, a0 +; RV32_NOZBB-NEXT: addi a1, a2, -241 +; RV32_NOZBB-NEXT: and a0, a0, a1 +; RV32_NOZBB-NEXT: srli a1, a0, 8 +; RV32_NOZBB-NEXT: add a0, a0, a1 +; RV32_NOZBB-NEXT: andi a0, a0, 31 ; RV32_NOZBB-NEXT: ret ; ; RV64NOZBB-LABEL: test_cttz_i16_zero_undef: @@ -655,13 +661,15 @@ define i16 @test_cttz_i16_zero_undef(i16 %a) nounwind { ; RV64NOZBB-NEXT: and a1, a0, a2 ; RV64NOZBB-NEXT: srli a0, a0, 2 ; RV64NOZBB-NEXT: and a0, a0, a2 +; RV64NOZBB-NEXT: lui a2, 1 ; RV64NOZBB-NEXT: add a0, a1, a0 ; RV64NOZBB-NEXT: srli a1, a0, 4 ; RV64NOZBB-NEXT: add a0, a0, a1 -; RV64NOZBB-NEXT: andi a1, a0, 15 -; RV64NOZBB-NEXT: slli a0, a0, 52 -; RV64NOZBB-NEXT: srli a0, a0, 60 -; RV64NOZBB-NEXT: add a0, a1, a0 +; RV64NOZBB-NEXT: addiw a1, a2, -241 +; RV64NOZBB-NEXT: and a0, a0, a1 +; RV64NOZBB-NEXT: srli a1, a0, 8 +; RV64NOZBB-NEXT: add a0, a0, a1 +; RV64NOZBB-NEXT: andi a0, a0, 31 ; RV64NOZBB-NEXT: ret ; ; RV32ZBB-LABEL: test_cttz_i16_zero_undef: @@ -1073,13 +1081,15 @@ define i16 @test_ctlz_i16(i16 %a) nounwind { ; RV32_NOZBB-NEXT: and a1, a0, a2 ; RV32_NOZBB-NEXT: srli a0, a0, 2 ; RV32_NOZBB-NEXT: and a0, a0, a2 +; RV32_NOZBB-NEXT: lui a2, 1 ; RV32_NOZBB-NEXT: add a0, a1, a0 ; RV32_NOZBB-NEXT: srli a1, a0, 4 ; RV32_NOZBB-NEXT: add a0, a0, a1 -; RV32_NOZBB-NEXT: andi a1, a0, 15 -; RV32_NOZBB-NEXT: slli a0, a0, 20 -; RV32_NOZBB-NEXT: srli a0, a0, 28 -; RV32_NOZBB-NEXT: add a0, a1, a0 +; RV32_NOZBB-NEXT: addi a1, a2, -241 +; RV32_NOZBB-NEXT: and a0, a0, a1 +; RV32_NOZBB-NEXT: srli a1, a0, 8 +; RV32_NOZBB-NEXT: add a0, a0, a1 +; RV32_NOZBB-NEXT: andi a0, a0, 31 ; RV32_NOZBB-NEXT: ret ; RV32_NOZBB-NEXT: .LBB9_2: ; RV32_NOZBB-NEXT: li a0, 16 @@ -1112,13 +1122,15 @@ define i16 @test_ctlz_i16(i16 %a) nounwind { ; RV64NOZBB-NEXT: and a1, a0, a2 ; RV64NOZBB-NEXT: srli a0, a0, 2 ; RV64NOZBB-NEXT: and a0, a0, a2 +; RV64NOZBB-NEXT: lui a2, 1 ; RV64NOZBB-NEXT: add a0, a1, a0 ; RV64NOZBB-NEXT: srli a1, a0, 4 ; RV64NOZBB-NEXT: add a0, a0, a1 -; RV64NOZBB-NEXT: andi a1, a0, 15 -; RV64NOZBB-NEXT: slli a0, a0, 52 -; RV64NOZBB-NEXT: srli a0, a0, 60 -; RV64NOZBB-NEXT: add a0, a1, a0 +; RV64NOZBB-NEXT: addiw a1, a2, -241 +; RV64NOZBB-NEXT: and a0, a0, a1 +; RV64NOZBB-NEXT: srli a1, a0, 8 +; RV64NOZBB-NEXT: add a0, a0, a1 +; RV64NOZBB-NEXT: andi a0, a0, 31 ; RV64NOZBB-NEXT: ret ; RV64NOZBB-NEXT: .LBB9_2: ; RV64NOZBB-NEXT: li a0, 16 @@ -1722,13 +1734,15 @@ define i16 @test_ctlz_i16_zero_undef(i16 %a) nounwind { ; RV32_NOZBB-NEXT: and a1, a0, a2 ; RV32_NOZBB-NEXT: srli a0, a0, 2 ; RV32_NOZBB-NEXT: and a0, a0, a2 +; RV32_NOZBB-NEXT: lui a2, 1 ; RV32_NOZBB-NEXT: add a0, a1, a0 ; RV32_NOZBB-NEXT: srli a1, a0, 4 ; RV32_NOZBB-NEXT: add a0, a0, a1 -; RV32_NOZBB-NEXT: andi a1, a0, 15 -; RV32_NOZBB-NEXT: slli a0, a0, 20 -; RV32_NOZBB-NEXT: srli a0, a0, 28 -; RV32_NOZBB-NEXT: add a0, a1, a0 +; RV32_NOZBB-NEXT: addi a1, a2, -241 +; RV32_NOZBB-NEXT: and a0, a0, a1 +; RV32_NOZBB-NEXT: srli a1, a0, 8 +; RV32_NOZBB-NEXT: add a0, a0, a1 +; RV32_NOZBB-NEXT: andi a0, a0, 31 ; RV32_NOZBB-NEXT: ret ; ; RV64NOZBB-LABEL: test_ctlz_i16_zero_undef: @@ -1756,13 +1770,15 @@ define i16 @test_ctlz_i16_zero_undef(i16 %a) nounwind { ; RV64NOZBB-NEXT: and a1, a0, a2 ; RV64NOZBB-NEXT: srli a0, a0, 2 ; RV64NOZBB-NEXT: and a0, a0, a2 +; RV64NOZBB-NEXT: lui a2, 1 ; RV64NOZBB-NEXT: add a0, a1, a0 ; RV64NOZBB-NEXT: srli a1, a0, 4 ; RV64NOZBB-NEXT: add a0, a0, a1 -; RV64NOZBB-NEXT: andi a1, a0, 15 -; RV64NOZBB-NEXT: slli a0, a0, 52 -; RV64NOZBB-NEXT: srli a0, a0, 60 -; RV64NOZBB-NEXT: add a0, a1, a0 +; RV64NOZBB-NEXT: addiw a1, a2, -241 +; RV64NOZBB-NEXT: and a0, a0, a1 +; RV64NOZBB-NEXT: srli a1, a0, 8 +; RV64NOZBB-NEXT: add a0, a0, a1 +; RV64NOZBB-NEXT: andi a0, a0, 31 ; RV64NOZBB-NEXT: ret ; ; RV32ZBB-LABEL: test_ctlz_i16_zero_undef: @@ -2310,13 +2326,15 @@ define i16 @test_ctpop_i16(i16 %a) nounwind { ; RV32_NOZBB-NEXT: and a1, a0, a2 ; RV32_NOZBB-NEXT: srli a0, a0, 2 ; RV32_NOZBB-NEXT: and a0, a0, a2 +; RV32_NOZBB-NEXT: lui a2, 1 ; RV32_NOZBB-NEXT: add a0, a1, a0 ; RV32_NOZBB-NEXT: srli a1, a0, 4 ; RV32_NOZBB-NEXT: add a0, a0, a1 -; RV32_NOZBB-NEXT: andi a1, a0, 15 -; RV32_NOZBB-NEXT: slli a0, a0, 20 -; RV32_NOZBB-NEXT: srli a0, a0, 28 -; RV32_NOZBB-NEXT: add a0, a1, a0 +; RV32_NOZBB-NEXT: addi a1, a2, -241 +; RV32_NOZBB-NEXT: and a0, a0, a1 +; RV32_NOZBB-NEXT: srli a1, a0, 8 +; RV32_NOZBB-NEXT: add a0, a0, a1 +; RV32_NOZBB-NEXT: andi a0, a0, 31 ; RV32_NOZBB-NEXT: ret ; ; RV64NOZBB-LABEL: test_ctpop_i16: @@ -2331,13 +2349,15 @@ define i16 @test_ctpop_i16(i16 %a) nounwind { ; RV64NOZBB-NEXT: and a1, a0, a2 ; RV64NOZBB-NEXT: srli a0, a0, 2 ; RV64NOZBB-NEXT: and a0, a0, a2 +; RV64NOZBB-NEXT: lui a2, 1 ; RV64NOZBB-NEXT: add a0, a1, a0 ; RV64NOZBB-NEXT: srli a1, a0, 4 ; RV64NOZBB-NEXT: add a0, a0, a1 -; RV64NOZBB-NEXT: andi a1, a0, 15 -; RV64NOZBB-NEXT: slli a0, a0, 52 -; RV64NOZBB-NEXT: srli a0, a0, 60 -; RV64NOZBB-NEXT: add a0, a1, a0 +; RV64NOZBB-NEXT: addiw a1, a2, -241 +; RV64NOZBB-NEXT: and a0, a0, a1 +; RV64NOZBB-NEXT: srli a1, a0, 8 +; RV64NOZBB-NEXT: add a0, a0, a1 +; RV64NOZBB-NEXT: andi a0, a0, 31 ; RV64NOZBB-NEXT: ret ; ; RV32ZBB-LABEL: test_ctpop_i16: @@ -2364,12 +2384,15 @@ define i16 @test_ctpop_i16(i16 %a) nounwind { ; RV32XTHEADBB-NEXT: and a1, a0, a2 ; RV32XTHEADBB-NEXT: srli a0, a0, 2 ; RV32XTHEADBB-NEXT: and a0, a0, a2 +; RV32XTHEADBB-NEXT: lui a2, 1 ; RV32XTHEADBB-NEXT: add a0, a1, a0 ; RV32XTHEADBB-NEXT: srli a1, a0, 4 ; RV32XTHEADBB-NEXT: add a0, a0, a1 -; RV32XTHEADBB-NEXT: th.extu a1, a0, 11, 8 -; RV32XTHEADBB-NEXT: andi a0, a0, 15 +; RV32XTHEADBB-NEXT: addi a1, a2, -241 +; RV32XTHEADBB-NEXT: and a0, a0, a1 +; RV32XTHEADBB-NEXT: srli a1, a0, 8 ; RV32XTHEADBB-NEXT: add a0, a0, a1 +; RV32XTHEADBB-NEXT: andi a0, a0, 31 ; RV32XTHEADBB-NEXT: ret ; ; RV64XTHEADBB-LABEL: test_ctpop_i16: @@ -2384,12 +2407,15 @@ define i16 @test_ctpop_i16(i16 %a) nounwind { ; RV64XTHEADBB-NEXT: and a1, a0, a2 ; RV64XTHEADBB-NEXT: srli a0, a0, 2 ; RV64XTHEADBB-NEXT: and a0, a0, a2 +; RV64XTHEADBB-NEXT: lui a2, 1 ; RV64XTHEADBB-NEXT: add a0, a1, a0 ; RV64XTHEADBB-NEXT: srli a1, a0, 4 ; RV64XTHEADBB-NEXT: add a0, a0, a1 -; RV64XTHEADBB-NEXT: th.extu a1, a0, 11, 8 -; RV64XTHEADBB-NEXT: andi a0, a0, 15 +; RV64XTHEADBB-NEXT: addiw a1, a2, -241 +; RV64XTHEADBB-NEXT: and a0, a0, a1 +; RV64XTHEADBB-NEXT: srli a1, a0, 8 ; RV64XTHEADBB-NEXT: add a0, a0, a1 +; RV64XTHEADBB-NEXT: andi a0, a0, 31 ; RV64XTHEADBB-NEXT: ret %1 = call i16 @llvm.ctpop.i16(i16 %a) ret i16 %1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll index c628a0d620498..0167d874c1242 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -2636,7 +2636,7 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) { ; RVA22U64-NEXT: or a0, a0, a5 ; RVA22U64-NEXT: slli a6, a6, 24 ; RVA22U64-NEXT: or a2, a2, a4 -; RVA22U64-NEXT: add.uw a2, a6, a2 +; RVA22U64-NEXT: or a2, a6, a2 ; RVA22U64-NEXT: or a0, a0, a1 ; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RVA22U64-NEXT: vmv.v.x v8, a2 diff --git a/llvm/test/CodeGen/RISCV/shl-cttz.ll b/llvm/test/CodeGen/RISCV/shl-cttz.ll index 500673cc29196..3854f02bd8244 100644 --- a/llvm/test/CodeGen/RISCV/shl-cttz.ll +++ b/llvm/test/CodeGen/RISCV/shl-cttz.ll @@ -136,13 +136,14 @@ define i16 @shl_cttz_i16(i16 %x, i16 %y) { ; RV32I-NEXT: and a2, a1, a3 ; RV32I-NEXT: srli a1, a1, 2 ; RV32I-NEXT: and a1, a1, a3 +; RV32I-NEXT: lui a3, 1 ; RV32I-NEXT: add a1, a2, a1 ; RV32I-NEXT: srli a2, a1, 4 ; RV32I-NEXT: add a1, a1, a2 -; RV32I-NEXT: andi a2, a1, 15 -; RV32I-NEXT: slli a1, a1, 20 -; RV32I-NEXT: srli a1, a1, 28 -; RV32I-NEXT: add a1, a2, a1 +; RV32I-NEXT: addi a2, a3, -241 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: srli a2, a1, 8 +; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: sll a0, a0, a1 ; RV32I-NEXT: ret ; @@ -167,13 +168,14 @@ define i16 @shl_cttz_i16(i16 %x, i16 %y) { ; RV64I-NEXT: and a2, a1, a3 ; RV64I-NEXT: srli a1, a1, 2 ; RV64I-NEXT: and a1, a1, a3 +; RV64I-NEXT: lui a3, 1 ; RV64I-NEXT: add a1, a2, a1 ; RV64I-NEXT: srli a2, a1, 4 ; RV64I-NEXT: add a1, a1, a2 -; RV64I-NEXT: andi a2, a1, 15 -; RV64I-NEXT: slli a1, a1, 52 -; RV64I-NEXT: srli a1, a1, 60 -; RV64I-NEXT: add a1, a2, a1 +; RV64I-NEXT: addiw a2, a3, -241 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a2, a1, 8 +; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: sll a0, a0, a1 ; RV64I-NEXT: ret ; @@ -204,13 +206,14 @@ define i16 @shl_cttz_constant_i16(i16 %y) { ; RV32I-NEXT: and a1, a0, a2 ; RV32I-NEXT: srli a0, a0, 2 ; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: lui a2, 1 +; RV32I-NEXT: addi a2, a2, -241 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: andi a1, a0, 15 -; RV32I-NEXT: slli a0, a0, 20 -; RV32I-NEXT: srli a0, a0, 28 -; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: srli a1, a0, 8 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: li a1, 4 ; RV32I-NEXT: sll a0, a1, a0 ; RV32I-NEXT: ret @@ -237,13 +240,14 @@ define i16 @shl_cttz_constant_i16(i16 %y) { ; RV64I-NEXT: and a1, a0, a2 ; RV64I-NEXT: srli a0, a0, 2 ; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: lui a2, 1 +; RV64I-NEXT: addiw a2, a2, -241 ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: andi a1, a0, 15 -; RV64I-NEXT: slli a0, a0, 52 -; RV64I-NEXT: srli a0, a0, 60 -; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: srli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: li a1, 4 ; RV64I-NEXT: sll a0, a1, a0 ; RV64I-NEXT: ret diff --git a/llvm/test/CodeGen/X86/pr38185.ll b/llvm/test/CodeGen/X86/pr38185.ll index d5591c50738fa..4085c01344b28 100644 --- a/llvm/test/CodeGen/X86/pr38185.ll +++ b/llvm/test/CodeGen/X86/pr38185.ll @@ -14,11 +14,10 @@ define void @foo(ptr %a, ptr %b, ptr noalias %c, i64 %s) { ; CHECK-NEXT: # %bb.2: # %body ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl $1, (%rdx,%rax,4) -; CHECK-NEXT: movzbl (%rdi,%rax,4), %r8d -; CHECK-NEXT: movzbl (%rsi,%rax,4), %r9d -; CHECK-NEXT: andl %r8d, %r9d -; CHECK-NEXT: andl $1, %r9d -; CHECK-NEXT: movl %r9d, (%rdi,%rax,4) +; CHECK-NEXT: movl (%rdi,%rax,4), %r8d +; CHECK-NEXT: andl (%rsi,%rax,4), %r8d +; CHECK-NEXT: andl $1, %r8d +; CHECK-NEXT: movl %r8d, (%rdi,%rax,4) ; CHECK-NEXT: incq %rax ; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: jmp .LBB0_1 diff --git a/llvm/test/CodeGen/X86/vector-compress.ll b/llvm/test/CodeGen/X86/vector-compress.ll index 87a948a4f1f7e..bf7497f23bef3 100644 --- a/llvm/test/CodeGen/X86/vector-compress.ll +++ b/llvm/test/CodeGen/X86/vector-compress.ll @@ -1814,7 +1814,7 @@ define <64 x i8> @test_compress_v64i8(<64 x i8> %vec, <64 x i1> %mask, <64 x i8> ; AVX2-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; AVX2-NEXT: movl %ecx, %r13d ; AVX2-NEXT: movl %edx, %r15d -; AVX2-NEXT: movl %esi, %ebx +; AVX2-NEXT: movl %esi, %r14d ; AVX2-NEXT: # kill: def $edi killed $edi def $rdi ; AVX2-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; AVX2-NEXT: movl 360(%rbp), %eax @@ -1932,8 +1932,8 @@ define <64 x i8> @test_compress_v64i8(<64 x i8> %vec, <64 x i1> %mask, <64 x i8> ; AVX2-NEXT: vpinsrb $10, %r10d, %xmm6, %xmm6 ; AVX2-NEXT: movl 56(%rbp), %r11d ; AVX2-NEXT: vpinsrb $11, %r11d, %xmm6, %xmm6 -; AVX2-NEXT: movl 64(%rbp), %r14d -; AVX2-NEXT: vpinsrb $12, %r14d, %xmm6, %xmm6 +; AVX2-NEXT: movl 64(%rbp), %ebx +; AVX2-NEXT: vpinsrb $12, %ebx, %xmm6, %xmm6 ; AVX2-NEXT: movl 72(%rbp), %r12d ; AVX2-NEXT: vpinsrb $13, %r12d, %xmm6, %xmm6 ; AVX2-NEXT: movl 80(%rbp), %eax @@ -1988,20 +1988,20 @@ define <64 x i8> @test_compress_v64i8(<64 x i8> %vec, <64 x i1> %mask, <64 x i8> ; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: vpextrb $1, %xmm0, (%rsp,%rax) -; AVX2-NEXT: andl $1, %ebx -; AVX2-NEXT: addq %rax, %rbx -; AVX2-NEXT: vpextrb $2, %xmm0, (%rsp,%rbx) +; AVX2-NEXT: andl $1, %r14d +; AVX2-NEXT: addq %rax, %r14 +; AVX2-NEXT: vpextrb $2, %xmm0, (%rsp,%r14) ; AVX2-NEXT: andl $1, %r15d -; AVX2-NEXT: addq %rbx, %r15 +; AVX2-NEXT: addq %r14, %r15 ; AVX2-NEXT: vpextrb $3, %xmm0, (%rsp,%r15) ; AVX2-NEXT: andl $1, %r13d ; AVX2-NEXT: addq %r15, %r13 ; AVX2-NEXT: vpextrb $4, %xmm0, (%rsp,%r13) -; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: addq %r13, %rcx -; AVX2-NEXT: movl %ecx, %eax +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX2-NEXT: andl $1, %eax +; AVX2-NEXT: addq %r13, %rax ; AVX2-NEXT: vpextrb $5, %xmm0, (%rsp,%rax) +; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: addq %rcx, %rax @@ -2035,16 +2035,16 @@ define <64 x i8> @test_compress_v64i8(<64 x i8> %vec, <64 x i1> %mask, <64 x i8> ; AVX2-NEXT: # kill: def $r10d killed $r10d killed $r10 def $r10 ; AVX2-NEXT: andl $63, %r10d ; AVX2-NEXT: vpextrb $11, %xmm0, (%rsp,%r10) -; AVX2-NEXT: andl $1, %r14d -; AVX2-NEXT: addq %r11, %r14 +; AVX2-NEXT: andl $1, %ebx +; AVX2-NEXT: addq %r11, %rbx ; AVX2-NEXT: # kill: def $r11d killed $r11d killed $r11 def $r11 ; AVX2-NEXT: andl $63, %r11d ; AVX2-NEXT: vpextrb $12, %xmm0, (%rsp,%r11) ; AVX2-NEXT: andl $1, %r12d -; AVX2-NEXT: addq %r14, %r12 -; AVX2-NEXT: # kill: def $r14d killed $r14d killed $r14 def $r14 -; AVX2-NEXT: andl $63, %r14d -; AVX2-NEXT: vpextrb $13, %xmm0, (%rsp,%r14) +; AVX2-NEXT: addq %rbx, %r12 +; AVX2-NEXT: # kill: def $ebx killed $ebx killed $rbx def $rbx +; AVX2-NEXT: andl $63, %ebx +; AVX2-NEXT: vpextrb $13, %xmm0, (%rsp,%rbx) ; AVX2-NEXT: movl 80(%rbp), %eax ; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: addq %r12, %rax diff --git a/llvm/test/CodeGen/X86/vector-shift-lut.ll b/llvm/test/CodeGen/X86/vector-shift-lut.ll index 0bf2006090893..c6e7b18dfed6f 100644 --- a/llvm/test/CodeGen/X86/vector-shift-lut.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lut.ll @@ -1155,7 +1155,6 @@ define <32 x i8> @perlane_lshr_v32i8(<32 x i8> %a) nounwind { ; SSE2-NEXT: pandn %xmm3, %xmm5 ; SSE2-NEXT: psrlw $2, %xmm3 ; SSE2-NEXT: pand %xmm1, %xmm3 -; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 ; SSE2-NEXT: por %xmm5, %xmm3 ; SSE2-NEXT: paddb %xmm4, %xmm4 ; SSE2-NEXT: pxor %xmm1, %xmm1 @@ -1518,26 +1517,24 @@ define <64 x i8> @perlane_lshr_v64i8(<64 x i8> %a) nounwind { ; SSE2-NEXT: movdqa %xmm1, %xmm6 ; SSE2-NEXT: paddb %xmm1, %xmm6 ; SSE2-NEXT: pxor %xmm4, %xmm4 -; SSE2-NEXT: pxor %xmm8, %xmm8 -; SSE2-NEXT: pcmpgtb %xmm6, %xmm8 +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: pcmpgtb %xmm6, %xmm1 ; SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 -; SSE2-NEXT: movdqa %xmm8, %xmm1 -; SSE2-NEXT: pandn %xmm2, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm7 +; SSE2-NEXT: pandn %xmm2, %xmm7 ; SSE2-NEXT: psrlw $2, %xmm2 -; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] -; SSE2-NEXT: pand %xmm7, %xmm8 -; SSE2-NEXT: pand %xmm2, %xmm8 -; SSE2-NEXT: por %xmm1, %xmm8 +; SSE2-NEXT: pand %xmm1, %xmm2 +; SSE2-NEXT: por %xmm7, %xmm2 ; SSE2-NEXT: paddb %xmm6, %xmm6 ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpgtb %xmm6, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: pandn %xmm8, %xmm2 -; SSE2-NEXT: psrlw $1, %xmm8 +; SSE2-NEXT: movdqa %xmm1, %xmm7 +; SSE2-NEXT: pandn %xmm2, %xmm7 +; SSE2-NEXT: psrlw $1, %xmm2 ; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] ; SSE2-NEXT: pand %xmm6, %xmm1 -; SSE2-NEXT: pand %xmm8, %xmm1 -; SSE2-NEXT: por %xmm2, %xmm1 +; SSE2-NEXT: pand %xmm2, %xmm1 +; SSE2-NEXT: por %xmm7, %xmm1 ; SSE2-NEXT: psllw $5, %xmm5 ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: pcmpgtb %xmm5, %xmm2 @@ -1548,6 +1545,7 @@ define <64 x i8> @perlane_lshr_v64i8(<64 x i8> %a) nounwind { ; SSE2-NEXT: movdqa %xmm8, %xmm9 ; SSE2-NEXT: pandn %xmm2, %xmm9 ; SSE2-NEXT: psrlw $2, %xmm2 +; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] ; SSE2-NEXT: pand %xmm7, %xmm8 ; SSE2-NEXT: pand %xmm2, %xmm8 ; SSE2-NEXT: por %xmm9, %xmm8