-
Notifications
You must be signed in to change notification settings - Fork 15.3k
Allow more dead AND operations to be eliminated #123686
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Signed-off-by: John Lu <[email protected]>
|
Thank you for submitting a Pull Request (PR) to the LLVM Project! This PR will be automatically labeled and the relevant teams will be notified. If you wish to, you can add reviewers by using the "Reviewers" section on this page. If this is not working for you, it is probably because you do not have write permissions for the repository. In which case you can instead tag reviewers by name in a comment by using If you have received no comments on your PR for a week, you can request a review by "ping"ing the PR by adding a comment “Ping”. The common courtesy "ping" rate is once a week. Please remember that you are asking for valuable time from other developers. If you have further questions, they may be answered by the LLVM GitHub User Guide. You can also ask questions in a comment on this PR, on the LLVM Discord or on the forums. |
|
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-backend-powerpc Author: None (LU-JOHN) ChangesPrevious code removes dead AND operations of the form: (AND rX, b11001100) where LHSKnown.zero == b00110011 (i.e. previously LHSKnown.zero must == the zero bits of the constant operand). This Patch is 23.37 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/123686.diff 11 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 56194e2614af2d..585365faf6d486 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1429,8 +1429,8 @@ bool TargetLowering::SimplifyDemandedBits(
// Do not increment Depth here; that can cause an infinite loop.
KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
// If the LHS already has zeros where RHSC does, this 'and' is dead.
- if ((LHSKnown.Zero & DemandedBits) ==
- (~RHSC->getAPIntValue() & DemandedBits))
+
+ if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSC->getAPIntValue()))
return TLO.CombineTo(Op, Op0);
// If any of the set bits in the RHS are known zero on the LHS, shrink
diff --git a/llvm/test/CodeGen/AMDGPU/fabs.f16.ll b/llvm/test/CodeGen/AMDGPU/fabs.f16.ll
index 7a81af5243ee07..054712c1ec1393 100644
--- a/llvm/test/CodeGen/AMDGPU/fabs.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fabs.f16.ll
@@ -668,7 +668,7 @@ define amdgpu_kernel void @v_extract_fabs_no_fold_v2f16(ptr addrspace(1) %in) #0
; CI-NEXT: flat_load_dword v0, v[0:1]
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_bfe_u32 v1, v0, 16, 15
-; CI-NEXT: v_and_b32_e32 v0, 0x7fff, v0
+; CI-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
; CI-NEXT: flat_store_short v[0:1], v0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: flat_store_short v[0:1], v1
diff --git a/llvm/test/CodeGen/ARM/popcnt.ll b/llvm/test/CodeGen/ARM/popcnt.ll
index edcae5e141e73a..e048c815c08614 100644
--- a/llvm/test/CodeGen/ARM/popcnt.ll
+++ b/llvm/test/CodeGen/ARM/popcnt.ll
@@ -311,10 +311,12 @@ define i32 @ctpop16(i16 %x) nounwind readnone {
; CHECK-NEXT: and r2, r0, r1
; CHECK-NEXT: and r0, r1, r0, lsr #2
; CHECK-NEXT: add r0, r2, r0
+; CHECK-NEXT: mov r1, #15
+; CHECK-NEXT: orr r1, r1, #3840
; CHECK-NEXT: add r0, r0, r0, lsr #4
-; CHECK-NEXT: and r1, r0, #3840
-; CHECK-NEXT: and r0, r0, #15
-; CHECK-NEXT: add r0, r0, r1, lsr #8
+; CHECK-NEXT: and r0, r0, r1
+; CHECK-NEXT: add r0, r0, r0, lsr #8
+; CHECK-NEXT: and r0, r0, #255
; CHECK-NEXT: mov pc, lr
%count = tail call i16 @llvm.ctpop.i16(i16 %x)
%conv = zext i16 %count to i32
diff --git a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
index 161ed573c81f02..c2a5ee625ba808 100644
--- a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
@@ -201,9 +201,10 @@ define i16 @test_ctpop_i16(i16 %a) nounwind {
; LA32-NEXT: add.w $a0, $a2, $a0
; LA32-NEXT: srli.w $a1, $a0, 4
; LA32-NEXT: add.w $a0, $a0, $a1
-; LA32-NEXT: bstrpick.w $a1, $a0, 11, 8
-; LA32-NEXT: andi $a0, $a0, 15
+; LA32-NEXT: andi $a0, $a0, 3855
+; LA32-NEXT: srli.w $a1, $a0, 8
; LA32-NEXT: add.w $a0, $a0, $a1
+; LA32-NEXT: andi $a0, $a0, 31
; LA32-NEXT: ret
;
; LA64-LABEL: test_ctpop_i16:
diff --git a/llvm/test/CodeGen/PowerPC/popcnt-zext.ll b/llvm/test/CodeGen/PowerPC/popcnt-zext.ll
index d79edb5fc5d37f..af60b43b1892d5 100644
--- a/llvm/test/CodeGen/PowerPC/popcnt-zext.ll
+++ b/llvm/test/CodeGen/PowerPC/popcnt-zext.ll
@@ -24,8 +24,9 @@ define i16 @zpop_i8_i16(i8 %x) {
; SLOW-NEXT: srwi 4, 3, 4
; SLOW-NEXT: add 3, 3, 4
; SLOW-NEXT: rlwinm 4, 3, 24, 28, 31
-; SLOW-NEXT: clrlwi 3, 3, 28
+; SLOW-NEXT: andi. 3, 3, 3855
; SLOW-NEXT: add 3, 3, 4
+; SLOW-NEXT: clrlwi 3, 3, 27
; SLOW-NEXT: blr
%z = zext i8 %x to i16
%pop = tail call i16 @llvm.ctpop.i16(i16 %z)
@@ -173,9 +174,9 @@ define i32 @popz_i16_32(i16 %x) {
; SLOW-NEXT: srwi 4, 3, 4
; SLOW-NEXT: add 3, 3, 4
; SLOW-NEXT: rlwinm 4, 3, 24, 28, 31
-; SLOW-NEXT: clrlwi 3, 3, 28
+; SLOW-NEXT: andi. 3, 3, 3855
; SLOW-NEXT: add 3, 3, 4
-; SLOW-NEXT: clrldi 3, 3, 32
+; SLOW-NEXT: clrlwi 3, 3, 27
; SLOW-NEXT: blr
%pop = tail call i16 @llvm.ctpop.i16(i16 %x)
%z = zext i16 %pop to i32
@@ -278,7 +279,7 @@ define i64 @popa_i16_i64(i16 %x) {
; SLOW-NEXT: srwi 4, 3, 4
; SLOW-NEXT: add 3, 3, 4
; SLOW-NEXT: rlwinm 4, 3, 24, 28, 31
-; SLOW-NEXT: clrlwi 3, 3, 28
+; SLOW-NEXT: andi. 3, 3, 3855
; SLOW-NEXT: add 3, 3, 4
; SLOW-NEXT: rlwinm 3, 3, 0, 27, 27
; SLOW-NEXT: blr
diff --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
index da97ac0d742379..fc1daa5f458006 100644
--- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
@@ -142,13 +142,15 @@ define i16 @test_cttz_i16(i16 %a) nounwind {
; RV32_NOZBB-NEXT: and a1, a0, a2
; RV32_NOZBB-NEXT: srli a0, a0, 2
; RV32_NOZBB-NEXT: and a0, a0, a2
+; RV32_NOZBB-NEXT: lui a2, 1
; RV32_NOZBB-NEXT: add a0, a1, a0
; RV32_NOZBB-NEXT: srli a1, a0, 4
; RV32_NOZBB-NEXT: add a0, a0, a1
-; RV32_NOZBB-NEXT: andi a1, a0, 15
-; RV32_NOZBB-NEXT: slli a0, a0, 20
-; RV32_NOZBB-NEXT: srli a0, a0, 28
-; RV32_NOZBB-NEXT: add a0, a1, a0
+; RV32_NOZBB-NEXT: addi a1, a2, -241
+; RV32_NOZBB-NEXT: and a0, a0, a1
+; RV32_NOZBB-NEXT: srli a1, a0, 8
+; RV32_NOZBB-NEXT: add a0, a0, a1
+; RV32_NOZBB-NEXT: andi a0, a0, 31
; RV32_NOZBB-NEXT: ret
; RV32_NOZBB-NEXT: .LBB1_2:
; RV32_NOZBB-NEXT: li a0, 16
@@ -172,13 +174,15 @@ define i16 @test_cttz_i16(i16 %a) nounwind {
; RV64NOZBB-NEXT: and a1, a0, a2
; RV64NOZBB-NEXT: srli a0, a0, 2
; RV64NOZBB-NEXT: and a0, a0, a2
+; RV64NOZBB-NEXT: lui a2, 1
; RV64NOZBB-NEXT: add a0, a1, a0
; RV64NOZBB-NEXT: srli a1, a0, 4
; RV64NOZBB-NEXT: add a0, a0, a1
-; RV64NOZBB-NEXT: andi a1, a0, 15
-; RV64NOZBB-NEXT: slli a0, a0, 52
-; RV64NOZBB-NEXT: srli a0, a0, 60
-; RV64NOZBB-NEXT: add a0, a1, a0
+; RV64NOZBB-NEXT: addiw a1, a2, -241
+; RV64NOZBB-NEXT: and a0, a0, a1
+; RV64NOZBB-NEXT: srli a1, a0, 8
+; RV64NOZBB-NEXT: add a0, a0, a1
+; RV64NOZBB-NEXT: andi a0, a0, 31
; RV64NOZBB-NEXT: ret
; RV64NOZBB-NEXT: .LBB1_2:
; RV64NOZBB-NEXT: li a0, 16
@@ -631,13 +635,15 @@ define i16 @test_cttz_i16_zero_undef(i16 %a) nounwind {
; RV32_NOZBB-NEXT: and a1, a0, a2
; RV32_NOZBB-NEXT: srli a0, a0, 2
; RV32_NOZBB-NEXT: and a0, a0, a2
+; RV32_NOZBB-NEXT: lui a2, 1
; RV32_NOZBB-NEXT: add a0, a1, a0
; RV32_NOZBB-NEXT: srli a1, a0, 4
; RV32_NOZBB-NEXT: add a0, a0, a1
-; RV32_NOZBB-NEXT: andi a1, a0, 15
-; RV32_NOZBB-NEXT: slli a0, a0, 20
-; RV32_NOZBB-NEXT: srli a0, a0, 28
-; RV32_NOZBB-NEXT: add a0, a1, a0
+; RV32_NOZBB-NEXT: addi a1, a2, -241
+; RV32_NOZBB-NEXT: and a0, a0, a1
+; RV32_NOZBB-NEXT: srli a1, a0, 8
+; RV32_NOZBB-NEXT: add a0, a0, a1
+; RV32_NOZBB-NEXT: andi a0, a0, 31
; RV32_NOZBB-NEXT: ret
;
; RV64NOZBB-LABEL: test_cttz_i16_zero_undef:
@@ -655,13 +661,15 @@ define i16 @test_cttz_i16_zero_undef(i16 %a) nounwind {
; RV64NOZBB-NEXT: and a1, a0, a2
; RV64NOZBB-NEXT: srli a0, a0, 2
; RV64NOZBB-NEXT: and a0, a0, a2
+; RV64NOZBB-NEXT: lui a2, 1
; RV64NOZBB-NEXT: add a0, a1, a0
; RV64NOZBB-NEXT: srli a1, a0, 4
; RV64NOZBB-NEXT: add a0, a0, a1
-; RV64NOZBB-NEXT: andi a1, a0, 15
-; RV64NOZBB-NEXT: slli a0, a0, 52
-; RV64NOZBB-NEXT: srli a0, a0, 60
-; RV64NOZBB-NEXT: add a0, a1, a0
+; RV64NOZBB-NEXT: addiw a1, a2, -241
+; RV64NOZBB-NEXT: and a0, a0, a1
+; RV64NOZBB-NEXT: srli a1, a0, 8
+; RV64NOZBB-NEXT: add a0, a0, a1
+; RV64NOZBB-NEXT: andi a0, a0, 31
; RV64NOZBB-NEXT: ret
;
; RV32ZBB-LABEL: test_cttz_i16_zero_undef:
@@ -1073,13 +1081,15 @@ define i16 @test_ctlz_i16(i16 %a) nounwind {
; RV32_NOZBB-NEXT: and a1, a0, a2
; RV32_NOZBB-NEXT: srli a0, a0, 2
; RV32_NOZBB-NEXT: and a0, a0, a2
+; RV32_NOZBB-NEXT: lui a2, 1
; RV32_NOZBB-NEXT: add a0, a1, a0
; RV32_NOZBB-NEXT: srli a1, a0, 4
; RV32_NOZBB-NEXT: add a0, a0, a1
-; RV32_NOZBB-NEXT: andi a1, a0, 15
-; RV32_NOZBB-NEXT: slli a0, a0, 20
-; RV32_NOZBB-NEXT: srli a0, a0, 28
-; RV32_NOZBB-NEXT: add a0, a1, a0
+; RV32_NOZBB-NEXT: addi a1, a2, -241
+; RV32_NOZBB-NEXT: and a0, a0, a1
+; RV32_NOZBB-NEXT: srli a1, a0, 8
+; RV32_NOZBB-NEXT: add a0, a0, a1
+; RV32_NOZBB-NEXT: andi a0, a0, 31
; RV32_NOZBB-NEXT: ret
; RV32_NOZBB-NEXT: .LBB9_2:
; RV32_NOZBB-NEXT: li a0, 16
@@ -1112,13 +1122,15 @@ define i16 @test_ctlz_i16(i16 %a) nounwind {
; RV64NOZBB-NEXT: and a1, a0, a2
; RV64NOZBB-NEXT: srli a0, a0, 2
; RV64NOZBB-NEXT: and a0, a0, a2
+; RV64NOZBB-NEXT: lui a2, 1
; RV64NOZBB-NEXT: add a0, a1, a0
; RV64NOZBB-NEXT: srli a1, a0, 4
; RV64NOZBB-NEXT: add a0, a0, a1
-; RV64NOZBB-NEXT: andi a1, a0, 15
-; RV64NOZBB-NEXT: slli a0, a0, 52
-; RV64NOZBB-NEXT: srli a0, a0, 60
-; RV64NOZBB-NEXT: add a0, a1, a0
+; RV64NOZBB-NEXT: addiw a1, a2, -241
+; RV64NOZBB-NEXT: and a0, a0, a1
+; RV64NOZBB-NEXT: srli a1, a0, 8
+; RV64NOZBB-NEXT: add a0, a0, a1
+; RV64NOZBB-NEXT: andi a0, a0, 31
; RV64NOZBB-NEXT: ret
; RV64NOZBB-NEXT: .LBB9_2:
; RV64NOZBB-NEXT: li a0, 16
@@ -1722,13 +1734,15 @@ define i16 @test_ctlz_i16_zero_undef(i16 %a) nounwind {
; RV32_NOZBB-NEXT: and a1, a0, a2
; RV32_NOZBB-NEXT: srli a0, a0, 2
; RV32_NOZBB-NEXT: and a0, a0, a2
+; RV32_NOZBB-NEXT: lui a2, 1
; RV32_NOZBB-NEXT: add a0, a1, a0
; RV32_NOZBB-NEXT: srli a1, a0, 4
; RV32_NOZBB-NEXT: add a0, a0, a1
-; RV32_NOZBB-NEXT: andi a1, a0, 15
-; RV32_NOZBB-NEXT: slli a0, a0, 20
-; RV32_NOZBB-NEXT: srli a0, a0, 28
-; RV32_NOZBB-NEXT: add a0, a1, a0
+; RV32_NOZBB-NEXT: addi a1, a2, -241
+; RV32_NOZBB-NEXT: and a0, a0, a1
+; RV32_NOZBB-NEXT: srli a1, a0, 8
+; RV32_NOZBB-NEXT: add a0, a0, a1
+; RV32_NOZBB-NEXT: andi a0, a0, 31
; RV32_NOZBB-NEXT: ret
;
; RV64NOZBB-LABEL: test_ctlz_i16_zero_undef:
@@ -1756,13 +1770,15 @@ define i16 @test_ctlz_i16_zero_undef(i16 %a) nounwind {
; RV64NOZBB-NEXT: and a1, a0, a2
; RV64NOZBB-NEXT: srli a0, a0, 2
; RV64NOZBB-NEXT: and a0, a0, a2
+; RV64NOZBB-NEXT: lui a2, 1
; RV64NOZBB-NEXT: add a0, a1, a0
; RV64NOZBB-NEXT: srli a1, a0, 4
; RV64NOZBB-NEXT: add a0, a0, a1
-; RV64NOZBB-NEXT: andi a1, a0, 15
-; RV64NOZBB-NEXT: slli a0, a0, 52
-; RV64NOZBB-NEXT: srli a0, a0, 60
-; RV64NOZBB-NEXT: add a0, a1, a0
+; RV64NOZBB-NEXT: addiw a1, a2, -241
+; RV64NOZBB-NEXT: and a0, a0, a1
+; RV64NOZBB-NEXT: srli a1, a0, 8
+; RV64NOZBB-NEXT: add a0, a0, a1
+; RV64NOZBB-NEXT: andi a0, a0, 31
; RV64NOZBB-NEXT: ret
;
; RV32ZBB-LABEL: test_ctlz_i16_zero_undef:
@@ -2310,13 +2326,15 @@ define i16 @test_ctpop_i16(i16 %a) nounwind {
; RV32_NOZBB-NEXT: and a1, a0, a2
; RV32_NOZBB-NEXT: srli a0, a0, 2
; RV32_NOZBB-NEXT: and a0, a0, a2
+; RV32_NOZBB-NEXT: lui a2, 1
; RV32_NOZBB-NEXT: add a0, a1, a0
; RV32_NOZBB-NEXT: srli a1, a0, 4
; RV32_NOZBB-NEXT: add a0, a0, a1
-; RV32_NOZBB-NEXT: andi a1, a0, 15
-; RV32_NOZBB-NEXT: slli a0, a0, 20
-; RV32_NOZBB-NEXT: srli a0, a0, 28
-; RV32_NOZBB-NEXT: add a0, a1, a0
+; RV32_NOZBB-NEXT: addi a1, a2, -241
+; RV32_NOZBB-NEXT: and a0, a0, a1
+; RV32_NOZBB-NEXT: srli a1, a0, 8
+; RV32_NOZBB-NEXT: add a0, a0, a1
+; RV32_NOZBB-NEXT: andi a0, a0, 31
; RV32_NOZBB-NEXT: ret
;
; RV64NOZBB-LABEL: test_ctpop_i16:
@@ -2331,13 +2349,15 @@ define i16 @test_ctpop_i16(i16 %a) nounwind {
; RV64NOZBB-NEXT: and a1, a0, a2
; RV64NOZBB-NEXT: srli a0, a0, 2
; RV64NOZBB-NEXT: and a0, a0, a2
+; RV64NOZBB-NEXT: lui a2, 1
; RV64NOZBB-NEXT: add a0, a1, a0
; RV64NOZBB-NEXT: srli a1, a0, 4
; RV64NOZBB-NEXT: add a0, a0, a1
-; RV64NOZBB-NEXT: andi a1, a0, 15
-; RV64NOZBB-NEXT: slli a0, a0, 52
-; RV64NOZBB-NEXT: srli a0, a0, 60
-; RV64NOZBB-NEXT: add a0, a1, a0
+; RV64NOZBB-NEXT: addiw a1, a2, -241
+; RV64NOZBB-NEXT: and a0, a0, a1
+; RV64NOZBB-NEXT: srli a1, a0, 8
+; RV64NOZBB-NEXT: add a0, a0, a1
+; RV64NOZBB-NEXT: andi a0, a0, 31
; RV64NOZBB-NEXT: ret
;
; RV32ZBB-LABEL: test_ctpop_i16:
@@ -2364,12 +2384,15 @@ define i16 @test_ctpop_i16(i16 %a) nounwind {
; RV32XTHEADBB-NEXT: and a1, a0, a2
; RV32XTHEADBB-NEXT: srli a0, a0, 2
; RV32XTHEADBB-NEXT: and a0, a0, a2
+; RV32XTHEADBB-NEXT: lui a2, 1
; RV32XTHEADBB-NEXT: add a0, a1, a0
; RV32XTHEADBB-NEXT: srli a1, a0, 4
; RV32XTHEADBB-NEXT: add a0, a0, a1
-; RV32XTHEADBB-NEXT: th.extu a1, a0, 11, 8
-; RV32XTHEADBB-NEXT: andi a0, a0, 15
+; RV32XTHEADBB-NEXT: addi a1, a2, -241
+; RV32XTHEADBB-NEXT: and a0, a0, a1
+; RV32XTHEADBB-NEXT: srli a1, a0, 8
; RV32XTHEADBB-NEXT: add a0, a0, a1
+; RV32XTHEADBB-NEXT: andi a0, a0, 31
; RV32XTHEADBB-NEXT: ret
;
; RV64XTHEADBB-LABEL: test_ctpop_i16:
@@ -2384,12 +2407,15 @@ define i16 @test_ctpop_i16(i16 %a) nounwind {
; RV64XTHEADBB-NEXT: and a1, a0, a2
; RV64XTHEADBB-NEXT: srli a0, a0, 2
; RV64XTHEADBB-NEXT: and a0, a0, a2
+; RV64XTHEADBB-NEXT: lui a2, 1
; RV64XTHEADBB-NEXT: add a0, a1, a0
; RV64XTHEADBB-NEXT: srli a1, a0, 4
; RV64XTHEADBB-NEXT: add a0, a0, a1
-; RV64XTHEADBB-NEXT: th.extu a1, a0, 11, 8
-; RV64XTHEADBB-NEXT: andi a0, a0, 15
+; RV64XTHEADBB-NEXT: addiw a1, a2, -241
+; RV64XTHEADBB-NEXT: and a0, a0, a1
+; RV64XTHEADBB-NEXT: srli a1, a0, 8
; RV64XTHEADBB-NEXT: add a0, a0, a1
+; RV64XTHEADBB-NEXT: andi a0, a0, 31
; RV64XTHEADBB-NEXT: ret
%1 = call i16 @llvm.ctpop.i16(i16 %a)
ret i16 %1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index c628a0d620498a..0167d874c1242e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -2636,7 +2636,7 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) {
; RVA22U64-NEXT: or a0, a0, a5
; RVA22U64-NEXT: slli a6, a6, 24
; RVA22U64-NEXT: or a2, a2, a4
-; RVA22U64-NEXT: add.uw a2, a6, a2
+; RVA22U64-NEXT: or a2, a6, a2
; RVA22U64-NEXT: or a0, a0, a1
; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RVA22U64-NEXT: vmv.v.x v8, a2
diff --git a/llvm/test/CodeGen/RISCV/shl-cttz.ll b/llvm/test/CodeGen/RISCV/shl-cttz.ll
index 500673cc29196f..3854f02bd82448 100644
--- a/llvm/test/CodeGen/RISCV/shl-cttz.ll
+++ b/llvm/test/CodeGen/RISCV/shl-cttz.ll
@@ -136,13 +136,14 @@ define i16 @shl_cttz_i16(i16 %x, i16 %y) {
; RV32I-NEXT: and a2, a1, a3
; RV32I-NEXT: srli a1, a1, 2
; RV32I-NEXT: and a1, a1, a3
+; RV32I-NEXT: lui a3, 1
; RV32I-NEXT: add a1, a2, a1
; RV32I-NEXT: srli a2, a1, 4
; RV32I-NEXT: add a1, a1, a2
-; RV32I-NEXT: andi a2, a1, 15
-; RV32I-NEXT: slli a1, a1, 20
-; RV32I-NEXT: srli a1, a1, 28
-; RV32I-NEXT: add a1, a2, a1
+; RV32I-NEXT: addi a2, a3, -241
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: srli a2, a1, 8
+; RV32I-NEXT: add a1, a1, a2
; RV32I-NEXT: sll a0, a0, a1
; RV32I-NEXT: ret
;
@@ -167,13 +168,14 @@ define i16 @shl_cttz_i16(i16 %x, i16 %y) {
; RV64I-NEXT: and a2, a1, a3
; RV64I-NEXT: srli a1, a1, 2
; RV64I-NEXT: and a1, a1, a3
+; RV64I-NEXT: lui a3, 1
; RV64I-NEXT: add a1, a2, a1
; RV64I-NEXT: srli a2, a1, 4
; RV64I-NEXT: add a1, a1, a2
-; RV64I-NEXT: andi a2, a1, 15
-; RV64I-NEXT: slli a1, a1, 52
-; RV64I-NEXT: srli a1, a1, 60
-; RV64I-NEXT: add a1, a2, a1
+; RV64I-NEXT: addiw a2, a3, -241
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: srli a2, a1, 8
+; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: sll a0, a0, a1
; RV64I-NEXT: ret
;
@@ -204,13 +206,14 @@ define i16 @shl_cttz_constant_i16(i16 %y) {
; RV32I-NEXT: and a1, a0, a2
; RV32I-NEXT: srli a0, a0, 2
; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: lui a2, 1
+; RV32I-NEXT: addi a2, a2, -241
; RV32I-NEXT: add a0, a1, a0
; RV32I-NEXT: srli a1, a0, 4
; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: andi a1, a0, 15
-; RV32I-NEXT: slli a0, a0, 20
-; RV32I-NEXT: srli a0, a0, 28
-; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: srli a1, a0, 8
+; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: li a1, 4
; RV32I-NEXT: sll a0, a1, a0
; RV32I-NEXT: ret
@@ -237,13 +240,14 @@ define i16 @shl_cttz_constant_i16(i16 %y) {
; RV64I-NEXT: and a1, a0, a2
; RV64I-NEXT: srli a0, a0, 2
; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: lui a2, 1
+; RV64I-NEXT: addiw a2, a2, -241
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: srli a1, a0, 4
; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: andi a1, a0, 15
-; RV64I-NEXT: slli a0, a0, 52
-; RV64I-NEXT: srli a0, a0, 60
-; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: srli a1, a0, 8
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: li a1, 4
; RV64I-NEXT: sll a0, a1, a0
; RV64I-NEXT: ret
diff --git a/llvm/test/CodeGen/X86/pr38185.ll b/llvm/test/CodeGen/X86/pr38185.ll
index d5591c50738fa1..4085c01344b285 100644
--- a/llvm/test/CodeGen/X86/pr38185.ll
+++ b/llvm/test/CodeGen/X86/pr38185.ll
@@ -14,11 +14,10 @@ define void @foo(ptr %a, ptr %b, ptr noalias %c, i64 %s) {
; CHECK-NEXT: # %bb.2: # %body
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: movl $1, (%rdx,%rax,4)
-; CHECK-NEXT: movzbl (%rdi,%rax,4), %r8d
-; CHECK-NEXT: movzbl (%rsi,%rax,4), %r9d
-; CHECK-NEXT: andl %r8d, %r9d
-; CHECK-NEXT: andl $1, %r9d
-; CHECK-NEXT: movl %r9d, (%rdi,%rax,4)
+; CHECK-NEXT: movl (%rdi,%rax,4), %r8d
+; CHECK-NEXT: andl (%rsi,%rax,4), %r8d
+; CHECK-NEXT: andl $1, %r8d
+; CHECK-NEXT: movl %r8d, (%rdi,%rax,4)
; CHECK-NEXT: incq %rax
; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: jmp .LBB0_1
diff --git a/llvm/test/CodeGen/X86/vector-compress.ll b/llvm/test/CodeGen/X86/vector-compress.ll
index 87a948a4f1f7ee..bf7497f23bef3c 100644
--- a/llvm/test/CodeGen/X86/vector-compress.ll
+++ b/llvm/test/CodeGen/X86/vector-compress.ll
@@ -1814,7 +1814,7 @@ define <64 x i8> @test_compress_v64i8(<64 x i8> %vec, <64 x i1> %mask, <64 x i8>
; AVX2-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX2-NEXT: movl %ecx, %r13d
; AVX2-NEXT: movl %edx, %r15d
-; AVX2-NEXT: movl %esi, %ebx
+; AVX2-NEXT: movl %esi, %r14d
; AVX2-NEXT: # kill: def $edi killed $edi def $rdi
; AVX2-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX2-NEXT: movl 360(%rbp), %eax
@@ -1932,8 +1932,8 @@ define <64 x i8> @test_compress_v64i8(<64 x i8> %vec, <64 x i1> %mask, <64 x i8>
; AVX2-NEXT: vpinsrb $10, %r10d, %xmm6, %xmm6
; AVX2-NEXT: movl 56(%rbp), %r11d
; AVX2-NEXT: vpinsrb $11, %r11d, %xmm6, %xmm6
-; AVX2-NEXT: movl 64(%rbp), %r14d
-; AVX2-NEXT: vpinsrb $12, %r14d, %xmm6, %xmm6
+; AVX2-NEXT: movl 64(%rbp), %ebx
+; AVX2-NEXT: vpinsrb $12, %ebx, %xmm6, %xmm6
; AVX2-NEXT: movl 72(%rbp), %r12d
; AVX2-NEXT: vpinsrb $13, %r12d, %xmm6, %xmm6
; AVX2-NEXT: movl 80(%rbp), %eax
@@ -1988,20 +1988,20 @@ define <64 x i8> @test_compress_v64i8(<64 x i8> %vec, <64 x i1> %mask, <64 x i8>
; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: vpextrb $1, %xmm0, (%rsp,%rax)
-; AVX2-NEXT: andl $1, %ebx
-; AVX2-NEXT: addq %rax, %rbx
-; AVX2-NEXT: vpextrb $2, %xmm0, (%rsp,%rbx)
+; AVX2-NEXT: andl $1, %r14d
+; AVX2-NEXT: addq %rax, %r14
+; AVX2-NEXT: vpextrb $2, %xmm0, (%rsp,%r14)
; AVX2-NEXT: andl $1, %r15d
-; AVX2-NEXT: addq %rbx, %r15
+; AVX2-NEXT: addq %r14, %r15
; AVX2-NEXT: vpextrb $3, %xmm0, (%rsp,%r15)
; AVX2-NEXT: andl $1, %r13d
; AVX2-NEXT: addq %r15, %r13
; AVX2-NEXT: vpextrb $4, %xmm0, (%rsp,%r13)
-; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; AVX2-NEXT: andl $1, %ecx
-; AVX2-NEXT: addq %r13, %rcx
-; AVX2-NEXT: movl %ecx, %eax
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: addq %r13, %rax
; AVX2-NEXT: vpextrb $5, %xmm0, (%rsp,%rax)
+; AVX2-NEXT: movq %rax, %rcx
; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: addq %rcx, %rax
@@ -2035,16 +2035,16 @@ define <64 x i8> @test_compress_v64i8(<64 x i8> %vec, <64 x i1> %mask, <64 x i8>
; AVX2...
[truncated]
|
arsenm
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Most of the test changes look like regressions
| ; CHECK-NEXT: and r1, r0, #3840 | ||
| ; CHECK-NEXT: and r0, r0, #15 | ||
| ; CHECK-NEXT: add r0, r0, r1, lsr #8 | ||
| ; CHECK-NEXT: and r0, r0, r1 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Regression
| ; CI-NEXT: s_waitcnt vmcnt(0) | ||
| ; CI-NEXT: v_bfe_u32 v1, v0, 16, 15 | ||
| ; CI-NEXT: v_and_b32_e32 v0, 0x7fff, v0 | ||
| ; CI-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Technically no worse but now we're emitting a messier constant
| ; LA32-NEXT: bstrpick.w $a1, $a0, 11, 8 | ||
| ; LA32-NEXT: andi $a0, $a0, 15 | ||
| ; LA32-NEXT: andi $a0, $a0, 3855 | ||
| ; LA32-NEXT: srli.w $a1, $a0, 8 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Regression
| ; SLOW-NEXT: clrlwi 3, 3, 28 | ||
| ; SLOW-NEXT: andi. 3, 3, 3855 | ||
| ; SLOW-NEXT: add 3, 3, 4 | ||
| ; SLOW-NEXT: clrlwi 3, 3, 27 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Regression
Previous code removes dead AND operations of the form:
(AND rX, b11001100)
where LHSKnown.zero == b00110011 (i.e. previously LHSKnown.zero must == the zero bits of the constant operand). This
is unnecessarily restrictive and has been modified to test that LHSKnown.zero must contain the zero bits of the constant operand.