From cb33af1d2b1933907de639dd57de87f906f31ebb Mon Sep 17 00:00:00 2001
From: Rose <gfunni234@gmail.com>
Date: Mon, 23 Jun 2025 12:38:38 -0400
Subject: [PATCH 1/4] Pre-commit test (NFC)

---
 .../AArch64/aarch64-bitwisenot-fold.ll        | 26 +++++++++++++++++++
 1 file changed, 26 insertions(+)
diff --git a/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll b/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll
index 5fbf38b2560d4..28099a76fa34b 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll
@@ -96,3 +96,29 @@ define i64 @andnot_sub_with_neg_i64(i64 %a0, i64 %a1) {
   %and = and i64 %diff, %a0
   ret i64 %and
 }
+
+define i32 @and_not_select_eq(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: and_not_select_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    orr w8, w1, w0
+; CHECK-NEXT:    cmp w8, w0
+; CHECK-NEXT:    csel w0, w0, w2, eq
+; CHECK-NEXT:    ret
+  %or = or i32 %b, %a
+  %cmp = icmp eq i32 %or, %a
+  %a.c = select i1 %cmp, i32 %a, i32 %c
+  ret i32 %a.c
+}
+
+define i32 @and_not_select_ne(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: and_not_select_ne:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    orr w8, w1, w0
+; CHECK-NEXT:    cmp w8, w0
+; CHECK-NEXT:    csel w0, w0, w2, ne
+; CHECK-NEXT:    ret
+  %or = or i32 %b, %a
+  %cmp = icmp ne i32 %or, %a
+  %a.c = select i1 %cmp, i32 %a, i32 %c
+  ret i32 %a.c
+}

From d6a014c25b15d5038a30b09313bc4846f615ab16 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234@gmail.com>
Date: Mon, 23 Jun 2025 13:22:51 -0400
Subject: [PATCH 2/4] [TargetLowering] Fold (a | b) ==/!= b -> (a & ~b) ==/!= 0
 when and-not exists

This is especially helpful for AArch64, which simplifies ands + cmp to tst.

Alive2: https://alive2.llvm.org/ce/z/LLgcJJ
---
 llvm/include/llvm/CodeGen/TargetLowering.h    |  2 +
 .../CodeGen/SelectionDAG/TargetLowering.cpp   | 41 +++++++++++++++++++
 .../AArch64/aarch64-bitwisenot-fold.ll        |  6 +--
 .../AArch64/switch-cases-to-branch-and.ll     |  3 +-
 4 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 727526055e592..ff2523b8a2517 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -5800,6 +5800,8 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
 private:
   SDValue foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
                            const SDLoc &DL, DAGCombinerInfo &DCI) const;
+  SDValue foldSetCCWithOr(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
+                          const SDLoc &DL, DAGCombinerInfo &DCI) const;
   SDValue foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
                              const SDLoc &DL, DAGCombinerInfo &DCI) const;
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 66717135c9adf..5f43b618122ee 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -21,6 +21,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SDPatternMatch.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/IR/DataLayout.h"
@@ -37,6 +38,7 @@
 #include <cctype>
 #include <deque>
 using namespace llvm;
+using namespace llvm::SDPatternMatch;
 
 /// NOTE: The TargetMachine owns TLOF.
 TargetLowering::TargetLowering(const TargetMachine &tm)
@@ -4212,6 +4214,42 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
   return SDValue();
 }
 
+/// This helper function of SimplifySetCC tries to optimize the comparison when
+/// either operand of the SetCC node is a bitwise-or instruction.
+/// For now, this just transforms (X | Y) ==/!= Y into X & ~Y ==/!= 0.
+SDValue TargetLowering::foldSetCCWithOr(EVT VT, SDValue N0, SDValue N1,
+                                        ISD::CondCode Cond, const SDLoc &DL,
+                                        DAGCombinerInfo &DCI) const {
+  if (N1.getOpcode() == ISD::OR && N0.getOpcode() != ISD::OR)
+    std::swap(N0, N1);
+
+  SelectionDAG &DAG = DCI.DAG;
+  EVT OpVT = N0.getValueType();
+  if (!N0.hasOneUse() || !OpVT.isInteger() ||
+      (Cond != ISD::SETEQ && Cond != ISD::SETNE))
+    return SDValue();
+
+  // (X | Y) == Y
+  // (X | Y) != Y
+  SDValue X;
+  if (sd_match(N0, m_Or(m_Value(X), m_Specific(N1))) && hasAndNotCompare(N1)) {
+    // If the target supports an 'and-not' or 'and-complement' logic operation,
+    // try to use that to make a comparison operation more efficient.
+
+    // Bail out if the compare operand that we want to turn into a zero is
+    // already a zero (otherwise, infinite loop).
+    if (isNullConstant(N1))
+      return SDValue();
+
+    // Transform this into: X & ~Y ==/!= 0.
+    SDValue NotY = DAG.getNOT(SDLoc(N1), N1, OpVT);
+    SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, X, NotY);
+    return DAG.getSetCC(DL, VT, NewAnd, DAG.getConstant(0, DL, OpVT), Cond);
+  }
+
+  return SDValue();
+}
+
 /// There are multiple IR patterns that could be checking whether certain
 /// truncation of a signed number would be lossy or not. The pattern which is
 /// best at IR level, may not lower optimally. Thus, we want to unfold it.
@@ -5507,6 +5545,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
 
     if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
       return V;
+
+    if (SDValue V = foldSetCCWithOr(VT, N0, N1, Cond, dl, DCI))
+      return V;
   }
 
   // Fold remainder of division by a constant.
diff --git a/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll b/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll
index 28099a76fa34b..2c8a4b5f6be53 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll
@@ -100,8 +100,7 @@ define i64 @andnot_sub_with_neg_i64(i64 %a0, i64 %a1) {
 define i32 @and_not_select_eq(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: and_not_select_eq:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    orr w8, w1, w0
-; CHECK-NEXT:    cmp w8, w0
+; CHECK-NEXT:    bics wzr, w1, w0
 ; CHECK-NEXT:    csel w0, w0, w2, eq
 ; CHECK-NEXT:    ret
   %or = or i32 %b, %a
@@ -113,8 +112,7 @@ define i32 @and_not_select_eq(i32 %a, i32 %b, i32 %c) {
 define i32 @and_not_select_ne(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: and_not_select_ne:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    orr w8, w1, w0
-; CHECK-NEXT:    cmp w8, w0
+; CHECK-NEXT:    bics wzr, w1, w0
 ; CHECK-NEXT:    csel w0, w0, w2, ne
 ; CHECK-NEXT:    ret
   %or = or i32 %b, %a
diff --git a/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll b/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll
index 4d36a003cbfe4..e99ba4843c452 100644
--- a/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll
+++ b/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll
@@ -569,8 +569,7 @@ define void @test_successor_with_loop_phi(ptr %A, ptr %B) {
 ; CHECK-NEXT:    ldr w8, [x0]
 ; CHECK-NEXT:    str wzr, [x0]
 ; CHECK-NEXT:    mov x0, x1
-; CHECK-NEXT:    orr w8, w8, #0x4
-; CHECK-NEXT:    cmp w8, #4
+; CHECK-NEXT:    tst w8, #0xfffffffb
 ; CHECK-NEXT:    b.eq LBB7_1
 ; CHECK-NEXT:  ; %bb.2: ; %exit
 ; CHECK-NEXT:    ret

From 5d1440ceb3604ba4c7959ec60511b047e45b0009 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234@gmail.com>
Date: Thu, 26 Jun 2025 12:56:49 -0400
Subject: [PATCH 3/4] Add tests

---
 .../AArch64/aarch64-bitwisenot-fold.ll        | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll b/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll
index 2c8a4b5f6be53..3b1ae8610295d 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll
@@ -120,3 +120,27 @@ define i32 @and_not_select_ne(i32 %a, i32 %b, i32 %c) {
   %a.c = select i1 %cmp, i32 %a, i32 %c
   ret i32 %a.c
 }
+
+define i32 @and_not_select_eq_swap(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: and_not_select_eq_swap:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bics wzr, w1, w0
+; CHECK-NEXT:    csel w0, w0, w2, eq
+; CHECK-NEXT:    ret
+  %or = or i32 %b, %a
+  %cmp = icmp eq i32 %a, %or
+  %a.c = select i1 %cmp, i32 %a, i32 %c
+  ret i32 %a.c
+}
+
+define i32 @and_not_select_ne_swap(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: and_not_select_ne_swap:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bics wzr, w1, w0
+; CHECK-NEXT:    csel w0, w0, w2, ne
+; CHECK-NEXT:    ret
+  %or = or i32 %a, %b
+  %cmp = icmp eq i32 %a, %or
+  %a.c = select i1 %cmp, i32 %a, i32 %c
+  ret i32 %a.c
+}

From 04fd2871e160fe81144fd71685bd874450217dbc Mon Sep 17 00:00:00 2001
From: Rose <gfunni234@gmail.com>
Date: Thu, 26 Jun 2025 13:00:56 -0400
Subject: [PATCH 4/4] Fix

---
 llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll b/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll
index 3b1ae8610295d..d6401f347f277 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll
@@ -140,7 +140,7 @@ define i32 @and_not_select_ne_swap(i32 %a, i32 %b, i32 %c) {
 ; CHECK-NEXT:    csel w0, w0, w2, ne
 ; CHECK-NEXT:    ret
   %or = or i32 %a, %b
-  %cmp = icmp eq i32 %a, %or
+  %cmp = icmp ne i32 %a, %or
   %a.c = select i1 %cmp, i32 %a, i32 %c
   ret i32 %a.c
 }