From cb33af1d2b1933907de639dd57de87f906f31ebb Mon Sep 17 00:00:00 2001 From: Rose Date: Mon, 23 Jun 2025 12:38:38 -0400 Subject: [PATCH 1/4] Pre-commit test (NFC) --- .../AArch64/aarch64-bitwisenot-fold.ll | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll b/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll index 5fbf38b2560d4..28099a76fa34b 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll @@ -96,3 +96,29 @@ define i64 @andnot_sub_with_neg_i64(i64 %a0, i64 %a1) { %and = and i64 %diff, %a0 ret i64 %and } + +define i32 @and_not_select_eq(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: and_not_select_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, w1, w0 +; CHECK-NEXT: cmp w8, w0 +; CHECK-NEXT: csel w0, w0, w2, eq +; CHECK-NEXT: ret + %or = or i32 %b, %a + %cmp = icmp eq i32 %or, %a + %a.c = select i1 %cmp, i32 %a, i32 %c + ret i32 %a.c +} + +define i32 @and_not_select_ne(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: and_not_select_ne: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, w1, w0 +; CHECK-NEXT: cmp w8, w0 +; CHECK-NEXT: csel w0, w0, w2, ne +; CHECK-NEXT: ret + %or = or i32 %b, %a + %cmp = icmp ne i32 %or, %a + %a.c = select i1 %cmp, i32 %a, i32 %c + ret i32 %a.c +} From d6a014c25b15d5038a30b09313bc4846f615ab16 Mon Sep 17 00:00:00 2001 From: Rose Date: Mon, 23 Jun 2025 13:22:51 -0400 Subject: [PATCH 2/4] [TargetLowering] Fold (a | b) ==/!= b -> (a & ~b) ==/!= 0 when and-not exists This is especially helpful for AArch64, which simplifies ands + cmp to tst. Alive2: https://alive2.llvm.org/ce/z/LLgcJJ --- llvm/include/llvm/CodeGen/TargetLowering.h | 2 + .../CodeGen/SelectionDAG/TargetLowering.cpp | 41 +++++++++++++++++++ .../AArch64/aarch64-bitwisenot-fold.ll | 6 +-- .../AArch64/switch-cases-to-branch-and.ll | 3 +- 4 files changed, 46 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 727526055e592..ff2523b8a2517 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -5800,6 +5800,8 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase { private: SDValue foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &DL, DAGCombinerInfo &DCI) const; + SDValue foldSetCCWithOr(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, + const SDLoc &DL, DAGCombinerInfo &DCI) const; SDValue foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &DL, DAGCombinerInfo &DCI) const; diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 66717135c9adf..5f43b618122ee 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SDPatternMatch.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/DataLayout.h" @@ -37,6 +38,7 @@ #include #include using namespace llvm; +using namespace llvm::SDPatternMatch; /// NOTE: The TargetMachine owns TLOF. TargetLowering::TargetLowering(const TargetMachine &tm) @@ -4212,6 +4214,42 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, return SDValue(); } +/// This helper function of SimplifySetCC tries to optimize the comparison when +/// either operand of the SetCC node is a bitwise-or instruction. +/// For now, this just transforms (X | Y) ==/!= Y into X & ~Y ==/!= 0. +SDValue TargetLowering::foldSetCCWithOr(EVT VT, SDValue N0, SDValue N1, + ISD::CondCode Cond, const SDLoc &DL, + DAGCombinerInfo &DCI) const { + if (N1.getOpcode() == ISD::OR && N0.getOpcode() != ISD::OR) + std::swap(N0, N1); + + SelectionDAG &DAG = DCI.DAG; + EVT OpVT = N0.getValueType(); + if (!N0.hasOneUse() || !OpVT.isInteger() || + (Cond != ISD::SETEQ && Cond != ISD::SETNE)) + return SDValue(); + + // (X | Y) == Y + // (X | Y) != Y + SDValue X; + if (sd_match(N0, m_Or(m_Value(X), m_Specific(N1))) && hasAndNotCompare(N1)) { + // If the target supports an 'and-not' or 'and-complement' logic operation, + // try to use that to make a comparison operation more efficient. + + // Bail out if the compare operand that we want to turn into a zero is + // already a zero (otherwise, infinite loop). + if (isNullConstant(N1)) + return SDValue(); + + // Transform this into: X & ~Y ==/!= 0. + SDValue NotY = DAG.getNOT(SDLoc(N1), N1, OpVT); + SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, X, NotY); + return DAG.getSetCC(DL, VT, NewAnd, DAG.getConstant(0, DL, OpVT), Cond); + } + + return SDValue(); +} + /// There are multiple IR patterns that could be checking whether certain /// truncation of a signed number would be lossy or not. The pattern which is /// best at IR level, may not lower optimally. Thus, we want to unfold it. @@ -5507,6 +5545,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI)) return V; + + if (SDValue V = foldSetCCWithOr(VT, N0, N1, Cond, dl, DCI)) + return V; } // Fold remainder of division by a constant. diff --git a/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll b/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll index 28099a76fa34b..2c8a4b5f6be53 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll @@ -100,8 +100,7 @@ define i64 @andnot_sub_with_neg_i64(i64 %a0, i64 %a1) { define i32 @and_not_select_eq(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: and_not_select_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: orr w8, w1, w0 -; CHECK-NEXT: cmp w8, w0 +; CHECK-NEXT: bics wzr, w1, w0 ; CHECK-NEXT: csel w0, w0, w2, eq ; CHECK-NEXT: ret %or = or i32 %b, %a @@ -113,8 +112,7 @@ define i32 @and_not_select_eq(i32 %a, i32 %b, i32 %c) { define i32 @and_not_select_ne(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: and_not_select_ne: ; CHECK: // %bb.0: -; CHECK-NEXT: orr w8, w1, w0 -; CHECK-NEXT: cmp w8, w0 +; CHECK-NEXT: bics wzr, w1, w0 ; CHECK-NEXT: csel w0, w0, w2, ne ; CHECK-NEXT: ret %or = or i32 %b, %a diff --git a/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll b/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll index 4d36a003cbfe4..e99ba4843c452 100644 --- a/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll +++ b/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll @@ -569,8 +569,7 @@ define void @test_successor_with_loop_phi(ptr %A, ptr %B) { ; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: str wzr, [x0] ; CHECK-NEXT: mov x0, x1 -; CHECK-NEXT: orr w8, w8, #0x4 -; CHECK-NEXT: cmp w8, #4 +; CHECK-NEXT: tst w8, #0xfffffffb ; CHECK-NEXT: b.eq LBB7_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret From 5d1440ceb3604ba4c7959ec60511b047e45b0009 Mon Sep 17 00:00:00 2001 From: Rose Date: Thu, 26 Jun 2025 12:56:49 -0400 Subject: [PATCH 3/4] Add tests --- .../AArch64/aarch64-bitwisenot-fold.ll | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll b/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll index 2c8a4b5f6be53..3b1ae8610295d 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll @@ -120,3 +120,27 @@ define i32 @and_not_select_ne(i32 %a, i32 %b, i32 %c) { %a.c = select i1 %cmp, i32 %a, i32 %c ret i32 %a.c } + +define i32 @and_not_select_eq_swap(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: and_not_select_eq_swap: +; CHECK: // %bb.0: +; CHECK-NEXT: bics wzr, w1, w0 +; CHECK-NEXT: csel w0, w0, w2, eq +; CHECK-NEXT: ret + %or = or i32 %b, %a + %cmp = icmp eq i32 %a, %or + %a.c = select i1 %cmp, i32 %a, i32 %c + ret i32 %a.c +} + +define i32 @and_not_select_ne_swap(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: and_not_select_ne_swap: +; CHECK: // %bb.0: +; CHECK-NEXT: bics wzr, w1, w0 +; CHECK-NEXT: csel w0, w0, w2, ne +; CHECK-NEXT: ret + %or = or i32 %a, %b + %cmp = icmp eq i32 %a, %or + %a.c = select i1 %cmp, i32 %a, i32 %c + ret i32 %a.c +} From 04fd2871e160fe81144fd71685bd874450217dbc Mon Sep 17 00:00:00 2001 From: Rose Date: Thu, 26 Jun 2025 13:00:56 -0400 Subject: [PATCH 4/4] Fix --- llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll b/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll index 3b1ae8610295d..d6401f347f277 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll @@ -140,7 +140,7 @@ define i32 @and_not_select_ne_swap(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: csel w0, w0, w2, ne ; CHECK-NEXT: ret %or = or i32 %a, %b - %cmp = icmp eq i32 %a, %or + %cmp = icmp ne i32 %a, %or %a.c = select i1 %cmp, i32 %a, i32 %c ret i32 %a.c }