diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 5d62ded171f4f..f6d811ddba8ab 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -396,6 +396,8 @@ namespace { bool PromoteLoad(SDValue Op); SDValue foldShiftToAvg(SDNode *N); + // Fold `a bitwiseop (~b +/- c)` -> `a bitwiseop ~(b -/+ c)` + SDValue foldBitwiseOpWithNeg(SDNode *N, const SDLoc &DL, EVT VT); SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, @@ -7541,6 +7543,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return DAG.getNode(ISD::AND, DL, VT, X, DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y, Z), VT)); + // Fold (and X, (add (not Y), Z)) -> (and X, (not (sub Y, Z))) + // Fold (and X, (sub (not Y), Z)) -> (and X, (not (add Y, Z))) + if (TLI.hasAndNot(SDValue(N, 0))) + if (SDValue Folded = foldBitwiseOpWithNeg(N, DL, VT)) + return Folded; + // Fold (and (srl X, C), 1) -> (srl X, BW-1) for signbit extraction // If we are shifting down an extended sign bit, see if we can simplify // this to shifting the MSB directly to expose further simplifications. @@ -11652,6 +11660,22 @@ SDValue DAGCombiner::foldShiftToAvg(SDNode *N) { return DAG.getNode(FloorISD, SDLoc(N), N->getValueType(0), {A, B}); } +SDValue DAGCombiner::foldBitwiseOpWithNeg(SDNode *N, const SDLoc &DL, EVT VT) { + unsigned Opc = N->getOpcode(); + SDValue X, Y, Z; + if (sd_match( + N, m_BitwiseLogic(m_Value(X), m_Add(m_Not(m_Value(Y)), m_Value(Z))))) + return DAG.getNode(Opc, DL, VT, X, + DAG.getNOT(DL, DAG.getNode(ISD::SUB, DL, VT, Y, Z), VT)); + + if (sd_match(N, m_BitwiseLogic(m_Value(X), m_Sub(m_OneUse(m_Not(m_Value(Y))), + m_Value(Z))))) + return DAG.getNode(Opc, DL, VT, X, + DAG.getNOT(DL, DAG.getNode(ISD::ADD, DL, VT, Y, Z), VT)); + + return SDValue(); +} + /// Generate Min/Max node SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, diff --git a/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll b/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll new file mode 100644 index 0000000000000..5fbf38b2560d4 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=aarch64-linux | FileCheck %s + +define i8 @andnot_add_with_neg_i8(i8 %a0, i8 %a1) { +; CHECK-LABEL: andnot_add_with_neg_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sub w8, w0, w1 +; CHECK-NEXT: bic w0, w0, w8 +; CHECK-NEXT: ret + %not = xor i8 %a0, -1 + %sum = add i8 %not, %a1 + %and = and i8 %sum, %a0 + ret i8 %and +} + +define i8 @andnot_sub_with_neg_i8(i8 %a0, i8 %a1) { +; CHECK-LABEL: andnot_sub_with_neg_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: bic w0, w0, w8 +; CHECK-NEXT: ret + %not = xor i8 %a0, -1 + %diff = sub i8 %not, %a1 + %and = and i8 %diff, %a0 + ret i8 %and +} + +define i16 @andnot_add_with_neg_i16(i16 %a0, i16 %a1) { +; CHECK-LABEL: andnot_add_with_neg_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sub w8, w0, w1 +; CHECK-NEXT: bic w0, w0, w8 +; CHECK-NEXT: ret + %not = xor i16 %a0, -1 + %sum = add i16 %not, %a1 + %and = and i16 %sum, %a0 + ret i16 %and +} + +define i16 @andnot_sub_with_neg_i16(i16 %a0, i16 %a1) { +; CHECK-LABEL: andnot_sub_with_neg_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: bic w0, w0, w8 +; CHECK-NEXT: ret + %not = xor i16 %a0, -1 + %diff = sub i16 %not, %a1 + %and = and i16 %diff, %a0 + ret i16 %and +} + +define i32 @andnot_add_with_neg_i32(i32 %a0, i32 %a1) { +; CHECK-LABEL: andnot_add_with_neg_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sub w8, w0, w1 +; CHECK-NEXT: bic w0, w0, w8 +; CHECK-NEXT: ret + %not = xor i32 %a0, -1 + %sum = add i32 %not, %a1 + %and = and i32 %sum, %a0 + ret i32 %and +} + +define i32 @andnot_sub_with_neg_i32(i32 %a0, i32 %a1) { +; CHECK-LABEL: andnot_sub_with_neg_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: bic w0, w0, w8 +; CHECK-NEXT: ret + %not = xor i32 %a0, -1 + %diff = sub i32 %not, %a1 + %and = and i32 %diff, %a0 + ret i32 %and +} + +define i64 @andnot_add_with_neg_i64(i64 %a0, i64 %a1) { +; CHECK-LABEL: andnot_add_with_neg_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: sub x8, x0, x1 +; CHECK-NEXT: bic x0, x0, x8 +; CHECK-NEXT: ret + %not = xor i64 %a0, -1 + %sum = add i64 %not, %a1 + %and = and i64 %sum, %a0 + ret i64 %and +} + +define i64 @andnot_sub_with_neg_i64(i64 %a0, i64 %a1) { +; CHECK-LABEL: andnot_sub_with_neg_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, x1 +; CHECK-NEXT: bic x0, x0, x8 +; CHECK-NEXT: ret + %not = xor i64 %a0, -1 + %diff = sub i64 %not, %a1 + %and = and i64 %diff, %a0 + ret i64 %and +} diff --git a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll index e564d7bddea6f..27be02c50f1c7 100644 --- a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll @@ -885,9 +885,8 @@ define i64 @test_cttz_i64(i64 %a) nounwind { define i8 @test_not_cttz_i8(i8 %a) nounwind { ; LA32R-LABEL: test_not_cttz_i8: ; LA32R: # %bb.0: -; LA32R-NEXT: nor $a1, $a0, $zero -; LA32R-NEXT: addi.w $a1, $a1, -1 -; LA32R-NEXT: and $a0, $a0, $a1 +; LA32R-NEXT: addi.w $a1, $a0, 1 +; LA32R-NEXT: andn $a0, $a0, $a1 ; LA32R-NEXT: srli.w $a1, $a0, 1 ; LA32R-NEXT: andi $a1, $a1, 85 ; LA32R-NEXT: sub.w $a0, $a0, $a1 @@ -921,9 +920,8 @@ define i8 @test_not_cttz_i8(i8 %a) nounwind { define i16 @test_not_cttz_i16(i16 %a) nounwind { ; LA32R-LABEL: test_not_cttz_i16: ; LA32R: # %bb.0: -; LA32R-NEXT: nor $a1, $a0, $zero -; LA32R-NEXT: addi.w $a1, $a1, -1 -; LA32R-NEXT: and $a0, $a0, $a1 +; LA32R-NEXT: addi.w $a1, $a0, 1 +; LA32R-NEXT: andn $a0, $a0, $a1 ; LA32R-NEXT: srli.w $a1, $a0, 1 ; LA32R-NEXT: lu12i.w $a2, 5 ; LA32R-NEXT: ori $a2, $a2, 1365