From 13c0f6d967accbcef8a0b1495b236faeef3a6a13 Mon Sep 17 00:00:00 2001 From: Yui5427 <785369607@qq.com> Date: Sun, 24 Aug 2025 13:34:01 +0800 Subject: [PATCH 01/10] [DAGCombiner] add fold (xor (smin(x, C), C)) -> select (x < C), xor(x, C), 0 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 42 +++++++++++ llvm/test/CodeGen/AArch64/xor-smin-smax.ll | 75 +++++++++++++++++++ 2 files changed, 117 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/xor-smin-smax.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d130efe96b56b..80565b5d0bd7d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10092,6 +10092,48 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N)) return Combined; + // fold (xor (smin(x, C), C)) -> select (x < C), xor(x, C), 0 + // fold (xor (smin(C, x), C)) -> select (x < C), xor(x, C), 0 + if (N0.getOpcode() == ISD::SMIN && N0.hasOneUse()) { + SDValue Op0 = N0.getOperand(0); + SDValue Op1 = N0.getOperand(1); + + if (Op1 != N1) { + std::swap(Op0, Op1); + } + + if (Op1 == N1) { + if (isa(N1)) { + EVT CCVT = getSetCCResultType(VT); + SDValue Cmp = DAG.getSetCC(SDLoc(N), CCVT, Op0, N1, ISD::SETLT); + SDValue XorXC = DAG.getNode(ISD::XOR, SDLoc(N), VT, Op0, N1); + SDValue Zero = DAG.getConstant(0, SDLoc(N), VT); + return DAG.getSelect(SDLoc(N), VT, Cmp, XorXC, Zero); + } + } + } + + // fold (xor (smax(x, C), C)) -> select (x > C), xor(x, C), 0 + // fold (xor (smax(C, x), C)) -> select (x > C), xor(x, C), 0 + if (N0.getOpcode() == ISD::SMAX && N0.hasOneUse()) { + SDValue Op0 = N0.getOperand(0); + SDValue Op1 = N0.getOperand(1); + + if (Op1 != N1) { + std::swap(Op0, Op1); + } + + if (Op1 == N1) { + if (isa(N1)) { + EVT CCVT = getSetCCResultType(VT); + SDValue Cmp = DAG.getSetCC(SDLoc(N), CCVT, Op0, N1, ISD::SETGT); + SDValue XorXC = DAG.getNode(ISD::XOR, SDLoc(N), VT, Op0, N1); + SDValue Zero = DAG.getConstant(0, SDLoc(N), VT); + return DAG.getSelect(SDLoc(N), VT, Cmp, XorXC, Zero); + } + } + } + return SDValue(); } diff --git a/llvm/test/CodeGen/AArch64/xor-smin-smax.ll b/llvm/test/CodeGen/AArch64/xor-smin-smax.ll new file mode 100644 index 0000000000000..cfdec2da61c7a --- /dev/null +++ b/llvm/test/CodeGen/AArch64/xor-smin-smax.ll @@ -0,0 +1,75 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s + +; Test for DAGCombiner optimization: fold (xor (smin(x, C), C)) -> select (x < C), xor (x, C), 0 + +define i64 @test_smin_neg_one(i64 %a) { +; CHECK-LABEL: test_smin_neg_one: +; CHECK: // %bb.0: +; CHECK-NEXT: cmn x0, #1 +; CHECK-NEXT: csinv x0, xzr, x0, ge +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.smin.i64(i64 %a, i64 -1) + %retval.0 = xor i64 %1, -1 + ret i64 %retval.0 +} + +define i64 @test_smin_zero(i64 %a) { +; CHECK-LABEL: test_smin_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: and x0, x0, x0, asr #63 +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.smin.i64(i64 %a, i64 0) + %retval.0 = xor i64 %1, 0 + ret i64 %retval.0 +} + +define i64 @test_smin_constant(i64 %a) { +; CHECK-LABEL: test_smin_constant: +; CHECK: // %bb.0: +; CHECK-NEXT: eor x8, x0, #0x8 +; CHECK-NEXT: cmp x0, #8 +; CHECK-NEXT: csel x0, x8, xzr, lt +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.smin.i64(i64 %a, i64 8) + %retval.0 = xor i64 %1, 8 + ret i64 %retval.0 +} + +; Test for DAGCombiner optimization: fold (xor (smax(x, C), C)) -> select (x > C), xor (x, C), 0 + +define i64 @test_smax_neg_one(i64 %a) { +; CHECK-LABEL: test_smax_neg_one: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: bic x0, x8, x0, asr #63 +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.smax.i64(i64 %a, i64 -1) + %retval.0 = xor i64 %1, -1 + ret i64 %retval.0 +} + +define i64 @test_smax_zero(i64 %a) { +; CHECK-LABEL: test_smax_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: bic x0, x0, x0, asr #63 +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.smax.i64(i64 %a, i64 0) + %retval.0 = xor i64 %1, 0 + ret i64 %retval.0 +} + +define i64 @test_smax_constant(i64 %a) { +; CHECK-LABEL: test_smax_constant: +; CHECK: // %bb.0: +; CHECK-NEXT: eor x8, x0, #0x8 +; CHECK-NEXT: cmp x0, #8 +; CHECK-NEXT: csel x0, x8, xzr, gt +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.smax.i64(i64 %a, i64 8) + %retval.0 = xor i64 %1, 8 + ret i64 %retval.0 +} + +declare i64 @llvm.smin.i64(i64, i64) +declare i64 @llvm.smax.i64(i64, i64) \ No newline at end of file From b83f0f90692d6ddb90245fa88459be0fb2036800 Mon Sep 17 00:00:00 2001 From: Yui5427 <785369607@qq.com> Date: Mon, 25 Aug 2025 21:42:42 +0800 Subject: [PATCH 02/10] Combining umax umin, smin, smax --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 59 ++--- llvm/test/CodeGen/AArch64/xor-smin-smax.ll | 205 +++++++++++++++++- 2 files changed, 235 insertions(+), 29 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 80565b5d0bd7d..e8c404f2b471d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10093,40 +10093,43 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { return Combined; // fold (xor (smin(x, C), C)) -> select (x < C), xor(x, C), 0 - // fold (xor (smin(C, x), C)) -> select (x < C), xor(x, C), 0 - if (N0.getOpcode() == ISD::SMIN && N0.hasOneUse()) { - SDValue Op0 = N0.getOperand(0); - SDValue Op1 = N0.getOperand(1); - - if (Op1 != N1) { - std::swap(Op0, Op1); - } - - if (Op1 == N1) { - if (isa(N1)) { - EVT CCVT = getSetCCResultType(VT); - SDValue Cmp = DAG.getSetCC(SDLoc(N), CCVT, Op0, N1, ISD::SETLT); - SDValue XorXC = DAG.getNode(ISD::XOR, SDLoc(N), VT, Op0, N1); - SDValue Zero = DAG.getConstant(0, SDLoc(N), VT); - return DAG.getSelect(SDLoc(N), VT, Cmp, XorXC, Zero); - } - } - } - // fold (xor (smax(x, C), C)) -> select (x > C), xor(x, C), 0 - // fold (xor (smax(C, x), C)) -> select (x > C), xor(x, C), 0 - if (N0.getOpcode() == ISD::SMAX && N0.hasOneUse()) { + // fold (xor (umin(x, C), C)) -> select (x < C), xor(x, C), 0 + // fold (xor (umax(x, C), C)) -> select (x > C), xor(x, C), 0 + if ((N0.getOpcode() == ISD::SMIN || N0.getOpcode() == ISD::SMAX || + N0.getOpcode() == ISD::UMIN || N0.getOpcode() == ISD::UMAX) && + N0.hasOneUse()) { SDValue Op0 = N0.getOperand(0); SDValue Op1 = N0.getOperand(1); - if (Op1 != N1) { - std::swap(Op0, Op1); - } - if (Op1 == N1) { - if (isa(N1)) { + if (isa(N1) || + ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) { + // For vectors, only optimize when the constant is zero or all-ones to + // avoid generating more instructions + if (VT.isVector()) { + ConstantSDNode *N1C = isConstOrConstSplat(N1); + if (!N1C || (!N1C->isZero() && !N1C->isAllOnes())) + return SDValue(); + } + EVT CCVT = getSetCCResultType(VT); - SDValue Cmp = DAG.getSetCC(SDLoc(N), CCVT, Op0, N1, ISD::SETGT); + ISD::CondCode CC; + switch (N0.getOpcode()) { + case ISD::SMIN: + CC = ISD::SETLT; + break; + case ISD::SMAX: + CC = ISD::SETGT; + break; + case ISD::UMIN: + CC = ISD::SETULT; + break; + case ISD::UMAX: + CC = ISD::SETUGT; + break; + } + SDValue Cmp = DAG.getSetCC(SDLoc(N), CCVT, Op0, N1, CC); SDValue XorXC = DAG.getNode(ISD::XOR, SDLoc(N), VT, Op0, N1); SDValue Zero = DAG.getConstant(0, SDLoc(N), VT); return DAG.getSelect(SDLoc(N), VT, Cmp, XorXC, Zero); diff --git a/llvm/test/CodeGen/AArch64/xor-smin-smax.ll b/llvm/test/CodeGen/AArch64/xor-smin-smax.ll index cfdec2da61c7a..74d80eeaefd4a 100644 --- a/llvm/test/CodeGen/AArch64/xor-smin-smax.ll +++ b/llvm/test/CodeGen/AArch64/xor-smin-smax.ll @@ -71,5 +71,208 @@ define i64 @test_smax_constant(i64 %a) { ret i64 %retval.0 } +define i64 @test_umin_neg_one(i64 %a) { +; CHECK-LABEL: test_umin_neg_one: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x0, x0 +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.umin.i64(i64 %a, i64 -1) + %retval.0 = xor i64 %1, -1 + ret i64 %retval.0 +} + +define i64 @test_umin_zero(i64 %a) { +; CHECK-LABEL: test_umin_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, xzr +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.umin.i64(i64 %a, i64 0) + %retval.0 = xor i64 %1, 0 + ret i64 %retval.0 +} + +define i64 @test_umin_constant(i64 %a) { +; CHECK-LABEL: test_umin_constant: +; CHECK: // %bb.0: +; CHECK-NEXT: eor x8, x0, #0x8 +; CHECK-NEXT: cmp x0, #8 +; CHECK-NEXT: csel x0, x8, xzr, lo +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.umin.i64(i64 %a, i64 8) + %retval.0 = xor i64 %1, 8 + ret i64 %retval.0 +} + +define i64 @test_umax_neg_one(i64 %a) { +; CHECK-LABEL: test_umax_neg_one: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, xzr +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.umax.i64(i64 %a, i64 -1) + %retval.0 = xor i64 %1, -1 + ret i64 %retval.0 +} + +define i64 @test_umax_zero(i64 %a) { +; CHECK-LABEL: test_umax_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.umax.i64(i64 %a, i64 0) + %retval.0 = xor i64 %1, 0 + ret i64 %retval.0 +} + +define i64 @test_umax_constant(i64 %a) { +; CHECK-LABEL: test_umax_constant: +; CHECK: // %bb.0: +; CHECK-NEXT: eor x8, x0, #0x8 +; CHECK-NEXT: cmp x0, #8 +; CHECK-NEXT: csel x0, x8, xzr, hi +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.umax.i64(i64 %a, i64 8) + %retval.0 = xor i64 %1, 8 + ret i64 %retval.0 +} + +; Test vector cases + +define <4 x i32> @test_smin_vector_neg_one(<4 x i32> %a) { +; CHECK-LABEL: test_smin_vector_neg_one: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff +; CHECK-NEXT: cmgt v1.4s, v1.4s, v0.4s +; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b +; CHECK-NEXT: ret + %1 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %a, <4 x i32> ) + %retval.0 = xor <4 x i32> %1, + ret <4 x i32> %retval.0 +} + +define <4 x i32> @test_smin_vector_zero(<4 x i32> %a) { +; CHECK-LABEL: test_smin_vector_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %1 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %a, <4 x i32> ) + %retval.0 = xor <4 x i32> %1, + ret <4 x i32> %retval.0 +} + +define <4 x i32> @test_smin_vector_constant(<4 x i32> %a) { +; CHECK-LABEL: test_smin_vector_constant: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.4s, #8 +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret + %1 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %a, <4 x i32> ) + %retval.0 = xor <4 x i32> %1, + ret <4 x i32> %retval.0 +} + +define <4 x i32> @test_smax_vector_neg_one(<4 x i32> %a) { +; CHECK-LABEL: test_smax_vector_neg_one: +; CHECK: // %bb.0: +; CHECK-NEXT: cmge v1.4s, v0.4s, #0 +; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b +; CHECK-NEXT: ret + %1 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> ) + %retval.0 = xor <4 x i32> %1, + ret <4 x i32> %retval.0 +} + +define <4 x i32> @test_smax_vector_zero(<4 x i32> %a) { +; CHECK-LABEL: test_smax_vector_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %1 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> ) + %retval.0 = xor <4 x i32> %1, + ret <4 x i32> %retval.0 +} + +define <4 x i32> @test_smax_vector_constant(<4 x i32> %a) { +; CHECK-LABEL: test_smax_vector_constant: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.4s, #8 +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret + %1 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> ) + %retval.0 = xor <4 x i32> %1, + ret <4 x i32> %retval.0 +} + +define <4 x i32> @test_umin_vector_neg_one(<4 x i32> %a) { +; CHECK-LABEL: test_umin_vector_neg_one: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: ret + %1 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %a, <4 x i32> ) + %retval.0 = xor <4 x i32> %1, + ret <4 x i32> %retval.0 +} + +define <4 x i32> @test_umin_vector_zero(<4 x i32> %a) { +; CHECK-LABEL: test_umin_vector_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: ret + %1 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %a, <4 x i32> ) + %retval.0 = xor <4 x i32> %1, + ret <4 x i32> %retval.0 +} + +define <4 x i32> @test_umin_vector_constant(<4 x i32> %a) { +; CHECK-LABEL: test_umin_vector_constant: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.4s, #8 +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret + %1 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %a, <4 x i32> ) + %retval.0 = xor <4 x i32> %1, + ret <4 x i32> %retval.0 +} + +define <4 x i32> @test_umax_vector_neg_one(<4 x i32> %a) { +; CHECK-LABEL: test_umax_vector_neg_one: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: ret + %1 = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> %a, <4 x i32> ) + %retval.0 = xor <4 x i32> %1, + ret <4 x i32> %retval.0 +} + +define <4 x i32> @test_umax_vector_zero(<4 x i32> %a) { +; CHECK-LABEL: test_umax_vector_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %1 = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> %a, <4 x i32> ) + %retval.0 = xor <4 x i32> %1, + ret <4 x i32> %retval.0 +} + +define <4 x i32> @test_umax_vector_constant(<4 x i32> %a) { +; CHECK-LABEL: test_umax_vector_constant: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.4s, #8 +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret + %1 = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> %a, <4 x i32> ) + %retval.0 = xor <4 x i32> %1, + ret <4 x i32> %retval.0 +} + declare i64 @llvm.smin.i64(i64, i64) -declare i64 @llvm.smax.i64(i64, i64) \ No newline at end of file +declare i64 @llvm.smax.i64(i64, i64) +declare i64 @llvm.umin.i64(i64, i64) +declare i64 @llvm.umax.i64(i64, i64) +declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>) \ No newline at end of file From 841ae7ca9ebe91ad89826661929acf9aa050fe2c Mon Sep 17 00:00:00 2001 From: Yui5427 <785369607@qq.com> Date: Wed, 27 Aug 2025 18:10:36 +0800 Subject: [PATCH 03/10] Use sd_match --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e8c404f2b471d..05f25b199389e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10096,11 +10096,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { // fold (xor (smax(x, C), C)) -> select (x > C), xor(x, C), 0 // fold (xor (umin(x, C), C)) -> select (x < C), xor(x, C), 0 // fold (xor (umax(x, C), C)) -> select (x > C), xor(x, C), 0 - if ((N0.getOpcode() == ISD::SMIN || N0.getOpcode() == ISD::SMAX || - N0.getOpcode() == ISD::UMIN || N0.getOpcode() == ISD::UMAX) && - N0.hasOneUse()) { - SDValue Op0 = N0.getOperand(0); - SDValue Op1 = N0.getOperand(1); + SDValue Op0, Op1; + if ((sd_match(N0, m_OneUse(m_AnyOf(m_SMin(m_Value(Op0), m_Value(Op1)), + m_SMax(m_Value(Op0), m_Value(Op1)), + m_UMin(m_Value(Op0), m_Value(Op1)), + m_UMax(m_Value(Op0), m_Value(Op1))))))) { if (Op1 == N1) { if (isa(N1) || From 7e1effab1635ed9341cc57ba6a0ca2d4bf40e706 Mon Sep 17 00:00:00 2001 From: Yui5427 <785369607@qq.com> Date: Wed, 27 Aug 2025 22:08:37 +0800 Subject: [PATCH 04/10] Use m_specific and DL --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 70 +++++++++---------- 1 file changed, 34 insertions(+), 36 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 05f25b199389e..c401841c8c21b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10096,44 +10096,42 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { // fold (xor (smax(x, C), C)) -> select (x > C), xor(x, C), 0 // fold (xor (umin(x, C), C)) -> select (x < C), xor(x, C), 0 // fold (xor (umax(x, C), C)) -> select (x > C), xor(x, C), 0 - SDValue Op0, Op1; - if ((sd_match(N0, m_OneUse(m_AnyOf(m_SMin(m_Value(Op0), m_Value(Op1)), - m_SMax(m_Value(Op0), m_Value(Op1)), - m_UMin(m_Value(Op0), m_Value(Op1)), - m_UMax(m_Value(Op0), m_Value(Op1))))))) { - - if (Op1 == N1) { - if (isa(N1) || - ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) { - // For vectors, only optimize when the constant is zero or all-ones to - // avoid generating more instructions - if (VT.isVector()) { - ConstantSDNode *N1C = isConstOrConstSplat(N1); - if (!N1C || (!N1C->isZero() && !N1C->isAllOnes())) - return SDValue(); - } + SDValue Op0; + if ((sd_match(N0, m_OneUse(m_AnyOf(m_SMin(m_Value(Op0), m_Specific(N1)), + m_SMax(m_Value(Op0), m_Specific(N1)), + m_UMin(m_Value(Op0), m_Specific(N1)), + m_UMax(m_Value(Op0), m_Specific(N1))))))) { + + if (isa(N1) || + ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) { + // For vectors, only optimize when the constant is zero or all-ones to + // avoid generating more instructions + if (VT.isVector()) { + ConstantSDNode *N1C = isConstOrConstSplat(N1); + if (!N1C || (!N1C->isZero() && !N1C->isAllOnes())) + return SDValue(); + } - EVT CCVT = getSetCCResultType(VT); - ISD::CondCode CC; - switch (N0.getOpcode()) { - case ISD::SMIN: - CC = ISD::SETLT; - break; - case ISD::SMAX: - CC = ISD::SETGT; - break; - case ISD::UMIN: - CC = ISD::SETULT; - break; - case ISD::UMAX: - CC = ISD::SETUGT; - break; - } - SDValue Cmp = DAG.getSetCC(SDLoc(N), CCVT, Op0, N1, CC); - SDValue XorXC = DAG.getNode(ISD::XOR, SDLoc(N), VT, Op0, N1); - SDValue Zero = DAG.getConstant(0, SDLoc(N), VT); - return DAG.getSelect(SDLoc(N), VT, Cmp, XorXC, Zero); + EVT CCVT = getSetCCResultType(VT); + ISD::CondCode CC; + switch (N0.getOpcode()) { + case ISD::SMIN: + CC = ISD::SETLT; + break; + case ISD::SMAX: + CC = ISD::SETGT; + break; + case ISD::UMIN: + CC = ISD::SETULT; + break; + case ISD::UMAX: + CC = ISD::SETUGT; + break; } + SDValue Cmp = DAG.getSetCC(DL, CCVT, Op0, N1, CC); + SDValue XorXC = DAG.getNode(ISD::XOR, DL, VT, Op0, N1); + SDValue Zero = DAG.getConstant(0, DL, VT); + return DAG.getSelect(DL, VT, Cmp, XorXC, Zero); } } From a3a1b2b9309f0aee9de16a6fc1f2263ad1b1b7d6 Mon Sep 17 00:00:00 2001 From: guan jian <148229859+rez5427@users.noreply.github.com> Date: Thu, 28 Aug 2025 00:09:25 +0800 Subject: [PATCH 05/10] Update llvm/test/CodeGen/AArch64/xor-smin-smax.ll Co-authored-by: Matt Arsenault --- llvm/test/CodeGen/AArch64/xor-smin-smax.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/AArch64/xor-smin-smax.ll b/llvm/test/CodeGen/AArch64/xor-smin-smax.ll index 74d80eeaefd4a..904397a23afd1 100644 --- a/llvm/test/CodeGen/AArch64/xor-smin-smax.ll +++ b/llvm/test/CodeGen/AArch64/xor-smin-smax.ll @@ -275,4 +275,4 @@ declare i64 @llvm.umax.i64(i64, i64) declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>) declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>) declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>) \ No newline at end of file +declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>) From 8fe2c394cb93683ab7c91e4849d3931153d7ee9d Mon Sep 17 00:00:00 2001 From: Yui5427 <785369607@qq.com> Date: Fri, 29 Aug 2025 20:48:32 +0800 Subject: [PATCH 06/10] Use freeze N1 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c401841c8c21b..72ab54cc7220f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10128,8 +10128,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { CC = ISD::SETUGT; break; } - SDValue Cmp = DAG.getSetCC(DL, CCVT, Op0, N1, CC); - SDValue XorXC = DAG.getNode(ISD::XOR, DL, VT, Op0, N1); + SDValue FN1 = DAG.getFreeze(N1); + SDValue Cmp = DAG.getSetCC(DL, CCVT, Op0, FN1, CC); + SDValue XorXC = DAG.getNode(ISD::XOR, DL, VT, Op0, FN1); SDValue Zero = DAG.getConstant(0, DL, VT); return DAG.getSelect(DL, VT, Cmp, XorXC, Zero); } From baae52b8a8d43b7d80b28cdcb388768100cbd4cf Mon Sep 17 00:00:00 2001 From: Yui5427 <785369607@qq.com> Date: Thu, 4 Sep 2025 20:47:41 +0800 Subject: [PATCH 07/10] remove redundent bracket --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 72ab54cc7220f..54e05cf1cb352 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10097,10 +10097,10 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { // fold (xor (umin(x, C), C)) -> select (x < C), xor(x, C), 0 // fold (xor (umax(x, C), C)) -> select (x > C), xor(x, C), 0 SDValue Op0; - if ((sd_match(N0, m_OneUse(m_AnyOf(m_SMin(m_Value(Op0), m_Specific(N1)), - m_SMax(m_Value(Op0), m_Specific(N1)), - m_UMin(m_Value(Op0), m_Specific(N1)), - m_UMax(m_Value(Op0), m_Specific(N1))))))) { + if (sd_match(N0, m_OneUse(m_AnyOf(m_SMin(m_Value(Op0), m_Specific(N1)), + m_SMax(m_Value(Op0), m_Specific(N1)), + m_UMin(m_Value(Op0), m_Specific(N1)), + m_UMax(m_Value(Op0), m_Specific(N1)))))) { if (isa(N1) || ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) { From 9d5885b90a6953413d2dd3d5ce44ba4df82b8ece Mon Sep 17 00:00:00 2001 From: rez5427 Date: Mon, 8 Sep 2025 01:19:28 +0800 Subject: [PATCH 08/10] Add legal --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 5 +++++ llvm/test/CodeGen/AArch64/xor-smin-smax.ll | 6 +++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 54e05cf1cb352..4b20b756f8a15 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10112,6 +10112,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { return SDValue(); } + // Avoid the fold if the minmax operation is legal and select is expensive + if (TLI.isOperationLegal(N0.getOpcode(), VT) && + TLI.isPredictableSelectExpensive()) + return SDValue(); + EVT CCVT = getSetCCResultType(VT); ISD::CondCode CC; switch (N0.getOpcode()) { diff --git a/llvm/test/CodeGen/AArch64/xor-smin-smax.ll b/llvm/test/CodeGen/AArch64/xor-smin-smax.ll index 904397a23afd1..012a2094f8197 100644 --- a/llvm/test/CodeGen/AArch64/xor-smin-smax.ll +++ b/llvm/test/CodeGen/AArch64/xor-smin-smax.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mcpu=cortex-a53 | FileCheck %s ; Test for DAGCombiner optimization: fold (xor (smin(x, C), C)) -> select (x < C), xor (x, C), 0 @@ -141,7 +141,7 @@ define <4 x i32> @test_smin_vector_neg_one(<4 x i32> %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff ; CHECK-NEXT: cmgt v1.4s, v1.4s, v0.4s -; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b +; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %1 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %a, <4 x i32> ) %retval.0 = xor <4 x i32> %1, @@ -175,7 +175,7 @@ define <4 x i32> @test_smax_vector_neg_one(<4 x i32> %a) { ; CHECK-LABEL: test_smax_vector_neg_one: ; CHECK: // %bb.0: ; CHECK-NEXT: cmge v1.4s, v0.4s, #0 -; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b +; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %1 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> ) %retval.0 = xor <4 x i32> %1, From 1fe7fd5e8f126549d7db20b5a2f6cf78a48c1c0d Mon Sep 17 00:00:00 2001 From: rez5427 Date: Mon, 8 Sep 2025 22:25:50 +0800 Subject: [PATCH 09/10] remove constant zero tests --- llvm/test/CodeGen/AArch64/xor-smin-smax.ll | 82 ---------------------- 1 file changed, 82 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/xor-smin-smax.ll b/llvm/test/CodeGen/AArch64/xor-smin-smax.ll index 012a2094f8197..2d6696e1c556e 100644 --- a/llvm/test/CodeGen/AArch64/xor-smin-smax.ll +++ b/llvm/test/CodeGen/AArch64/xor-smin-smax.ll @@ -14,16 +14,6 @@ define i64 @test_smin_neg_one(i64 %a) { ret i64 %retval.0 } -define i64 @test_smin_zero(i64 %a) { -; CHECK-LABEL: test_smin_zero: -; CHECK: // %bb.0: -; CHECK-NEXT: and x0, x0, x0, asr #63 -; CHECK-NEXT: ret - %1 = tail call i64 @llvm.smin.i64(i64 %a, i64 0) - %retval.0 = xor i64 %1, 0 - ret i64 %retval.0 -} - define i64 @test_smin_constant(i64 %a) { ; CHECK-LABEL: test_smin_constant: ; CHECK: // %bb.0: @@ -37,7 +27,6 @@ define i64 @test_smin_constant(i64 %a) { } ; Test for DAGCombiner optimization: fold (xor (smax(x, C), C)) -> select (x > C), xor (x, C), 0 - define i64 @test_smax_neg_one(i64 %a) { ; CHECK-LABEL: test_smax_neg_one: ; CHECK: // %bb.0: @@ -49,16 +38,6 @@ define i64 @test_smax_neg_one(i64 %a) { ret i64 %retval.0 } -define i64 @test_smax_zero(i64 %a) { -; CHECK-LABEL: test_smax_zero: -; CHECK: // %bb.0: -; CHECK-NEXT: bic x0, x0, x0, asr #63 -; CHECK-NEXT: ret - %1 = tail call i64 @llvm.smax.i64(i64 %a, i64 0) - %retval.0 = xor i64 %1, 0 - ret i64 %retval.0 -} - define i64 @test_smax_constant(i64 %a) { ; CHECK-LABEL: test_smax_constant: ; CHECK: // %bb.0: @@ -81,16 +60,6 @@ define i64 @test_umin_neg_one(i64 %a) { ret i64 %retval.0 } -define i64 @test_umin_zero(i64 %a) { -; CHECK-LABEL: test_umin_zero: -; CHECK: // %bb.0: -; CHECK-NEXT: mov x0, xzr -; CHECK-NEXT: ret - %1 = tail call i64 @llvm.umin.i64(i64 %a, i64 0) - %retval.0 = xor i64 %1, 0 - ret i64 %retval.0 -} - define i64 @test_umin_constant(i64 %a) { ; CHECK-LABEL: test_umin_constant: ; CHECK: // %bb.0: @@ -113,15 +82,6 @@ define i64 @test_umax_neg_one(i64 %a) { ret i64 %retval.0 } -define i64 @test_umax_zero(i64 %a) { -; CHECK-LABEL: test_umax_zero: -; CHECK: // %bb.0: -; CHECK-NEXT: ret - %1 = tail call i64 @llvm.umax.i64(i64 %a, i64 0) - %retval.0 = xor i64 %1, 0 - ret i64 %retval.0 -} - define i64 @test_umax_constant(i64 %a) { ; CHECK-LABEL: test_umax_constant: ; CHECK: // %bb.0: @@ -135,7 +95,6 @@ define i64 @test_umax_constant(i64 %a) { } ; Test vector cases - define <4 x i32> @test_smin_vector_neg_one(<4 x i32> %a) { ; CHECK-LABEL: test_smin_vector_neg_one: ; CHECK: // %bb.0: @@ -148,17 +107,6 @@ define <4 x i32> @test_smin_vector_neg_one(<4 x i32> %a) { ret <4 x i32> %retval.0 } -define <4 x i32> @test_smin_vector_zero(<4 x i32> %a) { -; CHECK-LABEL: test_smin_vector_zero: -; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ret - %1 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %a, <4 x i32> ) - %retval.0 = xor <4 x i32> %1, - ret <4 x i32> %retval.0 -} - define <4 x i32> @test_smin_vector_constant(<4 x i32> %a) { ; CHECK-LABEL: test_smin_vector_constant: ; CHECK: // %bb.0: @@ -182,17 +130,6 @@ define <4 x i32> @test_smax_vector_neg_one(<4 x i32> %a) { ret <4 x i32> %retval.0 } -define <4 x i32> @test_smax_vector_zero(<4 x i32> %a) { -; CHECK-LABEL: test_smax_vector_zero: -; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ret - %1 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> ) - %retval.0 = xor <4 x i32> %1, - ret <4 x i32> %retval.0 -} - define <4 x i32> @test_smax_vector_constant(<4 x i32> %a) { ; CHECK-LABEL: test_smax_vector_constant: ; CHECK: // %bb.0: @@ -215,16 +152,6 @@ define <4 x i32> @test_umin_vector_neg_one(<4 x i32> %a) { ret <4 x i32> %retval.0 } -define <4 x i32> @test_umin_vector_zero(<4 x i32> %a) { -; CHECK-LABEL: test_umin_vector_zero: -; CHECK: // %bb.0: -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ret - %1 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %a, <4 x i32> ) - %retval.0 = xor <4 x i32> %1, - ret <4 x i32> %retval.0 -} - define <4 x i32> @test_umin_vector_constant(<4 x i32> %a) { ; CHECK-LABEL: test_umin_vector_constant: ; CHECK: // %bb.0: @@ -247,15 +174,6 @@ define <4 x i32> @test_umax_vector_neg_one(<4 x i32> %a) { ret <4 x i32> %retval.0 } -define <4 x i32> @test_umax_vector_zero(<4 x i32> %a) { -; CHECK-LABEL: test_umax_vector_zero: -; CHECK: // %bb.0: -; CHECK-NEXT: ret - %1 = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> %a, <4 x i32> ) - %retval.0 = xor <4 x i32> %1, - ret <4 x i32> %retval.0 -} - define <4 x i32> @test_umax_vector_constant(<4 x i32> %a) { ; CHECK-LABEL: test_umax_vector_constant: ; CHECK: // %bb.0: From 8793ff37cbaa6bd866e32494ba640edb5acd2afd Mon Sep 17 00:00:00 2001 From: rez5427 Date: Tue, 16 Sep 2025 00:19:38 +0800 Subject: [PATCH 10/10] rename xor-smin-smax to xor-min-max --- llvm/test/CodeGen/AArch64/{xor-smin-smax.ll => xor-min-max.ll} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename llvm/test/CodeGen/AArch64/{xor-smin-smax.ll => xor-min-max.ll} (100%) diff --git a/llvm/test/CodeGen/AArch64/xor-smin-smax.ll b/llvm/test/CodeGen/AArch64/xor-min-max.ll similarity index 100% rename from llvm/test/CodeGen/AArch64/xor-smin-smax.ll rename to llvm/test/CodeGen/AArch64/xor-min-max.ll