From c68c3429514f990bc17063cea81f4401ca6821d1 Mon Sep 17 00:00:00 2001 From: Ricardo Jesus Date: Mon, 28 Jul 2025 07:32:49 -0700 Subject: [PATCH 1/6] [AArch64] Improve lowering of scalar abs(sub(a, b)). This patch avoids a comparison against zero when lowering abs(sub(a, b)) patterns, instead reusing the condition codes generated by a subs of the operands directly. For example, currently: ``` sxtb w8, w0 sub w8, w8, w1, sxtb cmp w8, #0 cneg w0, w8, pl ``` becomes: ``` sxtb w8, w0 sxtb w9, w1 subs w8, w9, w8 cneg w0, w8, gt ``` Whilst this doesn't decrease the number of instructions utilised, the new version exposes more ILP and uses ``cheaper'' instructions, typically having lower latency and/or higher throughput. This patch also includes a somewhat orthogonal change in performNegCSelCombine, which I included to avoid a code generation regression in CodeGen/AArch64/abd[su]-neg.ll due to the combine negating the operands of the csel, leading to an extra sub instruction. If preferable, I can open a separate PR for this. --- .../Target/AArch64/AArch64ISelLowering.cpp | 33 ++++++--- llvm/test/CodeGen/AArch64/abds-neg.ll | 30 ++++---- llvm/test/CodeGen/AArch64/abds.ll | 74 +++++++++---------- llvm/test/CodeGen/AArch64/abdu-neg.ll | 30 ++++---- llvm/test/CodeGen/AArch64/abdu.ll | 74 +++++++++---------- 5 files changed, 128 insertions(+), 113 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 4f9471ca4aa6a7..1b98df3d802de6 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -7118,12 +7118,21 @@ SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const { return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU); SDLoc DL(Op); - SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), - Op.getOperand(0)); - // Generate SUBS & CSEL. - SDValue Cmp = DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT), - Op.getOperand(0), DAG.getConstant(0, DL, VT)); - return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg, + SDValue Val = Op.getOperand(0); + SDValue Neg = DAG.getNegative(Val, DL, VT); + SDValue Cmp; + + // For abs(sub(lhs, rhs)), we can compare lhs and rhs directly. This allows + // reusing the subs operation for the calculation and comparison. + if (Val.getOpcode() == ISD::SUB) + Cmp = DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT), + Val.getOperand(0), Val.getOperand(1)); + else + // Otherwise, compare with zero. + Cmp = DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT), Val, + DAG.getConstant(0, DL, VT)); + + return DAG.getNode(AArch64ISD::CSEL, DL, VT, Val, Neg, DAG.getConstant(AArch64CC::PL, DL, MVT::i32), Cmp.getValue(1)); } @@ -20851,11 +20860,17 @@ static SDValue performNegCSelCombine(SDNode *N, SelectionDAG &DAG) { if (!isNegatedInteger(N0) && !isNegatedInteger(N1)) return SDValue(); - SDValue N0N = getNegatedInteger(N0, DAG); - SDValue N1N = getNegatedInteger(N1, DAG); - SDLoc DL(N); EVT VT = CSel.getValueType(); + + // If the operands are negations of each other, reverse them. + if ((isNegatedInteger(N0) && N0.getOperand(1) == N1) || + (isNegatedInteger(N1) && N1.getOperand(1) == N0)) + return DAG.getNode(AArch64ISD::CSEL, DL, VT, N1, N0, CSel.getOperand(2), + CSel.getOperand(3)); + + SDValue N0N = getNegatedInteger(N0, DAG); + SDValue N1N = getNegatedInteger(N1, DAG); return DAG.getNode(AArch64ISD::CSEL, DL, VT, N0N, N1N, CSel.getOperand(2), CSel.getOperand(3)); } diff --git a/llvm/test/CodeGen/AArch64/abds-neg.ll b/llvm/test/CodeGen/AArch64/abds-neg.ll index 75247823ee7939..045edeb8aaac98 100644 --- a/llvm/test/CodeGen/AArch64/abds-neg.ll +++ b/llvm/test/CodeGen/AArch64/abds-neg.ll @@ -8,9 +8,9 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: sxtb w8, w1 +; CHECK-NEXT: sxtb w9, w0 +; CHECK-NEXT: subs w8, w9, w8 ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = sext i8 %a to i64 @@ -25,9 +25,9 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: sxth w8, w1 +; CHECK-NEXT: sxtb w9, w0 +; CHECK-NEXT: subs w8, w9, w8 ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = sext i8 %a to i64 @@ -42,9 +42,9 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: sxtb w8, w1 +; CHECK-NEXT: sxtb w9, w0 +; CHECK-NEXT: subs w8, w9, w8 ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = sext i8 %a to i64 @@ -59,9 +59,9 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: sxth w8, w1 +; CHECK-NEXT: sxth w9, w0 +; CHECK-NEXT: subs w8, w9, w8 ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = sext i16 %a to i64 @@ -92,9 +92,9 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: sxth w8, w1 +; CHECK-NEXT: sxth w9, w0 +; CHECK-NEXT: subs w8, w9, w8 ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = sext i16 %a to i64 diff --git a/llvm/test/CodeGen/AArch64/abds.ll b/llvm/test/CodeGen/AArch64/abds.ll index bbdb1168517100..cad82f2c7114aa 100644 --- a/llvm/test/CodeGen/AArch64/abds.ll +++ b/llvm/test/CodeGen/AArch64/abds.ll @@ -8,9 +8,9 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: sxtb w8, w1 +; CHECK-NEXT: sxtb w9, w0 +; CHECK-NEXT: subs w8, w9, w8 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = sext i8 %a to i64 @@ -24,9 +24,9 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: sxth w8, w1 +; CHECK-NEXT: sxtb w9, w0 +; CHECK-NEXT: subs w8, w9, w8 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = sext i8 %a to i64 @@ -40,9 +40,9 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: sxtb w8, w1 +; CHECK-NEXT: sxtb w9, w0 +; CHECK-NEXT: subs w8, w9, w8 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = sext i8 %a to i64 @@ -56,9 +56,9 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: sxth w8, w1 +; CHECK-NEXT: sxth w9, w0 +; CHECK-NEXT: subs w8, w9, w8 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = sext i16 %a to i64 @@ -87,9 +87,9 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: sxth w8, w1 +; CHECK-NEXT: sxth w9, w0 +; CHECK-NEXT: subs w8, w9, w8 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = sext i16 %a to i64 @@ -214,9 +214,9 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_minmax_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: sxtb w8, w1 +; CHECK-NEXT: sxtb w9, w0 +; CHECK-NEXT: subs w8, w9, w8 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %min = call i8 @llvm.smin.i8(i8 %a, i8 %b) @@ -228,9 +228,9 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind { define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_minmax_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: sxth w8, w1 +; CHECK-NEXT: sxth w9, w0 +; CHECK-NEXT: subs w8, w9, w8 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %min = call i16 @llvm.smin.i16(i16 %a, i16 %b) @@ -286,9 +286,9 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_cmp_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: sxtb w8, w1 +; CHECK-NEXT: sxtb w9, w0 +; CHECK-NEXT: subs w8, w9, w8 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp sgt i8 %a, %b @@ -301,9 +301,9 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_cmp_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: sxth w8, w1 +; CHECK-NEXT: sxth w9, w0 +; CHECK-NEXT: subs w8, w9, w8 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp sge i16 %a, %b @@ -507,11 +507,11 @@ define i64 @vector_legalized(i16 %a, i16 %b) { ; CHECK-LABEL: vector_legalized: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxth -; CHECK-NEXT: addp d0, v0.2d -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: sxth w8, w1 +; CHECK-NEXT: sxth w9, w0 +; CHECK-NEXT: subs w8, w9, w8 ; CHECK-NEXT: cneg w8, w8, mi +; CHECK-NEXT: addp d0, v0.2d ; CHECK-NEXT: fmov x9, d0 ; CHECK-NEXT: add x0, x9, x8 ; CHECK-NEXT: ret @@ -532,9 +532,9 @@ define i64 @vector_legalized(i16 %a, i16 %b) { define i8 @abd_select_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_select_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: sxtb w8, w1 +; CHECK-NEXT: sxtb w9, w0 +; CHECK-NEXT: subs w8, w9, w8 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp slt i8 %a, %b @@ -547,9 +547,9 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind { define i16 @abd_select_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_select_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: sxth w8, w1 +; CHECK-NEXT: sxth w9, w0 +; CHECK-NEXT: subs w8, w9, w8 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp sle i16 %a, %b diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll index d07f099a536ab6..8f79a46ba78016 100644 --- a/llvm/test/CodeGen/AArch64/abdu-neg.ll +++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll @@ -8,9 +8,9 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: sub w8, w8, w1, uxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: and w8, w1, #0xff +; CHECK-NEXT: and w9, w0, #0xff +; CHECK-NEXT: subs w8, w9, w8 ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = zext i8 %a to i64 @@ -25,9 +25,9 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: sub w8, w8, w1, uxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: and w8, w1, #0xffff +; CHECK-NEXT: and w9, w0, #0xff +; CHECK-NEXT: subs w8, w9, w8 ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = zext i8 %a to i64 @@ -42,9 +42,9 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: sub w8, w8, w1, uxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: and w8, w1, #0xff +; CHECK-NEXT: and w9, w0, #0xff +; CHECK-NEXT: subs w8, w9, w8 ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = zext i8 %a to i64 @@ -59,9 +59,9 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: sub w8, w8, w1, uxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: and w8, w1, #0xffff +; CHECK-NEXT: and w9, w0, #0xffff +; CHECK-NEXT: subs w8, w9, w8 ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = zext i16 %a to i64 @@ -92,9 +92,9 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: sub w8, w8, w1, uxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: and w8, w1, #0xffff +; CHECK-NEXT: and w9, w0, #0xffff +; CHECK-NEXT: subs w8, w9, w8 ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = zext i16 %a to i64 diff --git a/llvm/test/CodeGen/AArch64/abdu.ll b/llvm/test/CodeGen/AArch64/abdu.ll index 1045ee20dc7345..bb60f6d0c94cde 100644 --- a/llvm/test/CodeGen/AArch64/abdu.ll +++ b/llvm/test/CodeGen/AArch64/abdu.ll @@ -8,9 +8,9 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: sub w8, w8, w1, uxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: and w8, w1, #0xff +; CHECK-NEXT: and w9, w0, #0xff +; CHECK-NEXT: subs w8, w9, w8 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = zext i8 %a to i64 @@ -24,9 +24,9 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: sub w8, w8, w1, uxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: and w8, w1, #0xffff +; CHECK-NEXT: and w9, w0, #0xff +; CHECK-NEXT: subs w8, w9, w8 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = zext i8 %a to i64 @@ -40,9 +40,9 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: sub w8, w8, w1, uxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: and w8, w1, #0xff +; CHECK-NEXT: and w9, w0, #0xff +; CHECK-NEXT: subs w8, w9, w8 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = zext i8 %a to i64 @@ -56,9 +56,9 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: sub w8, w8, w1, uxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: and w8, w1, #0xffff +; CHECK-NEXT: and w9, w0, #0xffff +; CHECK-NEXT: subs w8, w9, w8 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = zext i16 %a to i64 @@ -87,9 +87,9 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: sub w8, w8, w1, uxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: and w8, w1, #0xffff +; CHECK-NEXT: and w9, w0, #0xffff +; CHECK-NEXT: subs w8, w9, w8 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = zext i16 %a to i64 @@ -218,9 +218,9 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_minmax_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: sub w8, w8, w1, uxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: and w8, w1, #0xff +; CHECK-NEXT: and w9, w0, #0xff +; CHECK-NEXT: subs w8, w9, w8 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %min = call i8 @llvm.umin.i8(i8 %a, i8 %b) @@ -232,9 +232,9 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind { define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_minmax_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: sub w8, w8, w1, uxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: and w8, w1, #0xffff +; CHECK-NEXT: and w9, w0, #0xffff +; CHECK-NEXT: subs w8, w9, w8 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %min = call i16 @llvm.umin.i16(i16 %a, i16 %b) @@ -292,9 +292,9 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_cmp_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: sub w8, w8, w1, uxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: and w8, w1, #0xff +; CHECK-NEXT: and w9, w0, #0xff +; CHECK-NEXT: subs w8, w9, w8 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp ugt i8 %a, %b @@ -307,9 +307,9 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_cmp_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: sub w8, w8, w1, uxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: and w8, w1, #0xffff +; CHECK-NEXT: and w9, w0, #0xffff +; CHECK-NEXT: subs w8, w9, w8 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp uge i16 %a, %b @@ -372,11 +372,11 @@ define i64 @vector_legalized(i16 %a, i16 %b) { ; CHECK-LABEL: vector_legalized: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: sub w8, w8, w1, uxth -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: and w8, w1, #0xffff +; CHECK-NEXT: and w9, w0, #0xffff +; CHECK-NEXT: subs w8, w9, w8 ; CHECK-NEXT: cneg w8, w8, mi +; CHECK-NEXT: addp d0, v0.2d ; CHECK-NEXT: fmov x9, d0 ; CHECK-NEXT: add x0, x9, x8 ; CHECK-NEXT: ret @@ -397,9 +397,9 @@ define i64 @vector_legalized(i16 %a, i16 %b) { define i8 @abd_select_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_select_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: sub w8, w8, w1, uxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: and w8, w1, #0xff +; CHECK-NEXT: and w9, w0, #0xff +; CHECK-NEXT: subs w8, w9, w8 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp ult i8 %a, %b @@ -412,9 +412,9 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind { define i16 @abd_select_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_select_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: sub w8, w8, w1, uxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: and w8, w1, #0xffff +; CHECK-NEXT: and w9, w0, #0xffff +; CHECK-NEXT: subs w8, w9, w8 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp ule i16 %a, %b From 73365468b38bd9dc785eb363d4d976e9fffeb7db Mon Sep 17 00:00:00 2001 From: Ricardo Jesus Date: Wed, 30 Jul 2025 08:48:23 -0700 Subject: [PATCH 2/6] Combine subs(sub(a,b), 0) -> subs(a,b) in performCSELCombine. --- .../Target/AArch64/AArch64ISelLowering.cpp | 39 ++++++++++++------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 1b98df3d802de6..4e2efe71108648 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -7118,21 +7118,12 @@ SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const { return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU); SDLoc DL(Op); - SDValue Val = Op.getOperand(0); - SDValue Neg = DAG.getNegative(Val, DL, VT); - SDValue Cmp; - - // For abs(sub(lhs, rhs)), we can compare lhs and rhs directly. This allows - // reusing the subs operation for the calculation and comparison. - if (Val.getOpcode() == ISD::SUB) - Cmp = DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT), - Val.getOperand(0), Val.getOperand(1)); - else - // Otherwise, compare with zero. - Cmp = DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT), Val, - DAG.getConstant(0, DL, VT)); - - return DAG.getNode(AArch64ISD::CSEL, DL, VT, Val, Neg, + SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), + Op.getOperand(0)); + // Generate SUBS & CSEL. + SDValue Cmp = DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT), + Op.getOperand(0), DAG.getConstant(0, DL, VT)); + return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg, DAG.getConstant(AArch64CC::PL, DL, MVT::i32), Cmp.getValue(1)); } @@ -25479,6 +25470,24 @@ static SDValue performCSELCombine(SDNode *N, } } + // CSEL a, b, cc, SUBS(SUB(x,y), 0) -> CSEL a, b, cc, SUBS(x,y) if cc doesn't + // use overflow flags to avoid the comparison with zero. + if (Cond.getOpcode() == AArch64ISD::SUBS && + isNullConstant(Cond.getOperand(1))) { + SDValue Sub = Cond.getOperand(0); + AArch64CC::CondCode CC = + static_cast(N->getConstantOperandVal(2)); + if (Sub.getOpcode() == ISD::SUB && + (CC == AArch64CC::EQ || CC == AArch64CC::NE || CC == AArch64CC::MI || + CC == AArch64CC::PL)) { + SDLoc DL(N); + SDValue Subs = DAG.getNode(AArch64ISD::SUBS, DL, Cond->getVTList(), + Sub.getOperand(0), Sub.getOperand(1)); + return DAG.getNode(AArch64ISD::CSEL, DL, N->getVTList(), N->getOperand(0), + N->getOperand(1), N->getOperand(2), Subs.getValue(1)); + } + } + // CSEL (LASTB P, Z), X, NE(ANY P) -> CLASTB P, X, Z if (SDValue CondLast = foldCSELofLASTB(N, DAG)) return CondLast; From e8efeabf751dd2912fb14b14f667ed12ee1f8ca9 Mon Sep 17 00:00:00 2001 From: Ricardo Jesus Date: Wed, 30 Jul 2025 10:16:07 -0700 Subject: [PATCH 3/6] Update tests after rebase. --- llvm/test/CodeGen/AArch64/abds-neg.ll | 24 ++++++++++++------------ llvm/test/CodeGen/AArch64/abdu-neg.ll | 24 ++++++++++++------------ 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/abds-neg.ll b/llvm/test/CodeGen/AArch64/abds-neg.ll index 045edeb8aaac98..3c111e1dd52729 100644 --- a/llvm/test/CodeGen/AArch64/abds-neg.ll +++ b/llvm/test/CodeGen/AArch64/abds-neg.ll @@ -77,8 +77,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: subs w8, w1, w8 -; CHECK-NEXT: cneg w0, w8, ge +; CHECK-NEXT: subs w8, w8, w1 +; CHECK-NEXT: cneg w0, w8, gt ; CHECK-NEXT: ret %aext = sext i16 %a to i64 %bext = sext i32 %b to i64 @@ -109,8 +109,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: subs w8, w1, w0 -; CHECK-NEXT: cneg w0, w8, ge +; CHECK-NEXT: subs w8, w0, w1 +; CHECK-NEXT: cneg w0, w8, gt ; CHECK-NEXT: ret %aext = sext i32 %a to i64 %bext = sext i32 %b to i64 @@ -125,8 +125,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w1 -; CHECK-NEXT: subs w8, w8, w0 -; CHECK-NEXT: cneg w0, w8, ge +; CHECK-NEXT: subs w8, w0, w8 +; CHECK-NEXT: cneg w0, w8, gt ; CHECK-NEXT: ret %aext = sext i32 %a to i64 %bext = sext i16 %b to i64 @@ -140,8 +140,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: subs w8, w1, w0 -; CHECK-NEXT: cneg w0, w8, ge +; CHECK-NEXT: subs w8, w0, w1 +; CHECK-NEXT: cneg w0, w8, gt ; CHECK-NEXT: ret %aext = sext i32 %a to i64 %bext = sext i32 %b to i64 @@ -155,8 +155,8 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: subs x8, x1, x0 -; CHECK-NEXT: cneg x0, x8, ge +; CHECK-NEXT: subs x8, x0, x1 +; CHECK-NEXT: cneg x0, x8, gt ; CHECK-NEXT: ret %aext = sext i64 %a to i128 %bext = sext i64 %b to i128 @@ -170,8 +170,8 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: subs x8, x1, x0 -; CHECK-NEXT: cneg x0, x8, ge +; CHECK-NEXT: subs x8, x0, x1 +; CHECK-NEXT: cneg x0, x8, gt ; CHECK-NEXT: ret %aext = sext i64 %a to i128 %bext = sext i64 %b to i128 diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll index 8f79a46ba78016..1592009d9519a4 100644 --- a/llvm/test/CodeGen/AArch64/abdu-neg.ll +++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll @@ -77,8 +77,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: subs w8, w1, w8 -; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: subs w8, w8, w1 +; CHECK-NEXT: cneg w0, w8, hi ; CHECK-NEXT: ret %aext = zext i16 %a to i64 %bext = zext i32 %b to i64 @@ -109,8 +109,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: subs w8, w1, w0 -; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: subs w8, w0, w1 +; CHECK-NEXT: cneg w0, w8, hi ; CHECK-NEXT: ret %aext = zext i32 %a to i64 %bext = zext i32 %b to i64 @@ -125,8 +125,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w1, #0xffff -; CHECK-NEXT: subs w8, w8, w0 -; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: subs w8, w0, w8 +; CHECK-NEXT: cneg w0, w8, hi ; CHECK-NEXT: ret %aext = zext i32 %a to i64 %bext = zext i16 %b to i64 @@ -140,8 +140,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: subs w8, w1, w0 -; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: subs w8, w0, w1 +; CHECK-NEXT: cneg w0, w8, hi ; CHECK-NEXT: ret %aext = zext i32 %a to i64 %bext = zext i32 %b to i64 @@ -155,8 +155,8 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: subs x8, x1, x0 -; CHECK-NEXT: cneg x0, x8, hs +; CHECK-NEXT: subs x8, x0, x1 +; CHECK-NEXT: cneg x0, x8, hi ; CHECK-NEXT: ret %aext = zext i64 %a to i128 %bext = zext i64 %b to i128 @@ -170,8 +170,8 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: subs x8, x1, x0 -; CHECK-NEXT: cneg x0, x8, hs +; CHECK-NEXT: subs x8, x0, x1 +; CHECK-NEXT: cneg x0, x8, hi ; CHECK-NEXT: ret %aext = zext i64 %a to i128 %bext = zext i64 %b to i128 From cba370bf5e46e712b2d898a429138c561f6a2093 Mon Sep 17 00:00:00 2001 From: Ricardo Jesus Date: Wed, 30 Jul 2025 10:19:14 -0700 Subject: [PATCH 4/6] Simplify performNegCSelCombine changes. Instead of reversing the operands when they are negations of each other, peek through already existing negations. This has the same effect when the operands are negations of each other but should be more generally useful. --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 4e2efe71108648..c5a42763066414 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -20822,6 +20822,9 @@ static bool isNegatedInteger(SDValue Op) { } static SDValue getNegatedInteger(SDValue Op, SelectionDAG &DAG) { + if (isNegatedInteger(Op)) + return Op.getOperand(1); + SDLoc DL(Op); EVT VT = Op.getValueType(); SDValue Zero = DAG.getConstant(0, DL, VT); @@ -20851,17 +20854,11 @@ static SDValue performNegCSelCombine(SDNode *N, SelectionDAG &DAG) { if (!isNegatedInteger(N0) && !isNegatedInteger(N1)) return SDValue(); - SDLoc DL(N); - EVT VT = CSel.getValueType(); - - // If the operands are negations of each other, reverse them. - if ((isNegatedInteger(N0) && N0.getOperand(1) == N1) || - (isNegatedInteger(N1) && N1.getOperand(1) == N0)) - return DAG.getNode(AArch64ISD::CSEL, DL, VT, N1, N0, CSel.getOperand(2), - CSel.getOperand(3)); - SDValue N0N = getNegatedInteger(N0, DAG); SDValue N1N = getNegatedInteger(N1, DAG); + + SDLoc DL(N); + EVT VT = CSel.getValueType(); return DAG.getNode(AArch64ISD::CSEL, DL, VT, N0N, N1N, CSel.getOperand(2), CSel.getOperand(3)); } From 78619a52ac8ef30ba9978a6651c7b7bc44475896 Mon Sep 17 00:00:00 2001 From: Ricardo Jesus Date: Thu, 31 Jul 2025 04:50:00 -0700 Subject: [PATCH 5/6] Combine Cond and Sub with Subs. --- .../Target/AArch64/AArch64ISelLowering.cpp | 13 +- llvm/test/CodeGen/AArch64/abds-neg.ll | 25 ++-- llvm/test/CodeGen/AArch64/abds.ll | 62 ++++------ llvm/test/CodeGen/AArch64/abdu-neg.ll | 25 ++-- llvm/test/CodeGen/AArch64/abdu.ll | 60 ++++------ .../CodeGen/AArch64/csel-subs-dag-combine.ll | 112 ++++++++++++++++++ llvm/test/CodeGen/AArch64/midpoint-int.ll | 28 ++--- 7 files changed, 202 insertions(+), 123 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/csel-subs-dag-combine.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index c5a42763066414..1811af8c5cccce 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -25468,8 +25468,12 @@ static SDValue performCSELCombine(SDNode *N, } // CSEL a, b, cc, SUBS(SUB(x,y), 0) -> CSEL a, b, cc, SUBS(x,y) if cc doesn't - // use overflow flags to avoid the comparison with zero. - if (Cond.getOpcode() == AArch64ISD::SUBS && + // use overflow flags, to avoid the comparison with zero. In case of success, + // this also replaces the original SUB(x,y) with the newly created SUBS(x,y). + // NOTE: Perhaps in the future use performFlagSettingCombine to replace SUB + // nodes with their SUBS equivalent as is already done for other flag-setting + // operators, in which case doing the replacement here becomes redundant. + if (Cond.getOpcode() == AArch64ISD::SUBS && Cond->hasNUsesOfValue(1, 1) && isNullConstant(Cond.getOperand(1))) { SDValue Sub = Cond.getOperand(0); AArch64CC::CondCode CC = @@ -25480,8 +25484,9 @@ static SDValue performCSELCombine(SDNode *N, SDLoc DL(N); SDValue Subs = DAG.getNode(AArch64ISD::SUBS, DL, Cond->getVTList(), Sub.getOperand(0), Sub.getOperand(1)); - return DAG.getNode(AArch64ISD::CSEL, DL, N->getVTList(), N->getOperand(0), - N->getOperand(1), N->getOperand(2), Subs.getValue(1)); + DCI.CombineTo(Sub.getNode(), Subs); + DCI.CombineTo(Cond.getNode(), Subs, Subs.getValue(1)); + return SDValue(N, 0); } } diff --git a/llvm/test/CodeGen/AArch64/abds-neg.ll b/llvm/test/CodeGen/AArch64/abds-neg.ll index 3c111e1dd52729..b7d1b3e323d59f 100644 --- a/llvm/test/CodeGen/AArch64/abds-neg.ll +++ b/llvm/test/CodeGen/AArch64/abds-neg.ll @@ -8,9 +8,8 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w8, w1 -; CHECK-NEXT: sxtb w9, w0 -; CHECK-NEXT: subs w8, w9, w8 +; CHECK-NEXT: sxtb w8, w0 +; CHECK-NEXT: subs w8, w8, w1, sxtb ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = sext i8 %a to i64 @@ -25,9 +24,8 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w1 -; CHECK-NEXT: sxtb w9, w0 -; CHECK-NEXT: subs w8, w9, w8 +; CHECK-NEXT: sxtb w8, w0 +; CHECK-NEXT: subs w8, w8, w1, sxth ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = sext i8 %a to i64 @@ -42,9 +40,8 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w8, w1 -; CHECK-NEXT: sxtb w9, w0 -; CHECK-NEXT: subs w8, w9, w8 +; CHECK-NEXT: sxtb w8, w0 +; CHECK-NEXT: subs w8, w8, w1, sxtb ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = sext i8 %a to i64 @@ -59,9 +56,8 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w1 -; CHECK-NEXT: sxth w9, w0 -; CHECK-NEXT: subs w8, w9, w8 +; CHECK-NEXT: sxth w8, w0 +; CHECK-NEXT: subs w8, w8, w1, sxth ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = sext i16 %a to i64 @@ -92,9 +88,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w1 -; CHECK-NEXT: sxth w9, w0 -; CHECK-NEXT: subs w8, w9, w8 +; CHECK-NEXT: sxth w8, w0 +; CHECK-NEXT: subs w8, w8, w1, sxth ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = sext i16 %a to i64 diff --git a/llvm/test/CodeGen/AArch64/abds.ll b/llvm/test/CodeGen/AArch64/abds.ll index cad82f2c7114aa..bf52e71ec21fe1 100644 --- a/llvm/test/CodeGen/AArch64/abds.ll +++ b/llvm/test/CodeGen/AArch64/abds.ll @@ -8,9 +8,8 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w8, w1 -; CHECK-NEXT: sxtb w9, w0 -; CHECK-NEXT: subs w8, w9, w8 +; CHECK-NEXT: sxtb w8, w0 +; CHECK-NEXT: subs w8, w8, w1, sxtb ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = sext i8 %a to i64 @@ -24,9 +23,8 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w1 -; CHECK-NEXT: sxtb w9, w0 -; CHECK-NEXT: subs w8, w9, w8 +; CHECK-NEXT: sxtb w8, w0 +; CHECK-NEXT: subs w8, w8, w1, sxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = sext i8 %a to i64 @@ -40,9 +38,8 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w8, w1 -; CHECK-NEXT: sxtb w9, w0 -; CHECK-NEXT: subs w8, w9, w8 +; CHECK-NEXT: sxtb w8, w0 +; CHECK-NEXT: subs w8, w8, w1, sxtb ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = sext i8 %a to i64 @@ -56,9 +53,8 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w1 -; CHECK-NEXT: sxth w9, w0 -; CHECK-NEXT: subs w8, w9, w8 +; CHECK-NEXT: sxth w8, w0 +; CHECK-NEXT: subs w8, w8, w1, sxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = sext i16 %a to i64 @@ -87,9 +83,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w1 -; CHECK-NEXT: sxth w9, w0 -; CHECK-NEXT: subs w8, w9, w8 +; CHECK-NEXT: sxth w8, w0 +; CHECK-NEXT: subs w8, w8, w1, sxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = sext i16 %a to i64 @@ -214,9 +209,8 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_minmax_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w8, w1 -; CHECK-NEXT: sxtb w9, w0 -; CHECK-NEXT: subs w8, w9, w8 +; CHECK-NEXT: sxtb w8, w0 +; CHECK-NEXT: subs w8, w8, w1, sxtb ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %min = call i8 @llvm.smin.i8(i8 %a, i8 %b) @@ -228,9 +222,8 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind { define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_minmax_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w1 -; CHECK-NEXT: sxth w9, w0 -; CHECK-NEXT: subs w8, w9, w8 +; CHECK-NEXT: sxth w8, w0 +; CHECK-NEXT: subs w8, w8, w1, sxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %min = call i16 @llvm.smin.i16(i16 %a, i16 %b) @@ -286,9 +279,8 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_cmp_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w8, w1 -; CHECK-NEXT: sxtb w9, w0 -; CHECK-NEXT: subs w8, w9, w8 +; CHECK-NEXT: sxtb w8, w0 +; CHECK-NEXT: subs w8, w8, w1, sxtb ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp sgt i8 %a, %b @@ -301,9 +293,8 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_cmp_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w1 -; CHECK-NEXT: sxth w9, w0 -; CHECK-NEXT: subs w8, w9, w8 +; CHECK-NEXT: sxth w8, w0 +; CHECK-NEXT: subs w8, w8, w1, sxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp sge i16 %a, %b @@ -507,11 +498,10 @@ define i64 @vector_legalized(i16 %a, i16 %b) { ; CHECK-LABEL: vector_legalized: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: sxth w8, w1 -; CHECK-NEXT: sxth w9, w0 -; CHECK-NEXT: subs w8, w9, w8 -; CHECK-NEXT: cneg w8, w8, mi +; CHECK-NEXT: sxth w8, w0 +; CHECK-NEXT: subs w8, w8, w1, sxth ; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: cneg w8, w8, mi ; CHECK-NEXT: fmov x9, d0 ; CHECK-NEXT: add x0, x9, x8 ; CHECK-NEXT: ret @@ -532,9 +522,8 @@ define i64 @vector_legalized(i16 %a, i16 %b) { define i8 @abd_select_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_select_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w8, w1 -; CHECK-NEXT: sxtb w9, w0 -; CHECK-NEXT: subs w8, w9, w8 +; CHECK-NEXT: sxtb w8, w0 +; CHECK-NEXT: subs w8, w8, w1, sxtb ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp slt i8 %a, %b @@ -547,9 +536,8 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind { define i16 @abd_select_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_select_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w1 -; CHECK-NEXT: sxth w9, w0 -; CHECK-NEXT: subs w8, w9, w8 +; CHECK-NEXT: sxth w8, w0 +; CHECK-NEXT: subs w8, w8, w1, sxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp sle i16 %a, %b diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll index 1592009d9519a4..943d02cc29bf1e 100644 --- a/llvm/test/CodeGen/AArch64/abdu-neg.ll +++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll @@ -8,9 +8,8 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, #0xff -; CHECK-NEXT: and w9, w0, #0xff -; CHECK-NEXT: subs w8, w9, w8 +; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: subs w8, w8, w1, uxtb ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = zext i8 %a to i64 @@ -25,9 +24,8 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, #0xffff -; CHECK-NEXT: and w9, w0, #0xff -; CHECK-NEXT: subs w8, w9, w8 +; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: subs w8, w8, w1, uxth ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = zext i8 %a to i64 @@ -42,9 +40,8 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, #0xff -; CHECK-NEXT: and w9, w0, #0xff -; CHECK-NEXT: subs w8, w9, w8 +; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: subs w8, w8, w1, uxtb ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = zext i8 %a to i64 @@ -59,9 +56,8 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, #0xffff -; CHECK-NEXT: and w9, w0, #0xffff -; CHECK-NEXT: subs w8, w9, w8 +; CHECK-NEXT: and w8, w0, #0xffff +; CHECK-NEXT: subs w8, w8, w1, uxth ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = zext i16 %a to i64 @@ -92,9 +88,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, #0xffff -; CHECK-NEXT: and w9, w0, #0xffff -; CHECK-NEXT: subs w8, w9, w8 +; CHECK-NEXT: and w8, w0, #0xffff +; CHECK-NEXT: subs w8, w8, w1, uxth ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = zext i16 %a to i64 diff --git a/llvm/test/CodeGen/AArch64/abdu.ll b/llvm/test/CodeGen/AArch64/abdu.ll index bb60f6d0c94cde..8d2b0b0742d7de 100644 --- a/llvm/test/CodeGen/AArch64/abdu.ll +++ b/llvm/test/CodeGen/AArch64/abdu.ll @@ -8,9 +8,8 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, #0xff -; CHECK-NEXT: and w9, w0, #0xff -; CHECK-NEXT: subs w8, w9, w8 +; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: subs w8, w8, w1, uxtb ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = zext i8 %a to i64 @@ -24,9 +23,8 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, #0xffff -; CHECK-NEXT: and w9, w0, #0xff -; CHECK-NEXT: subs w8, w9, w8 +; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: subs w8, w8, w1, uxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = zext i8 %a to i64 @@ -40,9 +38,8 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, #0xff -; CHECK-NEXT: and w9, w0, #0xff -; CHECK-NEXT: subs w8, w9, w8 +; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: subs w8, w8, w1, uxtb ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = zext i8 %a to i64 @@ -56,9 +53,8 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, #0xffff -; CHECK-NEXT: and w9, w0, #0xffff -; CHECK-NEXT: subs w8, w9, w8 +; CHECK-NEXT: and w8, w0, #0xffff +; CHECK-NEXT: subs w8, w8, w1, uxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = zext i16 %a to i64 @@ -87,9 +83,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, #0xffff -; CHECK-NEXT: and w9, w0, #0xffff -; CHECK-NEXT: subs w8, w9, w8 +; CHECK-NEXT: and w8, w0, #0xffff +; CHECK-NEXT: subs w8, w8, w1, uxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = zext i16 %a to i64 @@ -218,9 +213,8 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_minmax_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, #0xff -; CHECK-NEXT: and w9, w0, #0xff -; CHECK-NEXT: subs w8, w9, w8 +; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: subs w8, w8, w1, uxtb ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %min = call i8 @llvm.umin.i8(i8 %a, i8 %b) @@ -232,9 +226,8 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind { define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_minmax_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, #0xffff -; CHECK-NEXT: and w9, w0, #0xffff -; CHECK-NEXT: subs w8, w9, w8 +; CHECK-NEXT: and w8, w0, #0xffff +; CHECK-NEXT: subs w8, w8, w1, uxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %min = call i16 @llvm.umin.i16(i16 %a, i16 %b) @@ -292,9 +285,8 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_cmp_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, #0xff -; CHECK-NEXT: and w9, w0, #0xff -; CHECK-NEXT: subs w8, w9, w8 +; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: subs w8, w8, w1, uxtb ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp ugt i8 %a, %b @@ -307,9 +299,8 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_cmp_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, #0xffff -; CHECK-NEXT: and w9, w0, #0xffff -; CHECK-NEXT: subs w8, w9, w8 +; CHECK-NEXT: and w8, w0, #0xffff +; CHECK-NEXT: subs w8, w8, w1, uxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp uge i16 %a, %b @@ -372,9 +363,8 @@ define i64 @vector_legalized(i16 %a, i16 %b) { ; CHECK-LABEL: vector_legalized: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: and w8, w1, #0xffff -; CHECK-NEXT: and w9, w0, #0xffff -; CHECK-NEXT: subs w8, w9, w8 +; CHECK-NEXT: and w8, w0, #0xffff +; CHECK-NEXT: subs w8, w8, w1, uxth ; CHECK-NEXT: cneg w8, w8, mi ; CHECK-NEXT: addp d0, v0.2d ; CHECK-NEXT: fmov x9, d0 @@ -397,9 +387,8 @@ define i64 @vector_legalized(i16 %a, i16 %b) { define i8 @abd_select_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_select_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, #0xff -; CHECK-NEXT: and w9, w0, #0xff -; CHECK-NEXT: subs w8, w9, w8 +; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: subs w8, w8, w1, uxtb ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp ult i8 %a, %b @@ -412,9 +401,8 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind { define i16 @abd_select_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_select_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, #0xffff -; CHECK-NEXT: and w9, w0, #0xffff -; CHECK-NEXT: subs w8, w9, w8 +; CHECK-NEXT: and w8, w0, #0xffff +; CHECK-NEXT: subs w8, w8, w1, uxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp ule i16 %a, %b diff --git a/llvm/test/CodeGen/AArch64/csel-subs-dag-combine.ll b/llvm/test/CodeGen/AArch64/csel-subs-dag-combine.ll new file mode 100644 index 00000000000000..5036be9c45e69f --- /dev/null +++ b/llvm/test/CodeGen/AArch64/csel-subs-dag-combine.ll @@ -0,0 +1,112 @@ +; RUN: llc -debug-only=isel -o /dev/null < %s 2>&1 | FileCheck %s + +; REQUIRES: asserts + +; These tests ensure that we don't combine +; CSEL a, b, cc, SUBS(SUB(x,y), 0) -> CSEL a, b, cc, SUBS(x,y) +; if the flags set by SUBS(SUB(x,y), 0) have more than one use. +; +; This restriction exists because combining SUBS(SUB(x,y), 0) -> SUBS(x,y) is +; only valid if there are no users of the overflow flags (C/V) generated by the +; SUBS. Currently, we only check the flags used by the CSEL, and therefore we +; conservatively reject cases where the SUBS's flags have other uses. + +target triple = "aarch64-unknown-linux-gnu" + +; CHECK-LABEL: Legalized selection DAG: %bb.0 'combine_subs:' +; CHECK-NEXT: SelectionDAG has 13 nodes: +; CHECK-NEXT: t0: ch,glue = EntryToken +; CHECK-NEXT: t2: i32,ch = CopyFromReg t0, Register:i32 %0 +; CHECK-NEXT: t4: i32,ch = CopyFromReg t0, Register:i32 %1 +; CHECK-NEXT: t5: i32 = sub t2, t4 +; CHECK-NEXT: t14: i32,i32 = AArch64ISD::SUBS t5, Constant:i32<0> +; CHECK-NEXT: t16: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t14:1 +; CHECK-NEXT: t11: ch,glue = CopyToReg t0, Register:i32 $w0, t16 +; CHECK-NEXT: t12: ch = AArch64ISD::RET_GLUE t11, Register:i32 $w0, t11:1 + +; CHECK-LABEL: Optimized legalized selection DAG: %bb.0 'combine_subs:' +; CHECK-NEXT: SelectionDAG has 11 nodes: +; CHECK-NEXT: t0: ch,glue = EntryToken +; CHECK-NEXT: t2: i32,ch = CopyFromReg t0, Register:i32 %0 +; CHECK-NEXT: t4: i32,ch = CopyFromReg t0, Register:i32 %1 +; CHECK-NEXT: t18: i32,i32 = AArch64ISD::SUBS t2, t4 +; CHECK-NEXT: t16: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t18:1 +; CHECK-NEXT: t11: ch,glue = CopyToReg t0, Register:i32 $w0, t16 +; CHECK-NEXT: t12: ch = AArch64ISD::RET_GLUE t11, Register:i32 $w0, t11:1 + +define i32 @combine_subs(i32 %a, i32 %b) { + %sub = sub i32 %a, %b + %cc = icmp ne i32 %sub, 0 + %sel = select i1 %cc, i32 %a, i32 %b + ret i32 %sel +} + +; CHECK-LABEL: Legalized selection DAG: %bb.0 'combine_subs_multiple_sub_uses:' +; CHECK-NEXT: SelectionDAG has 14 nodes: +; CHECK-NEXT: t0: ch,glue = EntryToken +; CHECK-NEXT: t2: i32,ch = CopyFromReg t0, Register:i32 %0 +; CHECK-NEXT: t4: i32,ch = CopyFromReg t0, Register:i32 %1 +; CHECK-NEXT: t5: i32 = sub t2, t4 +; CHECK-NEXT: t15: i32,i32 = AArch64ISD::SUBS t5, Constant:i32<0> +; CHECK-NEXT: t17: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t15:1 +; CHECK-NEXT: t10: i32 = add t17, t5 +; CHECK-NEXT: t12: ch,glue = CopyToReg t0, Register:i32 $w0, t10 +; CHECK-NEXT: t13: ch = AArch64ISD::RET_GLUE t12, Register:i32 $w0, t12:1 + +; CHECK-LABEL: Optimized legalized selection DAG: %bb.0 'combine_subs_multiple_sub_uses:' +; CHECK-NEXT: SelectionDAG has 12 nodes: +; CHECK-NEXT: t0: ch,glue = EntryToken +; CHECK-NEXT: t2: i32,ch = CopyFromReg t0, Register:i32 %0 +; CHECK-NEXT: t4: i32,ch = CopyFromReg t0, Register:i32 %1 +; CHECK-NEXT: t17: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t19:1 +; CHECK-NEXT: t10: i32 = add t17, t19 +; CHECK-NEXT: t12: ch,glue = CopyToReg t0, Register:i32 $w0, t10 +; CHECK-NEXT: t19: i32,i32 = AArch64ISD::SUBS t2, t4 +; CHECK-NEXT: t13: ch = AArch64ISD::RET_GLUE t12, Register:i32 $w0, t12:1 + +define i32 @combine_subs_multiple_sub_uses(i32 %a, i32 %b) { + %sub = sub i32 %a, %b + %cc = icmp ne i32 %sub, 0 + %sel = select i1 %cc, i32 %a, i32 %b + %add = add i32 %sel, %sub + ret i32 %add +} + +; CHECK-LABEL: Legalized selection DAG: %bb.0 'do_not_combine_subs_multiple_flag_uses:' +; CHECK-NEXT: SelectionDAG has 19 nodes: +; CHECK-NEXT: t0: ch,glue = EntryToken +; CHECK-NEXT: t2: i32,ch = CopyFromReg t0, Register:i32 %0 +; CHECK-NEXT: t4: i32,ch = CopyFromReg t0, Register:i32 %1 +; CHECK-NEXT: t24: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t21:1 +; CHECK-NEXT: t6: i32,ch = CopyFromReg t0, Register:i32 %2 +; CHECK-NEXT: t8: i32,ch = CopyFromReg t0, Register:i32 %3 +; CHECK-NEXT: t23: i32 = AArch64ISD::CSEL t6, t8, Constant:i32<1>, t21:1 +; CHECK-NEXT: t15: i32 = add t24, t23 +; CHECK-NEXT: t17: ch,glue = CopyToReg t0, Register:i32 $w0, t15 +; CHECK-NEXT: t9: i32 = sub t2, t4 +; CHECK-NEXT: t21: i32,i32 = AArch64ISD::SUBS t9, Constant:i32<0> +; CHECK-NEXT: t18: ch = AArch64ISD::RET_GLUE t17, Register:i32 $w0, t17:1 + +; CHECK-LABEL: Optimized legalized selection DAG: %bb.0 'do_not_combine_subs_multiple_flag_uses:' +; CHECK-NEXT: SelectionDAG has 19 nodes: +; CHECK-NEXT: t0: ch,glue = EntryToken +; CHECK-NEXT: t2: i32,ch = CopyFromReg t0, Register:i32 %0 +; CHECK-NEXT: t4: i32,ch = CopyFromReg t0, Register:i32 %1 +; CHECK-NEXT: t24: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t21:1 +; CHECK-NEXT: t6: i32,ch = CopyFromReg t0, Register:i32 %2 +; CHECK-NEXT: t8: i32,ch = CopyFromReg t0, Register:i32 %3 +; CHECK-NEXT: t23: i32 = AArch64ISD::CSEL t6, t8, Constant:i32<1>, t21:1 +; CHECK-NEXT: t15: i32 = add t24, t23 +; CHECK-NEXT: t17: ch,glue = CopyToReg t0, Register:i32 $w0, t15 +; CHECK-NEXT: t9: i32 = sub t2, t4 +; CHECK-NEXT: t21: i32,i32 = AArch64ISD::SUBS t9, Constant:i32<0> +; CHECK-NEXT: t18: ch = AArch64ISD::RET_GLUE t17, Register:i32 $w0, t17:1 + +define i32 @do_not_combine_subs_multiple_flag_uses(i32 %a, i32 %b, i32 %c, i32 %d) { + %sub = sub i32 %a, %b + %cc = icmp ne i32 %sub, 0 + %sel = select i1 %cc, i32 %a, i32 %b + %other = select i1 %cc, i32 %c, i32 %d + %add = add i32 %sel, %other + ret i32 %add +} diff --git a/llvm/test/CodeGen/AArch64/midpoint-int.ll b/llvm/test/CodeGen/AArch64/midpoint-int.ll index 15c1dffae749e9..d881e998db2bc0 100644 --- a/llvm/test/CodeGen/AArch64/midpoint-int.ll +++ b/llvm/test/CodeGen/AArch64/midpoint-int.ll @@ -255,12 +255,11 @@ define i64 @scalar_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind { ; CHECK-LABEL: scalar_i16_signed_reg_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w9, w1 -; CHECK-NEXT: sxth w10, w0 +; CHECK-NEXT: sxth w9, w0 ; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: subs w9, w10, w9 -; CHECK-NEXT: cneg w9, w9, mi +; CHECK-NEXT: subs w9, w9, w1, sxth ; CHECK-NEXT: cneg w8, w8, le +; CHECK-NEXT: cneg w9, w9, mi ; CHECK-NEXT: lsr w9, w9, #1 ; CHECK-NEXT: madd w0, w9, w8, w0 ; CHECK-NEXT: ret @@ -278,12 +277,11 @@ define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind { define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind { ; CHECK-LABEL: scalar_i16_unsigned_reg_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: and w9, w1, #0xffff -; CHECK-NEXT: and w10, w0, #0xffff +; CHECK-NEXT: and w9, w0, #0xffff ; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: subs w9, w10, w9 -; CHECK-NEXT: cneg w9, w9, mi +; CHECK-NEXT: subs w9, w9, w1, uxth ; CHECK-NEXT: cneg w8, w8, ls +; CHECK-NEXT: cneg w9, w9, mi ; CHECK-NEXT: lsr w9, w9, #1 ; CHECK-NEXT: madd w0, w9, w8, w0 ; CHECK-NEXT: ret @@ -382,12 +380,11 @@ define i16 @scalar_i16_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind { ; CHECK-LABEL: scalar_i8_signed_reg_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w9, w1 -; CHECK-NEXT: sxtb w10, w0 +; CHECK-NEXT: sxtb w9, w0 ; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: subs w9, w10, w9 -; CHECK-NEXT: cneg w9, w9, mi +; CHECK-NEXT: subs w9, w9, w1, sxtb ; CHECK-NEXT: cneg w8, w8, le +; CHECK-NEXT: cneg w9, w9, mi ; CHECK-NEXT: lsr w9, w9, #1 ; CHECK-NEXT: madd w0, w9, w8, w0 ; CHECK-NEXT: ret @@ -405,12 +402,11 @@ define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind { define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind { ; CHECK-LABEL: scalar_i8_unsigned_reg_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: and w9, w1, #0xff -; CHECK-NEXT: and w10, w0, #0xff +; CHECK-NEXT: and w9, w0, #0xff ; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: subs w9, w10, w9 -; CHECK-NEXT: cneg w9, w9, mi +; CHECK-NEXT: subs w9, w9, w1, uxtb ; CHECK-NEXT: cneg w8, w8, ls +; CHECK-NEXT: cneg w9, w9, mi ; CHECK-NEXT: lsr w9, w9, #1 ; CHECK-NEXT: madd w0, w9, w8, w0 ; CHECK-NEXT: ret From 1741900dc4fbe023b63dea579d979b93289f7da0 Mon Sep 17 00:00:00 2001 From: Ricardo Jesus Date: Thu, 31 Jul 2025 06:32:43 -0700 Subject: [PATCH 6/6] Revert performNegCSelCombine-related changes. By combining the original SUB node to the SUBS created when folding subs(sub(a,b), 0) -> subs(a,b), we no longer create ``swapped'' SUB nodes, such as sub(b,a), after performNegCSelCombine. Therefore, the changes to avoid code gen regressions in some cases are no longer required. --- .../Target/AArch64/AArch64ISelLowering.cpp | 3 --- llvm/test/CodeGen/AArch64/abds-neg.ll | 24 +++++++++---------- llvm/test/CodeGen/AArch64/abdu-neg.ll | 24 +++++++++---------- 3 files changed, 24 insertions(+), 27 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 1811af8c5cccce..c790df26bc8f9d 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -20822,9 +20822,6 @@ static bool isNegatedInteger(SDValue Op) { } static SDValue getNegatedInteger(SDValue Op, SelectionDAG &DAG) { - if (isNegatedInteger(Op)) - return Op.getOperand(1); - SDLoc DL(Op); EVT VT = Op.getValueType(); SDValue Zero = DAG.getConstant(0, DL, VT); diff --git a/llvm/test/CodeGen/AArch64/abds-neg.ll b/llvm/test/CodeGen/AArch64/abds-neg.ll index b7d1b3e323d59f..02c76ba7343a09 100644 --- a/llvm/test/CodeGen/AArch64/abds-neg.ll +++ b/llvm/test/CodeGen/AArch64/abds-neg.ll @@ -73,8 +73,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: subs w8, w8, w1 -; CHECK-NEXT: cneg w0, w8, gt +; CHECK-NEXT: subs w8, w1, w8 +; CHECK-NEXT: cneg w0, w8, ge ; CHECK-NEXT: ret %aext = sext i16 %a to i64 %bext = sext i32 %b to i64 @@ -104,8 +104,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, gt +; CHECK-NEXT: subs w8, w1, w0 +; CHECK-NEXT: cneg w0, w8, ge ; CHECK-NEXT: ret %aext = sext i32 %a to i64 %bext = sext i32 %b to i64 @@ -120,8 +120,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w1 -; CHECK-NEXT: subs w8, w0, w8 -; CHECK-NEXT: cneg w0, w8, gt +; CHECK-NEXT: subs w8, w8, w0 +; CHECK-NEXT: cneg w0, w8, ge ; CHECK-NEXT: ret %aext = sext i32 %a to i64 %bext = sext i16 %b to i64 @@ -135,8 +135,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, gt +; CHECK-NEXT: subs w8, w1, w0 +; CHECK-NEXT: cneg w0, w8, ge ; CHECK-NEXT: ret %aext = sext i32 %a to i64 %bext = sext i32 %b to i64 @@ -150,8 +150,8 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, gt +; CHECK-NEXT: subs x8, x1, x0 +; CHECK-NEXT: cneg x0, x8, ge ; CHECK-NEXT: ret %aext = sext i64 %a to i128 %bext = sext i64 %b to i128 @@ -165,8 +165,8 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, gt +; CHECK-NEXT: subs x8, x1, x0 +; CHECK-NEXT: cneg x0, x8, ge ; CHECK-NEXT: ret %aext = sext i64 %a to i128 %bext = sext i64 %b to i128 diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll index 943d02cc29bf1e..400031b64cb84f 100644 --- a/llvm/test/CodeGen/AArch64/abdu-neg.ll +++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll @@ -73,8 +73,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: subs w8, w8, w1 -; CHECK-NEXT: cneg w0, w8, hi +; CHECK-NEXT: subs w8, w1, w8 +; CHECK-NEXT: cneg w0, w8, hs ; CHECK-NEXT: ret %aext = zext i16 %a to i64 %bext = zext i32 %b to i64 @@ -104,8 +104,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, hi +; CHECK-NEXT: subs w8, w1, w0 +; CHECK-NEXT: cneg w0, w8, hs ; CHECK-NEXT: ret %aext = zext i32 %a to i64 %bext = zext i32 %b to i64 @@ -120,8 +120,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w1, #0xffff -; CHECK-NEXT: subs w8, w0, w8 -; CHECK-NEXT: cneg w0, w8, hi +; CHECK-NEXT: subs w8, w8, w0 +; CHECK-NEXT: cneg w0, w8, hs ; CHECK-NEXT: ret %aext = zext i32 %a to i64 %bext = zext i16 %b to i64 @@ -135,8 +135,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, hi +; CHECK-NEXT: subs w8, w1, w0 +; CHECK-NEXT: cneg w0, w8, hs ; CHECK-NEXT: ret %aext = zext i32 %a to i64 %bext = zext i32 %b to i64 @@ -150,8 +150,8 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, hi +; CHECK-NEXT: subs x8, x1, x0 +; CHECK-NEXT: cneg x0, x8, hs ; CHECK-NEXT: ret %aext = zext i64 %a to i128 %bext = zext i64 %b to i128 @@ -165,8 +165,8 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, hi +; CHECK-NEXT: subs x8, x1, x0 +; CHECK-NEXT: cneg x0, x8, hs ; CHECK-NEXT: ret %aext = zext i64 %a to i128 %bext = zext i64 %b to i128