From abc561de981f9bccee54cb0a8bba76c1ae34bd05 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 4 Jul 2025 14:26:40 +0100 Subject: [PATCH] [DAG] foldABSToABD - fallback to value tracking if the (ABS (SUB LHS, RHS)) operands aren't extended ISD::ABDS can be used if the subtraction will not overwrap (this is an extension is handle cases where the NSW flag has been lost) ISD::ABDU can be used if both operands have at least 1 zero sign bit. Fixes #147049 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 15 ++++-- llvm/test/CodeGen/AArch64/abd-combine.ll | 53 +++++++------------ llvm/test/CodeGen/AArch64/sve-abd.ll | 3 +- llvm/test/CodeGen/RISCV/rvv/abd.ll | 10 ++-- 4 files changed, 36 insertions(+), 45 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 586eb2f3cf45e..6b5d19458c7fd 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11402,18 +11402,25 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) { SDValue AbsOp0 = N->getOperand(0); unsigned Opc0 = Op0.getOpcode(); - // Check if the operands of the sub are (zero|sign)-extended. - // TODO: Should we use ValueTracking instead? + // Check if the operands of the sub are (zero|sign)-extended, otherwise + // fallback to ValueTracking. if (Opc0 != Op1.getOpcode() || (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND && Opc0 != ISD::SIGN_EXTEND_INREG)) { // fold (abs (sub nsw x, y)) -> abds(x, y) // Don't fold this for unsupported types as we lose the NSW handling. - if (AbsOp0->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) && - TLI.preferABDSToABSWithNSW(VT)) { + if (hasOperation(ISD::ABDS, VT) && TLI.preferABDSToABSWithNSW(VT) && + (AbsOp0->getFlags().hasNoSignedWrap() || + DAG.willNotOverflowSub(/*IsSigned=*/true, Op0, Op1))) { SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1); return DAG.getZExtOrTrunc(ABD, DL, SrcVT); } + // fold (abs (sub x, y)) -> abdu(x, y) + if (hasOperation(ISD::ABDU, VT) && DAG.SignBitIsZero(Op0) && + DAG.SignBitIsZero(Op1)) { + SDValue ABD = DAG.getNode(ISD::ABDU, DL, VT, Op0, Op1); + return DAG.getZExtOrTrunc(ABD, DL, SrcVT); + } return SDValue(); } diff --git a/llvm/test/CodeGen/AArch64/abd-combine.ll b/llvm/test/CodeGen/AArch64/abd-combine.ll index e48680f4be98b..d0257890d2c43 100644 --- a/llvm/test/CodeGen/AArch64/abd-combine.ll +++ b/llvm/test/CodeGen/AArch64/abd-combine.ll @@ -18,13 +18,11 @@ define <8 x i16> @abdu_const(<8 x i16> %src1) { ; CHECK-LABEL: abdu_const: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0 -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-NEXT: sub v1.4s, v2.4s, v1.4s -; CHECK-NEXT: abs v1.4s, v1.4s -; CHECK-NEXT: abs v0.4s, v0.4s -; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ushll v2.4s, v0.4h, #0 +; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0 +; CHECK-NEXT: uabd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: uabd v1.4s, v2.4s, v1.4s +; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h ; CHECK-NEXT: ret %zextsrc1 = zext <8 x i16> %src1 to <8 x i32> %sub = sub <8 x i32> %zextsrc1, @@ -37,10 +35,10 @@ define <8 x i16> @abdu_const_lhs(<8 x i16> %src1) { ; CHECK-LABEL: abdu_const_lhs: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: usubw v2.4s, v1.4s, v0.4h -; CHECK-NEXT: usubw2 v0.4s, v1.4s, v0.8h -; CHECK-NEXT: abs v0.4s, v0.4s -; CHECK-NEXT: abs v1.4s, v2.4s +; CHECK-NEXT: ushll v2.4s, v0.4h, #0 +; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0 +; CHECK-NEXT: uabd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: uabd v1.4s, v2.4s, v1.4s ; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h ; CHECK-NEXT: ret %zextsrc1 = zext <8 x i16> %src1 to <8 x i32> @@ -53,13 +51,6 @@ define <8 x i16> @abdu_const_lhs(<8 x i16> %src1) { define <8 x i16> @abdu_const_zero(<8 x i16> %src1) { ; CHECK-LABEL: abdu_const_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: ushll v2.4s, v0.4h, #0 -; CHECK-NEXT: usubw2 v0.4s, v1.4s, v0.8h -; CHECK-NEXT: neg v1.4s, v2.4s -; CHECK-NEXT: abs v0.4s, v0.4s -; CHECK-NEXT: abs v1.4s, v1.4s -; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h ; CHECK-NEXT: ret %zextsrc1 = zext <8 x i16> %src1 to <8 x i32> %sub = sub <8 x i32> , %zextsrc1 @@ -328,13 +319,11 @@ define <8 x i16> @abds_const(<8 x i16> %src1) { ; CHECK-LABEL: abds_const: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: sshll2 v2.4s, v0.8h, #0 -; CHECK-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-NEXT: sub v1.4s, v2.4s, v1.4s -; CHECK-NEXT: abs v1.4s, v1.4s -; CHECK-NEXT: abs v0.4s, v0.4s -; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: sshll v2.4s, v0.4h, #0 +; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0 +; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: sabd v1.4s, v2.4s, v1.4s +; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h ; CHECK-NEXT: ret %zextsrc1 = sext <8 x i16> %src1 to <8 x i32> %sub = sub <8 x i32> %zextsrc1, @@ -347,10 +336,10 @@ define <8 x i16> @abds_const_lhs(<8 x i16> %src1) { ; CHECK-LABEL: abds_const_lhs: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: ssubw v2.4s, v1.4s, v0.4h -; CHECK-NEXT: ssubw2 v0.4s, v1.4s, v0.8h -; CHECK-NEXT: abs v0.4s, v0.4s -; CHECK-NEXT: abs v1.4s, v2.4s +; CHECK-NEXT: sshll v2.4s, v0.4h, #0 +; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0 +; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: sabd v1.4s, v2.4s, v1.4s ; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h ; CHECK-NEXT: ret %zextsrc1 = sext <8 x i16> %src1 to <8 x i32> @@ -363,10 +352,8 @@ define <8 x i16> @abds_const_lhs(<8 x i16> %src1) { define <8 x i16> @abds_const_zero(<8 x i16> %src1) { ; CHECK-LABEL: abds_const_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: sshll v2.4s, v0.4h, #0 -; CHECK-NEXT: ssubw2 v0.4s, v1.4s, v0.8h -; CHECK-NEXT: neg v1.4s, v2.4s +; CHECK-NEXT: sshll v1.4s, v0.4h, #0 +; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0 ; CHECK-NEXT: abs v0.4s, v0.4s ; CHECK-NEXT: abs v1.4s, v1.4s ; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h diff --git a/llvm/test/CodeGen/AArch64/sve-abd.ll b/llvm/test/CodeGen/AArch64/sve-abd.ll index 72790155d046f..92bbd4e7f2759 100644 --- a/llvm/test/CodeGen/AArch64/sve-abd.ll +++ b/llvm/test/CodeGen/AArch64/sve-abd.ll @@ -283,8 +283,7 @@ define @uabd_non_matching_promotion( %a, %a to %b.zext = sext %b to diff --git a/llvm/test/CodeGen/RISCV/rvv/abd.ll b/llvm/test/CodeGen/RISCV/rvv/abd.ll index 583d872238df7..be4292c9902eb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/abd.ll +++ b/llvm/test/CodeGen/RISCV/rvv/abd.ll @@ -316,12 +316,10 @@ define @uabd_non_matching_promotion( %a, %a to %b.zext = sext %b to