diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 55f994f357b75..d4ad4d3a09381 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11402,18 +11402,25 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) { SDValue AbsOp0 = N->getOperand(0); unsigned Opc0 = Op0.getOpcode(); - // Check if the operands of the sub are (zero|sign)-extended. - // TODO: Should we use ValueTracking instead? + // Check if the operands of the sub are (zero|sign)-extended, otherwise + // fallback to ValueTracking. if (Opc0 != Op1.getOpcode() || (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND && Opc0 != ISD::SIGN_EXTEND_INREG)) { // fold (abs (sub nsw x, y)) -> abds(x, y) // Don't fold this for unsupported types as we lose the NSW handling. - if (AbsOp0->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) && - TLI.preferABDSToABSWithNSW(VT)) { + if (hasOperation(ISD::ABDS, VT) && TLI.preferABDSToABSWithNSW(VT) && + (AbsOp0->getFlags().hasNoSignedWrap() || + DAG.willNotOverflowSub(/*IsSigned=*/true, Op0, Op1))) { SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1); return DAG.getZExtOrTrunc(ABD, DL, SrcVT); } + // fold (abs (sub x, y)) -> abdu(x, y) + if (hasOperation(ISD::ABDU, VT) && DAG.SignBitIsZero(Op0) && + DAG.SignBitIsZero(Op1)) { + SDValue ABD = DAG.getNode(ISD::ABDU, DL, VT, Op0, Op1); + return DAG.getZExtOrTrunc(ABD, DL, SrcVT); + } return SDValue(); } diff --git a/llvm/test/CodeGen/AArch64/abd-combine.ll b/llvm/test/CodeGen/AArch64/abd-combine.ll index e48680f4be98b..d0257890d2c43 100644 --- a/llvm/test/CodeGen/AArch64/abd-combine.ll +++ b/llvm/test/CodeGen/AArch64/abd-combine.ll @@ -18,13 +18,11 @@ define <8 x i16> @abdu_const(<8 x i16> %src1) { ; CHECK-LABEL: abdu_const: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0 -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-NEXT: sub v1.4s, v2.4s, v1.4s -; CHECK-NEXT: abs v1.4s, v1.4s -; CHECK-NEXT: abs v0.4s, v0.4s -; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ushll v2.4s, v0.4h, #0 +; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0 +; CHECK-NEXT: uabd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: uabd v1.4s, v2.4s, v1.4s +; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h ; CHECK-NEXT: ret %zextsrc1 = zext <8 x i16> %src1 to <8 x i32> %sub = sub <8 x i32> %zextsrc1, @@ -37,10 +35,10 @@ define <8 x i16> @abdu_const_lhs(<8 x i16> %src1) { ; CHECK-LABEL: abdu_const_lhs: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: usubw v2.4s, v1.4s, v0.4h -; CHECK-NEXT: usubw2 v0.4s, v1.4s, v0.8h -; CHECK-NEXT: abs v0.4s, v0.4s -; CHECK-NEXT: abs v1.4s, v2.4s +; CHECK-NEXT: ushll v2.4s, v0.4h, #0 +; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0 +; CHECK-NEXT: uabd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: uabd v1.4s, v2.4s, v1.4s ; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h ; CHECK-NEXT: ret %zextsrc1 = zext <8 x i16> %src1 to <8 x i32> @@ -53,13 +51,6 @@ define <8 x i16> @abdu_const_lhs(<8 x i16> %src1) { define <8 x i16> @abdu_const_zero(<8 x i16> %src1) { ; CHECK-LABEL: abdu_const_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: ushll v2.4s, v0.4h, #0 -; CHECK-NEXT: usubw2 v0.4s, v1.4s, v0.8h -; CHECK-NEXT: neg v1.4s, v2.4s -; CHECK-NEXT: abs v0.4s, v0.4s -; CHECK-NEXT: abs v1.4s, v1.4s -; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h ; CHECK-NEXT: ret %zextsrc1 = zext <8 x i16> %src1 to <8 x i32> %sub = sub <8 x i32> , %zextsrc1 @@ -328,13 +319,11 @@ define <8 x i16> @abds_const(<8 x i16> %src1) { ; CHECK-LABEL: abds_const: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: sshll2 v2.4s, v0.8h, #0 -; CHECK-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-NEXT: sub v1.4s, v2.4s, v1.4s -; CHECK-NEXT: abs v1.4s, v1.4s -; CHECK-NEXT: abs v0.4s, v0.4s -; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: sshll v2.4s, v0.4h, #0 +; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0 +; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: sabd v1.4s, v2.4s, v1.4s +; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h ; CHECK-NEXT: ret %zextsrc1 = sext <8 x i16> %src1 to <8 x i32> %sub = sub <8 x i32> %zextsrc1, @@ -347,10 +336,10 @@ define <8 x i16> @abds_const_lhs(<8 x i16> %src1) { ; CHECK-LABEL: abds_const_lhs: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: ssubw v2.4s, v1.4s, v0.4h -; CHECK-NEXT: ssubw2 v0.4s, v1.4s, v0.8h -; CHECK-NEXT: abs v0.4s, v0.4s -; CHECK-NEXT: abs v1.4s, v2.4s +; CHECK-NEXT: sshll v2.4s, v0.4h, #0 +; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0 +; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: sabd v1.4s, v2.4s, v1.4s ; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h ; CHECK-NEXT: ret %zextsrc1 = sext <8 x i16> %src1 to <8 x i32> @@ -363,10 +352,8 @@ define <8 x i16> @abds_const_lhs(<8 x i16> %src1) { define <8 x i16> @abds_const_zero(<8 x i16> %src1) { ; CHECK-LABEL: abds_const_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: sshll v2.4s, v0.4h, #0 -; CHECK-NEXT: ssubw2 v0.4s, v1.4s, v0.8h -; CHECK-NEXT: neg v1.4s, v2.4s +; CHECK-NEXT: sshll v1.4s, v0.4h, #0 +; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0 ; CHECK-NEXT: abs v0.4s, v0.4s ; CHECK-NEXT: abs v1.4s, v1.4s ; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h diff --git a/llvm/test/CodeGen/AArch64/sve-abd.ll b/llvm/test/CodeGen/AArch64/sve-abd.ll index 72790155d046f..92bbd4e7f2759 100644 --- a/llvm/test/CodeGen/AArch64/sve-abd.ll +++ b/llvm/test/CodeGen/AArch64/sve-abd.ll @@ -283,8 +283,7 @@ define @uabd_non_matching_promotion( %a, %a to %b.zext = sext %b to diff --git a/llvm/test/CodeGen/RISCV/rvv/abd.ll b/llvm/test/CodeGen/RISCV/rvv/abd.ll index 583d872238df7..be4292c9902eb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/abd.ll +++ b/llvm/test/CodeGen/RISCV/rvv/abd.ll @@ -316,12 +316,10 @@ define @uabd_non_matching_promotion( %a, %a to %b.zext = sext %b to