-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[AArch64][SVE] Add AArch64ISD nodes for wide add instructions #115895
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-aarch64 Author: James Chesterman (JamesChesterman) ChangesWhen lowering from a partial reduction to a pair of wide adds, previously the corresponding intrinsics were returned as nodes. Now there are AArch64ISD nodes that are returned. Full diff: https://github.com/llvm/llvm-project/pull/115895.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index e7923ff02de704..a0629e1839dd22 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2755,6 +2755,10 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::UADDV)
MAKE_CASE(AArch64ISD::UADDLV)
MAKE_CASE(AArch64ISD::SADDLV)
+ MAKE_CASE(AArch64ISD::SADDWT)
+ MAKE_CASE(AArch64ISD::SADDWB)
+ MAKE_CASE(AArch64ISD::UADDWT)
+ MAKE_CASE(AArch64ISD::UADDWB)
MAKE_CASE(AArch64ISD::SDOT)
MAKE_CASE(AArch64ISD::UDOT)
MAKE_CASE(AArch64ISD::USDOT)
@@ -21825,17 +21829,10 @@ SDValue tryLowerPartialReductionToWideAdd(SDNode *N,
return SDValue();
bool InputIsSigned = ExtInputOpcode == ISD::SIGN_EXTEND;
- auto BottomIntrinsic = InputIsSigned ? Intrinsic::aarch64_sve_saddwb
- : Intrinsic::aarch64_sve_uaddwb;
- auto TopIntrinsic = InputIsSigned ? Intrinsic::aarch64_sve_saddwt
- : Intrinsic::aarch64_sve_uaddwt;
-
- auto BottomID = DAG.getTargetConstant(BottomIntrinsic, DL, AccElemVT);
- auto BottomNode =
- DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, AccVT, BottomID, Acc, Input);
- auto TopID = DAG.getTargetConstant(TopIntrinsic, DL, AccElemVT);
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, AccVT, TopID, BottomNode,
- Input);
+ auto BottomISD = InputIsSigned ? AArch64ISD::SADDWB : AArch64ISD::UADDWB;
+ auto TopISD = InputIsSigned ? AArch64ISD::SADDWT : AArch64ISD::UADDWT;
+ auto BottomNode = DAG.getNode(BottomISD, DL, AccVT, Acc, Input);
+ return DAG.getNode(TopISD, DL, AccVT, BottomNode, Input);
}
static SDValue performIntrinsicCombine(SDNode *N,
@@ -22015,6 +22012,18 @@ static SDValue performIntrinsicCombine(SDNode *N,
case Intrinsic::aarch64_sve_bic_u:
return DAG.getNode(AArch64ISD::BIC, SDLoc(N), N->getValueType(0),
N->getOperand(2), N->getOperand(3));
+ case Intrinsic::aarch64_sve_saddwb:
+ return DAG.getNode(AArch64ISD::SADDWB, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2));
+ case Intrinsic::aarch64_sve_saddwt:
+ return DAG.getNode(AArch64ISD::SADDWT, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2));
+ case Intrinsic::aarch64_sve_uaddwb:
+ return DAG.getNode(AArch64ISD::UADDWB, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2));
+ case Intrinsic::aarch64_sve_uaddwt:
+ return DAG.getNode(AArch64ISD::UADDWT, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_sve_eor_u:
return DAG.getNode(ISD::XOR, SDLoc(N), N->getValueType(0), N->getOperand(2),
N->getOperand(3));
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index d696355bb062a8..00de5efdaa8e6d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -273,6 +273,12 @@ enum NodeType : unsigned {
UADDLV,
SADDLV,
+ // Wide adds
+ SADDWT,
+ SADDWB,
+ UADDWT,
+ UADDWB,
+
// Add Pairwise of two vectors
ADDP,
// Add Long Pairwise
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index c10653e05841cd..f4ab21f50e5598 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -430,6 +430,22 @@ def SDT_AArch64Arith_Unpred : SDTypeProfile<1, 2, [
def AArch64bic_node : SDNode<"AArch64ISD::BIC", SDT_AArch64Arith_Unpred>;
+def SDT_AArch64addw : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>]>;
+
+def AArch64saddwt_node : SDNode<"AArch64ISD::SADDWT", SDT_AArch64addw>;
+def AArch64saddwb_node : SDNode<"AArch64ISD::SADDWB", SDT_AArch64addw>;
+def AArch64uaddwt_node : SDNode<"AArch64ISD::UADDWT", SDT_AArch64addw>;
+def AArch64uaddwb_node : SDNode<"AArch64ISD::UADDWB", SDT_AArch64addw>;
+
+def AArch64saddwt : PatFrag<(ops node:$op1, node:$op2),
+ (AArch64saddwt_node node:$op1, node:$op2)>;
+def AArch64saddwb : PatFrag<(ops node:$op1, node:$op2),
+ (AArch64saddwb_node node:$op1, node:$op2)>;
+def AArch64uaddwt : PatFrag<(ops node:$op1, node:$op2),
+ (AArch64uaddwt_node node:$op1, node:$op2)>;
+def AArch64uaddwb : PatFrag<(ops node:$op1, node:$op2),
+ (AArch64uaddwb_node node:$op1, node:$op2)>;
+
def AArch64bic : PatFrags<(ops node:$op1, node:$op2),
[(and node:$op1, (xor node:$op2, (splat_vector (i32 -1)))),
(and node:$op1, (xor node:$op2, (splat_vector (i64 -1)))),
@@ -3674,10 +3690,10 @@ let Predicates = [HasSVE2orSME] in {
defm UABDLT_ZZZ : sve2_wide_int_arith_long<0b01111, "uabdlt", int_aarch64_sve_uabdlt>;
// SVE2 integer add/subtract wide
- defm SADDWB_ZZZ : sve2_wide_int_arith_wide<0b000, "saddwb", int_aarch64_sve_saddwb>;
- defm SADDWT_ZZZ : sve2_wide_int_arith_wide<0b001, "saddwt", int_aarch64_sve_saddwt>;
- defm UADDWB_ZZZ : sve2_wide_int_arith_wide<0b010, "uaddwb", int_aarch64_sve_uaddwb>;
- defm UADDWT_ZZZ : sve2_wide_int_arith_wide<0b011, "uaddwt", int_aarch64_sve_uaddwt>;
+ defm SADDWB_ZZZ : sve2_wide_int_arith_wide<0b000, "saddwb", AArch64saddwb>;
+ defm SADDWT_ZZZ : sve2_wide_int_arith_wide<0b001, "saddwt", AArch64saddwt>;
+ defm UADDWB_ZZZ : sve2_wide_int_arith_wide<0b010, "uaddwb", AArch64uaddwb>;
+ defm UADDWT_ZZZ : sve2_wide_int_arith_wide<0b011, "uaddwt", AArch64uaddwt>;
defm SSUBWB_ZZZ : sve2_wide_int_arith_wide<0b100, "ssubwb", int_aarch64_sve_ssubwb>;
defm SSUBWT_ZZZ : sve2_wide_int_arith_wide<0b101, "ssubwt", int_aarch64_sve_ssubwt>;
defm USUBWB_ZZZ : sve2_wide_int_arith_wide<0b110, "usubwb", int_aarch64_sve_usubwb>;
|
SamTebbs33
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks good to me.
When lowering from a partial reduction to a pair of wide adds, previously the corresponding intrinsics were returned as nodes. Now there are AArch64ISD nodes that are returned.
056bb1e to
0cadef8
Compare
Remove unnecessary code and rename variables.
paulwalker-arm
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Patch looks good to my eye but do you mind also updating AArch64TargetLowering::verifyTargetSDNode() to verify the new ISD nodes are created properly.
nodes are correct when generating them.
MacDue
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM!
paulwalker-arm
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A couple of suggestions but otherwise this looks good to me.
When lowering from a partial reduction to a pair of wide adds, previously the corresponding intrinsics were returned as nodes. Now there are AArch64ISD nodes that are returned.