Skip to content

Commit 4e1db6a

Browse files
[AArch64][SVE] Add AArch64ISD nodes for wide add instructions (#115895)
When lowering from a partial reduction to a pair of wide adds, previously the corresponding intrinsics were returned as nodes. Now there are AArch64ISD nodes that are returned.
1 parent e9fc2fa commit 4e1db6a

File tree

3 files changed

+58
-15
lines changed

3 files changed

+58
-15
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 41 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2768,6 +2768,10 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
27682768
MAKE_CASE(AArch64ISD::UADDV)
27692769
MAKE_CASE(AArch64ISD::UADDLV)
27702770
MAKE_CASE(AArch64ISD::SADDLV)
2771+
MAKE_CASE(AArch64ISD::SADDWT)
2772+
MAKE_CASE(AArch64ISD::SADDWB)
2773+
MAKE_CASE(AArch64ISD::UADDWT)
2774+
MAKE_CASE(AArch64ISD::UADDWB)
27712775
MAKE_CASE(AArch64ISD::SDOT)
27722776
MAKE_CASE(AArch64ISD::UDOT)
27732777
MAKE_CASE(AArch64ISD::USDOT)
@@ -21907,17 +21911,10 @@ SDValue tryLowerPartialReductionToWideAdd(SDNode *N,
2190721911
return SDValue();
2190821912

2190921913
bool InputIsSigned = ExtInputOpcode == ISD::SIGN_EXTEND;
21910-
auto BottomIntrinsic = InputIsSigned ? Intrinsic::aarch64_sve_saddwb
21911-
: Intrinsic::aarch64_sve_uaddwb;
21912-
auto TopIntrinsic = InputIsSigned ? Intrinsic::aarch64_sve_saddwt
21913-
: Intrinsic::aarch64_sve_uaddwt;
21914-
21915-
auto BottomID = DAG.getTargetConstant(BottomIntrinsic, DL, AccElemVT);
21916-
auto BottomNode =
21917-
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, AccVT, BottomID, Acc, Input);
21918-
auto TopID = DAG.getTargetConstant(TopIntrinsic, DL, AccElemVT);
21919-
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, AccVT, TopID, BottomNode,
21920-
Input);
21914+
auto BottomOpcode = InputIsSigned ? AArch64ISD::SADDWB : AArch64ISD::UADDWB;
21915+
auto TopOpcode = InputIsSigned ? AArch64ISD::SADDWT : AArch64ISD::UADDWT;
21916+
auto BottomNode = DAG.getNode(BottomOpcode, DL, AccVT, Acc, Input);
21917+
return DAG.getNode(TopOpcode, DL, AccVT, BottomNode, Input);
2192121918
}
2192221919

2192321920
static SDValue performIntrinsicCombine(SDNode *N,
@@ -22097,6 +22094,18 @@ static SDValue performIntrinsicCombine(SDNode *N,
2209722094
case Intrinsic::aarch64_sve_bic_u:
2209822095
return DAG.getNode(AArch64ISD::BIC, SDLoc(N), N->getValueType(0),
2209922096
N->getOperand(2), N->getOperand(3));
22097+
case Intrinsic::aarch64_sve_saddwb:
22098+
return DAG.getNode(AArch64ISD::SADDWB, SDLoc(N), N->getValueType(0),
22099+
N->getOperand(1), N->getOperand(2));
22100+
case Intrinsic::aarch64_sve_saddwt:
22101+
return DAG.getNode(AArch64ISD::SADDWT, SDLoc(N), N->getValueType(0),
22102+
N->getOperand(1), N->getOperand(2));
22103+
case Intrinsic::aarch64_sve_uaddwb:
22104+
return DAG.getNode(AArch64ISD::UADDWB, SDLoc(N), N->getValueType(0),
22105+
N->getOperand(1), N->getOperand(2));
22106+
case Intrinsic::aarch64_sve_uaddwt:
22107+
return DAG.getNode(AArch64ISD::UADDWT, SDLoc(N), N->getValueType(0),
22108+
N->getOperand(1), N->getOperand(2));
2210022109
case Intrinsic::aarch64_sve_eor_u:
2210122110
return DAG.getNode(ISD::XOR, SDLoc(N), N->getValueType(0), N->getOperand(2),
2210222111
N->getOperand(3));
@@ -29702,6 +29711,27 @@ void AArch64TargetLowering::verifyTargetSDNode(const SDNode *N) const {
2970229711
switch (N->getOpcode()) {
2970329712
default:
2970429713
break;
29714+
case AArch64ISD::SADDWT:
29715+
case AArch64ISD::SADDWB:
29716+
case AArch64ISD::UADDWT:
29717+
case AArch64ISD::UADDWB: {
29718+
assert(N->getNumValues() == 1 && "Expected one result!");
29719+
assert(N->getNumOperands() == 2 && "Expected two operands!");
29720+
EVT VT = N->getValueType(0);
29721+
EVT Op0VT = N->getOperand(0).getValueType();
29722+
EVT Op1VT = N->getOperand(1).getValueType();
29723+
assert(VT.isVector() && Op0VT.isVector() && Op1VT.isVector() &&
29724+
VT.isInteger() && Op0VT.isInteger() && Op1VT.isInteger() &&
29725+
"Expected integer vectors!");
29726+
assert(VT == Op0VT &&
29727+
"Expected result and first input to have the same type!");
29728+
assert(Op0VT.getSizeInBits() == Op1VT.getSizeInBits() &&
29729+
"Expected vectors of equal size!");
29730+
assert(Op0VT.getVectorElementCount() * 2 == Op1VT.getVectorElementCount() &&
29731+
"Expected result vector and first input vector to have half the "
29732+
"lanes of the second input vector!");
29733+
break;
29734+
}
2970529735
case AArch64ISD::SUNPKLO:
2970629736
case AArch64ISD::SUNPKHI:
2970729737
case AArch64ISD::UUNPKLO:

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,12 @@ enum NodeType : unsigned {
273273
UADDLV,
274274
SADDLV,
275275

276+
// Wide adds
277+
SADDWT,
278+
SADDWB,
279+
UADDWT,
280+
UADDWB,
281+
276282
// Add Pairwise of two vectors
277283
ADDP,
278284
// Add Long Pairwise

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,13 @@ def SDT_AArch64Arith_Unpred : SDTypeProfile<1, 2, [
430430

431431
def AArch64bic_node : SDNode<"AArch64ISD::BIC", SDT_AArch64Arith_Unpred>;
432432

433+
def SDT_AArch64addw : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>]>;
434+
435+
def AArch64saddwt : SDNode<"AArch64ISD::SADDWT", SDT_AArch64addw>;
436+
def AArch64saddwb : SDNode<"AArch64ISD::SADDWB", SDT_AArch64addw>;
437+
def AArch64uaddwt : SDNode<"AArch64ISD::UADDWT", SDT_AArch64addw>;
438+
def AArch64uaddwb : SDNode<"AArch64ISD::UADDWB", SDT_AArch64addw>;
439+
433440
def AArch64bic : PatFrags<(ops node:$op1, node:$op2),
434441
[(and node:$op1, (xor node:$op2, (splat_vector (i32 -1)))),
435442
(and node:$op1, (xor node:$op2, (splat_vector (i64 -1)))),
@@ -3674,10 +3681,10 @@ let Predicates = [HasSVE2orSME] in {
36743681
defm UABDLT_ZZZ : sve2_wide_int_arith_long<0b01111, "uabdlt", int_aarch64_sve_uabdlt>;
36753682

36763683
// SVE2 integer add/subtract wide
3677-
defm SADDWB_ZZZ : sve2_wide_int_arith_wide<0b000, "saddwb", int_aarch64_sve_saddwb>;
3678-
defm SADDWT_ZZZ : sve2_wide_int_arith_wide<0b001, "saddwt", int_aarch64_sve_saddwt>;
3679-
defm UADDWB_ZZZ : sve2_wide_int_arith_wide<0b010, "uaddwb", int_aarch64_sve_uaddwb>;
3680-
defm UADDWT_ZZZ : sve2_wide_int_arith_wide<0b011, "uaddwt", int_aarch64_sve_uaddwt>;
3684+
defm SADDWB_ZZZ : sve2_wide_int_arith_wide<0b000, "saddwb", AArch64saddwb>;
3685+
defm SADDWT_ZZZ : sve2_wide_int_arith_wide<0b001, "saddwt", AArch64saddwt>;
3686+
defm UADDWB_ZZZ : sve2_wide_int_arith_wide<0b010, "uaddwb", AArch64uaddwb>;
3687+
defm UADDWT_ZZZ : sve2_wide_int_arith_wide<0b011, "uaddwt", AArch64uaddwt>;
36813688
defm SSUBWB_ZZZ : sve2_wide_int_arith_wide<0b100, "ssubwb", int_aarch64_sve_ssubwb>;
36823689
defm SSUBWT_ZZZ : sve2_wide_int_arith_wide<0b101, "ssubwt", int_aarch64_sve_ssubwt>;
36833690
defm USUBWB_ZZZ : sve2_wide_int_arith_wide<0b110, "usubwb", int_aarch64_sve_usubwb>;

0 commit comments

Comments
 (0)