-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[AArch64][SME2] Improve register allocation of multi-vector SME intrinsics #116399
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
fc0d224
6992da2
0d4c931
7188a2d
ebc97b7
43939b9
426253c
645e30b
d7ccfe1
0f0bc84
7f3e687
6cb5c5d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2808,6 +2808,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { | |
| MAKE_CASE(AArch64ISD::FMUL_PRED) | ||
| MAKE_CASE(AArch64ISD::FSUB_PRED) | ||
| MAKE_CASE(AArch64ISD::RDSVL) | ||
| MAKE_CASE(AArch64ISD::FORM_STRIDED_TUPLE_X2) | ||
| MAKE_CASE(AArch64ISD::FORM_STRIDED_TUPLE_X4) | ||
MacDue marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| MAKE_CASE(AArch64ISD::BIC) | ||
| MAKE_CASE(AArch64ISD::CBZ) | ||
| MAKE_CASE(AArch64ISD::CBNZ) | ||
|
|
@@ -5709,6 +5711,46 @@ SDValue AArch64TargetLowering::getRuntimePStateSM(SelectionDAG &DAG, | |
| Mask); | ||
| } | ||
|
|
||
| static unsigned getIntrinsicID(const SDNode *N); | ||
kmclaughlin-arm marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| SDValue TryLowerMultiVecSMEDotIntrinsic(SDValue Op, SelectionDAG &DAG, | ||
kmclaughlin-arm marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| unsigned Size) { | ||
| assert((Size == 2 || Size == 4) && "Invalid Tuple Size"); | ||
| auto IsStridedLoad = [Size](SDValue Op) -> bool { | ||
| unsigned Intrinsic = getIntrinsicID(Op.getNode()); | ||
| if (Size == 2) | ||
| return Intrinsic == Intrinsic::aarch64_sve_ld1_pn_x2; | ||
| else | ||
| return Intrinsic == Intrinsic::aarch64_sve_ld1_pn_x4; | ||
| }; | ||
|
|
||
| SmallVector<SDValue> Ops; | ||
| unsigned LastLoadIdx = Size == 2 ? 5 : 7; | ||
| unsigned LoadResNo = Op.getOperand(3).getResNo(); | ||
| for (unsigned I = 3; I < LastLoadIdx; I++) { | ||
| if (!IsStridedLoad(Op->getOperand(I)) || | ||
| Op.getOperand(I).getResNo() != LoadResNo) | ||
| return SDValue(); | ||
| Ops.push_back(Op->getOperand(I)); | ||
| } | ||
|
|
||
| EVT VT = Op->getOperand(3).getValueType(); | ||
| SDVTList VTList = | ||
| Size == 2 ? DAG.getVTList(VT, VT) : DAG.getVTList(VT, VT, VT, VT); | ||
| unsigned Opc = Size == 2 ? AArch64ISD::FORM_STRIDED_TUPLE_X2 | ||
| : AArch64ISD::FORM_STRIDED_TUPLE_X4; | ||
| SDLoc DL(Op); | ||
| SDValue Pseudo = DAG.getNode(Opc, DL, VTList, Ops); | ||
|
|
||
| SmallVector<SDValue> DotOps = {Op.getOperand(0), Op->getOperand(1), | ||
| Op->getOperand(2)}; | ||
| for (unsigned I = 0; I < Size; I++) | ||
| DotOps.push_back(Pseudo.getValue(I)); | ||
| DotOps.push_back(Op->getOperand(DotOps.size())); | ||
| DotOps.push_back(Op->getOperand(DotOps.size())); | ||
| return DAG.getNode(Op->getOpcode(), DL, MVT::Other, DotOps); | ||
| } | ||
|
|
||
| // Lower an SME LDR/STR ZA intrinsic | ||
| // Case 1: If the vector number (vecnum) is an immediate in range, it gets | ||
| // folded into the instruction | ||
|
|
@@ -5898,6 +5940,22 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_VOID(SDValue Op, | |
| Op->getOperand(0), // Chain | ||
| DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32), | ||
| DAG.getConstant(AArch64SME::Always, DL, MVT::i64)); | ||
| case Intrinsic::aarch64_sme_uvdot_lane_za32_vg1x4: | ||
| case Intrinsic::aarch64_sme_suvdot_lane_za32_vg1x4: | ||
| case Intrinsic::aarch64_sme_usvdot_lane_za32_vg1x4: | ||
| case Intrinsic::aarch64_sme_svdot_lane_za32_vg1x4: | ||
| case Intrinsic::aarch64_sme_usdot_lane_za32_vg1x4: | ||
| case Intrinsic::aarch64_sme_udot_lane_za32_vg1x4: | ||
| case Intrinsic::aarch64_sme_sudot_lane_za32_vg1x4: | ||
| case Intrinsic::aarch64_sme_sdot_lane_za32_vg1x4: | ||
| return TryLowerMultiVecSMEDotIntrinsic(Op, DAG, 4); | ||
| case Intrinsic::aarch64_sme_uvdot_lane_za32_vg1x2: | ||
| case Intrinsic::aarch64_sme_sdot_lane_za32_vg1x2: | ||
| case Intrinsic::aarch64_sme_svdot_lane_za32_vg1x2: | ||
| case Intrinsic::aarch64_sme_usdot_lane_za32_vg1x2: | ||
| case Intrinsic::aarch64_sme_sudot_lane_za32_vg1x2: | ||
| case Intrinsic::aarch64_sme_udot_lane_za32_vg1x2: | ||
| return TryLowerMultiVecSMEDotIntrinsic(Op, DAG, 2); | ||
|
||
| } | ||
| } | ||
|
|
||
|
|
@@ -7639,6 +7697,11 @@ static unsigned getIntrinsicID(const SDNode *N) { | |
| return IID; | ||
| return Intrinsic::not_intrinsic; | ||
| } | ||
| case ISD::INTRINSIC_W_CHAIN: { | ||
| unsigned IID = N->getConstantOperandVal(1); | ||
| if (IID < Intrinsic::num_intrinsics) | ||
| return IID; | ||
| } | ||
| } | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -28,6 +28,17 @@ def AArch64_restore_zt : SDNode<"AArch64ISD::RESTORE_ZT", SDTypeProfile<0, 2, | |
| def AArch64_save_zt : SDNode<"AArch64ISD::SAVE_ZT", SDTypeProfile<0, 2, | ||
| [SDTCisInt<0>, SDTCisPtrTy<1>]>, | ||
| [SDNPHasChain, SDNPSideEffect, SDNPMayStore]>; | ||
|
|
||
|
||
| def SDT_FORM_STRIDED_TUPLE_X2 : SDTypeProfile<4, 4, | ||
MacDue marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| [SDTCisVec<0>, SDTCisSameAs<0, 1>, | ||
| SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>]>; | ||
|
|
||
| def SDT_FORM_STRIDED_TUPLE_X4 : SDTypeProfile<4, 4, | ||
| [SDTCisVec<0>, SDTCisSameAs<0, 1>, | ||
| SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, | ||
| SDTCisSameAs<0, 4>, SDTCisSameAs<0, 5>, | ||
| SDTCisSameAs<0, 6>, SDTCisSameAs<0, 7>]>; | ||
|
|
||
| def AArch64CoalescerBarrier | ||
| : SDNode<"AArch64ISD::COALESCER_BARRIER", SDTypeProfile<1, 1, []>, [SDNPOptInGlue, SDNPOutGlue]>; | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: