@@ -1813,8 +1813,8 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
18131813
18141814bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT,
18151815 EVT OpVT) const {
1816- // Only SVE has a 1:1 mapping from intrinsic -> instruction (whilelo).
1817- if (!Subtarget->hasSVE ())
1816+ // Only SVE/SME has a 1:1 mapping from intrinsic -> instruction (whilelo).
1817+ if (!Subtarget->hasSVEorSME ())
18181818 return true;
18191819
18201820 // We can only support legal predicate result types. We can use the SVE
@@ -20004,47 +20004,98 @@ static SDValue convertMergedOpToPredOp(SDNode *N, unsigned Opc,
2000420004 return SDValue();
2000520005}
2000620006
20007- static SDValue performIntrinsicCombine (SDNode *N,
20008- TargetLowering::DAGCombinerInfo &DCI,
20009- const AArch64Subtarget *Subtarget) {
20007+ static SDValue tryCombineGetActiveLaneMask (SDNode *N,
20008+ TargetLowering::DAGCombinerInfo &DCI,
20009+ const AArch64Subtarget *Subtarget) {
2001020010 SelectionDAG &DAG = DCI.DAG;
20011- unsigned IID = getIntrinsicID(N);
20012- switch (IID) {
20013- default:
20014- break;
20015- case Intrinsic::get_active_lane_mask: {
20016- SDValue Res = SDValue();
20017- EVT VT = N->getValueType(0);
20018- if (VT.isFixedLengthVector()) {
20019- // We can use the SVE whilelo instruction to lower this intrinsic by
20020- // creating the appropriate sequence of scalable vector operations and
20021- // then extracting a fixed-width subvector from the scalable vector.
20011+ EVT VT = N->getValueType(0);
20012+ if (VT.isFixedLengthVector()) {
20013+ // We can use the SVE whilelo instruction to lower this intrinsic by
20014+ // creating the appropriate sequence of scalable vector operations and
20015+ // then extracting a fixed-width subvector from the scalable vector.
20016+ SDLoc DL(N);
20017+ SDValue ID =
20018+ DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, DL, MVT::i64);
2002220019
20023- SDLoc DL(N);
20024- SDValue ID =
20025- DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, DL, MVT::i64 );
20020+ EVT WhileVT =
20021+ EVT::getVectorVT(*DAG.getContext(), MVT::i1,
20022+ ElementCount::getScalable(VT.getVectorNumElements()) );
2002620023
20027- EVT WhileVT = EVT::getVectorVT(
20028- *DAG.getContext(), MVT::i1,
20029- ElementCount::getScalable(VT.getVectorNumElements()));
20024+ // Get promoted scalable vector VT, i.e. promote nxv4i1 -> nxv4i32.
20025+ EVT PromVT = getPromotedVTForPredicate(WhileVT);
2003020026
20031- // Get promoted scalable vector VT, i.e. promote nxv4i1 -> nxv4i32.
20032- EVT PromVT = getPromotedVTForPredicate(WhileVT);
20027+ // Get the fixed-width equivalent of PromVT for extraction.
20028+ EVT ExtVT =
20029+ EVT::getVectorVT(*DAG.getContext(), PromVT.getVectorElementType(),
20030+ VT.getVectorElementCount());
2003320031
20034- // Get the fixed-width equivalent of PromVT for extraction.
20035- EVT ExtVT =
20036- EVT::getVectorVT(*DAG.getContext(), PromVT.getVectorElementType(),
20037- VT.getVectorElementCount());
20032+ SDValue Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WhileVT, ID,
20033+ N->getOperand(1), N->getOperand(2));
20034+ Res = DAG.getNode(ISD::SIGN_EXTEND, DL, PromVT, Res);
20035+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtVT, Res,
20036+ DAG.getConstant(0, DL, MVT::i64));
20037+ Res = DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
2003820038
20039- Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WhileVT, ID,
20040- N->getOperand(1), N->getOperand(2));
20041- Res = DAG.getNode(ISD::SIGN_EXTEND, DL, PromVT, Res);
20042- Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtVT, Res,
20043- DAG.getConstant(0, DL, MVT::i64));
20044- Res = DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
20045- }
2004620039 return Res;
2004720040 }
20041+
20042+ if (!Subtarget->hasSVE2p1() && !Subtarget->hasSME2())
20043+ return SDValue();
20044+
20045+ if (!N->hasNUsesOfValue(2, 0))
20046+ return SDValue();
20047+
20048+ auto It = N->use_begin();
20049+ SDNode *Lo = *It++;
20050+ SDNode *Hi = *It;
20051+
20052+ const uint64_t HalfSize = VT.getVectorMinNumElements() / 2;
20053+ uint64_t OffLo, OffHi;
20054+ if (Lo->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
20055+ !isIntImmediate(Lo->getOperand(1).getNode(), OffLo) ||
20056+ (OffLo != 0 && OffLo != HalfSize) ||
20057+ Hi->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
20058+ !isIntImmediate(Hi->getOperand(1).getNode(), OffHi) ||
20059+ (OffHi != 0 && OffHi != HalfSize))
20060+ return SDValue();
20061+
20062+ if (OffLo > OffHi) {
20063+ std::swap(Lo, Hi);
20064+ std::swap(OffLo, OffHi);
20065+ }
20066+
20067+ if (OffLo != 0 || OffHi != HalfSize)
20068+ return SDValue();
20069+
20070+ SDLoc DL(N);
20071+ SDValue ID =
20072+ DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo_x2, DL, MVT::i64);
20073+ SDValue Idx = N->getOperand(1);
20074+ SDValue TC = N->getOperand(2);
20075+ if (Idx.getValueType() != MVT::i64) {
20076+ Idx = DAG.getZExtOrTrunc(Idx, DL, MVT::i64);
20077+ TC = DAG.getZExtOrTrunc(TC, DL, MVT::i64);
20078+ }
20079+ auto R =
20080+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL,
20081+ {Lo->getValueType(0), Hi->getValueType(0)}, {ID, Idx, TC});
20082+
20083+ DCI.CombineTo(Lo, R.getValue(0));
20084+ DCI.CombineTo(Hi, R.getValue(1));
20085+
20086+ return SDValue(N, 0);
20087+ }
20088+
20089+ static SDValue performIntrinsicCombine(SDNode *N,
20090+ TargetLowering::DAGCombinerInfo &DCI,
20091+ const AArch64Subtarget *Subtarget) {
20092+ SelectionDAG &DAG = DCI.DAG;
20093+ unsigned IID = getIntrinsicID(N);
20094+ switch (IID) {
20095+ default:
20096+ break;
20097+ case Intrinsic::get_active_lane_mask:
20098+ return tryCombineGetActiveLaneMask(N, DCI, Subtarget);
2004820099 case Intrinsic::aarch64_neon_vcvtfxs2fp:
2004920100 case Intrinsic::aarch64_neon_vcvtfxu2fp:
2005020101 return tryCombineFixedPointConvert(N, DCI, DAG);
0 commit comments