@@ -1834,8 +1834,8 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
18341834
18351835bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT,
18361836 EVT OpVT) const {
1837- // Only SVE has a 1:1 mapping from intrinsic -> instruction (whilelo).
1838- if (!Subtarget->hasSVE ())
1837+ // Only SVE/SME has a 1:1 mapping from intrinsic -> instruction (whilelo).
1838+ if (!Subtarget->hasSVEorSME ())
18391839 return true;
18401840
18411841 // We can only support legal predicate result types. We can use the SVE
@@ -20535,6 +20535,61 @@ static SDValue convertMergedOpToPredOp(SDNode *N, unsigned Opc,
2053520535 return SDValue();
2053620536}
2053720537
20538+ static SDValue tryCombineWhileLo(SDNode *N,
20539+ TargetLowering::DAGCombinerInfo &DCI,
20540+ const AArch64Subtarget *Subtarget) {
20541+ if (DCI.isBeforeLegalize())
20542+ return SDValue();
20543+
20544+ if (!Subtarget->hasSVE2p1() && !Subtarget->hasSME2())
20545+ return SDValue();
20546+
20547+ if (!N->hasNUsesOfValue(2, 0))
20548+ return SDValue();
20549+
20550+ const uint64_t HalfSize = N->getValueType(0).getVectorMinNumElements() / 2;
20551+ if (HalfSize < 2)
20552+ return SDValue();
20553+
20554+ auto It = N->use_begin();
20555+ SDNode *Lo = *It++;
20556+ SDNode *Hi = *It;
20557+
20558+ uint64_t OffLo, OffHi;
20559+ if (Lo->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
20560+ !isIntImmediate(Lo->getOperand(1).getNode(), OffLo) ||
20561+ Hi->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
20562+ !isIntImmediate(Hi->getOperand(1).getNode(), OffHi))
20563+ return SDValue();
20564+
20565+ if (OffLo > OffHi) {
20566+ std::swap(Lo, Hi);
20567+ std::swap(OffLo, OffHi);
20568+ }
20569+
20570+ if (OffLo != 0 || OffHi != HalfSize)
20571+ return SDValue();
20572+
20573+ SelectionDAG &DAG = DCI.DAG;
20574+ SDLoc DL(N);
20575+ SDValue ID =
20576+ DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo_x2, DL, MVT::i64);
20577+ SDValue Idx = N->getOperand(1);
20578+ SDValue TC = N->getOperand(2);
20579+ if (Idx.getValueType() != MVT::i64) {
20580+ Idx = DAG.getZExtOrTrunc(Idx, DL, MVT::i64);
20581+ TC = DAG.getZExtOrTrunc(TC, DL, MVT::i64);
20582+ }
20583+ auto R =
20584+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL,
20585+ {Lo->getValueType(0), Hi->getValueType(0)}, {ID, Idx, TC});
20586+
20587+ DCI.CombineTo(Lo, R.getValue(0));
20588+ DCI.CombineTo(Hi, R.getValue(1));
20589+
20590+ return SDValue(N, 0);
20591+ }
20592+
2053820593static SDValue performIntrinsicCombine(SDNode *N,
2053920594 TargetLowering::DAGCombinerInfo &DCI,
2054020595 const AArch64Subtarget *Subtarget) {
@@ -20832,6 +20887,8 @@ static SDValue performIntrinsicCombine(SDNode *N,
2083220887 case Intrinsic::aarch64_sve_ptest_last:
2083320888 return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
2083420889 AArch64CC::LAST_ACTIVE);
20890+ case Intrinsic::aarch64_sve_whilelo:
20891+ return tryCombineWhileLo(N, DCI, Subtarget);
2083520892 }
2083620893 return SDValue();
2083720894}
0 commit comments