@@ -1873,8 +1873,8 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
18731873
18741874bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT,
18751875 EVT OpVT) const {
1876- // Only SVE has a 1:1 mapping from intrinsic -> instruction (whilelo).
1877- if (!Subtarget->hasSVE ())
1876+ // Only SVE/SME has a 1:1 mapping from intrinsic -> instruction (whilelo).
1877+ if (!Subtarget->hasSVEorSME ())
18781878 return true;
18791879
18801880 // We can only support legal predicate result types. We can use the SVE
@@ -20507,6 +20507,61 @@ static SDValue convertMergedOpToPredOp(SDNode *N, unsigned Opc,
2050720507 return SDValue();
2050820508}
2050920509
20510+ static SDValue tryCombineWhileLo(SDNode *N,
20511+ TargetLowering::DAGCombinerInfo &DCI,
20512+ const AArch64Subtarget *Subtarget) {
20513+ if (DCI.isBeforeLegalize())
20514+ return SDValue();
20515+
20516+ if (!Subtarget->hasSVE2p1() && !Subtarget->hasSME2())
20517+ return SDValue();
20518+
20519+ if (!N->hasNUsesOfValue(2, 0))
20520+ return SDValue();
20521+
20522+ const uint64_t HalfSize = N->getValueType(0).getVectorMinNumElements() / 2;
20523+ if (HalfSize < 2)
20524+ return SDValue();
20525+
20526+ auto It = N->use_begin();
20527+ SDNode *Lo = *It++;
20528+ SDNode *Hi = *It;
20529+
20530+ uint64_t OffLo, OffHi;
20531+ if (Lo->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
20532+ !isIntImmediate(Lo->getOperand(1).getNode(), OffLo) ||
20533+ Hi->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
20534+ !isIntImmediate(Hi->getOperand(1).getNode(), OffHi))
20535+ return SDValue();
20536+
20537+ if (OffLo > OffHi) {
20538+ std::swap(Lo, Hi);
20539+ std::swap(OffLo, OffHi);
20540+ }
20541+
20542+ if (OffLo != 0 || OffHi != HalfSize)
20543+ return SDValue();
20544+
20545+ SelectionDAG &DAG = DCI.DAG;
20546+ SDLoc DL(N);
20547+ SDValue ID =
20548+ DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo_x2, DL, MVT::i64);
20549+ SDValue Idx = N->getOperand(1);
20550+ SDValue TC = N->getOperand(2);
20551+ if (Idx.getValueType() != MVT::i64) {
20552+ Idx = DAG.getZExtOrTrunc(Idx, DL, MVT::i64);
20553+ TC = DAG.getZExtOrTrunc(TC, DL, MVT::i64);
20554+ }
20555+ auto R =
20556+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL,
20557+ {Lo->getValueType(0), Hi->getValueType(0)}, {ID, Idx, TC});
20558+
20559+ DCI.CombineTo(Lo, R.getValue(0));
20560+ DCI.CombineTo(Hi, R.getValue(1));
20561+
20562+ return SDValue(N, 0);
20563+ }
20564+
2051020565static SDValue performIntrinsicCombine(SDNode *N,
2051120566 TargetLowering::DAGCombinerInfo &DCI,
2051220567 const AArch64Subtarget *Subtarget) {
@@ -20837,6 +20892,8 @@ static SDValue performIntrinsicCombine(SDNode *N,
2083720892 case Intrinsic::aarch64_sve_ptest_last:
2083820893 return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
2083920894 AArch64CC::LAST_ACTIVE);
20895+ case Intrinsic::aarch64_sve_whilelo:
20896+ return tryCombineWhileLo(N, DCI, Subtarget);
2084020897 }
2084120898 return SDValue();
2084220899}
0 commit comments