@@ -2160,6 +2160,25 @@ bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT,
21602160 return false;
21612161}
21622162
2163+ bool AArch64TargetLowering::shouldExpandGetAliasLaneMask(
2164+ EVT VT, EVT PtrVT, unsigned EltSize) const {
2165+ if (!Subtarget->hasSVE2())
2166+ return true;
2167+
2168+ if (PtrVT != MVT::i64)
2169+ return true;
2170+
2171+ if (VT == MVT::v2i1 || VT == MVT::nxv2i1)
2172+ return EltSize != 8;
2173+ if (VT == MVT::v4i1 || VT == MVT::nxv4i1)
2174+ return EltSize != 4;
2175+ if (VT == MVT::v8i1 || VT == MVT::nxv8i1)
2176+ return EltSize != 2;
2177+ if (VT == MVT::v16i1 || VT == MVT::nxv16i1)
2178+ return EltSize != 1;
2179+ return true;
2180+ }
2181+
21632182bool AArch64TargetLowering::shouldExpandPartialReductionIntrinsic(
21642183 const IntrinsicInst *I) const {
21652184 assert(I->getIntrinsicID() ==
@@ -5987,6 +6006,18 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
59876006 EVT PtrVT = getPointerTy(DAG.getDataLayout());
59886007 return DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
59896008 }
6009+ case Intrinsic::aarch64_sve_whilewr_b:
6010+ case Intrinsic::aarch64_sve_whilewr_h:
6011+ case Intrinsic::aarch64_sve_whilewr_s:
6012+ case Intrinsic::aarch64_sve_whilewr_d:
6013+ return DAG.getNode(AArch64ISD::WHILEWR, dl, Op.getValueType(),
6014+ Op.getOperand(1), Op.getOperand(2));
6015+ case Intrinsic::aarch64_sve_whilerw_b:
6016+ case Intrinsic::aarch64_sve_whilerw_h:
6017+ case Intrinsic::aarch64_sve_whilerw_s:
6018+ case Intrinsic::aarch64_sve_whilerw_d:
6019+ return DAG.getNode(AArch64ISD::WHILERW, dl, Op.getValueType(),
6020+ Op.getOperand(1), Op.getOperand(2));
59906021 case Intrinsic::aarch64_neon_abs: {
59916022 EVT Ty = Op.getValueType();
59926023 if (Ty == MVT::i64) {
@@ -6461,6 +6492,52 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
64616492 return DAG.getNode(AArch64ISD::USDOT, DL, Op.getValueType(),
64626493 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
64636494 }
6495+ case Intrinsic::experimental_get_alias_lane_mask: {
6496+ unsigned IntrinsicID = 0;
6497+ uint64_t EltSize = Op.getOperand(3)->getAsZExtVal();
6498+ bool IsWriteAfterRead = Op.getOperand(4)->getAsZExtVal() == 1;
6499+ switch (EltSize) {
6500+ case 1:
6501+ IntrinsicID = IsWriteAfterRead ? Intrinsic::aarch64_sve_whilewr_b
6502+ : Intrinsic::aarch64_sve_whilerw_b;
6503+ break;
6504+ case 2:
6505+ IntrinsicID = IsWriteAfterRead ? Intrinsic::aarch64_sve_whilewr_h
6506+ : Intrinsic::aarch64_sve_whilerw_h;
6507+ break;
6508+ case 4:
6509+ IntrinsicID = IsWriteAfterRead ? Intrinsic::aarch64_sve_whilewr_s
6510+ : Intrinsic::aarch64_sve_whilerw_s;
6511+ break;
6512+ case 8:
6513+ IntrinsicID = IsWriteAfterRead ? Intrinsic::aarch64_sve_whilewr_d
6514+ : Intrinsic::aarch64_sve_whilerw_d;
6515+ break;
6516+ default:
6517+ llvm_unreachable("Unexpected element size for get.alias.lane.mask");
6518+ break;
6519+ }
6520+ SDValue ID = DAG.getTargetConstant(IntrinsicID, dl, MVT::i64);
6521+
6522+ EVT VT = Op.getValueType();
6523+ if (VT.isScalableVector())
6524+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, ID, Op.getOperand(1),
6525+ Op.getOperand(2));
6526+
6527+ // We can use the SVE whilewr/whilerw instruction to lower this
6528+ // intrinsic by creating the appropriate sequence of scalable vector
6529+ // operations and then extracting a fixed-width subvector from the scalable
6530+ // vector.
6531+
6532+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
6533+ EVT WhileVT = ContainerVT.changeElementType(MVT::i1);
6534+
6535+ SDValue Mask = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, WhileVT, ID,
6536+ Op.getOperand(1), Op.getOperand(2));
6537+ SDValue MaskAsInt = DAG.getNode(ISD::SIGN_EXTEND, dl, ContainerVT, Mask);
6538+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, MaskAsInt,
6539+ DAG.getVectorIdxConstant(0, dl));
6540+ }
64646541 case Intrinsic::aarch64_neon_saddlv:
64656542 case Intrinsic::aarch64_neon_uaddlv: {
64666543 EVT OpVT = Op.getOperand(1).getValueType();
@@ -19961,7 +20038,10 @@ static bool isPredicateCCSettingOp(SDValue N) {
1996120038 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilele ||
1996220039 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelo ||
1996320040 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilels ||
19964- N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt)))
20041+ N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt ||
20042+ // get_alias_lane_mask is lowered to a whilewr/rw instruction.
20043+ N.getConstantOperandVal(0) ==
20044+ Intrinsic::experimental_get_alias_lane_mask)))
1996520045 return true;
1996620046
1996720047 return false;
@@ -28232,7 +28312,8 @@ void AArch64TargetLowering::ReplaceNodeResults(
2823228312 DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, RuntimePStateSM));
2823328313 return;
2823428314 }
28235- case Intrinsic::experimental_vector_match: {
28315+ case Intrinsic::experimental_vector_match:
28316+ case Intrinsic::experimental_get_alias_lane_mask: {
2823628317 if (!VT.isFixedLengthVector() || VT.getVectorElementType() != MVT::i1)
2823728318 return;
2823828319
0 commit comments