@@ -985,6 +985,33 @@ static bool isAllActivePredicate(Value *Pred) {
985985 m_ConstantInt<AArch64SVEPredPattern::all>()));
986986}
987987
988+ // Simplify unary operation where predicate has all inactive lanes by replacing
989+ // instruction with zeroed object
990+ static std::optional<Instruction *>
991+ instCombineSVENoActiveUnaryZero (InstCombiner &IC, IntrinsicInst &II) {
992+ if (match (II.getOperand (0 ), m_ZeroInt ())) {
993+ Constant *Node;
994+ Type *RetTy = II.getType ();
995+ if (RetTy->isStructTy ()) {
996+ auto StructT = cast<StructType>(RetTy);
997+ auto VecT = StructT->getElementType (0 );
998+ SmallVector<llvm::Constant *, 4 > ZerVec;
999+ for (unsigned i = 0 ; i < StructT->getNumElements (); i++) {
1000+ ZerVec.push_back (VecT->isFPOrFPVectorTy () ? ConstantFP::get (VecT, 0.0 )
1001+ : ConstantInt::get (VecT, 0 ));
1002+ }
1003+ Node = ConstantStruct::get (StructT, ZerVec);
1004+ } else if (RetTy->isFPOrFPVectorTy ())
1005+ Node = ConstantFP::get (RetTy, 0.0 );
1006+ else
1007+ Node = ConstantInt::get (II.getType (), 0 );
1008+
1009+ IC.replaceInstUsesWith (II, Node);
1010+ return IC.eraseInstFromFunction (II);
1011+ }
1012+ return std::nullopt ;
1013+ }
1014+
9881015static std::optional<Instruction *> instCombineSVESel (InstCombiner &IC,
9891016 IntrinsicInst &II) {
9901017 // svsel(ptrue, x, y) => x
@@ -1398,6 +1425,10 @@ instCombineSVELD1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) {
13981425 Value *PtrOp = II.getOperand (1 );
13991426 Type *VecTy = II.getType ();
14001427
1428+ // Replace by zero constant when all lanes are inactive
1429+ if (auto II_NA = instCombineSVENoActiveUnaryZero (IC, II))
1430+ return II_NA;
1431+
14011432 if (isAllActivePredicate (Pred)) {
14021433 LoadInst *Load = IC.Builder .CreateLoad (VecTy, PtrOp);
14031434 Load->copyMetadata (II);
@@ -1745,6 +1776,10 @@ instCombineLD1GatherIndex(InstCombiner &IC, IntrinsicInst &II) {
17451776 Type *Ty = II.getType ();
17461777 Value *PassThru = ConstantAggregateZero::get (Ty);
17471778
1779+ // Replace by zero constant when all lanes are inactive
1780+ if (auto II_NA = instCombineSVENoActiveUnaryZero (IC, II))
1781+ return II_NA;
1782+
17481783 // Contiguous gather => masked load.
17491784 // (sve.ld1.gather.index Mask BasePtr (sve.index IndexBase 1))
17501785 // => (masked.load (gep BasePtr IndexBase) Align Mask zeroinitializer)
@@ -1971,6 +2006,41 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
19712006 switch (IID) {
19722007 default :
19732008 break ;
2009+
2010+ case Intrinsic::aarch64_sve_ld1_gather:
2011+ case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
2012+ case Intrinsic::aarch64_sve_ld1_gather_sxtw:
2013+ case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
2014+ case Intrinsic::aarch64_sve_ld1_gather_uxtw:
2015+ case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
2016+ case Intrinsic::aarch64_sve_ld1q_gather_index:
2017+ case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
2018+ case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
2019+ case Intrinsic::aarch64_sve_ld1ro:
2020+ case Intrinsic::aarch64_sve_ld1rq:
2021+ case Intrinsic::aarch64_sve_ld1udq:
2022+ case Intrinsic::aarch64_sve_ld1uwq:
2023+ case Intrinsic::aarch64_sve_ld2_sret:
2024+ case Intrinsic::aarch64_sve_ld2q_sret:
2025+ case Intrinsic::aarch64_sve_ld3_sret:
2026+ case Intrinsic::aarch64_sve_ld3q_sret:
2027+ case Intrinsic::aarch64_sve_ld4_sret:
2028+ case Intrinsic::aarch64_sve_ld4q_sret:
2029+ case Intrinsic::aarch64_sve_ldff1:
2030+ case Intrinsic::aarch64_sve_ldff1_gather:
2031+ case Intrinsic::aarch64_sve_ldff1_gather_index:
2032+ case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
2033+ case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
2034+ case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
2035+ case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
2036+ case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
2037+ case Intrinsic::aarch64_sve_ldnf1:
2038+ case Intrinsic::aarch64_sve_ldnt1:
2039+ case Intrinsic::aarch64_sve_ldnt1_gather:
2040+ case Intrinsic::aarch64_sve_ldnt1_gather_index:
2041+ case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
2042+ case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
2043+ return instCombineSVENoActiveUnaryZero (IC, II);
19742044 case Intrinsic::aarch64_neon_fmaxnm:
19752045 case Intrinsic::aarch64_neon_fminnm:
19762046 return instCombineMaxMinNM (IC, II);
0 commit comments