@@ -135,6 +135,7 @@ class VectorCombine {
135135 bool foldShuffleOfIntrinsics (Instruction &I);
136136 bool foldShuffleToIdentity (Instruction &I);
137137 bool foldShuffleFromReductions (Instruction &I);
138+ bool foldShuffleChainsToReduce (Instruction &I);
138139 bool foldCastFromReductions (Instruction &I);
139140 bool foldSelectShuffle (Instruction &I, bool FromReduction = false );
140141 bool foldInterleaveIntrinsics (Instruction &I);
@@ -3129,6 +3130,179 @@ bool VectorCombine::foldShuffleFromReductions(Instruction &I) {
31293130 return MadeChanges;
31303131}
31313132
3133+ bool VectorCombine::foldShuffleChainsToReduce (Instruction &I) {
3134+ auto *EEI = dyn_cast<ExtractElementInst>(&I);
3135+ if (!EEI)
3136+ return false ;
3137+
3138+ std::queue<Value *> InstWorklist;
3139+ Value *InitEEV = nullptr ;
3140+ Intrinsic::ID CommonOp = 0 ;
3141+
3142+ bool IsFirstCallInst = true ;
3143+ bool ShouldBeCallInst = true ;
3144+
3145+ SmallVector<Value *, 3 > PrevVecV (3 , nullptr );
3146+ int64_t ShuffleMaskHalf = -1 , ExpectedShuffleMaskHalf = 1 ;
3147+ int64_t VecSize = -1 ;
3148+
3149+ Value *VecOp;
3150+ if (!match (&I, m_ExtractElt (m_Value (VecOp), m_Zero ())))
3151+ return false ;
3152+
3153+ auto *FVT = dyn_cast<FixedVectorType>(VecOp->getType ());
3154+ if (!FVT)
3155+ return false ;
3156+
3157+ VecSize = FVT->getNumElements ();
3158+ if (VecSize < 2 || (VecSize % 2 ) != 0 )
3159+ return false ;
3160+
3161+ ShuffleMaskHalf = 1 ;
3162+ PrevVecV[2 ] = VecOp;
3163+ InitEEV = EEI;
3164+
3165+ InstWorklist.push (PrevVecV[2 ]);
3166+
3167+ while (!InstWorklist.empty ()) {
3168+ Value *V = InstWorklist.front ();
3169+ InstWorklist.pop ();
3170+
3171+ auto *CI = dyn_cast<Instruction>(V);
3172+ if (!CI)
3173+ return false ;
3174+
3175+ if (auto *CallI = dyn_cast<CallInst>(CI)) {
3176+ if (!ShouldBeCallInst || !PrevVecV[2 ])
3177+ return false ;
3178+
3179+ if (!IsFirstCallInst &&
3180+ any_of (PrevVecV, [](Value *VecV) { return VecV == nullptr ; }))
3181+ return false ;
3182+
3183+ if (CallI != (IsFirstCallInst ? PrevVecV[2 ] : PrevVecV[0 ]))
3184+ return false ;
3185+ IsFirstCallInst = false ;
3186+
3187+ auto *II = dyn_cast<IntrinsicInst>(CallI);
3188+ if (!II)
3189+ return false ;
3190+
3191+ if (!CommonOp)
3192+ CommonOp = II->getIntrinsicID ();
3193+ if (II->getIntrinsicID () != CommonOp)
3194+ return false ;
3195+
3196+ switch (II->getIntrinsicID ()) {
3197+ case Intrinsic::umin:
3198+ case Intrinsic::umax:
3199+ case Intrinsic::smin:
3200+ case Intrinsic::smax: {
3201+ auto *Op0 = CallI->getOperand (0 );
3202+ auto *Op1 = CallI->getOperand (1 );
3203+ PrevVecV[0 ] = Op0;
3204+ PrevVecV[1 ] = Op1;
3205+ break ;
3206+ }
3207+ default :
3208+ return false ;
3209+ }
3210+ ShouldBeCallInst ^= 1 ;
3211+
3212+ if (!isa<ShuffleVectorInst>(PrevVecV[1 ]))
3213+ std::swap (PrevVecV[0 ], PrevVecV[1 ]);
3214+ InstWorklist.push (PrevVecV[1 ]);
3215+ InstWorklist.push (PrevVecV[0 ]);
3216+ } else if (auto *SVInst = dyn_cast<ShuffleVectorInst>(CI)) {
3217+ if (ShouldBeCallInst ||
3218+ any_of (PrevVecV, [](Value *VecV) { return VecV == nullptr ; }))
3219+ return false ;
3220+
3221+ if (SVInst != PrevVecV[1 ])
3222+ return false ;
3223+
3224+ auto *ShuffleVec = SVInst->getOperand (0 );
3225+ if (!ShuffleVec || ShuffleVec != PrevVecV[0 ])
3226+ return false ;
3227+
3228+ SmallVector<int > CurMask;
3229+ SVInst->getShuffleMask (CurMask);
3230+
3231+ if (ShuffleMaskHalf != ExpectedShuffleMaskHalf)
3232+ return false ;
3233+ ExpectedShuffleMaskHalf *= 2 ;
3234+
3235+ for (int Mask = 0 , MaskSize = CurMask.size (); Mask != MaskSize; ++Mask) {
3236+ if (Mask < ShuffleMaskHalf && CurMask[Mask] != ShuffleMaskHalf + Mask)
3237+ return false ;
3238+ if (Mask >= ShuffleMaskHalf && CurMask[Mask] != -1 )
3239+ return false ;
3240+ }
3241+ ShuffleMaskHalf *= 2 ;
3242+ if (ExpectedShuffleMaskHalf == VecSize)
3243+ break ;
3244+ ShouldBeCallInst ^= 1 ;
3245+ } else {
3246+ return false ;
3247+ }
3248+ }
3249+
3250+ if (ShouldBeCallInst)
3251+ return false ;
3252+
3253+ assert (VecSize != -1 && ExpectedShuffleMaskHalf == VecSize &&
3254+ " Expected Match for Vector Size and Mask Half" );
3255+
3256+ Value *FinalVecV = PrevVecV[0 ];
3257+ auto *FinalVecVTy = dyn_cast<FixedVectorType>(FinalVecV->getType ());
3258+
3259+ if (!InitEEV || !FinalVecV)
3260+ return false ;
3261+
3262+ assert (FinalVecVTy && " Expected non-null value for Vector Type" );
3263+
3264+ Intrinsic::ID ReducedOp = 0 ;
3265+ switch (CommonOp) {
3266+ case Intrinsic::umin:
3267+ ReducedOp = Intrinsic::vector_reduce_umin;
3268+ break ;
3269+ case Intrinsic::umax:
3270+ ReducedOp = Intrinsic::vector_reduce_umax;
3271+ break ;
3272+ case Intrinsic::smin:
3273+ ReducedOp = Intrinsic::vector_reduce_smin;
3274+ break ;
3275+ case Intrinsic::smax:
3276+ ReducedOp = Intrinsic::vector_reduce_smax;
3277+ break ;
3278+ default :
3279+ return false ;
3280+ }
3281+
3282+ InstructionCost OrigCost = 0 ;
3283+ unsigned int NumLevels = Log2_64 (VecSize);
3284+
3285+ for (unsigned int Level = 0 ; Level < NumLevels; ++Level) {
3286+ OrigCost += TTI.getShuffleCost (TargetTransformInfo::SK_PermuteSingleSrc,
3287+ FinalVecVTy, FinalVecVTy);
3288+ OrigCost += TTI.getArithmeticInstrCost (Instruction::ICmp, FinalVecVTy);
3289+ }
3290+ OrigCost += TTI.getVectorInstrCost (Instruction::ExtractElement, FinalVecVTy,
3291+ CostKind, 0 );
3292+
3293+ IntrinsicCostAttributes ICA (ReducedOp, FinalVecVTy, {FinalVecV});
3294+ InstructionCost NewCost = TTI.getIntrinsicInstrCost (ICA, CostKind);
3295+
3296+ if (NewCost >= OrigCost)
3297+ return false ;
3298+
3299+ auto *ReducedResult =
3300+ Builder.CreateIntrinsic (ReducedOp, {FinalVecV->getType ()}, {FinalVecV});
3301+ replaceValue (*InitEEV, *ReducedResult);
3302+
3303+ return true ;
3304+ }
3305+
31323306// / Determine if its more efficient to fold:
31333307// / reduce(trunc(x)) -> trunc(reduce(x)).
31343308// / reduce(sext(x)) -> sext(reduce(x)).
@@ -4216,6 +4390,9 @@ bool VectorCombine::run() {
42164390 if (foldCastFromReductions (I))
42174391 return true ;
42184392 break ;
4393+ case Instruction::ExtractElement:
4394+ MadeChange |= foldShuffleChainsToReduce (I);
4395+ break ;
42194396 case Instruction::ICmp:
42204397 case Instruction::FCmp:
42214398 if (foldExtractExtract (I))
0 commit comments