@@ -3177,14 +3177,18 @@ bool VectorCombine::foldCastFromReductions(Instruction &I) {
31773177// / Returns true if this ShuffleVectorInst eventually feeds into a
31783178// / vector reduction intrinsic (e.g., vector_reduce_add) by only following
31793179// / chains of shuffles and binary operators (in any combination/order).
3180- static bool feedsIntoVectorReduction (ShuffleVectorInst *SVI) {
3180+ // / The search does not go deeper than the given Depth.
3181+ static bool feedsIntoVectorReduction (ShuffleVectorInst *SVI, unsigned Depth) {
31813182 SmallPtrSet<Instruction *, 8 > Visited;
3182- SmallVector<Instruction *, 4 > WorkList;
3183+ SmallVector<std::pair< Instruction *, unsigned > , 4 > WorkList;
31833184 bool FoundReduction = false ;
31843185
3185- WorkList.push_back (SVI);
3186+ WorkList.push_back ({ SVI, 0 } );
31863187 while (!WorkList.empty ()) {
3187- Instruction *I = WorkList.pop_back_val ();
3188+ auto [I, CurDepth] = WorkList.pop_back_val ();
3189+ if (CurDepth > Depth)
3190+ return false ;
3191+
31883192 for (User *U : I->users ()) {
31893193 auto *UI = dyn_cast<Instruction>(U);
31903194 if (!UI || !Visited.insert (UI).second )
@@ -3199,6 +3203,10 @@ static bool feedsIntoVectorReduction(ShuffleVectorInst *SVI) {
31993203 case Intrinsic::vector_reduce_and:
32003204 case Intrinsic::vector_reduce_or:
32013205 case Intrinsic::vector_reduce_xor:
3206+ case Intrinsic::vector_reduce_smin:
3207+ case Intrinsic::vector_reduce_smax:
3208+ case Intrinsic::vector_reduce_umin:
3209+ case Intrinsic::vector_reduce_umax:
32023210 FoundReduction = true ;
32033211 continue ;
32043212 default :
@@ -3208,8 +3216,7 @@ static bool feedsIntoVectorReduction(ShuffleVectorInst *SVI) {
32083216
32093217 if (!isa<BinaryOperator>(UI) && !isa<ShuffleVectorInst>(UI))
32103218 return false ;
3211-
3212- WorkList.emplace_back (UI);
3219+ WorkList.emplace_back (UI, CurDepth + 1 );
32133220 }
32143221 }
32153222 return FoundReduction;
@@ -3481,9 +3488,9 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
34813488 unsigned NumGroups = Mask.size () / MaxElementsInVector;
34823489 // For each group of MaxElementsInVector contiguous elements,
34833490 // collect their shuffle pattern and insert into the set of unique patterns.
3484- for (unsigned k = 0 ; k < NumFullVectors; ++k ) {
3485- for (unsigned l = 0 ; l < MaxElementsInVector; ++l )
3486- SubShuffle[l ] = Mask[MaxElementsInVector * k + l ];
3491+ for (unsigned I = 0 ; I < NumFullVectors; ++I ) {
3492+ for (unsigned J = 0 ; J < MaxElementsInVector; ++J )
3493+ SubShuffle[J ] = Mask[MaxElementsInVector * I + J ];
34873494 if (UniqueShuffles.insert (SubShuffle).second )
34883495 NumUniqueGroups += 1 ;
34893496 }
@@ -3552,8 +3559,8 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
35523559 LLVM_DEBUG (dbgs () << " Found a binop select shuffle pattern: " << I << " \n " );
35533560 LLVM_DEBUG (dbgs () << " CostBefore: " << CostBefore
35543561 << " vs CostAfter: " << CostAfter << " \n " );
3555- if (CostBefore < CostAfter || CostBefore == 0 ||
3556- (CostBefore == CostAfter && !feedsIntoVectorReduction (SVI)))
3562+ if (CostBefore < CostAfter ||
3563+ (CostBefore == CostAfter && !feedsIntoVectorReduction (SVI, 8 )))
35573564 return false ;
35583565
35593566 // The cost model has passed, create the new instructions.
0 commit comments