@@ -2346,6 +2346,12 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
23462346 // End if-block.
23472347 VPRegionBlock *Parent = RepRecipe->getParent ()->getParent ();
23482348 bool IfPredicateInstr = Parent ? Parent->isReplicator () : false ;
2349+ assert ((Parent || all_of (RepRecipe->operands (),
2350+ [](VPValue *Op) {
2351+ return Op->isDefinedOutsideLoopRegions ();
2352+ })) &&
2353+ " Expected a recipe is either within a region or all of its operands "
2354+ " are defined outside the vectorized region." );
23492355 if (IfPredicateInstr)
23502356 PredicatedInstructions.push_back (Cloned);
23512357}
@@ -8950,6 +8956,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
89508956 bool NeedsBlends = BB != HeaderBB && !BB->phis ().empty ();
89518957 return Legal->blockNeedsPredication (BB) || NeedsBlends;
89528958 });
8959+ auto *MiddleVPBB =
8960+ cast<VPBasicBlock>(Plan->getVectorLoopRegion ()->getSingleSuccessor ());
8961+ VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi ();
89538962 for (BasicBlock *BB : make_range (DFS.beginRPO (), DFS.endRPO ())) {
89548963 // Relevant instructions from basic block BB will be grouped into VPRecipe
89558964 // ingredients and fill a new VPBasicBlock.
@@ -8976,12 +8985,21 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
89768985 Operands = {OpRange.begin (), OpRange.end ()};
89778986 }
89788987
8979- // Invariant stores inside loop will be deleted and a single store
8980- // with the final reduction value will be added to the exit block
8988+ // The stores with invariant address inside the loop will be deleted, and
8989+ // in the exit block, a uniform store recipe will be created for the final
8990+ // invariant store of the reduction.
89818991 StoreInst *SI;
89828992 if ((SI = dyn_cast<StoreInst>(&I)) &&
8983- Legal->isInvariantAddressOfReduction (SI->getPointerOperand ()))
8993+ Legal->isInvariantAddressOfReduction (SI->getPointerOperand ())) {
8994+ // Only create recipe for the final invariant store of the reduction.
8995+ if (!Legal->isInvariantStoreOfReduction (SI))
8996+ continue ;
8997+ auto *Recipe = new VPReplicateRecipe (
8998+ SI, RecipeBuilder.mapToVPValues (Instr->operands ()),
8999+ true /* IsUniform */ );
9000+ Recipe->insertBefore (*MiddleVPBB, MBIP);
89849001 continue ;
9002+ }
89859003
89869004 VPRecipeBase *Recipe =
89879005 RecipeBuilder.tryToCreateWidenRecipe (Instr, Operands, Range, VPBB);
@@ -9150,45 +9168,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
91509168 using namespace VPlanPatternMatch ;
91519169 VPRegionBlock *VectorLoopRegion = Plan->getVectorLoopRegion ();
91529170 VPBasicBlock *Header = VectorLoopRegion->getEntryBasicBlock ();
9153- // Gather all VPReductionPHIRecipe and sort them so that Intermediate stores
9154- // sank outside of the loop would keep the same order as they had in the
9155- // original loop.
9156- SmallVector<VPReductionPHIRecipe *> ReductionPHIList;
9157- for (VPRecipeBase &R : Header->phis ()) {
9158- if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R))
9159- ReductionPHIList.emplace_back (ReductionPhi);
9160- }
9161- bool HasIntermediateStore = false ;
9162- stable_sort (ReductionPHIList,
9163- [this , &HasIntermediateStore](const VPReductionPHIRecipe *R1,
9164- const VPReductionPHIRecipe *R2) {
9165- auto *IS1 = R1->getRecurrenceDescriptor ().IntermediateStore ;
9166- auto *IS2 = R2->getRecurrenceDescriptor ().IntermediateStore ;
9167- HasIntermediateStore |= IS1 || IS2;
9168-
9169- // If neither of the recipes has an intermediate store, keep the
9170- // order the same.
9171- if (!IS1 && !IS2)
9172- return false ;
9173-
9174- // If only one of the recipes has an intermediate store, then
9175- // move it towards the beginning of the list.
9176- if (IS1 && !IS2)
9177- return true ;
9178-
9179- if (!IS1 && IS2)
9180- return false ;
9181-
9182- // If both recipes have an intermediate store, then the recipe
9183- // with the later store should be processed earlier. So it
9184- // should go to the beginning of the list.
9185- return DT->dominates (IS2, IS1);
9186- });
9187-
9188- if (HasIntermediateStore && ReductionPHIList.size () > 1 )
9189- for (VPRecipeBase *R : ReductionPHIList)
9190- R->moveBefore (*Header, Header->getFirstNonPhi ());
9191-
9171+ VPBasicBlock *MiddleVPBB =
9172+ cast<VPBasicBlock>(VectorLoopRegion->getSingleSuccessor ());
91929173 for (VPRecipeBase &R : Header->phis ()) {
91939174 auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
91949175 if (!PhiR || !PhiR->isInLoop () || (MinVF.isScalar () && !PhiR->isOrdered ()))
@@ -9207,9 +9188,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
92079188 for (VPUser *U : Cur->users ()) {
92089189 auto *UserRecipe = cast<VPSingleDefRecipe>(U);
92099190 if (!UserRecipe->getParent ()->getEnclosingLoopRegion ()) {
9210- assert (match (U, m_Binary<VPInstruction::ExtractFromEnd>(
9211- m_VPValue (), m_VPValue ())) &&
9212- " U must be an ExtractFromEnd VPInstruction" );
9191+ assert (UserRecipe->getParent () == MiddleVPBB &&
9192+ " U must be either in the loop region or the middle block." );
92139193 continue ;
92149194 }
92159195 Worklist.insert (UserRecipe);
@@ -9314,8 +9294,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
93149294 }
93159295 VPBasicBlock *LatchVPBB = VectorLoopRegion->getExitingBasicBlock ();
93169296 Builder.setInsertPoint (&*LatchVPBB->begin ());
9317- VPBasicBlock *MiddleVPBB =
9318- cast<VPBasicBlock>(VectorLoopRegion->getSingleSuccessor ());
93199297 VPBasicBlock::iterator IP = MiddleVPBB->getFirstNonPhi ();
93209298 for (VPRecipeBase &R :
93219299 Plan->getVectorLoopRegion ()->getEntryBasicBlock ()->phis ()) {
@@ -9390,12 +9368,13 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
93909368 // also modeled in VPlan.
93919369 auto *FinalReductionResult = new VPInstruction (
93929370 VPInstruction::ComputeReductionResult, {PhiR, NewExitingVPV}, ExitDL);
9371+ // Update all users outside the vector region.
9372+ OrigExitingVPV->replaceUsesWithIf (
9373+ FinalReductionResult, [](VPUser &User, unsigned ) {
9374+ auto *Parent = cast<VPRecipeBase>(&User)->getParent ();
9375+ return Parent && !Parent->getParent ();
9376+ });
93939377 FinalReductionResult->insertBefore (*MiddleVPBB, IP);
9394- OrigExitingVPV->replaceUsesWithIf (FinalReductionResult, [](VPUser &User,
9395- unsigned ) {
9396- return match (&User, m_Binary<VPInstruction::ExtractFromEnd>(m_VPValue (),
9397- m_VPValue ()));
9398- });
93999378
94009379 // Adjust AnyOf reductions; replace the reduction phi for the selected value
94019380 // with a boolean reduction phi node to check if the condition is true in
0 commit comments