@@ -7255,7 +7255,10 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
72557255 // Get the VPInstruction computing the reduction result in the middle block.
72567256 // The first operand may not be from the middle block if it is not connected
72577257 // to the scalar preheader. In that case, there's nothing to fix.
7258- auto *EpiRedResult = dyn_cast<VPInstruction>(EpiResumePhiR->getOperand (0 ));
7258+ VPValue *Incoming = EpiResumePhiR->getOperand (0 );
7259+ match (Incoming, VPlanPatternMatch::m_ZExtOrSExt (
7260+ VPlanPatternMatch::m_VPValue (Incoming)));
7261+ auto *EpiRedResult = dyn_cast<VPInstruction>(Incoming);
72597262 if (!EpiRedResult ||
72607263 (EpiRedResult->getOpcode () != VPInstruction::ComputeAnyOfResult &&
72617264 EpiRedResult->getOpcode () != VPInstruction::ComputeReductionResult &&
@@ -9206,28 +9209,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
92069209 PhiR->setOperand (1 , NewExitingVPV);
92079210 }
92089211
9209- // If the vector reduction can be performed in a smaller type, we truncate
9210- // then extend the loop exit value to enable InstCombine to evaluate the
9211- // entire expression in the smaller type.
9212- if (MinVF.isVector () && PhiTy != RdxDesc.getRecurrenceType () &&
9213- !RecurrenceDescriptor::isAnyOfRecurrenceKind (
9214- RdxDesc.getRecurrenceKind ())) {
9215- assert (!PhiR->isInLoop () && " Unexpected truncated inloop reduction!" );
9216- Type *RdxTy = RdxDesc.getRecurrenceType ();
9217- auto *Trunc =
9218- new VPWidenCastRecipe (Instruction::Trunc, NewExitingVPV, RdxTy);
9219- auto *Extnd =
9220- RdxDesc.isSigned ()
9221- ? new VPWidenCastRecipe (Instruction::SExt, Trunc, PhiTy)
9222- : new VPWidenCastRecipe (Instruction::ZExt, Trunc, PhiTy);
9223-
9224- Trunc->insertAfter (NewExitingVPV->getDefiningRecipe ());
9225- Extnd->insertAfter (Trunc);
9226- if (PhiR->getOperand (1 ) == NewExitingVPV)
9227- PhiR->setOperand (1 , Extnd->getVPSingleValue ());
9228- NewExitingVPV = Extnd;
9229- }
9230-
92319212 // We want code in the middle block to appear to execute on the location of
92329213 // the scalar loop's latch terminator because: (a) it is all compiler
92339214 // generated, (b) these instructions are always executed after evaluating
@@ -9266,6 +9247,31 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
92669247 Builder.createNaryOp (VPInstruction::ComputeReductionResult,
92679248 {PhiR, NewExitingVPV}, Flags, ExitDL);
92689249 }
9250+ // If the vector reduction can be performed in a smaller type, we truncate
9251+ // then extend the loop exit value to enable InstCombine to evaluate the
9252+ // entire expression in the smaller type.
9253+ if (MinVF.isVector () && PhiTy != RdxDesc.getRecurrenceType () &&
9254+ !RecurrenceDescriptor::isAnyOfRecurrenceKind (
9255+ RdxDesc.getRecurrenceKind ())) {
9256+ assert (!PhiR->isInLoop () && " Unexpected truncated inloop reduction!" );
9257+ Type *RdxTy = RdxDesc.getRecurrenceType ();
9258+ auto *Trunc =
9259+ new VPWidenCastRecipe (Instruction::Trunc, NewExitingVPV, RdxTy);
9260+ Instruction::CastOps ExtendOpc =
9261+ RdxDesc.isSigned () ? Instruction::SExt : Instruction::ZExt;
9262+ auto *Extnd = new VPWidenCastRecipe (ExtendOpc, Trunc, PhiTy);
9263+ Trunc->insertAfter (NewExitingVPV->getDefiningRecipe ());
9264+ Extnd->insertAfter (Trunc);
9265+ if (PhiR->getOperand (1 ) == NewExitingVPV)
9266+ PhiR->setOperand (1 , Extnd->getVPSingleValue ());
9267+
9268+ // Update ComputeReductionResult with the truncated exiting value and
9269+ // extend its result.
9270+ FinalReductionResult->setOperand (1 , Trunc);
9271+ FinalReductionResult =
9272+ Builder.createScalarCast (ExtendOpc, FinalReductionResult, PhiTy, {});
9273+ }
9274+
92699275 // Update all users outside the vector region. Also replace redundant
92709276 // ExtractLastElement.
92719277 for (auto *U : to_vector (OrigExitingVPV->users ())) {
0 commit comments