@@ -1024,10 +1024,10 @@ static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan,
10241024
10251025 // Calculate the widest type required for known TC, VF and UF.
10261026 auto ComputeBitWidth = [](APInt TC, uint64_t Align) {
1027- auto AlignedTC =
1027+ APInt AlignedTC =
10281028 Align * APIntOps::RoundingUDiv (TC, APInt (TC.getBitWidth (), Align),
10291029 APInt::Rounding::UP);
1030- auto MaxVal = AlignedTC - 1 ;
1030+ APInt MaxVal = AlignedTC - 1 ;
10311031 return std::max<unsigned >(PowerOf2Ceil (MaxVal.getActiveBits ()), 8 );
10321032 };
10331033 unsigned NewBitWidth =
@@ -1041,6 +1041,10 @@ static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan,
10411041 VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion ()->getEntryBasicBlock ();
10421042 for (VPRecipeBase &Phi : HeaderVPBB->phis ()) {
10431043 auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
1044+
1045+ // Currently only handle canonical IVs as it is trivial to replace the start
1046+ // and stop values, and we only perform the optimisation when the IV is only
1047+ // used by the comparison controlling loop control-flow.
10441048 if (!WideIV || !WideIV->isCanonical () ||
10451049 WideIV->hasMoreThanOneUniqueUser () ||
10461050 NewIVTy == WideIV->getScalarType ())
@@ -1064,7 +1068,7 @@ static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan,
10641068 auto *NewBTC = new VPWidenCastRecipe (
10651069 Instruction::Trunc, Plan.getOrCreateBackedgeTakenCount (), NewIVTy);
10661070 Plan.getVectorPreheader ()->appendRecipe (NewBTC);
1067- auto *Cmp = dyn_cast <VPInstruction>(*WideIV->user_begin ());
1071+ auto *Cmp = cast <VPInstruction>(*WideIV->user_begin ());
10681072 Cmp->setOperand (1 , NewBTC);
10691073
10701074 MadeChange = true ;
0 commit comments