2020#include " VPlanPatternMatch.h"
2121#include " VPlanUtils.h"
2222#include " VPlanVerifier.h"
23+ #include " llvm/ADT/APInt.h"
2324#include " llvm/ADT/PostOrderIterator.h"
2425#include " llvm/ADT/STLExtras.h"
2526#include " llvm/ADT/SetVector.h"
2930#include " llvm/Analysis/VectorUtils.h"
3031#include " llvm/IR/Intrinsics.h"
3132#include " llvm/IR/PatternMatch.h"
33+ #include " llvm/Support/Casting.h"
34+ #include " llvm/Support/TypeSize.h"
3235
3336using namespace llvm ;
3437
@@ -1086,11 +1089,84 @@ void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {
10861089 }
10871090}
10881091
1089- void VPlanTransforms::optimizeForVFAndUF (VPlan &Plan, ElementCount BestVF,
1090- unsigned BestUF,
1091- PredicatedScalarEvolution &PSE) {
1092- assert (Plan.hasVF (BestVF) && " BestVF is not available in Plan" );
1093- assert (Plan.hasUF (BestUF) && " BestUF is not available in Plan" );
1092+ // / Optimize the width of vector induction variables in \p Plan based on a known
1093+ // / constant Trip Count, \p BestVF and \p BestUF.
1094+ static bool optimizeVectorInductionWidthForTCAndVFUF (VPlan &Plan,
1095+ ElementCount BestVF,
1096+ unsigned BestUF) {
1097+ // Only proceed if we have not completely removed the vector region.
1098+ if (!Plan.getVectorLoopRegion ())
1099+ return false ;
1100+
1101+ if (!Plan.getTripCount ()->isLiveIn ())
1102+ return false ;
1103+ auto *TC = dyn_cast_if_present<ConstantInt>(
1104+ Plan.getTripCount ()->getUnderlyingValue ());
1105+ if (!TC || !BestVF.isFixed ())
1106+ return false ;
1107+
1108+ // Calculate the minimum power-of-2 bit width that can fit the known TC, VF
1109+ // and UF. Returns at least 8.
1110+ auto ComputeBitWidth = [](APInt TC, uint64_t Align) {
1111+ APInt AlignedTC =
1112+ Align * APIntOps::RoundingUDiv (TC, APInt (TC.getBitWidth (), Align),
1113+ APInt::Rounding::UP);
1114+ APInt MaxVal = AlignedTC - 1 ;
1115+ return std::max<unsigned >(PowerOf2Ceil (MaxVal.getActiveBits ()), 8 );
1116+ };
1117+ unsigned NewBitWidth =
1118+ ComputeBitWidth (TC->getValue (), BestVF.getKnownMinValue () * BestUF);
1119+
1120+ LLVMContext &Ctx = Plan.getCanonicalIV ()->getScalarType ()->getContext ();
1121+ auto *NewIVTy = IntegerType::get (Ctx, NewBitWidth);
1122+
1123+ bool MadeChange = false ;
1124+
1125+ VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion ()->getEntryBasicBlock ();
1126+ for (VPRecipeBase &Phi : HeaderVPBB->phis ()) {
1127+ auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
1128+
1129+ // Currently only handle canonical IVs as it is trivial to replace the start
1130+ // and stop values, and we currently only perform the optimization when the
1131+ // IV has a single use.
1132+ if (!WideIV || !WideIV->isCanonical () ||
1133+ WideIV->hasMoreThanOneUniqueUser () ||
1134+ NewIVTy == WideIV->getScalarType ())
1135+ continue ;
1136+
1137+ // Currently only handle cases where the single user is a header-mask
1138+ // comparison with the backedge-taken-count.
1139+ using namespace VPlanPatternMatch ;
1140+ if (!match (
1141+ *WideIV->user_begin (),
1142+ m_Binary<Instruction::ICmp>(
1143+ m_Specific (WideIV),
1144+ m_Broadcast (m_Specific (Plan.getOrCreateBackedgeTakenCount ())))))
1145+ continue ;
1146+
1147+ // Update IV operands and comparison bound to use new narrower type.
1148+ auto *NewStart = Plan.getOrAddLiveIn (ConstantInt::get (NewIVTy, 0 ));
1149+ WideIV->setStartValue (NewStart);
1150+ auto *NewStep = Plan.getOrAddLiveIn (ConstantInt::get (NewIVTy, 1 ));
1151+ WideIV->setStepValue (NewStep);
1152+
1153+ auto *NewBTC = new VPWidenCastRecipe (
1154+ Instruction::Trunc, Plan.getOrCreateBackedgeTakenCount (), NewIVTy);
1155+ Plan.getVectorPreheader ()->appendRecipe (NewBTC);
1156+ auto *Cmp = cast<VPInstruction>(*WideIV->user_begin ());
1157+ Cmp->setOperand (1 , NewBTC);
1158+
1159+ MadeChange = true ;
1160+ }
1161+
1162+ return MadeChange;
1163+ }
1164+
1165+ // / Try to simplify the branch condition of \p Plan. This may restrict the
1166+ // / resulting plan to \p BestVF and \p BestUF.
1167+ static bool simplifyBranchConditionForVFAndUF (VPlan &Plan, ElementCount BestVF,
1168+ unsigned BestUF,
1169+ PredicatedScalarEvolution &PSE) {
10941170 VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion ();
10951171 VPBasicBlock *ExitingVPBB = VectorRegion->getExitingBasicBlock ();
10961172 auto *Term = &ExitingVPBB->back ();
@@ -1103,7 +1179,7 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
11031179 if (!match (Term, m_BranchOnCount (m_VPValue (), m_VPValue ())) &&
11041180 !match (Term,
11051181 m_BranchOnCond (m_Not (m_ActiveLaneMask (m_VPValue (), m_VPValue ())))))
1106- return ;
1182+ return false ;
11071183
11081184 ScalarEvolution &SE = *PSE.getSE ();
11091185 const SCEV *TripCount =
@@ -1114,7 +1190,7 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
11141190 const SCEV *C = SE.getElementCount (TripCount->getType (), NumElements);
11151191 if (TripCount->isZero () ||
11161192 !SE.isKnownPredicate (CmpInst::ICMP_ULE, TripCount, C))
1117- return ;
1193+ return false ;
11181194
11191195 // The vector loop region only executes once. If possible, completely remove
11201196 // the region, otherwise replace the terminator controlling the latch with
@@ -1140,7 +1216,7 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
11401216
11411217 VPBlockUtils::connectBlocks (Preheader, Header);
11421218 VPBlockUtils::connectBlocks (ExitingVPBB, Exit);
1143- simplifyRecipes (Plan, *CanIVTy);
1219+ VPlanTransforms:: simplifyRecipes (Plan, *CanIVTy);
11441220 } else {
11451221 // The vector region contains header phis for which we cannot remove the
11461222 // loop region yet.
@@ -1153,8 +1229,23 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
11531229
11541230 Term->eraseFromParent ();
11551231
1156- Plan.setVF (BestVF);
1157- assert (Plan.getUF () == BestUF && " BestUF must match the Plan's UF" );
1232+ return true ;
1233+ }
1234+
1235+ void VPlanTransforms::optimizeForVFAndUF (VPlan &Plan, ElementCount BestVF,
1236+ unsigned BestUF,
1237+ PredicatedScalarEvolution &PSE) {
1238+ assert (Plan.hasVF (BestVF) && " BestVF is not available in Plan" );
1239+ assert (Plan.hasUF (BestUF) && " BestUF is not available in Plan" );
1240+
1241+ bool MadeChange =
1242+ simplifyBranchConditionForVFAndUF (Plan, BestVF, BestUF, PSE);
1243+ MadeChange |= optimizeVectorInductionWidthForTCAndVFUF (Plan, BestVF, BestUF);
1244+
1245+ if (MadeChange) {
1246+ Plan.setVF (BestVF);
1247+ assert (Plan.getUF () == BestUF && " BestUF must match the Plan's UF" );
1248+ }
11581249 // TODO: Further simplifications are possible
11591250 // 1. Replace inductions with constants.
11601251 // 2. Replace vector loop region with VPBasicBlock.
0 commit comments