@@ -1334,23 +1334,10 @@ class LoopVectorizationCostModel {
13341334 : ChosenTailFoldingStyle->second ;
13351335 }
13361336
1337- RTCheckStyle getRTCheckStyle (TailFoldingStyle TFStyle,
1338- const TargetTransformInfo &TTI) const {
1339- if (!TTI.useSafeEltsMask ())
1340- return RTCheckStyle::ScalarDifference;
1341-
1342- switch (TFStyle) {
1343- case TailFoldingStyle::Data:
1344- case TailFoldingStyle::DataAndControlFlow:
1345- case TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck:
1346- return RTCheckStyle::UseSafeEltsMask;
1347- default :
1348- return RTCheckStyle::ScalarDifference;
1349- }
1350- }
1351-
13521337 RTCheckStyle getRTCheckStyle (const TargetTransformInfo &TTI) const {
1353- return getRTCheckStyle (getTailFoldingStyle (), TTI);
1338+ if (TTI.useSafeEltsMask ())
1339+ return RTCheckStyle::UseSafeEltsMask;
1340+ return RTCheckStyle::ScalarDifference;
13541341 }
13551342
13561343 // / Selects and saves TailFoldingStyle for 2 options - if IV update may
@@ -1883,10 +1870,7 @@ class GeneratedRTChecks {
18831870 " vector.memcheck" );
18841871
18851872 auto DiffChecks = RtPtrChecking.getDiffChecks ();
1886- if (UseSafeEltsMask) {
1887- MemRuntimeCheckCond = addSafeEltsRuntimeChecks (
1888- MemCheckBlock->getTerminator (), *DiffChecks, MemCheckExp, VF);
1889- } else if (DiffChecks) {
1873+ if (DiffChecks) {
18901874 Value *RuntimeVF = nullptr ;
18911875 MemRuntimeCheckCond = addDiffRuntimeChecks (
18921876 MemCheckBlock->getTerminator (), *DiffChecks, MemCheckExp,
@@ -8585,7 +8569,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
85858569 std::optional<ArrayRef<PointerDiffInfo>> RTChecks =
85868570 CM.Legal ->getRuntimePointerChecking ()->getDiffChecks ();
85878571 if (RTChecks.has_value () &&
8588- CM.getRTCheckStyle (Style, TTI) == RTCheckStyle::UseSafeEltsMask)
8572+ CM.getRTCheckStyle (TTI) == RTCheckStyle::UseSafeEltsMask)
85898573 DiffChecks = *RTChecks;
85908574
85918575 VPlanTransforms::addActiveLaneMask (*Plan, ForControlFlow,
@@ -9008,11 +8992,91 @@ void LoopVectorizationPlanner::attachRuntimeChecks(
90088992 assert ((!CM.OptForSize ||
90098993 CM.Hints ->getForce () == LoopVectorizeHints::FK_Enabled) &&
90108994 " Cannot SCEV check stride or overflow when optimizing for size" );
9011- VPlanTransforms::attachCheckBlock (Plan, SCEVCheckCond, SCEVCheckBlock,
8995+ VPlanTransforms::attachCheckBlock (Plan, Plan.getOrAddLiveIn (SCEVCheckCond),
8996+ Plan.createVPIRBasicBlock (SCEVCheckBlock),
90128997 HasBranchWeights);
90138998 }
90148999 const auto &[MemCheckCond, MemCheckBlock] = RTChecks.getMemRuntimeChecks ();
90159000 if (MemCheckBlock && MemCheckBlock->hasNPredecessors (0 )) {
9001+ VPValue *MemCheckCondVPV = Plan.getOrAddLiveIn (MemCheckCond);
9002+ VPBasicBlock *MemCheckBlockVP = Plan.createVPIRBasicBlock (MemCheckBlock);
9003+ std::optional<ArrayRef<PointerDiffInfo>> ChecksOpt =
9004+ CM.Legal ->getRuntimePointerChecking ()->getDiffChecks ();
9005+
9006+ // Create a mask enabling safe elements for each iteration.
9007+ if (CM.getRTCheckStyle (TTI) == RTCheckStyle::UseSafeEltsMask &&
9008+ ChecksOpt.has_value () && ChecksOpt->size () > 0 ) {
9009+ ArrayRef<PointerDiffInfo> Checks = *ChecksOpt;
9010+ VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion ();
9011+ VPBasicBlock *LoopBody = LoopRegion->getEntryBasicBlock ();
9012+ VPBuilder Builder (MemCheckBlockVP);
9013+
9014+ // / Create a mask for each possibly-aliasing pointer pair, ANDing them if
9015+ // / there's more than one pair.
9016+ VPValue *AliasMask = nullptr ;
9017+ for (PointerDiffInfo Check : Checks) {
9018+ VPValue *Sink =
9019+ vputils::getOrCreateVPValueForSCEVExpr (Plan, Check.SinkStart );
9020+ VPValue *Src =
9021+ vputils::getOrCreateVPValueForSCEVExpr (Plan, Check.SrcStart );
9022+ VPAliasLaneMaskRecipe *M = new VPAliasLaneMaskRecipe (
9023+ Src, Sink, Check.AccessSize , Check.WriteAfterRead );
9024+ MemCheckBlockVP->appendRecipe (M);
9025+ if (AliasMask)
9026+ AliasMask = Builder.createAnd (AliasMask, M);
9027+ else
9028+ AliasMask = M;
9029+ }
9030+ assert (AliasMask && " Expected an alias mask to have been created" );
9031+
9032+ // Replace uses of the loop body's active lane mask phi with an AND of the
9033+ // phi and the alias mask.
9034+ for (VPRecipeBase &R : *LoopBody) {
9035+ auto *MaskPhi = dyn_cast<VPActiveLaneMaskPHIRecipe>(&R);
9036+ if (!MaskPhi)
9037+ continue ;
9038+ VPInstruction *And = new VPInstruction (Instruction::BinaryOps::And,
9039+ {MaskPhi, AliasMask});
9040+ MaskPhi->replaceUsesWithIf (And, [And](VPUser &U, unsigned ) {
9041+ auto *UR = dyn_cast<VPRecipeBase>(&U);
9042+ // If this is the first user, instert the AND.
9043+ if (UR && !And->getParent ())
9044+ And->insertBefore (UR);
9045+ bool Replace = UR != And;
9046+ return Replace;
9047+ });
9048+ }
9049+
9050+ // An empty mask would cause an infinite loop since the induction variable
9051+ // is updated with the number of set elements in the mask. Make sure we
9052+ // don't execute the vector loop when the mask is empty.
9053+ VPInstruction *PopCount =
9054+ new VPInstruction (VPInstruction::PopCount, {AliasMask});
9055+ PopCount->insertAfter (AliasMask->getDefiningRecipe ());
9056+ VPValue *Cmp =
9057+ Builder.createICmp (CmpInst::Predicate::ICMP_EQ, PopCount,
9058+ Plan.getOrAddLiveIn (ConstantInt::get (
9059+ IntegerType::get (Plan.getContext (), 64 ), 0 )));
9060+ MemCheckCondVPV = Cmp;
9061+
9062+ // Update the IV by the number of active lanes in the mask.
9063+ auto *CanonicalIVPHI = LoopRegion->getCanonicalIV ();
9064+ auto *CanonicalIVIncrement =
9065+ cast<VPInstruction>(CanonicalIVPHI->getBackedgeValue ());
9066+
9067+ // Increment phi by correct amount.
9068+ VPValue *IncrementBy = PopCount;
9069+ Type *IVType = CanonicalIVPHI->getScalarType ();
9070+
9071+ if (IVType->getScalarSizeInBits () < 64 ) {
9072+ Builder.setInsertPoint (CanonicalIVIncrement);
9073+ IncrementBy =
9074+ Builder.createScalarCast (Instruction::Trunc, IncrementBy, IVType,
9075+ CanonicalIVIncrement->getDebugLoc ());
9076+ }
9077+ CanonicalIVIncrement->setOperand (1 , IncrementBy);
9078+ }
9079+
90169080 // VPlan-native path does not do any analysis for runtime checks
90179081 // currently.
90189082 assert ((!EnableVPlanNativePath || OrigLoop->isInnermost ()) &&
@@ -9033,7 +9097,7 @@ void LoopVectorizationPlanner::attachRuntimeChecks(
90339097 " (e.g., adding 'restrict')." ;
90349098 });
90359099 }
9036- VPlanTransforms::attachCheckBlock (Plan, MemCheckCond, MemCheckBlock ,
9100+ VPlanTransforms::attachCheckBlock (Plan, MemCheckCondVPV, MemCheckBlockVP ,
90379101 HasBranchWeights);
90389102 }
90399103}
@@ -10001,9 +10065,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1000110065 // Optimistically generate runtime checks if they are needed. Drop them if
1000210066 // they turn out to not be profitable.
1000310067 if (VF.Width .isVector () || SelectedIC > 1 ) {
10004- TailFoldingStyle TFStyle = CM.getTailFoldingStyle ();
1000510068 bool UseSafeEltsMask =
10006- CM.getRTCheckStyle (TFStyle, *TTI) == RTCheckStyle::UseSafeEltsMask;
10069+ CM.getRTCheckStyle (*TTI) == RTCheckStyle::UseSafeEltsMask;
1000710070 if (UseSafeEltsMask)
1000810071 LoopsAliasMasked++;
1000910072 Checks.create (L, *LVL.getLAI (), PSE.getPredicate (), VF.Width , SelectedIC,
0 commit comments