@@ -130,6 +130,41 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
130130 return true ;
131131}
132132
133+ // Check if a load can be hoisted by verifying it doesn't alias with any stores
134+ // in blocks between FirstBB and LastBB using scoped noalias metadata.
135+ static bool canHoistLoadWithNoAliasCheck (VPReplicateRecipe *Load,
136+ VPBasicBlock *FirstBB,
137+ VPBasicBlock *LastBB) {
138+ // Get the load's memory location and check if it aliases with any stores
139+ // using scoped noalias metadata.
140+ auto LoadLoc = vputils::getMemoryLocation (*Load);
141+ if (!LoadLoc || !LoadLoc->AATags .Scope )
142+ return false ;
143+
144+ const AAMDNodes &LoadAA = LoadLoc->AATags ;
145+ for (VPBlockBase *Block = FirstBB; Block;
146+ Block = Block->getSingleSuccessor ()) {
147+ // This function assumes a simple linear chain of blocks. If there are
148+ // multiple successors, we would need more complex analysis.
149+ assert (Block->getNumSuccessors () <= 1 &&
150+ " Expected at most one successor in block chain" );
151+ auto *VPBB = cast<VPBasicBlock>(Block);
152+ for (VPRecipeBase &R : *VPBB) {
153+ if (R.mayWriteToMemory ()) {
154+ auto Loc = vputils::getMemoryLocation (R);
155+ // Bail out if we can't get the location or if the scoped noalias
156+ // metadata indicates potential aliasing.
157+ if (!Loc || ScopedNoAliasAAResult::mayAliasInScopes (
158+ LoadAA.Scope , Loc->AATags .NoAlias ))
159+ return false ;
160+ }
161+ }
162+ if (Block == LastBB)
163+ break ;
164+ }
165+ return true ;
166+ }
167+
133168static bool sinkScalarOperands (VPlan &Plan) {
134169 auto Iter = vp_depth_first_deep (Plan.getEntry ());
135170 bool Changed = false ;
@@ -3151,6 +3186,124 @@ void VPlanTransforms::hoistInvariantLoads(VPlan &Plan) {
31513186 }
31523187 }
31533188}
3189+
3190+ // Returns the intersection of metadata from a group of loads.
3191+ static VPIRMetadata getCommonLoadMetadata (ArrayRef<VPReplicateRecipe *> Loads) {
3192+ VPIRMetadata CommonMetadata = *Loads.front ();
3193+ for (VPReplicateRecipe *Load : drop_begin (Loads))
3194+ CommonMetadata.intersect (*Load);
3195+ return CommonMetadata;
3196+ }
3197+
3198+ void VPlanTransforms::hoistPredicatedLoads (VPlan &Plan, ScalarEvolution &SE,
3199+ const Loop *L) {
3200+ using namespace VPlanPatternMatch ;
3201+ VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion ();
3202+ VPTypeAnalysis TypeInfo (Plan);
3203+ VPDominatorTree VPDT (Plan);
3204+
3205+ // Group predicated loads by their address SCEV.
3206+ DenseMap<const SCEV *, SmallVector<VPReplicateRecipe *>> LoadsByAddress;
3207+ for (VPBlockBase *Block : vp_depth_first_shallow (LoopRegion->getEntry ())) {
3208+ auto *VPBB = cast<VPBasicBlock>(Block);
3209+ for (VPRecipeBase &R : *VPBB) {
3210+ auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
3211+ if (!RepR || RepR->getOpcode () != Instruction::Load ||
3212+ !RepR->isPredicated ())
3213+ continue ;
3214+
3215+ VPValue *Addr = RepR->getOperand (0 );
3216+ const SCEV *AddrSCEV = vputils::getSCEVExprForVPValue (Addr, SE, L);
3217+ if (!isa<SCEVCouldNotCompute>(AddrSCEV))
3218+ LoadsByAddress[AddrSCEV].push_back (RepR);
3219+ }
3220+ }
3221+
3222+ // For each address, collect loads with complementary masks, sort by
3223+ // dominance, and use the earliest load.
3224+ for (auto &[Addr, Loads] : LoadsByAddress) {
3225+ if (Loads.size () < 2 )
3226+ continue ;
3227+
3228+ // Collect groups of loads with complementary masks.
3229+ SmallVector<SmallVector<VPReplicateRecipe *, 4 >> LoadGroups;
3230+ for (VPReplicateRecipe *&LoadI : Loads) {
3231+ if (!LoadI)
3232+ continue ;
3233+
3234+ VPValue *MaskI = LoadI->getMask ();
3235+ Type *TypeI = TypeInfo.inferScalarType (LoadI);
3236+ SmallVector<VPReplicateRecipe *, 4 > Group;
3237+ Group.push_back (LoadI);
3238+ LoadI = nullptr ;
3239+
3240+ // Find all loads with the same type.
3241+ for (VPReplicateRecipe *&LoadJ : Loads) {
3242+ if (!LoadJ)
3243+ continue ;
3244+
3245+ Type *TypeJ = TypeInfo.inferScalarType (LoadJ);
3246+ if (TypeI == TypeJ) {
3247+ Group.push_back (LoadJ);
3248+ LoadJ = nullptr ;
3249+ }
3250+ }
3251+
3252+ // Check if any load in the group has a complementary mask with another,
3253+ // that is M1 == NOT(M2) or M2 == NOT(M1).
3254+ bool HasComplementaryMask =
3255+ any_of (drop_begin (Group), [MaskI](VPReplicateRecipe *Load) {
3256+ VPValue *MaskJ = Load->getMask ();
3257+ return match (MaskI, m_Not (m_Specific (MaskJ))) ||
3258+ match (MaskJ, m_Not (m_Specific (MaskI)));
3259+ });
3260+
3261+ if (HasComplementaryMask)
3262+ LoadGroups.push_back (std::move (Group));
3263+ }
3264+
3265+ // For each group, check memory dependencies and hoist the earliest load.
3266+ for (auto &Group : LoadGroups) {
3267+ // Sort loads by dominance order, with earliest (most dominating) first.
3268+ sort (Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) {
3269+ return VPDT.properlyDominates (A, B);
3270+ });
3271+
3272+ VPReplicateRecipe *EarliestLoad = Group.front ();
3273+ VPBasicBlock *FirstBB = EarliestLoad->getParent ();
3274+ VPBasicBlock *LastBB = Group.back ()->getParent ();
3275+
3276+ // Check that the load doesn't alias with stores between first and last.
3277+ if (!canHoistLoadWithNoAliasCheck (EarliestLoad, FirstBB, LastBB))
3278+ continue ;
3279+
3280+ // Find the load with minimum alignment to use.
3281+ auto *LoadWithMinAlign =
3282+ *min_element (Group, [](VPReplicateRecipe *A, VPReplicateRecipe *B) {
3283+ return cast<LoadInst>(A->getUnderlyingInstr ())->getAlign () <
3284+ cast<LoadInst>(B->getUnderlyingInstr ())->getAlign ();
3285+ });
3286+
3287+ // Collect common metadata from all loads in the group.
3288+ VPIRMetadata CommonMetadata = getCommonLoadMetadata (Group);
3289+
3290+ // Create an unpredicated load with minimum alignment using the earliest
3291+ // dominating address and common metadata.
3292+ auto *UnpredicatedLoad = new VPReplicateRecipe (
3293+ LoadWithMinAlign->getUnderlyingInstr (), EarliestLoad->getOperand (0 ),
3294+ /* IsSingleScalar=*/ false , /* Mask=*/ nullptr ,
3295+ CommonMetadata);
3296+ UnpredicatedLoad->insertBefore (EarliestLoad);
3297+
3298+ // Replace all loads in the group with the unpredicated load.
3299+ for (VPReplicateRecipe *Load : Group) {
3300+ Load->replaceAllUsesWith (UnpredicatedLoad);
3301+ Load->eraseFromParent ();
3302+ }
3303+ }
3304+ }
3305+ }
3306+
31543307// / Returns true if \p V is VPWidenLoadRecipe or VPInterleaveRecipe that can be
31553308// / converted to a narrower recipe. \p V is used by a wide recipe that feeds a
31563309// / store interleave group at index \p Idx, \p WideMember0 is the recipe feeding
0 commit comments