@@ -139,6 +139,41 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
139139 return true ;
140140}
141141
142+ // Check if a load can be hoisted by verifying it doesn't alias with any stores
143+ // in blocks between FirstBB and LastBB using scoped noalias metadata.
144+ static bool canHoistLoadWithNoAliasCheck (VPReplicateRecipe *Load,
145+ VPBasicBlock *FirstBB,
146+ VPBasicBlock *LastBB) {
147+ // Get the load's memory location and check if it aliases with any stores
148+ // using scoped noalias metadata.
149+ auto LoadLoc = vputils::getMemoryLocation (*Load);
150+ if (!LoadLoc || !LoadLoc->AATags .Scope )
151+ return false ;
152+
153+ const AAMDNodes &LoadAA = LoadLoc->AATags ;
154+ for (VPBlockBase *Block = FirstBB; Block;
155+ Block = Block->getSingleSuccessor ()) {
156+ // This function assumes a simple linear chain of blocks. If there are
157+ // multiple successors, we would need more complex analysis.
158+ assert (Block->getNumSuccessors () <= 1 &&
159+ " Expected at most one successor in block chain" );
160+ auto *VPBB = cast<VPBasicBlock>(Block);
161+ for (VPRecipeBase &R : *VPBB) {
162+ if (R.mayWriteToMemory ()) {
163+ auto Loc = vputils::getMemoryLocation (R);
164+ // Bail out if we can't get the location or if the scoped noalias
165+ // metadata indicates potential aliasing.
166+ if (!Loc || ScopedNoAliasAAResult::mayAliasInScopes (
167+ LoadAA.Scope , Loc->AATags .NoAlias ))
168+ return false ;
169+ }
170+ }
171+ if (Block == LastBB)
172+ break ;
173+ }
174+ return true ;
175+ }
176+
142177// / Return true if we do not know how to (mechanically) hoist or sink \p R out
143178// / of a loop region.
144179static bool cannotHoistOrSinkRecipe (const VPRecipeBase &R) {
@@ -4010,6 +4045,122 @@ void VPlanTransforms::hoistInvariantLoads(VPlan &Plan) {
40104045 }
40114046}
40124047
4048+ // Returns the intersection of metadata from a group of loads.
4049+ static VPIRMetadata getCommonLoadMetadata (ArrayRef<VPReplicateRecipe *> Loads) {
4050+ VPIRMetadata CommonMetadata = *Loads.front ();
4051+ for (VPReplicateRecipe *Load : drop_begin (Loads))
4052+ CommonMetadata.intersect (*Load);
4053+ return CommonMetadata;
4054+ }
4055+
4056+ void VPlanTransforms::hoistPredicatedLoads (VPlan &Plan, ScalarEvolution &SE,
4057+ const Loop *L) {
4058+ VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion ();
4059+ VPTypeAnalysis TypeInfo (Plan);
4060+ VPDominatorTree VPDT (Plan);
4061+
4062+ // Group predicated loads by their address SCEV.
4063+ DenseMap<const SCEV *, SmallVector<VPReplicateRecipe *>> LoadsByAddress;
4064+ for (VPBlockBase *Block : vp_depth_first_shallow (LoopRegion->getEntry ())) {
4065+ auto *VPBB = cast<VPBasicBlock>(Block);
4066+ for (VPRecipeBase &R : *VPBB) {
4067+ auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
4068+ if (!RepR || RepR->getOpcode () != Instruction::Load ||
4069+ !RepR->isPredicated ())
4070+ continue ;
4071+
4072+ VPValue *Addr = RepR->getOperand (0 );
4073+ const SCEV *AddrSCEV = vputils::getSCEVExprForVPValue (Addr, SE, L);
4074+ if (!isa<SCEVCouldNotCompute>(AddrSCEV))
4075+ LoadsByAddress[AddrSCEV].push_back (RepR);
4076+ }
4077+ }
4078+
4079+ // For each address, collect loads with complementary masks, sort by
4080+ // dominance, and use the earliest load.
4081+ for (auto &[Addr, Loads] : LoadsByAddress) {
4082+ if (Loads.size () < 2 )
4083+ continue ;
4084+
4085+ // Collect groups of loads with complementary masks.
4086+ SmallVector<SmallVector<VPReplicateRecipe *, 4 >> LoadGroups;
4087+ for (VPReplicateRecipe *&LoadI : Loads) {
4088+ if (!LoadI)
4089+ continue ;
4090+
4091+ VPValue *MaskI = LoadI->getMask ();
4092+ Type *TypeI = TypeInfo.inferScalarType (LoadI);
4093+ SmallVector<VPReplicateRecipe *, 4 > Group;
4094+ Group.push_back (LoadI);
4095+ LoadI = nullptr ;
4096+
4097+ // Find all loads with the same type.
4098+ for (VPReplicateRecipe *&LoadJ : Loads) {
4099+ if (!LoadJ)
4100+ continue ;
4101+
4102+ Type *TypeJ = TypeInfo.inferScalarType (LoadJ);
4103+ if (TypeI == TypeJ) {
4104+ Group.push_back (LoadJ);
4105+ LoadJ = nullptr ;
4106+ }
4107+ }
4108+
4109+ // Check if any load in the group has a complementary mask with another,
4110+ // that is M1 == NOT(M2) or M2 == NOT(M1).
4111+ bool HasComplementaryMask =
4112+ any_of (drop_begin (Group), [MaskI](VPReplicateRecipe *Load) {
4113+ VPValue *MaskJ = Load->getMask ();
4114+ return match (MaskI, m_Not (m_Specific (MaskJ))) ||
4115+ match (MaskJ, m_Not (m_Specific (MaskI)));
4116+ });
4117+
4118+ if (HasComplementaryMask)
4119+ LoadGroups.push_back (std::move (Group));
4120+ }
4121+
4122+ // For each group, check memory dependencies and hoist the earliest load.
4123+ for (auto &Group : LoadGroups) {
4124+ // Sort loads by dominance order, with earliest (most dominating) first.
4125+ sort (Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) {
4126+ return VPDT.properlyDominates (A, B);
4127+ });
4128+
4129+ VPReplicateRecipe *EarliestLoad = Group.front ();
4130+ VPBasicBlock *FirstBB = EarliestLoad->getParent ();
4131+ VPBasicBlock *LastBB = Group.back ()->getParent ();
4132+
4133+ // Check that the load doesn't alias with stores between first and last.
4134+ if (!canHoistLoadWithNoAliasCheck (EarliestLoad, FirstBB, LastBB))
4135+ continue ;
4136+
4137+ // Find the load with minimum alignment to use.
4138+ auto *LoadWithMinAlign =
4139+ *min_element (Group, [](VPReplicateRecipe *A, VPReplicateRecipe *B) {
4140+ return cast<LoadInst>(A->getUnderlyingInstr ())->getAlign () <
4141+ cast<LoadInst>(B->getUnderlyingInstr ())->getAlign ();
4142+ });
4143+
4144+ // Collect common metadata from all loads in the group.
4145+ VPIRMetadata CommonMetadata = getCommonLoadMetadata (Group);
4146+
4147+ // Create an unpredicated load with minimum alignment using the earliest
4148+ // dominating address and common metadata.
4149+ auto *UnpredicatedLoad = new VPReplicateRecipe (
4150+ LoadWithMinAlign->getUnderlyingInstr (), EarliestLoad->getOperand (0 ),
4151+ /* IsSingleScalar=*/ false , /* Mask=*/ nullptr , /* Flags=*/ {},
4152+ CommonMetadata);
4153+ UnpredicatedLoad->insertBefore (EarliestLoad);
4154+
4155+ // Replace all loads in the group with the unpredicated load.
4156+ for (VPReplicateRecipe *Load : Group) {
4157+ Load->replaceAllUsesWith (UnpredicatedLoad);
4158+ Load->eraseFromParent ();
4159+ }
4160+ }
4161+ }
4162+ }
4163+
40134164void VPlanTransforms::materializeConstantVectorTripCount (
40144165 VPlan &Plan, ElementCount BestVF, unsigned BestUF,
40154166 PredicatedScalarEvolution &PSE) {
0 commit comments