@@ -3968,6 +3968,151 @@ void VPlanTransforms::hoistInvariantLoads(VPlan &Plan) {
39683968 }
39693969}
39703970
3971+ // Returns the intersection of metadata from a group of loads.
3972+ static VPIRMetadata getCommonLoadMetadata (ArrayRef<VPReplicateRecipe *> Loads) {
3973+ VPIRMetadata CommonMetadata = *Loads.front ();
3974+ for (VPReplicateRecipe *Load : drop_begin (Loads))
3975+ CommonMetadata.intersect (*Load);
3976+ return CommonMetadata;
3977+ }
3978+
3979+ // Check if a load can be hoisted by verifying it doesn't alias with any stores
3980+ // in blocks between FirstBB and LastBB using scoped noalias metadata.
3981+ static bool canHoistLoadWithNoAliasCheck (VPReplicateRecipe *Load,
3982+ VPBasicBlock *FirstBB,
3983+ VPBasicBlock *LastBB) {
3984+ // Get the load's memory location and check if it aliases with any stores
3985+ // using scoped noalias metadata.
3986+ auto LoadLoc = vputils::getMemoryLocation (*Load);
3987+ if (!LoadLoc || !LoadLoc->AATags .Scope )
3988+ return false ;
3989+
3990+ const AAMDNodes &LoadAA = LoadLoc->AATags ;
3991+ for (VPBlockBase *Block = FirstBB; Block;
3992+ Block = Block->getSingleSuccessor ()) {
3993+ // This function assumes a simple linear chain of blocks. If there are
3994+ // multiple successors, we would need more complex analysis.
3995+ assert (Block->getNumSuccessors () <= 1 &&
3996+ " Expected at most one successor in block chain" );
3997+ auto *VPBB = cast<VPBasicBlock>(Block);
3998+ for (VPRecipeBase &R : *VPBB) {
3999+ if (R.mayWriteToMemory ()) {
4000+ auto Loc = vputils::getMemoryLocation (R);
4001+ // Bail out if we can't get the location or if the scoped noalias
4002+ // metadata indicates potential aliasing.
4003+ if (!Loc || ScopedNoAliasAAResult::mayAliasInScopes (
4004+ LoadAA.Scope , Loc->AATags .NoAlias ))
4005+ return false ;
4006+ }
4007+ }
4008+
4009+ if (Block == LastBB)
4010+ break ;
4011+ }
4012+ return true ;
4013+ }
4014+
4015+ void VPlanTransforms::hoistPredicatedLoads (VPlan &Plan, ScalarEvolution &SE,
4016+ const Loop *L) {
4017+ VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion ();
4018+ VPTypeAnalysis TypeInfo (Plan);
4019+ VPDominatorTree VPDT (Plan);
4020+
4021+ // Group predicated loads by their address SCEV.
4022+ MapVector<const SCEV *, SmallVector<VPReplicateRecipe *>> LoadsByAddress;
4023+ for (VPBlockBase *Block : vp_depth_first_shallow (LoopRegion->getEntry ())) {
4024+ auto *VPBB = cast<VPBasicBlock>(Block);
4025+ for (VPRecipeBase &R : *VPBB) {
4026+ auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
4027+ if (!RepR || RepR->getOpcode () != Instruction::Load ||
4028+ !RepR->isPredicated ())
4029+ continue ;
4030+
4031+ VPValue *Addr = RepR->getOperand (0 );
4032+ const SCEV *AddrSCEV = vputils::getSCEVExprForVPValue (Addr, SE, L);
4033+ if (!isa<SCEVCouldNotCompute>(AddrSCEV))
4034+ LoadsByAddress[AddrSCEV].push_back (RepR);
4035+ }
4036+ }
4037+
4038+ // For each address, collect loads with complementary masks, sort by
4039+ // dominance, and use the earliest load.
4040+ for (auto &[Addr, Loads] : LoadsByAddress) {
4041+ if (Loads.size () < 2 )
4042+ continue ;
4043+
4044+ // Collect groups of loads with complementary masks.
4045+ SmallVector<SmallVector<VPReplicateRecipe *, 4 >> LoadGroups;
4046+ for (VPReplicateRecipe *&LoadI : Loads) {
4047+ if (!LoadI)
4048+ continue ;
4049+
4050+ VPValue *MaskI = LoadI->getMask ();
4051+ Type *TypeI = TypeInfo.inferScalarType (LoadI);
4052+ SmallVector<VPReplicateRecipe *, 4 > Group;
4053+ Group.push_back (LoadI);
4054+ LoadI = nullptr ;
4055+
4056+ // Find all loads with the same type.
4057+ for (VPReplicateRecipe *&LoadJ : Loads) {
4058+ if (!LoadJ)
4059+ continue ;
4060+
4061+ Type *TypeJ = TypeInfo.inferScalarType (LoadJ);
4062+ if (TypeI == TypeJ) {
4063+ Group.push_back (LoadJ);
4064+ LoadJ = nullptr ;
4065+ }
4066+ }
4067+
4068+ // Check if any load in the group has a complementary mask with another,
4069+ // that is M1 == NOT(M2) or M2 == NOT(M1).
4070+ bool HasComplementaryMask =
4071+ any_of (drop_begin (Group), [MaskI](VPReplicateRecipe *Load) {
4072+ VPValue *MaskJ = Load->getMask ();
4073+ return match (MaskI, m_Not (m_Specific (MaskJ))) ||
4074+ match (MaskJ, m_Not (m_Specific (MaskI)));
4075+ });
4076+
4077+ if (HasComplementaryMask)
4078+ LoadGroups.push_back (std::move (Group));
4079+ }
4080+
4081+ // For each group, check memory dependencies and hoist the earliest load.
4082+ for (auto &Group : LoadGroups) {
4083+ // Sort loads by dominance order, with earliest (most dominating) first.
4084+ sort (Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) {
4085+ return VPDT.properlyDominates (A, B);
4086+ });
4087+
4088+ VPReplicateRecipe *EarliestLoad = Group.front ();
4089+ VPBasicBlock *FirstBB = EarliestLoad->getParent ();
4090+ VPBasicBlock *LastBB = Group.back ()->getParent ();
4091+
4092+ // Check that the load doesn't alias with stores between first and last.
4093+ if (!canHoistLoadWithNoAliasCheck (EarliestLoad, FirstBB, LastBB))
4094+ continue ;
4095+
4096+ // Collect common metadata from all loads in the group.
4097+ VPIRMetadata CommonMetadata = getCommonLoadMetadata (Group);
4098+
4099+ // Create an unpredicated version of the earliest load with common
4100+ // metadata.
4101+ auto *UnpredicatedLoad = new VPReplicateRecipe (
4102+ EarliestLoad->getUnderlyingInstr (), {EarliestLoad->getOperand (0 )},
4103+ /* IsSingleScalar=*/ false , /* Mask=*/ nullptr , CommonMetadata);
4104+
4105+ UnpredicatedLoad->insertBefore (EarliestLoad);
4106+
4107+ // Replace all loads in the group with the unpredicated load.
4108+ for (VPReplicateRecipe *Load : Group) {
4109+ Load->replaceAllUsesWith (UnpredicatedLoad);
4110+ Load->eraseFromParent ();
4111+ }
4112+ }
4113+ }
4114+ }
4115+
39714116void VPlanTransforms::materializeConstantVectorTripCount (
39724117 VPlan &Plan, ElementCount BestVF, unsigned BestUF,
39734118 PredicatedScalarEvolution &PSE) {
0 commit comments