@@ -4010,6 +4010,152 @@ void VPlanTransforms::hoistInvariantLoads(VPlan &Plan) {
40104010 }
40114011}
40124012
4013+ // Returns the intersection of metadata from a group of loads.
4014+ static VPIRMetadata getCommonLoadMetadata (ArrayRef<VPReplicateRecipe *> Loads) {
4015+ VPIRMetadata CommonMetadata = *Loads.front ();
4016+ for (VPReplicateRecipe *Load : drop_begin (Loads))
4017+ CommonMetadata.intersect (*Load);
4018+ return CommonMetadata;
4019+ }
4020+
4021+ // Check if a load can be hoisted by verifying it doesn't alias with any stores
4022+ // in blocks between FirstBB and LastBB using scoped noalias metadata.
4023+ static bool canHoistLoadWithNoAliasCheck (VPReplicateRecipe *Load,
4024+ VPBasicBlock *FirstBB,
4025+ VPBasicBlock *LastBB) {
4026+ // Get the load's memory location and check if it aliases with any stores
4027+ // using scoped noalias metadata.
4028+ auto LoadLoc = vputils::getMemoryLocation (*Load);
4029+ if (!LoadLoc || !LoadLoc->AATags .Scope )
4030+ return false ;
4031+
4032+ const AAMDNodes &LoadAA = LoadLoc->AATags ;
4033+ for (VPBlockBase *Block = FirstBB; Block;
4034+ Block = Block->getSingleSuccessor ()) {
4035+ // This function assumes a simple linear chain of blocks. If there are
4036+ // multiple successors, we would need more complex analysis.
4037+ assert (Block->getNumSuccessors () <= 1 &&
4038+ " Expected at most one successor in block chain" );
4039+ auto *VPBB = cast<VPBasicBlock>(Block);
4040+ for (VPRecipeBase &R : *VPBB) {
4041+ if (R.mayWriteToMemory ()) {
4042+ auto Loc = vputils::getMemoryLocation (R);
4043+ // Bail out if we can't get the location or if the scoped noalias
4044+ // metadata indicates potential aliasing.
4045+ if (!Loc || ScopedNoAliasAAResult::mayAliasInScopes (
4046+ LoadAA.Scope , Loc->AATags .NoAlias ))
4047+ return false ;
4048+ }
4049+ }
4050+
4051+ if (Block == LastBB)
4052+ break ;
4053+ }
4054+ return true ;
4055+ }
4056+
4057+ void VPlanTransforms::hoistPredicatedLoads (VPlan &Plan, ScalarEvolution &SE,
4058+ const Loop *L) {
4059+ VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion ();
4060+ VPTypeAnalysis TypeInfo (Plan);
4061+ VPDominatorTree VPDT (Plan);
4062+
4063+ // Group predicated loads by their address SCEV.
4064+ MapVector<const SCEV *, SmallVector<VPReplicateRecipe *>> LoadsByAddress;
4065+ for (VPBlockBase *Block : vp_depth_first_shallow (LoopRegion->getEntry ())) {
4066+ auto *VPBB = cast<VPBasicBlock>(Block);
4067+ for (VPRecipeBase &R : *VPBB) {
4068+ auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
4069+ if (!RepR || RepR->getOpcode () != Instruction::Load ||
4070+ !RepR->isPredicated ())
4071+ continue ;
4072+
4073+ VPValue *Addr = RepR->getOperand (0 );
4074+ const SCEV *AddrSCEV = vputils::getSCEVExprForVPValue (Addr, SE, L);
4075+ if (!isa<SCEVCouldNotCompute>(AddrSCEV))
4076+ LoadsByAddress[AddrSCEV].push_back (RepR);
4077+ }
4078+ }
4079+
4080+ // For each address, collect loads with complementary masks, sort by
4081+ // dominance, and use the earliest load.
4082+ for (auto &[Addr, Loads] : LoadsByAddress) {
4083+ if (Loads.size () < 2 )
4084+ continue ;
4085+
4086+ // Collect groups of loads with complementary masks.
4087+ SmallVector<SmallVector<VPReplicateRecipe *, 4 >> LoadGroups;
4088+ for (VPReplicateRecipe *&LoadI : Loads) {
4089+ if (!LoadI)
4090+ continue ;
4091+
4092+ VPValue *MaskI = LoadI->getMask ();
4093+ Type *TypeI = TypeInfo.inferScalarType (LoadI);
4094+ SmallVector<VPReplicateRecipe *, 4 > Group;
4095+ Group.push_back (LoadI);
4096+ LoadI = nullptr ;
4097+
4098+ // Find all loads with the same type.
4099+ for (VPReplicateRecipe *&LoadJ : Loads) {
4100+ if (!LoadJ)
4101+ continue ;
4102+
4103+ Type *TypeJ = TypeInfo.inferScalarType (LoadJ);
4104+ if (TypeI == TypeJ) {
4105+ Group.push_back (LoadJ);
4106+ LoadJ = nullptr ;
4107+ }
4108+ }
4109+
4110+ // Check if any load in the group has a complementary mask with another,
4111+ // that is M1 == NOT(M2) or M2 == NOT(M1).
4112+ bool HasComplementaryMask =
4113+ any_of (drop_begin (Group), [MaskI](VPReplicateRecipe *Load) {
4114+ VPValue *MaskJ = Load->getMask ();
4115+ return match (MaskI, m_Not (m_Specific (MaskJ))) ||
4116+ match (MaskJ, m_Not (m_Specific (MaskI)));
4117+ });
4118+
4119+ if (HasComplementaryMask)
4120+ LoadGroups.push_back (std::move (Group));
4121+ }
4122+
4123+ // For each group, check memory dependencies and hoist the earliest load.
4124+ for (auto &Group : LoadGroups) {
4125+ // Sort loads by dominance order, with earliest (most dominating) first.
4126+ sort (Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) {
4127+ return VPDT.properlyDominates (A, B);
4128+ });
4129+
4130+ VPReplicateRecipe *EarliestLoad = Group.front ();
4131+ VPBasicBlock *FirstBB = EarliestLoad->getParent ();
4132+ VPBasicBlock *LastBB = Group.back ()->getParent ();
4133+
4134+ // Check that the load doesn't alias with stores between first and last.
4135+ if (!canHoistLoadWithNoAliasCheck (EarliestLoad, FirstBB, LastBB))
4136+ continue ;
4137+
4138+ // Collect common metadata from all loads in the group.
4139+ VPIRMetadata CommonMetadata = getCommonLoadMetadata (Group);
4140+
4141+ // Create an unpredicated version of the earliest load with common
4142+ // metadata.
4143+ auto *UnpredicatedLoad = new VPReplicateRecipe (
4144+ EarliestLoad->getUnderlyingInstr (), {EarliestLoad->getOperand (0 )},
4145+ /* IsSingleScalar=*/ false , /* Mask=*/ nullptr , /* Flags=*/ {},
4146+ CommonMetadata);
4147+
4148+ UnpredicatedLoad->insertBefore (EarliestLoad);
4149+
4150+ // Replace all loads in the group with the unpredicated load.
4151+ for (VPReplicateRecipe *Load : Group) {
4152+ Load->replaceAllUsesWith (UnpredicatedLoad);
4153+ Load->eraseFromParent ();
4154+ }
4155+ }
4156+ }
4157+ }
4158+
40134159void VPlanTransforms::materializeConstantVectorTripCount (
40144160 VPlan &Plan, ElementCount BestVF, unsigned BestUF,
40154161 PredicatedScalarEvolution &PSE) {
0 commit comments