|
42 | 42 | #include "llvm/Support/TypeSize.h" |
43 | 43 | #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" |
44 | 44 |
|
| 45 | +#define DEBUG_TYPE "loop-vectorize" |
| 46 | + |
45 | 47 | using namespace llvm; |
46 | 48 | using namespace VPlanPatternMatch; |
47 | 49 |
|
@@ -3974,6 +3976,151 @@ void VPlanTransforms::hoistInvariantLoads(VPlan &Plan) { |
3974 | 3976 | } |
3975 | 3977 | } |
3976 | 3978 |
|
| 3979 | +// Returns the intersection of metadata from a group of loads. |
| 3980 | +static VPIRMetadata getCommonLoadMetadata(ArrayRef<VPReplicateRecipe *> Loads) { |
| 3981 | + VPIRMetadata CommonMetadata = *Loads.front(); |
| 3982 | + for (VPReplicateRecipe *Load : drop_begin(Loads)) |
| 3983 | + CommonMetadata.intersect(*Load); |
| 3984 | + return CommonMetadata; |
| 3985 | +} |
| 3986 | + |
| 3987 | +// Check if a load can be hoisted by verifying it doesn't alias with any stores |
| 3988 | +// in blocks between FirstBB and LastBB using scoped noalias metadata. |
| 3989 | +static bool canHoistLoadWithNoAliasCheck(VPReplicateRecipe *Load, |
| 3990 | + VPBasicBlock *FirstBB, |
| 3991 | + VPBasicBlock *LastBB) { |
| 3992 | + // Get the load's memory location and check if it aliases with any stores |
| 3993 | + // using scoped noalias metadata. |
| 3994 | + auto LoadLoc = vputils::getMemoryLocation(*Load); |
| 3995 | + if (!LoadLoc || !LoadLoc->AATags.Scope) |
| 3996 | + return false; |
| 3997 | + |
| 3998 | + const AAMDNodes &LoadAA = LoadLoc->AATags; |
| 3999 | + for (VPBlockBase *Block = FirstBB; Block; |
| 4000 | + Block = Block->getSingleSuccessor()) { |
| 4001 | + // This function assumes a simple linear chain of blocks. If there are |
| 4002 | + // multiple successors, we would need more complex analysis. |
| 4003 | + assert(Block->getNumSuccessors() <= 1 && |
| 4004 | + "Expected at most one successor in block chain"); |
| 4005 | + auto *VPBB = cast<VPBasicBlock>(Block); |
| 4006 | + for (VPRecipeBase &R : *VPBB) { |
| 4007 | + if (R.mayWriteToMemory()) { |
| 4008 | + auto Loc = vputils::getMemoryLocation(R); |
| 4009 | + // Bail out if we can't get the location or if the scoped noalias |
| 4010 | + // metadata indicates potential aliasing. |
| 4011 | + if (!Loc || ScopedNoAliasAAResult::mayAliasInScopes( |
| 4012 | + LoadAA.Scope, Loc->AATags.NoAlias)) |
| 4013 | + return false; |
| 4014 | + } |
| 4015 | + } |
| 4016 | + |
| 4017 | + if (Block == LastBB) |
| 4018 | + break; |
| 4019 | + } |
| 4020 | + return true; |
| 4021 | +} |
| 4022 | + |
| 4023 | +void VPlanTransforms::hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE, |
| 4024 | + const Loop *L) { |
| 4025 | + VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); |
| 4026 | + VPTypeAnalysis TypeInfo(Plan); |
| 4027 | + VPDominatorTree VPDT(Plan); |
| 4028 | + |
| 4029 | + // Group predicated loads by their address SCEV. |
| 4030 | + MapVector<const SCEV *, SmallVector<VPReplicateRecipe *>> LoadsByAddress; |
| 4031 | + for (VPBlockBase *Block : vp_depth_first_shallow(LoopRegion->getEntry())) { |
| 4032 | + auto *VPBB = cast<VPBasicBlock>(Block); |
| 4033 | + for (VPRecipeBase &R : *VPBB) { |
| 4034 | + auto *RepR = dyn_cast<VPReplicateRecipe>(&R); |
| 4035 | + if (!RepR || RepR->getOpcode() != Instruction::Load || |
| 4036 | + !RepR->isPredicated()) |
| 4037 | + continue; |
| 4038 | + |
| 4039 | + VPValue *Addr = RepR->getOperand(0); |
| 4040 | + const SCEV *AddrSCEV = vputils::getSCEVExprForVPValue(Addr, SE, L); |
| 4041 | + if (!isa<SCEVCouldNotCompute>(AddrSCEV)) |
| 4042 | + LoadsByAddress[AddrSCEV].push_back(RepR); |
| 4043 | + } |
| 4044 | + } |
| 4045 | + |
| 4046 | + // For each address, collect loads with complementary masks, sort by |
| 4047 | + // dominance, and use the earliest load. |
| 4048 | + for (auto &[Addr, Loads] : LoadsByAddress) { |
| 4049 | + if (Loads.size() < 2) |
| 4050 | + continue; |
| 4051 | + |
| 4052 | + // Collect groups of loads with complementary masks. |
| 4053 | + SmallVector<SmallVector<VPReplicateRecipe *, 4>> LoadGroups; |
| 4054 | + for (VPReplicateRecipe *&LoadI : Loads) { |
| 4055 | + if (!LoadI) |
| 4056 | + continue; |
| 4057 | + |
| 4058 | + VPValue *MaskI = LoadI->getMask(); |
| 4059 | + Type *TypeI = TypeInfo.inferScalarType(LoadI); |
| 4060 | + SmallVector<VPReplicateRecipe *, 4> Group; |
| 4061 | + Group.push_back(LoadI); |
| 4062 | + LoadI = nullptr; |
| 4063 | + |
| 4064 | + // Find all loads with the same type. |
| 4065 | + for (VPReplicateRecipe *&LoadJ : Loads) { |
| 4066 | + if (!LoadJ) |
| 4067 | + continue; |
| 4068 | + |
| 4069 | + Type *TypeJ = TypeInfo.inferScalarType(LoadJ); |
| 4070 | + if (TypeI == TypeJ) { |
| 4071 | + Group.push_back(LoadJ); |
| 4072 | + LoadJ = nullptr; |
| 4073 | + } |
| 4074 | + } |
| 4075 | + |
| 4076 | + // Check if any load in the group has a complementary mask with another, |
| 4077 | + // that is M1 == NOT(M2) or M2 == NOT(M1). |
| 4078 | + bool HasComplementaryMask = |
| 4079 | + any_of(drop_begin(Group), [MaskI](VPReplicateRecipe *Load) { |
| 4080 | + VPValue *MaskJ = Load->getMask(); |
| 4081 | + return match(MaskI, m_Not(m_Specific(MaskJ))) || |
| 4082 | + match(MaskJ, m_Not(m_Specific(MaskI))); |
| 4083 | + }); |
| 4084 | + |
| 4085 | + if (HasComplementaryMask) |
| 4086 | + LoadGroups.push_back(std::move(Group)); |
| 4087 | + } |
| 4088 | + |
| 4089 | + // For each group, check memory dependencies and hoist the earliest load. |
| 4090 | + for (auto &Group : LoadGroups) { |
| 4091 | + // Sort loads by dominance order, with earliest (most dominating) first. |
| 4092 | + sort(Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) { |
| 4093 | + return VPDT.properlyDominates(A, B); |
| 4094 | + }); |
| 4095 | + |
| 4096 | + VPReplicateRecipe *EarliestLoad = Group.front(); |
| 4097 | + VPBasicBlock *FirstBB = EarliestLoad->getParent(); |
| 4098 | + VPBasicBlock *LastBB = Group.back()->getParent(); |
| 4099 | + |
| 4100 | + // Check that the load doesn't alias with stores between first and last. |
| 4101 | + if (!canHoistLoadWithNoAliasCheck(EarliestLoad, FirstBB, LastBB)) |
| 4102 | + continue; |
| 4103 | + |
| 4104 | + // Collect common metadata from all loads in the group. |
| 4105 | + VPIRMetadata CommonMetadata = getCommonLoadMetadata(Group); |
| 4106 | + |
| 4107 | + // Create an unpredicated version of the earliest load with common |
| 4108 | + // metadata. |
| 4109 | + auto *UnpredicatedLoad = new VPReplicateRecipe( |
| 4110 | + EarliestLoad->getUnderlyingInstr(), {EarliestLoad->getOperand(0)}, |
| 4111 | + /*IsSingleScalar=*/false, /*Mask=*/nullptr, *EarliestLoad, CommonMetadata); |
| 4112 | + |
| 4113 | + UnpredicatedLoad->insertBefore(EarliestLoad); |
| 4114 | + |
| 4115 | + // Replace all loads in the group with the unpredicated load. |
| 4116 | + for (VPReplicateRecipe *Load : Group) { |
| 4117 | + Load->replaceAllUsesWith(UnpredicatedLoad); |
| 4118 | + Load->eraseFromParent(); |
| 4119 | + } |
| 4120 | + } |
| 4121 | + } |
| 4122 | +} |
| 4123 | + |
3977 | 4124 | void VPlanTransforms::materializeConstantVectorTripCount( |
3978 | 4125 | VPlan &Plan, ElementCount BestVF, unsigned BestUF, |
3979 | 4126 | PredicatedScalarEvolution &PSE) { |
|
0 commit comments