Skip to content

Commit bd19976

Browse files
authored
Merge pull request swiftlang#11955 from fhahn/pick-pred-load-hoisting-store-sinking
[LV] Pick predicated load/store hoisting/sinking changes. rdar://163931465
2 parents a50354c + 0946ab3 commit bd19976

30 files changed

+3512
-219
lines changed

llvm/include/llvm/Analysis/ScopedNoAliasAA.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,12 @@ class ScopedNoAliasAAResult : public AAResultBase {
4646
LLVM_ABI ModRefInfo getModRefInfo(const CallBase *Call1,
4747
const CallBase *Call2, AAQueryInfo &AAQI);
4848

49-
LLVM_ABI void
49+
LLVM_ABI static void
5050
collectScopedDomains(const MDNode *NoAlias,
51-
SmallPtrSetImpl<const MDNode *> &Domains) const;
51+
SmallPtrSetImpl<const MDNode *> &Domains);
5252

53-
private:
54-
bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const;
53+
LLVM_ABI static bool mayAliasInScopes(const MDNode *Scopes,
54+
const MDNode *NoAlias);
5555
};
5656

5757
/// Analysis pass providing a never-invalidated alias analysis result.

llvm/lib/Analysis/ScopedNoAliasAA.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ static void collectMDInDomain(const MDNode *List, const MDNode *Domain,
116116

117117
/// Collect the set of scoped domains relevant to the noalias scopes.
118118
void ScopedNoAliasAAResult::collectScopedDomains(
119-
const MDNode *NoAlias, SmallPtrSetImpl<const MDNode *> &Domains) const {
119+
const MDNode *NoAlias, SmallPtrSetImpl<const MDNode *> &Domains) {
120120
if (!NoAlias)
121121
return;
122122
assert(Domains.empty() && "Domains should be empty");
@@ -127,7 +127,7 @@ void ScopedNoAliasAAResult::collectScopedDomains(
127127
}
128128

129129
bool ScopedNoAliasAAResult::mayAliasInScopes(const MDNode *Scopes,
130-
const MDNode *NoAlias) const {
130+
const MDNode *NoAlias) {
131131
if (!Scopes || !NoAlias)
132132
return true;
133133

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4018,8 +4018,8 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks(
40184018
if (VF.isScalar())
40194019
continue;
40204020

4021-
VPCostContext CostCtx(CM.TTI, *CM.TLI, Legal->getWidestInductionType(),
4022-
CM, CM.CostKind);
4021+
VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind,
4022+
*CM.PSE.getSE(), OrigLoop);
40234023
precomputeCosts(*Plan, VF, CostCtx);
40244024
auto Iter = vp_depth_first_deep(Plan->getVectorLoopRegion()->getEntry());
40254025
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
@@ -4273,8 +4273,8 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
42734273

42744274
// Add on other costs that are modelled in VPlan, but not in the legacy
42754275
// cost model.
4276-
VPCostContext CostCtx(CM.TTI, *CM.TLI, CM.Legal->getWidestInductionType(),
4277-
CM, CM.CostKind);
4276+
VPCostContext CostCtx(CM.TTI, *CM.TLI, *P, CM, CM.CostKind,
4277+
*CM.PSE.getSE(), OrigLoop);
42784278
VPRegionBlock *VectorRegion = P->getVectorLoopRegion();
42794279
assert(VectorRegion && "Expected to have a vector region!");
42804280
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
@@ -6874,8 +6874,8 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
68746874

68756875
InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan,
68766876
ElementCount VF) const {
6877-
VPCostContext CostCtx(CM.TTI, *CM.TLI, Legal->getWidestInductionType(), CM,
6878-
CM.CostKind);
6877+
VPCostContext CostCtx(CM.TTI, *CM.TLI, Plan, CM, CM.CostKind, *PSE.getSE(),
6878+
OrigLoop);
68796879
InstructionCost Cost = precomputeCosts(Plan, VF, CostCtx);
68806880

68816881
// Now compute and add the VPlan-based cost.
@@ -7075,13 +7075,14 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
70757075
// simplifications not accounted for in the legacy cost model. If that's the
70767076
// case, don't trigger the assertion, as the extra simplifications may cause a
70777077
// different VF to be picked by the VPlan-based cost model.
7078-
VPCostContext CostCtx(CM.TTI, *CM.TLI, Legal->getWidestInductionType(), CM,
7079-
CM.CostKind);
7078+
VPCostContext CostCtx(CM.TTI, *CM.TLI, BestPlan, CM, CM.CostKind,
7079+
*CM.PSE.getSE(), OrigLoop);
70807080
precomputeCosts(BestPlan, BestFactor.Width, CostCtx);
70817081
// Verify that the VPlan-based and legacy cost models agree, except for VPlans
70827082
// with early exits and plans with additional VPlan simplifications. The
70837083
// legacy cost model doesn't properly model costs for such loops.
70847084
assert((BestFactor.Width == LegacyVF.Width || BestPlan.hasEarlyExit() ||
7085+
!Legal->getLAI()->getSymbolicStrides().empty() ||
70857086
planContainsAdditionalSimplifications(getPlanFor(BestFactor.Width),
70867087
CostCtx, OrigLoop,
70877088
BestFactor.Width) ||
@@ -8337,6 +8338,8 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
83378338
std::unique_ptr<VPlan>(VPlan0->duplicate()), SubRange, &LVer)) {
83388339
bool HasScalarVF = Plan->hasScalarVFOnly();
83398340
// Now optimize the initial VPlan.
8341+
VPlanTransforms::hoistPredicatedLoads(*Plan, *PSE.getSE(), OrigLoop);
8342+
VPlanTransforms::sinkPredicatedStores(*Plan, *PSE.getSE(), OrigLoop);
83408343
if (!HasScalarVF)
83418344
VPlanTransforms::runPass(VPlanTransforms::truncateToMinimalBitwidths,
83428345
*Plan, CM.getMinimalBitwidths());
@@ -8832,8 +8835,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
88328835
// TODO: Enable following transform when the EVL-version of extended-reduction
88338836
// and mulacc-reduction are implemented.
88348837
if (!CM.foldTailWithEVL()) {
8835-
VPCostContext CostCtx(CM.TTI, *CM.TLI, Legal->getWidestInductionType(), CM,
8836-
CM.CostKind);
8838+
VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind,
8839+
*CM.PSE.getSE(), OrigLoop);
88378840
VPlanTransforms::runPass(VPlanTransforms::convertToAbstractRecipes, *Plan,
88388841
CostCtx, Range);
88398842
}
@@ -10108,8 +10111,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1010810111
// Check if it is profitable to vectorize with runtime checks.
1010910112
bool ForceVectorization =
1011010113
Hints.getForce() == LoopVectorizeHints::FK_Enabled;
10111-
VPCostContext CostCtx(CM.TTI, *CM.TLI, CM.Legal->getWidestInductionType(),
10112-
CM, CM.CostKind);
10114+
VPCostContext CostCtx(CM.TTI, *CM.TLI, LVP.getPlanFor(VF.Width), CM,
10115+
CM.CostKind, *CM.PSE.getSE(), L);
1011310116
if (!ForceVectorization &&
1011410117
!isOutsideLoopWorkProfitable(Checks, VF, L, PSE, CostCtx,
1011510118
LVP.getPlanFor(VF.Width), SEL,

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include "llvm/ADT/ilist.h"
3535
#include "llvm/ADT/ilist_node.h"
3636
#include "llvm/Analysis/IVDescriptors.h"
37+
#include "llvm/Analysis/MemoryLocation.h"
3738
#include "llvm/Analysis/VectorUtils.h"
3839
#include "llvm/IR/DebugLoc.h"
3940
#include "llvm/IR/FMF.h"
@@ -931,13 +932,29 @@ class VPIRMetadata {
931932
/// Copy constructor for cloning.
932933
VPIRMetadata(const VPIRMetadata &Other) : Metadata(Other.Metadata) {}
933934

935+
VPIRMetadata &operator=(const VPIRMetadata &Other) {
936+
Metadata = Other.Metadata;
937+
return *this;
938+
}
939+
934940
/// Add all metadata to \p I.
935941
void applyMetadata(Instruction &I) const;
936942

937943
/// Add metadata with kind \p Kind and \p Node.
938944
void addMetadata(unsigned Kind, MDNode *Node) {
939945
Metadata.emplace_back(Kind, Node);
940946
}
947+
948+
/// Intersect this VPIRMetada object with \p MD, keeping only metadata
949+
/// nodes that are common to both.
950+
void intersect(const VPIRMetadata &MD);
951+
952+
/// Get metadata of kind \p Kind. Returns nullptr if not found.
953+
MDNode *getMetadata(unsigned Kind) const {
954+
auto It =
955+
find_if(Metadata, [Kind](const auto &P) { return P.first == Kind; });
956+
return It != Metadata.end() ? It->second : nullptr;
957+
}
941958
};
942959

943960
/// This is a concrete Recipe that models a single VPlan-level instruction.
@@ -2362,7 +2379,8 @@ class LLVM_ABI_FOR_TEST VPBlendRecipe : public VPSingleDefRecipe {
23622379
/// or stores into one wide load/store and shuffles. The first operand of a
23632380
/// VPInterleave recipe is the address, followed by the stored values, followed
23642381
/// by an optional mask.
2365-
class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase {
2382+
class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase,
2383+
public VPIRMetadata {
23662384
const InterleaveGroup<Instruction> *IG;
23672385

23682386
/// Indicates if the interleave group is in a conditional block and requires a
@@ -2376,10 +2394,8 @@ class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase {
23762394
public:
23772395
VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
23782396
ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2379-
bool NeedsMaskForGaps, DebugLoc DL)
2380-
: VPRecipeBase(VPDef::VPInterleaveSC, {Addr},
2381-
DL),
2382-
2397+
bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2398+
: VPRecipeBase(VPDef::VPInterleaveSC, {Addr}, DL), VPIRMetadata(MD),
23832399
IG(IG), NeedsMaskForGaps(NeedsMaskForGaps) {
23842400
// TODO: extend the masked interleaved-group support to reversed access.
23852401
assert((!Mask || !IG->isReverse()) &&
@@ -2402,7 +2418,7 @@ class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase {
24022418

24032419
VPInterleaveRecipe *clone() override {
24042420
return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(),
2405-
NeedsMaskForGaps, getDebugLoc());
2421+
NeedsMaskForGaps, *this, getDebugLoc());
24062422
}
24072423

24082424
VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
@@ -4052,6 +4068,10 @@ class VPlan {
40524068
/// Returns VF * UF of the vector loop region.
40534069
VPValue &getVFxUF() { return VFxUF; }
40544070

4071+
LLVMContext &getContext() const {
4072+
return getScalarHeader()->getIRBasicBlock()->getContext();
4073+
}
4074+
40554075
void addVF(ElementCount VF) { VFs.insert(VF); }
40564076

40574077
void setVF(ElementCount VF) {

llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -568,11 +568,11 @@ void VPlanTransforms::prepareForVectorization(
568568
VPBuilder Builder(MiddleVPBB);
569569
VPValue *Cmp;
570570
if (!RequiresScalarEpilogueCheck)
571-
Cmp = Plan.getOrAddLiveIn(ConstantInt::getFalse(
572-
IntegerType::getInt1Ty(TripCount->getType()->getContext())));
571+
Cmp = Plan.getOrAddLiveIn(
572+
ConstantInt::getFalse(IntegerType::getInt1Ty(Plan.getContext())));
573573
else if (TailFolded)
574-
Cmp = Plan.getOrAddLiveIn(ConstantInt::getTrue(
575-
IntegerType::getInt1Ty(TripCount->getType()->getContext())));
574+
Cmp = Plan.getOrAddLiveIn(
575+
ConstantInt::getTrue(IntegerType::getInt1Ty(Plan.getContext())));
576576
else
577577
Cmp = Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
578578
&Plan.getVectorTripCount(), LatchDL, "cmp.n");
@@ -622,7 +622,7 @@ void VPlanTransforms::attachCheckBlock(VPlan &Plan, Value *Cond,
622622
.createNaryOp(VPInstruction::BranchOnCond, {CondVPV},
623623
Plan.getCanonicalIV()->getDebugLoc());
624624
if (AddBranchWeights) {
625-
MDBuilder MDB(Plan.getScalarHeader()->getIRBasicBlock()->getContext());
625+
MDBuilder MDB(Plan.getContext());
626626
MDNode *BranchWeights =
627627
MDB.createBranchWeights(CheckBypassWeights, /*IsExpected=*/false);
628628
Term->addMetadata(LLVMContext::MD_prof, BranchWeights);

llvm/lib/Transforms/Vectorize/VPlanHelpers.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -349,12 +349,15 @@ struct VPCostContext {
349349
LoopVectorizationCostModel &CM;
350350
SmallPtrSet<Instruction *, 8> SkipCostComputation;
351351
TargetTransformInfo::TargetCostKind CostKind;
352+
ScalarEvolution &SE;
353+
const Loop *L;
352354

353355
VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI,
354-
Type *CanIVTy, LoopVectorizationCostModel &CM,
355-
TargetTransformInfo::TargetCostKind CostKind)
356-
: TTI(TTI), TLI(TLI), Types(CanIVTy), LLVMCtx(CanIVTy->getContext()),
357-
CM(CM), CostKind(CostKind) {}
356+
const VPlan &Plan, LoopVectorizationCostModel &CM,
357+
TargetTransformInfo::TargetCostKind CostKind,
358+
ScalarEvolution &SE, const Loop *L)
359+
: TTI(TTI), TLI(TLI), Types(Plan), LLVMCtx(Plan.getContext()), CM(CM),
360+
CostKind(CostKind), SE(SE), L(L) {}
358361

359362
/// Return the cost for \p UI with \p VF using the legacy cost model as
360363
/// fallback until computing the cost of all recipes migrates to VPlan.

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -608,6 +608,11 @@ template <Intrinsic::ID IntrID> inline IntrinsicID_match m_Intrinsic() {
608608
return IntrinsicID_match(IntrID);
609609
}
610610

611+
/// Match intrinsic calls with a runtime intrinsic ID.
612+
inline IntrinsicID_match m_Intrinsic(Intrinsic::ID IntrID) {
613+
return IntrinsicID_match(IntrID);
614+
}
615+
611616
template <Intrinsic::ID IntrID, typename T0>
612617
inline typename m_Intrinsic_Ty<T0>::Ty m_Intrinsic(const T0 &Op0) {
613618
return m_CombineAnd(m_Intrinsic<IntrID>(), m_Argument<0>(Op0));

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,13 @@ bool VPRecipeBase::mayHaveSideEffects() const {
156156
case VPPredInstPHISC:
157157
case VPVectorEndPointerSC:
158158
return false;
159-
case VPInstructionSC:
159+
case VPInstructionSC: {
160+
auto *VPI = cast<VPInstruction>(this);
161+
if (VPI->getOpcode() == VPInstruction::BranchOnCond ||
162+
VPI->getOpcode() == VPInstruction::BranchOnCount)
163+
return true;
160164
return mayWriteToMemory();
165+
}
161166
case VPWidenCallSC: {
162167
Function *Fn = cast<VPWidenCallRecipe>(this)->getCalledScalarFunction();
163168
return mayWriteToMemory() || !Fn->doesNotThrow() || !Fn->willReturn();
@@ -1038,6 +1043,8 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
10381043
case Instruction::ICmp:
10391044
case Instruction::Select:
10401045
case VPInstruction::AnyOf:
1046+
case VPInstruction::BranchOnCond:
1047+
case VPInstruction::BranchOnCount:
10411048
case VPInstruction::BuildStructVector:
10421049
case VPInstruction::BuildVector:
10431050
case VPInstruction::CalculateTripCountMinusVF:
@@ -1410,6 +1417,19 @@ void VPIRMetadata::applyMetadata(Instruction &I) const {
14101417
I.setMetadata(Kind, Node);
14111418
}
14121419

1420+
void VPIRMetadata::intersect(const VPIRMetadata &Other) {
1421+
SmallVector<std::pair<unsigned, MDNode *>> MetadataIntersection;
1422+
for (const auto &[KindA, MDA] : Metadata) {
1423+
for (const auto &[KindB, MDB] : Other.Metadata) {
1424+
if (KindA == KindB && MDA == MDB) {
1425+
MetadataIntersection.emplace_back(KindA, MDA);
1426+
break;
1427+
}
1428+
}
1429+
}
1430+
Metadata = std::move(MetadataIntersection);
1431+
}
1432+
14131433
void VPWidenCallRecipe::execute(VPTransformState &State) {
14141434
assert(State.VF.isVector() && "not widening");
14151435
assert(Variant != nullptr && "Can't create vector function.");
@@ -2832,12 +2852,12 @@ static void scalarizeInstruction(const Instruction *Instr,
28322852
Instruction *Cloned = Instr->clone();
28332853
if (!IsVoidRetTy) {
28342854
Cloned->setName(Instr->getName() + ".cloned");
2835-
#if !defined(NDEBUG)
2836-
// Verify that VPlan type inference results agree with the type of the
2837-
// generated values.
2838-
assert(State.TypeAnalysis.inferScalarType(RepRecipe) == Cloned->getType() &&
2839-
"inferred type and type from generated instructions do not match");
2840-
#endif
2855+
Type *ResultTy = State.TypeAnalysis.inferScalarType(RepRecipe);
2856+
// The operands of the replicate recipe may have been narrowed, resulting in
2857+
// a narrower result type. Update the type of the cloned instruction to the
2858+
// correct type.
2859+
if (ResultTy != Cloned->getType())
2860+
Cloned->mutateType(ResultTy);
28412861
}
28422862

28432863
RepRecipe->applyFlags(*Cloned);
@@ -3490,6 +3510,8 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
34903510
} else
34913511
NewLoad = State.Builder.CreateAlignedLoad(VecTy, ResAddr,
34923512
Group->getAlign(), "wide.vec");
3513+
applyMetadata(*NewLoad);
3514+
// TODO: Also manage existing metadata using VPIRMetadata.
34933515
Group->addMetadata(NewLoad);
34943516

34953517
ArrayRef<VPValue *> VPDefs = definedValues();
@@ -3610,6 +3632,8 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
36103632
NewStoreInstr =
36113633
State.Builder.CreateAlignedStore(IVec, ResAddr, Group->getAlign());
36123634

3635+
applyMetadata(*NewStoreInstr);
3636+
// TODO: Also manage existing metadata using VPIRMetadata.
36133637
Group->addMetadata(NewStoreInstr);
36143638
}
36153639

0 commit comments

Comments
 (0)