-
Couldn't load subscription status.
- Fork 15k
[SLPVectorizer][NFC] Avoid calling calculateRtStride twice.
#152359
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Before this patch, we call it once when we check if run-time strided load is legal, and call it second time when we generate the code. Instead we add a `StrideSCEV` field to `TreeEntry`, when `calculateRtStride` returns `true`, this field is set to the SCEV of run-time stride. we return `true`.
|
@llvm/pr-subscribers-vectorizers Author: Mikhail Gudim (mgudim) ChangesBefore this patch, we call it once when we check if run-time strided load is legal, and call it second time when we generate the code. Instead we add a Full diff: https://github.com/llvm/llvm-project/pull/152359.diff 1 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 5d0e2f9518a51..c4198159e039a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2199,6 +2199,7 @@ class BoUpSLP {
LoadsState canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
SmallVectorImpl<unsigned> &Order,
SmallVectorImpl<Value *> &PointerOps,
+ const SCEV **StrideSCEV = nullptr,
unsigned *BestVF = nullptr,
bool TryRecursiveCheck = true) const;
@@ -3879,6 +3880,10 @@ class BoUpSLP {
};
EntryState State;
+ // If we decide to generate a strided load for this Entry, `StrideSCEV`
+ // will be set.
+ const SCEV *StrideSCEV = nullptr;
+
/// List of combined opcodes supported by the vectorizer.
enum CombinedOpcode {
NotCombinedOp = -1,
@@ -4430,11 +4435,10 @@ class BoUpSLP {
/// Checks if the specified list of the instructions/values can be vectorized
/// and fills required data before actual scheduling of the instructions.
- TreeEntry::EntryState
- getScalarsVectorizationState(const InstructionsState &S, ArrayRef<Value *> VL,
- bool IsScatterVectorizeUserTE,
- OrdersType &CurrentOrder,
- SmallVectorImpl<Value *> &PointerOps);
+ TreeEntry::EntryState getScalarsVectorizationState(
+ const InstructionsState &S, ArrayRef<Value *> VL,
+ bool IsScatterVectorizeUserTE, OrdersType &CurrentOrder,
+ SmallVectorImpl<Value *> &PointerOps, const SCEV **StrideSCEV);
/// Maps a specific scalar to its tree entry(ies).
SmallDenseMap<Value *, SmallVector<TreeEntry *>> ScalarToTreeEntries;
@@ -5734,17 +5738,13 @@ static bool isReverseOrder(ArrayRef<unsigned> Order) {
}
/// Checks if the provided list of pointers \p Pointers represents the strided
-/// pointers for type ElemTy. If they are not, std::nullopt is returned.
-/// Otherwise, if \p Inst is not specified, just initialized optional value is
-/// returned to show that the pointers represent strided pointers. If \p Inst
-/// specified, the runtime stride is materialized before the given \p Inst.
-/// \returns std::nullopt if the pointers are not pointers with the runtime
-/// stride, nullptr or actual stride value, otherwise.
-static std::optional<Value *>
-calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
- const DataLayout &DL, ScalarEvolution &SE,
- SmallVectorImpl<unsigned> &SortedIndices,
- Instruction *Inst = nullptr) {
+/// pointers for type ElemTy. If they are not, `false` is returned.
+/// Otherwise, if `true` is returned. If `StrideSCEV` is not nullptr,
+/// it is set the the SCEV of the run-time stride.
+static bool calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
+ const DataLayout &DL, ScalarEvolution &SE,
+ SmallVectorImpl<unsigned> &SortedIndices,
+ const SCEV **StrideSCEV) {
SmallVector<const SCEV *> SCEVs;
const SCEV *PtrSCEVLowest = nullptr;
const SCEV *PtrSCEVHighest = nullptr;
@@ -5753,7 +5753,7 @@ calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
for (Value *Ptr : PointerOps) {
const SCEV *PtrSCEV = SE.getSCEV(Ptr);
if (!PtrSCEV)
- return std::nullopt;
+ return false;
SCEVs.push_back(PtrSCEV);
if (!PtrSCEVLowest && !PtrSCEVHighest) {
PtrSCEVLowest = PtrSCEVHighest = PtrSCEV;
@@ -5761,14 +5761,14 @@ calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
}
const SCEV *Diff = SE.getMinusSCEV(PtrSCEV, PtrSCEVLowest);
if (isa<SCEVCouldNotCompute>(Diff))
- return std::nullopt;
+ return false;
if (Diff->isNonConstantNegative()) {
PtrSCEVLowest = PtrSCEV;
continue;
}
const SCEV *Diff1 = SE.getMinusSCEV(PtrSCEVHighest, PtrSCEV);
if (isa<SCEVCouldNotCompute>(Diff1))
- return std::nullopt;
+ return false;
if (Diff1->isNonConstantNegative()) {
PtrSCEVHighest = PtrSCEV;
continue;
@@ -5777,7 +5777,7 @@ calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
// Dist = PtrSCEVHighest - PtrSCEVLowest;
const SCEV *Dist = SE.getMinusSCEV(PtrSCEVHighest, PtrSCEVLowest);
if (isa<SCEVCouldNotCompute>(Dist))
- return std::nullopt;
+ return false;
int Size = DL.getTypeStoreSize(ElemTy);
auto TryGetStride = [&](const SCEV *Dist,
const SCEV *Multiplier) -> const SCEV * {
@@ -5798,10 +5798,10 @@ calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
const SCEV *Sz = SE.getConstant(Dist->getType(), Size * (SCEVs.size() - 1));
Stride = TryGetStride(Dist, Sz);
if (!Stride)
- return std::nullopt;
+ return false;
}
if (!Stride || isa<SCEVConstant>(Stride))
- return std::nullopt;
+ return false;
// Iterate through all pointers and check if all distances are
// unique multiple of Stride.
using DistOrdPair = std::pair<int64_t, int>;
@@ -5815,28 +5815,28 @@ calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
const SCEV *Diff = SE.getMinusSCEV(PtrSCEV, PtrSCEVLowest);
const SCEV *Coeff = TryGetStride(Diff, Stride);
if (!Coeff)
- return std::nullopt;
+ return false;
const auto *SC = dyn_cast<SCEVConstant>(Coeff);
if (!SC || isa<SCEVCouldNotCompute>(SC))
- return std::nullopt;
+ return false;
if (!SE.getMinusSCEV(PtrSCEV, SE.getAddExpr(PtrSCEVLowest,
SE.getMulExpr(Stride, SC)))
->isZero())
- return std::nullopt;
+ return false;
Dist = SC->getAPInt().getZExtValue();
}
// If the strides are not the same or repeated, we can't vectorize.
if ((Dist / Size) * Size != Dist || (Dist / Size) >= SCEVs.size())
- return std::nullopt;
+ return false;
auto Res = Offsets.emplace(Dist, Cnt);
if (!Res.second)
- return std::nullopt;
+ return false;
// Consecutive order if the inserted element is the last one.
IsConsecutive = IsConsecutive && std::next(Res.first) == Offsets.end();
++Cnt;
}
if (Offsets.size() != SCEVs.size())
- return std::nullopt;
+ return false;
SortedIndices.clear();
if (!IsConsecutive) {
// Fill SortedIndices array only if it is non-consecutive.
@@ -5847,10 +5847,9 @@ calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
++Cnt;
}
}
- if (!Inst)
- return nullptr;
- SCEVExpander Expander(SE, DL, "strided-load-vec");
- return Expander.expandCodeFor(Stride, Stride->getType(), Inst);
+ if (StrideSCEV)
+ *StrideSCEV = Stride;
+ return true;
}
static std::pair<InstructionCost, InstructionCost>
@@ -6246,11 +6245,10 @@ static bool isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
return false;
}
-BoUpSLP::LoadsState
-BoUpSLP::canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
- SmallVectorImpl<unsigned> &Order,
- SmallVectorImpl<Value *> &PointerOps,
- unsigned *BestVF, bool TryRecursiveCheck) const {
+BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
+ ArrayRef<Value *> VL, const Value *VL0, SmallVectorImpl<unsigned> &Order,
+ SmallVectorImpl<Value *> &PointerOps, const SCEV **StrideSCEV,
+ unsigned *BestVF, bool TryRecursiveCheck) const {
// Check that a vectorized load would load the same memory as a scalar
// load. For example, we don't want to vectorize loads that are smaller
// than 8-bit. Even though we have a packed struct {<i2, i2, i2, i2>} LLVM
@@ -6289,7 +6287,7 @@ BoUpSLP::canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
if (!IsSorted) {
if (Sz > MinProfitableStridedLoads && TTI->isTypeLegal(VecTy)) {
if (TTI->isLegalStridedLoadStore(VecTy, CommonAlignment) &&
- calculateRtStride(PointerOps, ScalarTy, *DL, *SE, Order))
+ calculateRtStride(PointerOps, ScalarTy, *DL, *SE, Order, StrideSCEV))
return LoadsState::StridedVectorize;
}
@@ -6418,9 +6416,9 @@ BoUpSLP::canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
ArrayRef<Value *> Slice = VL.slice(Cnt, VF);
SmallVector<unsigned> Order;
SmallVector<Value *> PointerOps;
- LoadsState LS =
- canVectorizeLoads(Slice, Slice.front(), Order, PointerOps, BestVF,
- /*TryRecursiveCheck=*/false);
+ LoadsState LS = canVectorizeLoads(Slice, Slice.front(), Order,
+ PointerOps, StrideSCEV, BestVF,
+ /*TryRecursiveCheck=*/false);
// Check that the sorted loads are consecutive.
if (LS == LoadsState::Gather) {
if (BestVF) {
@@ -8565,8 +8563,9 @@ void BoUpSLP::tryToVectorizeGatheredLoads(
// Try to build vector load.
ArrayRef<Value *> Values(
reinterpret_cast<Value *const *>(Slice.begin()), Slice.size());
- LoadsState LS = canVectorizeLoads(Values, Slice.front(), CurrentOrder,
- PointerOps, &BestVF);
+ LoadsState LS =
+ canVectorizeLoads(Values, Slice.front(), CurrentOrder, PointerOps,
+ /*StrideSCEV = */ nullptr, &BestVF);
if (LS != LoadsState::Gather ||
(BestVF > 1 && static_cast<unsigned>(NumElts) == 2 * BestVF)) {
if (LS == LoadsState::ScatterVectorize) {
@@ -9171,7 +9170,7 @@ getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy,
BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
const InstructionsState &S, ArrayRef<Value *> VL,
bool IsScatterVectorizeUserTE, OrdersType &CurrentOrder,
- SmallVectorImpl<Value *> &PointerOps) {
+ SmallVectorImpl<Value *> &PointerOps, const SCEV **StrideSCEV) {
assert(S.getMainOp() &&
"Expected instructions with same/alternate opcodes only.");
@@ -9273,7 +9272,7 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
});
});
};
- switch (canVectorizeLoads(VL, VL0, CurrentOrder, PointerOps)) {
+ switch (canVectorizeLoads(VL, VL0, CurrentOrder, PointerOps, StrideSCEV)) {
case LoadsState::Vectorize:
return TreeEntry::Vectorize;
case LoadsState::CompressVectorize:
@@ -10710,8 +10709,9 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize;
OrdersType CurrentOrder;
SmallVector<Value *> PointerOps;
+ const SCEV *StrideSCEV = nullptr;
TreeEntry::EntryState State = getScalarsVectorizationState(
- S, VL, IsScatterVectorizeUserTE, CurrentOrder, PointerOps);
+ S, VL, IsScatterVectorizeUserTE, CurrentOrder, PointerOps, &StrideSCEV);
if (State == TreeEntry::NeedToGather) {
newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices);
return;
@@ -10871,6 +10871,7 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
TE = newTreeEntry(VL, TreeEntry::StridedVectorize, Bundle, S,
UserTreeIdx, ReuseShuffleIndices, CurrentOrder);
+ TE->StrideSCEV = StrideSCEV;
LLVM_DEBUG(dbgs() << "SLP: added a new TreeEntry (strided LoadInst).\n";
TE->dump());
break;
@@ -18711,16 +18712,13 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
ConstantInt::get(StrideTy, (IsReverseOrder ? -1 : 1) * Stride *
DL->getTypeAllocSize(ScalarTy));
} else {
- SmallVector<Value *> PointerOps(E->Scalars.size(), nullptr);
- transform(E->Scalars, PointerOps.begin(), [](Value *V) {
- return cast<LoadInst>(V)->getPointerOperand();
- });
- OrdersType Order;
- std::optional<Value *> Stride =
- calculateRtStride(PointerOps, ScalarTy, *DL, *SE, Order,
- &*Builder.GetInsertPoint());
+ const SCEV *StrideSCEV = E->StrideSCEV;
+ assert(StrideSCEV);
+ SCEVExpander Expander(*SE, *DL, "strided-load-vec");
+ Value *Stride = Expander.expandCodeFor(
+ StrideSCEV, StrideSCEV->getType(), &*Builder.GetInsertPoint());
Value *NewStride =
- Builder.CreateIntCast(*Stride, StrideTy, /*isSigned=*/true);
+ Builder.CreateIntCast(Stride, StrideTy, /*isSigned=*/true);
StrideVal = Builder.CreateMul(
NewStride,
ConstantInt::get(
|
|
@llvm/pr-subscribers-llvm-transforms Author: Mikhail Gudim (mgudim) ChangesBefore this patch, we call it once when we check if run-time strided load is legal, and call it second time when we generate the code. Instead we add a Full diff: https://github.com/llvm/llvm-project/pull/152359.diff 1 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 5d0e2f9518a51..c4198159e039a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2199,6 +2199,7 @@ class BoUpSLP {
LoadsState canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
SmallVectorImpl<unsigned> &Order,
SmallVectorImpl<Value *> &PointerOps,
+ const SCEV **StrideSCEV = nullptr,
unsigned *BestVF = nullptr,
bool TryRecursiveCheck = true) const;
@@ -3879,6 +3880,10 @@ class BoUpSLP {
};
EntryState State;
+ // If we decide to generate a strided load for this Entry, `StrideSCEV`
+ // will be set.
+ const SCEV *StrideSCEV = nullptr;
+
/// List of combined opcodes supported by the vectorizer.
enum CombinedOpcode {
NotCombinedOp = -1,
@@ -4430,11 +4435,10 @@ class BoUpSLP {
/// Checks if the specified list of the instructions/values can be vectorized
/// and fills required data before actual scheduling of the instructions.
- TreeEntry::EntryState
- getScalarsVectorizationState(const InstructionsState &S, ArrayRef<Value *> VL,
- bool IsScatterVectorizeUserTE,
- OrdersType &CurrentOrder,
- SmallVectorImpl<Value *> &PointerOps);
+ TreeEntry::EntryState getScalarsVectorizationState(
+ const InstructionsState &S, ArrayRef<Value *> VL,
+ bool IsScatterVectorizeUserTE, OrdersType &CurrentOrder,
+ SmallVectorImpl<Value *> &PointerOps, const SCEV **StrideSCEV);
/// Maps a specific scalar to its tree entry(ies).
SmallDenseMap<Value *, SmallVector<TreeEntry *>> ScalarToTreeEntries;
@@ -5734,17 +5738,13 @@ static bool isReverseOrder(ArrayRef<unsigned> Order) {
}
/// Checks if the provided list of pointers \p Pointers represents the strided
-/// pointers for type ElemTy. If they are not, std::nullopt is returned.
-/// Otherwise, if \p Inst is not specified, just initialized optional value is
-/// returned to show that the pointers represent strided pointers. If \p Inst
-/// specified, the runtime stride is materialized before the given \p Inst.
-/// \returns std::nullopt if the pointers are not pointers with the runtime
-/// stride, nullptr or actual stride value, otherwise.
-static std::optional<Value *>
-calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
- const DataLayout &DL, ScalarEvolution &SE,
- SmallVectorImpl<unsigned> &SortedIndices,
- Instruction *Inst = nullptr) {
+/// pointers for type ElemTy. If they are not, `false` is returned.
+/// Otherwise, if `true` is returned. If `StrideSCEV` is not nullptr,
+/// it is set the the SCEV of the run-time stride.
+static bool calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
+ const DataLayout &DL, ScalarEvolution &SE,
+ SmallVectorImpl<unsigned> &SortedIndices,
+ const SCEV **StrideSCEV) {
SmallVector<const SCEV *> SCEVs;
const SCEV *PtrSCEVLowest = nullptr;
const SCEV *PtrSCEVHighest = nullptr;
@@ -5753,7 +5753,7 @@ calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
for (Value *Ptr : PointerOps) {
const SCEV *PtrSCEV = SE.getSCEV(Ptr);
if (!PtrSCEV)
- return std::nullopt;
+ return false;
SCEVs.push_back(PtrSCEV);
if (!PtrSCEVLowest && !PtrSCEVHighest) {
PtrSCEVLowest = PtrSCEVHighest = PtrSCEV;
@@ -5761,14 +5761,14 @@ calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
}
const SCEV *Diff = SE.getMinusSCEV(PtrSCEV, PtrSCEVLowest);
if (isa<SCEVCouldNotCompute>(Diff))
- return std::nullopt;
+ return false;
if (Diff->isNonConstantNegative()) {
PtrSCEVLowest = PtrSCEV;
continue;
}
const SCEV *Diff1 = SE.getMinusSCEV(PtrSCEVHighest, PtrSCEV);
if (isa<SCEVCouldNotCompute>(Diff1))
- return std::nullopt;
+ return false;
if (Diff1->isNonConstantNegative()) {
PtrSCEVHighest = PtrSCEV;
continue;
@@ -5777,7 +5777,7 @@ calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
// Dist = PtrSCEVHighest - PtrSCEVLowest;
const SCEV *Dist = SE.getMinusSCEV(PtrSCEVHighest, PtrSCEVLowest);
if (isa<SCEVCouldNotCompute>(Dist))
- return std::nullopt;
+ return false;
int Size = DL.getTypeStoreSize(ElemTy);
auto TryGetStride = [&](const SCEV *Dist,
const SCEV *Multiplier) -> const SCEV * {
@@ -5798,10 +5798,10 @@ calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
const SCEV *Sz = SE.getConstant(Dist->getType(), Size * (SCEVs.size() - 1));
Stride = TryGetStride(Dist, Sz);
if (!Stride)
- return std::nullopt;
+ return false;
}
if (!Stride || isa<SCEVConstant>(Stride))
- return std::nullopt;
+ return false;
// Iterate through all pointers and check if all distances are
// unique multiple of Stride.
using DistOrdPair = std::pair<int64_t, int>;
@@ -5815,28 +5815,28 @@ calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
const SCEV *Diff = SE.getMinusSCEV(PtrSCEV, PtrSCEVLowest);
const SCEV *Coeff = TryGetStride(Diff, Stride);
if (!Coeff)
- return std::nullopt;
+ return false;
const auto *SC = dyn_cast<SCEVConstant>(Coeff);
if (!SC || isa<SCEVCouldNotCompute>(SC))
- return std::nullopt;
+ return false;
if (!SE.getMinusSCEV(PtrSCEV, SE.getAddExpr(PtrSCEVLowest,
SE.getMulExpr(Stride, SC)))
->isZero())
- return std::nullopt;
+ return false;
Dist = SC->getAPInt().getZExtValue();
}
// If the strides are not the same or repeated, we can't vectorize.
if ((Dist / Size) * Size != Dist || (Dist / Size) >= SCEVs.size())
- return std::nullopt;
+ return false;
auto Res = Offsets.emplace(Dist, Cnt);
if (!Res.second)
- return std::nullopt;
+ return false;
// Consecutive order if the inserted element is the last one.
IsConsecutive = IsConsecutive && std::next(Res.first) == Offsets.end();
++Cnt;
}
if (Offsets.size() != SCEVs.size())
- return std::nullopt;
+ return false;
SortedIndices.clear();
if (!IsConsecutive) {
// Fill SortedIndices array only if it is non-consecutive.
@@ -5847,10 +5847,9 @@ calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
++Cnt;
}
}
- if (!Inst)
- return nullptr;
- SCEVExpander Expander(SE, DL, "strided-load-vec");
- return Expander.expandCodeFor(Stride, Stride->getType(), Inst);
+ if (StrideSCEV)
+ *StrideSCEV = Stride;
+ return true;
}
static std::pair<InstructionCost, InstructionCost>
@@ -6246,11 +6245,10 @@ static bool isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
return false;
}
-BoUpSLP::LoadsState
-BoUpSLP::canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
- SmallVectorImpl<unsigned> &Order,
- SmallVectorImpl<Value *> &PointerOps,
- unsigned *BestVF, bool TryRecursiveCheck) const {
+BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
+ ArrayRef<Value *> VL, const Value *VL0, SmallVectorImpl<unsigned> &Order,
+ SmallVectorImpl<Value *> &PointerOps, const SCEV **StrideSCEV,
+ unsigned *BestVF, bool TryRecursiveCheck) const {
// Check that a vectorized load would load the same memory as a scalar
// load. For example, we don't want to vectorize loads that are smaller
// than 8-bit. Even though we have a packed struct {<i2, i2, i2, i2>} LLVM
@@ -6289,7 +6287,7 @@ BoUpSLP::canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
if (!IsSorted) {
if (Sz > MinProfitableStridedLoads && TTI->isTypeLegal(VecTy)) {
if (TTI->isLegalStridedLoadStore(VecTy, CommonAlignment) &&
- calculateRtStride(PointerOps, ScalarTy, *DL, *SE, Order))
+ calculateRtStride(PointerOps, ScalarTy, *DL, *SE, Order, StrideSCEV))
return LoadsState::StridedVectorize;
}
@@ -6418,9 +6416,9 @@ BoUpSLP::canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
ArrayRef<Value *> Slice = VL.slice(Cnt, VF);
SmallVector<unsigned> Order;
SmallVector<Value *> PointerOps;
- LoadsState LS =
- canVectorizeLoads(Slice, Slice.front(), Order, PointerOps, BestVF,
- /*TryRecursiveCheck=*/false);
+ LoadsState LS = canVectorizeLoads(Slice, Slice.front(), Order,
+ PointerOps, StrideSCEV, BestVF,
+ /*TryRecursiveCheck=*/false);
// Check that the sorted loads are consecutive.
if (LS == LoadsState::Gather) {
if (BestVF) {
@@ -8565,8 +8563,9 @@ void BoUpSLP::tryToVectorizeGatheredLoads(
// Try to build vector load.
ArrayRef<Value *> Values(
reinterpret_cast<Value *const *>(Slice.begin()), Slice.size());
- LoadsState LS = canVectorizeLoads(Values, Slice.front(), CurrentOrder,
- PointerOps, &BestVF);
+ LoadsState LS =
+ canVectorizeLoads(Values, Slice.front(), CurrentOrder, PointerOps,
+ /*StrideSCEV = */ nullptr, &BestVF);
if (LS != LoadsState::Gather ||
(BestVF > 1 && static_cast<unsigned>(NumElts) == 2 * BestVF)) {
if (LS == LoadsState::ScatterVectorize) {
@@ -9171,7 +9170,7 @@ getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy,
BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
const InstructionsState &S, ArrayRef<Value *> VL,
bool IsScatterVectorizeUserTE, OrdersType &CurrentOrder,
- SmallVectorImpl<Value *> &PointerOps) {
+ SmallVectorImpl<Value *> &PointerOps, const SCEV **StrideSCEV) {
assert(S.getMainOp() &&
"Expected instructions with same/alternate opcodes only.");
@@ -9273,7 +9272,7 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
});
});
};
- switch (canVectorizeLoads(VL, VL0, CurrentOrder, PointerOps)) {
+ switch (canVectorizeLoads(VL, VL0, CurrentOrder, PointerOps, StrideSCEV)) {
case LoadsState::Vectorize:
return TreeEntry::Vectorize;
case LoadsState::CompressVectorize:
@@ -10710,8 +10709,9 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize;
OrdersType CurrentOrder;
SmallVector<Value *> PointerOps;
+ const SCEV *StrideSCEV = nullptr;
TreeEntry::EntryState State = getScalarsVectorizationState(
- S, VL, IsScatterVectorizeUserTE, CurrentOrder, PointerOps);
+ S, VL, IsScatterVectorizeUserTE, CurrentOrder, PointerOps, &StrideSCEV);
if (State == TreeEntry::NeedToGather) {
newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices);
return;
@@ -10871,6 +10871,7 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
TE = newTreeEntry(VL, TreeEntry::StridedVectorize, Bundle, S,
UserTreeIdx, ReuseShuffleIndices, CurrentOrder);
+ TE->StrideSCEV = StrideSCEV;
LLVM_DEBUG(dbgs() << "SLP: added a new TreeEntry (strided LoadInst).\n";
TE->dump());
break;
@@ -18711,16 +18712,13 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
ConstantInt::get(StrideTy, (IsReverseOrder ? -1 : 1) * Stride *
DL->getTypeAllocSize(ScalarTy));
} else {
- SmallVector<Value *> PointerOps(E->Scalars.size(), nullptr);
- transform(E->Scalars, PointerOps.begin(), [](Value *V) {
- return cast<LoadInst>(V)->getPointerOperand();
- });
- OrdersType Order;
- std::optional<Value *> Stride =
- calculateRtStride(PointerOps, ScalarTy, *DL, *SE, Order,
- &*Builder.GetInsertPoint());
+ const SCEV *StrideSCEV = E->StrideSCEV;
+ assert(StrideSCEV);
+ SCEVExpander Expander(*SE, *DL, "strided-load-vec");
+ Value *Stride = Expander.expandCodeFor(
+ StrideSCEV, StrideSCEV->getType(), &*Builder.GetInsertPoint());
Value *NewStride =
- Builder.CreateIntCast(*Stride, StrideTy, /*isSigned=*/true);
+ Builder.CreateIntCast(Stride, StrideTy, /*isSigned=*/true);
StrideVal = Builder.CreateMul(
NewStride,
ConstantInt::get(
|
|
Why do you want to do this? It does not affect compile time much |
Eventually I want to merge a patch addressing this missed opportunity: That patch would look really ugly without this refactoring. Also, this is a good thing by itself I think. Simplifies the code somewhat |
|
I'll post the other patches soon |
Could you try to post the patch without this change at first? It does not simplify the code, we have just 2 function calls, instead it will extra 8 bytes to all nodes, even non-load, for all targets, increasing memory footprint. |
|
I posted the whole patch here: #153074 |
Before this patch, we call it once when we check if run-time strided load is legal, and call it second time when we generate the code.
Instead we add a
StrideSCEVfield toTreeEntry, whencalculateRtStridereturnstrue, this field is set to the SCEV of run-time stride. we returntrue.