From ca833d49da3b04920ebfcb3303baf8311ae61b03 Mon Sep 17 00:00:00 2001 From: Alexander Kornienko Date: Thu, 17 Apr 2025 15:53:06 +0200 Subject: [PATCH 1/2] Replace most uses of for_each with range-for loops. This removes a bit of complexity from the code, where it doesn't seem to be justified. --- .../Transforms/Vectorize/SLPVectorizer.cpp | 102 +++++++++--------- 1 file changed, 54 insertions(+), 48 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index fd23fb6c81c2c..4d2968ace17c6 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -14584,13 +14584,13 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef VectorizedVals, }; if (!ValueToExtUses) { ValueToExtUses.emplace(); - for_each(enumerate(ExternalUses), [&](const auto &P) { + for (const auto &P : enumerate(ExternalUses)) { // Ignore phis in loops. if (IsPhiInLoop(P.value())) - return; + continue; ValueToExtUses->try_emplace(P.value().Scalar, P.index()); - }); + } } // Can use original instruction, if no operands vectorized or they are // marked as externally used already. @@ -14668,13 +14668,13 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef VectorizedVals, } if (KeepScalar) { ExternalUsesAsOriginalScalar.insert(EU.Scalar); - for_each(Inst->operands(), [&](Value *V) { + for (Value *V : Inst->operands()) { auto It = ValueToExtUses->find(V); if (It != ValueToExtUses->end()) { // Replace all uses to avoid compiler crash. ExternalUses[It->second].User = nullptr; } - }); + } ExtraCost = ScalarCost; if (!IsPhiInLoop(EU)) ExtractsCount[Entry].insert(Inst); @@ -14683,13 +14683,13 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef VectorizedVals, // Update the users of the operands of the cast operand to avoid // compiler crash. if (auto *IOp = dyn_cast(Inst->getOperand(0))) { - for_each(IOp->operands(), [&](Value *V) { + for (Value *V : IOp->operands()) { auto It = ValueToExtUses->find(V); if (It != ValueToExtUses->end()) { // Replace all uses to avoid compiler crash. ExternalUses[It->second].User = nullptr; } - }); + } } } } @@ -15325,7 +15325,9 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry( // tree. Entries.push_back(FirstEntries.front()); // Update mapping between values and corresponding tree entries. - for_each(UsedValuesEntry, [&](auto &P) { P.second = 0; }); + for (auto &P : UsedValuesEntry) { + P.second = 0; + } VF = FirstEntries.front()->getVectorFactor(); } else { // Try to find nodes with the same vector factor. @@ -15375,13 +15377,13 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry( ValuesToEntries.emplace_back().insert(E->Scalars.begin(), E->Scalars.end()); // Update mapping between values and corresponding tree entries. - for_each(UsedValuesEntry, [&](auto &P) { + for (auto &P : UsedValuesEntry) { for (unsigned Idx : seq(ValuesToEntries.size())) if (ValuesToEntries[Idx].contains(P.first)) { P.second = Idx; break; } - }); + } } bool IsSplatOrUndefs = isSplat(VL) || all_of(VL, IsaPred); @@ -15527,12 +15529,12 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry( (MaxElement % VF) - (MinElement % VF) + 1)); if (NewVF < VF) { - for_each(SubMask, [&](int &Idx) { + for (int &Idx : SubMask) { if (Idx == PoisonMaskElem) - return; + continue; Idx = ((Idx % VF) - (((MinElement % VF) / NewVF) * NewVF)) % NewVF + (Idx >= static_cast(VF) ? NewVF : 0); - }); + } } else { NewVF = VF; } @@ -19304,8 +19306,11 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP, // whole bundle might not be ready. ReadyInsts.remove(BundleMember); if (ArrayRef Bundles = getScheduleBundles(V); - !Bundles.empty()) - for_each(Bundles, [&](ScheduleBundle *B) { ReadyInsts.remove(B); }); + !Bundles.empty()) { + for (ScheduleBundle *B : Bundles) { + ReadyInsts.remove(B); + } + } if (!BundleMember->isScheduled()) continue; @@ -19630,23 +19635,23 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleBundle &Bundle, } continue; } - for_each(Bundles, [&](ScheduleBundle *Bundle) { + for (ScheduleBundle *Bundle : Bundles) { if (!Visited.insert(Bundle).second || Bundle->hasValidDependencies()) - return; + continue; assert(isInSchedulingRegion(*Bundle) && "ScheduleData not in scheduling region"); for_each(Bundle->getBundle(), ProcessNode); - }); + } if (InsertInReadyList && SD->isReady()) { - for_each(Bundles, [&](ScheduleBundle *Bundle) { + for (ScheduleBundle *Bundle : Bundles) { assert(isInSchedulingRegion(*Bundle) && "ScheduleData not in scheduling region"); if (!Bundle->isReady()) - return; + continue; ReadyInsts.insert(Bundle); LLVM_DEBUG(dbgs() << "SLP: gets ready on update: " << *Bundle << "\n"); - }); + } } } } @@ -20030,8 +20035,9 @@ bool BoUpSLP::collectValuesToDemote( if (Operands.empty()) { if (!IsTruncRoot) MaxDepthLevel = 1; - (void)for_each(E.Scalars, std::bind(IsPotentiallyTruncated, _1, - std::ref(BitWidth))); + for (Value *V : E.Scalars) { + (void)IsPotentiallyTruncated(V, BitWidth); + } } else { // Several vectorized uses? Check if we can truncate it, otherwise - // exit. @@ -21032,17 +21038,17 @@ bool SLPVectorizerPass::vectorizeStores( unsigned Sz = 1 + Log2_32(MaxVF) - Log2_32(MinVF); SmallVector CandidateVFs(Sz + (NonPowerOf2VF > 0 ? 1 : 0)); unsigned Size = MinVF; - for_each(reverse(CandidateVFs), [&](unsigned &VF) { + for (unsigned &VF : reverse(CandidateVFs)) { VF = Size > MaxVF ? NonPowerOf2VF : Size; Size *= 2; - }); + } unsigned End = Operands.size(); unsigned Repeat = 0; constexpr unsigned MaxAttempts = 4; OwningArrayRef> RangeSizes(Operands.size()); - for_each(RangeSizes, [](std::pair &P) { + for (std::pair &P : RangeSizes) { P.first = P.second = 1; - }); + } DenseMap> NonSchedulable; auto IsNotVectorized = [](bool First, const std::pair &P) { @@ -21118,22 +21124,22 @@ bool SLPVectorizerPass::vectorizeStores( AnyProfitableGraph = RepeatChanged = Changed = true; // If we vectorized initial block, no need to try to vectorize // it again. - for_each(RangeSizes.slice(Cnt, Size), - [](std::pair &P) { - P.first = P.second = 0; - }); + for (std::pair &P : + RangeSizes.slice(Cnt, Size)) { + P.first = P.second = 0; + } if (Cnt < StartIdx + MinVF) { - for_each(RangeSizes.slice(StartIdx, Cnt - StartIdx), - [](std::pair &P) { - P.first = P.second = 0; - }); + for (std::pair &P : + RangeSizes.slice(StartIdx, Cnt - StartIdx)) { + P.first = P.second = 0; + } StartIdx = Cnt + Size; } if (Cnt > Sz - Size - MinVF) { - for_each(RangeSizes.slice(Cnt + Size, Sz - (Cnt + Size)), - [](std::pair &P) { - P.first = P.second = 0; - }); + for (std::pair &P : + RangeSizes.slice(Cnt + Size, Sz - (Cnt + Size))) { + P.first = P.second = 0; + } if (Sz == End) End = Cnt; Sz = Cnt; @@ -21159,13 +21165,13 @@ bool SLPVectorizerPass::vectorizeStores( continue; } if (TreeSize > 1) - for_each(RangeSizes.slice(Cnt, Size), - [&](std::pair &P) { - if (Size >= MaxRegVF) - P.second = std::max(P.second, TreeSize); - else - P.first = std::max(P.first, TreeSize); - }); + for (std::pair &P : + RangeSizes.slice(Cnt, Size)) { + if (Size >= MaxRegVF) + P.second = std::max(P.second, TreeSize); + else + P.first = std::max(P.first, TreeSize); + } ++Cnt; AnyProfitableGraph = true; } @@ -21207,10 +21213,10 @@ bool SLPVectorizerPass::vectorizeStores( CandidateVFs.push_back(Limit); if (VF > MaxTotalNum || VF >= StoresLimit) break; - for_each(RangeSizes, [&](std::pair &P) { + for (std::pair &P : RangeSizes) { if (P.first != 0) P.first = std::max(P.second, P.first); - }); + } // Last attempt to vectorize max number of elements, if all previous // attempts were unsuccessful because of the cost issues. CandidateVFs.push_back(VF); From 596e043b115d4fbdaeef050e952765875052e8de Mon Sep 17 00:00:00 2001 From: Alexander Kornienko Date: Thu, 17 Apr 2025 21:29:31 +0200 Subject: [PATCH 2/2] Remove extra braces. --- .../Transforms/Vectorize/SLPVectorizer.cpp | 21 +++++++------------ 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index f269bde36426a..13c1eb572ef92 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -15334,9 +15334,8 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry( // tree. Entries.push_back(FirstEntries.front()); // Update mapping between values and corresponding tree entries. - for (auto &P : UsedValuesEntry) { + for (auto &P : UsedValuesEntry) P.second = 0; - } VF = FirstEntries.front()->getVectorFactor(); } else { // Try to find nodes with the same vector factor. @@ -19309,9 +19308,8 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP, ReadyInsts.remove(BundleMember); if (ArrayRef Bundles = getScheduleBundles(V); !Bundles.empty()) { - for (ScheduleBundle *B : Bundles) { + for (ScheduleBundle *B : Bundles) ReadyInsts.remove(B); - } } if (!BundleMember->isScheduled()) @@ -20037,9 +20035,8 @@ bool BoUpSLP::collectValuesToDemote( if (Operands.empty()) { if (!IsTruncRoot) MaxDepthLevel = 1; - for (Value *V : E.Scalars) { + for (Value *V : E.Scalars) (void)IsPotentiallyTruncated(V, BitWidth); - } } else { // Several vectorized uses? Check if we can truncate it, otherwise - // exit. @@ -21048,9 +21045,8 @@ bool SLPVectorizerPass::vectorizeStores( unsigned Repeat = 0; constexpr unsigned MaxAttempts = 4; OwningArrayRef> RangeSizes(Operands.size()); - for (std::pair &P : RangeSizes) { + for (std::pair &P : RangeSizes) P.first = P.second = 1; - } DenseMap> NonSchedulable; auto IsNotVectorized = [](bool First, const std::pair &P) { @@ -21127,21 +21123,18 @@ bool SLPVectorizerPass::vectorizeStores( // If we vectorized initial block, no need to try to vectorize // it again. for (std::pair &P : - RangeSizes.slice(Cnt, Size)) { + RangeSizes.slice(Cnt, Size)) P.first = P.second = 0; - } if (Cnt < StartIdx + MinVF) { for (std::pair &P : - RangeSizes.slice(StartIdx, Cnt - StartIdx)) { + RangeSizes.slice(StartIdx, Cnt - StartIdx)) P.first = P.second = 0; - } StartIdx = Cnt + Size; } if (Cnt > Sz - Size - MinVF) { for (std::pair &P : - RangeSizes.slice(Cnt + Size, Sz - (Cnt + Size))) { + RangeSizes.slice(Cnt + Size, Sz - (Cnt + Size))) P.first = P.second = 0; - } if (Sz == End) End = Cnt; Sz = Cnt;