|
| 1 | +/*========================== begin_copyright_notice ============================ |
| 2 | + |
| 3 | +Copyright (C) 2024 Intel Corporation |
| 4 | + |
| 5 | +SPDX-License-Identifier: MIT |
| 6 | + |
| 7 | +============================= end_copyright_notice ===========================*/ |
| 8 | + |
| 9 | +/*========================== begin_copyright_notice ============================ |
| 10 | + |
| 11 | +Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 12 | +See https://llvm.org/LICENSE.txt for license information. |
| 13 | +SPDX-License-Identifier: Apache-2.0 with LLVM-exception |
| 14 | + |
| 15 | +============================= end_copyright_notice ===========================*/ |
| 16 | + |
| 17 | +From 424403326ad5ab2c4cc1f387b1ba17e27934c72d Mon Sep 17 00:00:00 2001 |
| 18 | +From: Florian Hahn < [email protected]> |
| 19 | +Date: Fri, 9 Dec 2022 22:14:03 +0000 |
| 20 | +Subject: [PATCH 3/5] [SCEV] Cache ZExt SCEV expressions. |
| 21 | + |
| 22 | +When creating SCEV expressions for ZExt, there's quite a bit of |
| 23 | +reasoning done and in many places the reasoning in turn will try to |
| 24 | +create new SCEVs for other ZExts. |
| 25 | + |
| 26 | +This can have a huge compile-time impact. The attached test from #58402 |
| 27 | +takes an excessive amount of compile time; without the patch, the test |
| 28 | +doesn't complete in 1500+ seconds, but with the patch it completes in 1 |
| 29 | +second. |
| 30 | + |
| 31 | +To speed up this case, cache created ZExt expressions for given (SCEV, Ty) pairs. |
| 32 | +Caching just ZExts is relatively straight-forward, but it might make |
| 33 | +sense to extend it to other expressions in the future. |
| 34 | + |
| 35 | +This has a slight positive impact on CTMark: |
| 36 | +* O3: -0.03% |
| 37 | +* ReleaseThinLTO: -0.03% |
| 38 | +* ReleaseLTO-g: 0.00% |
| 39 | + |
| 40 | +The patch also improves compile-time for some internal real-world workloads |
| 41 | +where time spent in SCEV goes from ~300 seconds to ~3 seconds. |
| 42 | + |
| 43 | +There are a few cases where computing & caching the result earlier may |
| 44 | +return more pessimistic results, but the compile-time savings seem to |
| 45 | +outweigh that. |
| 46 | + |
| 47 | +Fixes #58402. |
| 48 | + |
| 49 | +Reviewed By: mkazantsev |
| 50 | + |
| 51 | +Differential Revision: https://reviews.llvm.org/D137505 |
| 52 | +--- |
| 53 | + llvm/include/llvm/Analysis/ScalarEvolution.h | 68 +++++++++ |
| 54 | + llvm/lib/Analysis/ScalarEvolution.cpp | 61 ++++++++ |
| 55 | + .../pr58402-large-number-of-zext-exprs.ll | 131 ++++++++++++++++++ |
| 56 | + .../IndVarSimplify/AArch64/widen-loop-comp.ll | 24 ++-- |
| 57 | + 4 files changed, 272 insertions(+), 12 deletions(-) |
| 58 | + create mode 100644 llvm/test/Analysis/ScalarEvolution/pr58402-large-number-of-zext-exprs.ll |
| 59 | + |
| 60 | +diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h |
| 61 | +index b16aa7017719..f0c3ba40a8d6 100644 |
| 62 | +--- a/llvm/include/llvm/Analysis/ScalarEvolution.h |
| 63 | ++++ b/llvm/include/llvm/Analysis/ScalarEvolution.h |
| 64 | +@@ -567,6 +567,8 @@ public: |
| 65 | + const SCEV *getPtrToIntExpr(const SCEV *Op, Type *Ty); |
| 66 | + const SCEV *getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); |
| 67 | + const SCEV *getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); |
| 68 | ++ const SCEV *getZeroExtendExprImpl(const SCEV *Op, Type *Ty, |
| 69 | ++ unsigned Depth = 0); |
| 70 | + const SCEV *getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); |
| 71 | + const SCEV *getCastExpr(SCEVTypes Kind, const SCEV *Op, Type *Ty); |
| 72 | + const SCEV *getAnyExtendExpr(const SCEV *Op, Type *Ty); |
| 73 | +@@ -1209,6 +1211,45 @@ public: |
| 74 | + /// to be infinite, it must also be undefined. |
| 75 | + bool loopIsFiniteByAssumption(const Loop *L); |
| 76 | + |
| 77 | ++ class FoldID { |
| 78 | ++ SmallVector<unsigned, 4> Bits; |
| 79 | ++ |
| 80 | ++ public: |
| 81 | ++ void addInteger(unsigned long I) { Bits.push_back(I); } |
| 82 | ++ void addInteger(unsigned I) { Bits.push_back(I); } |
| 83 | ++ void addInteger(int I) { Bits.push_back(I); } |
| 84 | ++ |
| 85 | ++ void addInteger(unsigned long long I) { |
| 86 | ++ addInteger(unsigned(I)); |
| 87 | ++ addInteger(unsigned(I >> 32)); |
| 88 | ++ } |
| 89 | ++ |
| 90 | ++ void addPointer(const void *Ptr) { |
| 91 | ++ // Note: this adds pointers to the hash using sizes and endianness that |
| 92 | ++ // depend on the host. It doesn't matter, however, because hashing on |
| 93 | ++ // pointer values is inherently unstable. Nothing should depend on the |
| 94 | ++ // ordering of nodes in the folding set. |
| 95 | ++ static_assert(sizeof(uintptr_t) <= sizeof(unsigned long long), |
| 96 | ++ "unexpected pointer size"); |
| 97 | ++ addInteger(reinterpret_cast<uintptr_t>(Ptr)); |
| 98 | ++ } |
| 99 | ++ |
| 100 | ++ unsigned computeHash() const { |
| 101 | ++ unsigned Hash = Bits.size(); |
| 102 | ++ for (unsigned I = 0; I != Bits.size(); ++I) |
| 103 | ++ Hash = detail::combineHashValue(Hash, Bits[I]); |
| 104 | ++ return Hash; |
| 105 | ++ } |
| 106 | ++ bool operator==(const FoldID &RHS) const { |
| 107 | ++ if (Bits.size() != RHS.Bits.size()) |
| 108 | ++ return false; |
| 109 | ++ for (unsigned I = 0; I != Bits.size(); ++I) |
| 110 | ++ if (Bits[I] != RHS.Bits[I]) |
| 111 | ++ return false; |
| 112 | ++ return true; |
| 113 | ++ } |
| 114 | ++ }; |
| 115 | ++ |
| 116 | + private: |
| 117 | + /// A CallbackVH to arrange for ScalarEvolution to be notified whenever a |
| 118 | + /// Value is deleted. |
| 119 | +@@ -1289,6 +1330,11 @@ private: |
| 120 | + /// This is a cache of the values we have analyzed so far. |
| 121 | + ValueExprMapType ValueExprMap; |
| 122 | + |
| 123 | ++ /// This is a cache for expressions that got folded to a different existing |
| 124 | ++ /// SCEV. |
| 125 | ++ DenseMap<FoldID, const SCEV *> FoldCache; |
| 126 | ++ DenseMap<const SCEV *, SmallVector<FoldID, 2>> FoldCacheUser; |
| 127 | ++ |
| 128 | + /// Mark predicate values currently being processed by isImpliedCond. |
| 129 | + SmallPtrSet<const Value *, 6> PendingLoopPredicates; |
| 130 | + |
| 131 | +@@ -2263,6 +2309,28 @@ private: |
| 132 | + const SCEV *BackedgeCount = nullptr; |
| 133 | + }; |
| 134 | + |
| 135 | ++template <> struct DenseMapInfo<ScalarEvolution::FoldID> { |
| 136 | ++ static inline ScalarEvolution::FoldID getEmptyKey() { |
| 137 | ++ ScalarEvolution::FoldID ID; |
| 138 | ++ ID.addInteger(~0ULL); |
| 139 | ++ return ID; |
| 140 | ++ } |
| 141 | ++ static inline ScalarEvolution::FoldID getTombstoneKey() { |
| 142 | ++ ScalarEvolution::FoldID ID; |
| 143 | ++ ID.addInteger(~0ULL - 1ULL); |
| 144 | ++ return ID; |
| 145 | ++ } |
| 146 | ++ |
| 147 | ++ static unsigned getHashValue(const ScalarEvolution::FoldID &Val) { |
| 148 | ++ return Val.computeHash(); |
| 149 | ++ } |
| 150 | ++ |
| 151 | ++ static bool isEqual(const ScalarEvolution::FoldID &LHS, |
| 152 | ++ const ScalarEvolution::FoldID &RHS) { |
| 153 | ++ return LHS == RHS; |
| 154 | ++ } |
| 155 | ++}; |
| 156 | ++ |
| 157 | + } // end namespace llvm |
| 158 | + |
| 159 | + #endif // LLVM_ANALYSIS_SCALAREVOLUTION_H |
| 160 | +diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp |
| 161 | +index 495cd73d8eef..145e0cd2e3bc 100644 |
| 162 | +--- a/llvm/lib/Analysis/ScalarEvolution.cpp |
| 163 | ++++ b/llvm/lib/Analysis/ScalarEvolution.cpp |
| 164 | +@@ -1587,6 +1587,30 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { |
| 165 | + assert(!Op->getType()->isPointerTy() && "Can't extend pointer!"); |
| 166 | + Ty = getEffectiveSCEVType(Ty); |
| 167 | + |
| 168 | ++ FoldID ID; |
| 169 | ++ ID.addInteger(scZeroExtend); |
| 170 | ++ ID.addPointer(Op); |
| 171 | ++ ID.addPointer(Ty); |
| 172 | ++ auto Iter = FoldCache.find(ID); |
| 173 | ++ if (Iter != FoldCache.end()) |
| 174 | ++ return Iter->second; |
| 175 | ++ |
| 176 | ++ const SCEV *S = getZeroExtendExprImpl(Op, Ty, Depth); |
| 177 | ++ if (!isa<SCEVZeroExtendExpr>(S)) { |
| 178 | ++ FoldCache.insert({ID, S}); |
| 179 | ++ auto R = FoldCacheUser.insert({S, {}}); |
| 180 | ++ R.first->second.push_back(ID); |
| 181 | ++ } |
| 182 | ++ return S; |
| 183 | ++} |
| 184 | ++ |
| 185 | ++const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty, |
| 186 | ++ unsigned Depth) { |
| 187 | ++ assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && |
| 188 | ++ "This is not an extending conversion!"); |
| 189 | ++ assert(isSCEVable(Ty) && "This is not a conversion to a SCEVable type!"); |
| 190 | ++ assert(!Op->getType()->isPointerTy() && "Can't extend pointer!"); |
| 191 | ++ |
| 192 | + // Fold if the operand is constant. |
| 193 | + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) |
| 194 | + return getConstant( |
| 195 | +@@ -7840,6 +7864,8 @@ void ScalarEvolution::forgetAllLoops() { |
| 196 | + HasRecMap.clear(); |
| 197 | + MinTrailingZerosCache.clear(); |
| 198 | + PredicatedSCEVRewrites.clear(); |
| 199 | ++ FoldCache.clear(); |
| 200 | ++ FoldCacheUser.clear(); |
| 201 | + } |
| 202 | + |
| 203 | + void ScalarEvolution::forgetLoop(const Loop *L) { |
| 204 | +@@ -13236,6 +13262,12 @@ void ScalarEvolution::forgetMemoizedResultsImpl(const SCEV *S) { |
| 205 | + forgetBackedgeTakenCounts(Pair.getPointer(), Pair.getInt()); |
| 206 | + BECountUsers.erase(BEUsersIt); |
| 207 | + } |
| 208 | ++ |
| 209 | ++ auto FoldUser = FoldCacheUser.find(S); |
| 210 | ++ if (FoldUser != FoldCacheUser.end()) |
| 211 | ++ for (auto &KV : FoldUser->second) |
| 212 | ++ FoldCache.erase(KV); |
| 213 | ++ FoldCacheUser.erase(S); |
| 214 | + } |
| 215 | + |
| 216 | + void |
| 217 | +@@ -13483,6 +13515,35 @@ void ScalarEvolution::verify() const { |
| 218 | + } |
| 219 | + } |
| 220 | + } |
| 221 | ++ |
| 222 | ++ // Verify FoldCache/FoldCacheUser caches. |
| 223 | ++ for (auto [FoldID, Expr] : FoldCache) { |
| 224 | ++ auto I = FoldCacheUser.find(Expr); |
| 225 | ++ if (I == FoldCacheUser.end()) { |
| 226 | ++ dbgs() << "Missing entry in FoldCacheUser for cached expression " << *Expr |
| 227 | ++ << "!\n"; |
| 228 | ++ std::abort(); |
| 229 | ++ } |
| 230 | ++ if (!is_contained(I->second, FoldID)) { |
| 231 | ++ dbgs() << "Missing FoldID in cached users of " << *Expr << "!\n"; |
| 232 | ++ std::abort(); |
| 233 | ++ } |
| 234 | ++ } |
| 235 | ++ for (auto [Expr, IDs] : FoldCacheUser) { |
| 236 | ++ for (auto &FoldID : IDs) { |
| 237 | ++ auto I = FoldCache.find(FoldID); |
| 238 | ++ if (I == FoldCache.end()) { |
| 239 | ++ dbgs() << "Missing entry in FoldCache for expression " << *Expr |
| 240 | ++ << "!\n"; |
| 241 | ++ std::abort(); |
| 242 | ++ } |
| 243 | ++ if (I->second != Expr) { |
| 244 | ++ dbgs() << "Entry in FoldCache doesn't match FoldCacheUser: " |
| 245 | ++ << *I->second << " != " << *Expr << "!\n"; |
| 246 | ++ std::abort(); |
| 247 | ++ } |
| 248 | ++ } |
| 249 | ++ } |
| 250 | + } |
| 251 | + |
| 252 | + bool ScalarEvolution::invalidate( |
| 253 | +-- |
| 254 | +2.34.1 |
| 255 | + |
0 commit comments