|
17 | 17 | #include "llvm/ADT/SmallSet.h"
|
18 | 18 | #include "llvm/ADT/SmallVector.h"
|
19 | 19 | #include "llvm/ADT/Statistic.h"
|
| 20 | +#include "llvm/ADT/StringMap.h" |
20 | 21 | #include "llvm/ADT/StringRef.h"
|
21 |
| -#include "llvm/ADT/StringSet.h" |
22 | 22 | #include "llvm/Analysis/DependenceAnalysis.h"
|
23 | 23 | #include "llvm/Analysis/LoopCacheAnalysis.h"
|
24 | 24 | #include "llvm/Analysis/LoopInfo.h"
|
@@ -70,6 +70,13 @@ namespace {
|
70 | 70 |
|
71 | 71 | using LoopVector = SmallVector<Loop *, 8>;
|
72 | 72 |
|
| 73 | +/// A list of direction vectors. Each entry represents a direction vector |
| 74 | +/// corresponding to one or more dependencies existing in the loop nest. The |
| 75 | +/// length of all direction vectors is equal and is N + 1, where N is the depth |
| 76 | +/// of the loop nest. The first N elements correspond to the dependency |
| 77 | +/// direction of each N loops. The last one indicates whether this entry is |
| 78 | +/// forward dependency ('<') or not ('*'). The term "forward" aligns with what |
| 79 | +/// is defined in LoopAccessAnalysis. |
73 | 80 | // TODO: Check if we can use a sparse matrix here.
|
74 | 81 | using CharMatrix = std::vector<std::vector<char>>;
|
75 | 82 |
|
@@ -126,11 +133,33 @@ static bool noDuplicateRulesAndIgnore(ArrayRef<RuleTy> Rules) {
|
126 | 133 |
|
127 | 134 | static void printDepMatrix(CharMatrix &DepMatrix) {
|
128 | 135 | for (auto &Row : DepMatrix) {
|
129 |
| - for (auto D : Row) |
| 136 | + // Drop the last element because it is a flag indicating whether this is |
| 137 | + // forward dependency or not, which doesn't affect the legality check. |
| 138 | + for (char D : drop_end(Row)) |
130 | 139 | LLVM_DEBUG(dbgs() << D << " ");
|
131 | 140 | LLVM_DEBUG(dbgs() << "\n");
|
132 | 141 | }
|
133 | 142 | }
|
| 143 | + |
| 144 | +/// Return true if \p Src appears before \p Dst in the same basic block. |
| 145 | +/// Precondition: \p Src and \Dst are distinct instructions within the same |
| 146 | +/// basic block. |
| 147 | +static bool inThisOrder(const Instruction *Src, const Instruction *Dst) { |
| 148 | + assert(Src->getParent() == Dst->getParent() && Src != Dst && |
| 149 | + "Expected Src and Dst to be different instructions in the same BB"); |
| 150 | + |
| 151 | + bool FoundSrc = false; |
| 152 | + for (const Instruction &I : *(Src->getParent())) { |
| 153 | + if (&I == Src) { |
| 154 | + FoundSrc = true; |
| 155 | + continue; |
| 156 | + } |
| 157 | + if (&I == Dst) |
| 158 | + return FoundSrc; |
| 159 | + } |
| 160 | + |
| 161 | + llvm_unreachable("Dst not found"); |
| 162 | +} |
134 | 163 | #endif
|
135 | 164 |
|
136 | 165 | static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
|
@@ -174,7 +203,10 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
|
174 | 203 | return false;
|
175 | 204 | }
|
176 | 205 | ValueVector::iterator I, IE, J, JE;
|
177 |
| - StringSet<> Seen; |
| 206 | + |
| 207 | + // Manage direction vectors that are already seen. Map each direction vector |
| 208 | + // to an index of DepMatrix at which it is stored. |
| 209 | + StringMap<unsigned> Seen; |
178 | 210 |
|
179 | 211 | for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
|
180 | 212 | for (J = I, JE = MemInstr.end(); J != JE; ++J) {
|
@@ -228,9 +260,49 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
|
228 | 260 | Dep.push_back('I');
|
229 | 261 | }
|
230 | 262 |
|
| 263 | + // Test whether the dependency is forward or not. |
| 264 | + bool IsKnownForward = true; |
| 265 | + if (Src->getParent() != Dst->getParent()) { |
| 266 | + // In general, when Src and Dst are in different BBs, the execution |
| 267 | + // order of them within a single iteration is not guaranteed. Treat |
| 268 | + // conservatively as not-forward dependency in this case. |
| 269 | + IsKnownForward = false; |
| 270 | + } else { |
| 271 | + // Src and Dst are in the same BB. If they are the different |
| 272 | + // instructions, Src should appear before Dst in the BB as they are |
| 273 | + // stored to MemInstr in that order. |
| 274 | + assert((Src == Dst || inThisOrder(Src, Dst)) && |
| 275 | + "Unexpected instructions"); |
| 276 | + |
| 277 | + // If the Dependence object is reversed (due to normalization), it |
| 278 | + // represents the dependency from Dst to Src, meaning it is a backward |
| 279 | + // dependency. Otherwise it should be a forward dependency. |
| 280 | + bool IsReversed = D->getSrc() != Src; |
| 281 | + if (IsReversed) |
| 282 | + IsKnownForward = false; |
| 283 | + } |
| 284 | + |
| 285 | + // Initialize the last element. Assume forward dependencies only; it |
| 286 | + // will be updated later if there is any non-forward dependency. |
| 287 | + Dep.push_back('<'); |
| 288 | + |
| 289 | + // The last element should express the "summary" among one or more |
| 290 | + // direction vectors whose first N elements are the same (where N is |
| 291 | + // the depth of the loop nest). Hence we exclude the last element from |
| 292 | + // the Seen map. |
| 293 | + auto [Ite, Inserted] = Seen.try_emplace( |
| 294 | + StringRef(Dep.data(), Dep.size() - 1), DepMatrix.size()); |
| 295 | + |
231 | 296 | // Make sure we only add unique entries to the dependency matrix.
|
232 |
| - if (Seen.insert(StringRef(Dep.data(), Dep.size())).second) |
| 297 | + if (Inserted) |
233 | 298 | DepMatrix.push_back(Dep);
|
| 299 | + |
| 300 | + // If we cannot prove that this dependency is forward, change the last |
| 301 | + // element of the corresponding entry. Since a `[... *]` dependency |
| 302 | + // includes a `[... <]` dependency, we do not need to keep both and |
| 303 | + // change the existing entry instead. |
| 304 | + if (!IsKnownForward) |
| 305 | + DepMatrix[Ite->second].back() = '*'; |
234 | 306 | }
|
235 | 307 | }
|
236 | 308 | }
|
@@ -281,11 +353,12 @@ static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
|
281 | 353 | continue;
|
282 | 354 |
|
283 | 355 | // Check if the direction vector is lexicographically positive (or zero)
|
284 |
| - // for both before/after exchanged. |
285 |
| - if (isLexicographicallyPositive(Cur, OuterLoopId, Cur.size()) == false) |
| 356 | + // for both before/after exchanged. Ignore the last element because it |
| 357 | + // doesn't affect the legality. |
| 358 | + if (isLexicographicallyPositive(Cur, OuterLoopId, Cur.size() - 1) == false) |
286 | 359 | return false;
|
287 | 360 | std::swap(Cur[InnerLoopId], Cur[OuterLoopId]);
|
288 |
| - if (isLexicographicallyPositive(Cur, OuterLoopId, Cur.size()) == false) |
| 361 | + if (isLexicographicallyPositive(Cur, OuterLoopId, Cur.size() - 1) == false) |
289 | 362 | return false;
|
290 | 363 | }
|
291 | 364 | return true;
|
@@ -1334,22 +1407,35 @@ LoopInterchangeProfitability::isProfitablePerInstrOrderCost() {
|
1334 | 1407 | static bool canVectorize(const CharMatrix &DepMatrix, unsigned LoopId) {
|
1335 | 1408 | for (const auto &Dep : DepMatrix) {
|
1336 | 1409 | char Dir = Dep[LoopId];
|
1337 |
| - if (Dir != 'I' && Dir != '=') |
1338 |
| - return false; |
| 1410 | + char DepType = Dep.back(); |
| 1411 | + assert((DepType == '<' || DepType == '*') && |
| 1412 | + "Unexpected element in dependency vector"); |
| 1413 | + |
| 1414 | + // There are no loop-carried dependencies. |
| 1415 | + if (Dir == '=' || Dir == 'I') |
| 1416 | + continue; |
| 1417 | + |
| 1418 | + // DepType being '<' means that this direction vector represents a forward |
| 1419 | + // dependency. In principle, a loop with '<' direction can be vectorized in |
| 1420 | + // this case. |
| 1421 | + if (Dir == '<' && DepType == '<') |
| 1422 | + continue; |
| 1423 | + |
| 1424 | + // We cannot prove that the loop is vectorizable. |
| 1425 | + return false; |
1339 | 1426 | }
|
1340 | 1427 | return true;
|
1341 | 1428 | }
|
1342 | 1429 |
|
1343 | 1430 | std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
|
1344 | 1431 | unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix) {
|
1345 |
| - // If the outer loop is not loop independent it is not profitable to move |
1346 |
| - // this to inner position, since doing so would not enable inner loop |
1347 |
| - // parallelism. |
| 1432 | + // If the outer loop cannot be vectorized, it is not profitable to move this |
| 1433 | + // to inner position. |
1348 | 1434 | if (!canVectorize(DepMatrix, OuterLoopId))
|
1349 | 1435 | return false;
|
1350 | 1436 |
|
1351 |
| - // If inner loop has dependence and outer loop is loop independent then it is |
1352 |
| - // profitable to interchange to enable inner loop parallelism. |
| 1437 | + // If the inner loop cannot be vectorized but the outer loop can be, then it |
| 1438 | + // is profitable to interchange to enable inner loop parallelism. |
1353 | 1439 | if (!canVectorize(DepMatrix, InnerLoopId))
|
1354 | 1440 | return true;
|
1355 | 1441 |
|
|
0 commit comments