-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[LoopInterchange] Don't consider loops with BTC=0 #167113
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
e72522a
f51d9f6
e3abad5
5d9326a
56c3e9b
7d68d9c
9d548a5
738e6b6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -46,6 +46,7 @@ | |
| #include "llvm/Transforms/Utils/Local.h" | ||
| #include "llvm/Transforms/Utils/LoopUtils.h" | ||
| #include <cassert> | ||
| #include <map> | ||
| #include <utility> | ||
| #include <vector> | ||
|
|
||
|
|
@@ -421,9 +422,11 @@ static bool hasSupportedLoopDepth(ArrayRef<Loop *> LoopList, | |
| } | ||
|
|
||
| static bool isComputableLoopNest(ScalarEvolution *SE, | ||
| ArrayRef<Loop *> LoopList) { | ||
| ArrayRef<Loop *> LoopList, | ||
| std::map<const Loop *, const SCEV *> &LoopBTC) { | ||
| for (Loop *L : LoopList) { | ||
| const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L); | ||
| LoopBTC[L] = ExitCountOuter; | ||
| if (isa<SCEVCouldNotCompute>(ExitCountOuter)) { | ||
| LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n"); | ||
| return false; | ||
|
|
@@ -545,7 +548,8 @@ class LoopInterchangeProfitability { | |
| /// Check if the loop interchange is profitable. | ||
| bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop, | ||
| unsigned InnerLoopId, unsigned OuterLoopId, | ||
| CharMatrix &DepMatrix, CacheCostManager &CCM); | ||
| CharMatrix &DepMatrix, CacheCostManager &CCM, | ||
| std::map<const Loop *, const SCEV *> &LoopBTC); | ||
|
|
||
| private: | ||
| int getInstrOrderCost(); | ||
|
|
@@ -602,14 +606,17 @@ struct LoopInterchange { | |
| DependenceInfo *DI = nullptr; | ||
| DominatorTree *DT = nullptr; | ||
| LoopStandardAnalysisResults *AR = nullptr; | ||
|
|
||
| /// Interface to emit optimization remarks. | ||
| OptimizationRemarkEmitter *ORE; | ||
| // A cache to avoid recalculating the backedge-taken count for a loop. | ||
| std::map<const Loop *, const SCEV *> LoopBTC; | ||
Meinersbur marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI, | ||
| DominatorTree *DT, LoopStandardAnalysisResults *AR, | ||
| OptimizationRemarkEmitter *ORE) | ||
| : SE(SE), LI(LI), DI(DI), DT(DT), AR(AR), ORE(ORE) {} | ||
| OptimizationRemarkEmitter *ORE, | ||
| std::map<const Loop *, const SCEV *> &&LoopBTC) | ||
| : SE(SE), LI(LI), DI(DI), DT(DT), AR(AR), ORE(ORE), | ||
| LoopBTC(std::move(LoopBTC)) {} | ||
|
|
||
| bool run(Loop *L) { | ||
| if (L->getParentLoop()) | ||
|
|
@@ -701,7 +708,7 @@ struct LoopInterchange { | |
| LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n"); | ||
| LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE); | ||
| if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId, | ||
| DependencyMatrix, CCM)) { | ||
| DependencyMatrix, CCM, LoopBTC)) { | ||
| LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n"); | ||
| return false; | ||
| } | ||
|
|
@@ -1461,7 +1468,30 @@ std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization( | |
|
|
||
| bool LoopInterchangeProfitability::isProfitable( | ||
| const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId, | ||
| unsigned OuterLoopId, CharMatrix &DepMatrix, CacheCostManager &CCM) { | ||
| unsigned OuterLoopId, CharMatrix &DepMatrix, CacheCostManager &CCM, | ||
| std::map<const Loop *, const SCEV *> &LoopBTC) { | ||
|
|
||
| auto *InnerBTC = LoopBTC[InnerLoop]; | ||
| auto *OuterBTC = LoopBTC[OuterLoop]; | ||
| assert(InnerBTC && OuterBTC && | ||
| "Loop BTC should exist in cache but not found"); | ||
| // A loop with a backedge that isn't taken, e.g. an unconditional branch | ||
| // true, isn't really a loop and we don't want to consider it as a | ||
|
||
| // candidate. | ||
| // TODO: when interchange is forced, we should probably also allow | ||
| // interchange for these loops, and thus this logic should be moved just | ||
| // below the cost-model ignore check below. But this check is done first | ||
| // to avoid the issue in #163954. | ||
| if (InnerBTC && InnerBTC->isZero()) { | ||
| LLVM_DEBUG(dbgs() << "Inner loop back-edge isn't taken, rejecting " | ||
| "single iteration loop\n"); | ||
| return false; | ||
| } | ||
| if (OuterBTC && OuterBTC->isZero()) { | ||
| LLVM_DEBUG(dbgs() << "Outer loop back-edge isn't taken, rejecting " | ||
| "single iteration loop\n"); | ||
| return false; | ||
| } | ||
|
|
||
| // Return true if interchange is forced and the cost-model ignored. | ||
| if (Profitabilities.size() == 1 && Profitabilities[0] == RuleTy::Ignore) | ||
|
|
@@ -2098,6 +2128,7 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN, | |
| LPMUpdater &U) { | ||
| Function &F = *LN.getParent(); | ||
| SmallVector<Loop *, 8> LoopList(LN.getLoops()); | ||
| std::map<const Loop *, const SCEV *> LoopBTC; | ||
|
||
|
|
||
| if (MaxMemInstrCount < 1) { | ||
| LLVM_DEBUG(dbgs() << "MaxMemInstrCount should be at least 1"); | ||
|
|
@@ -2109,7 +2140,7 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN, | |
| if (!hasSupportedLoopDepth(LoopList, ORE)) | ||
| return PreservedAnalyses::all(); | ||
| // Ensure computable loop nest. | ||
| if (!isComputableLoopNest(&AR.SE, LoopList)) { | ||
| if (!isComputableLoopNest(&AR.SE, LoopList, LoopBTC)) { | ||
| LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n"); | ||
| return PreservedAnalyses::all(); | ||
| } | ||
|
|
@@ -2122,7 +2153,9 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN, | |
| }); | ||
|
|
||
| DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI); | ||
| if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &AR, &ORE).run(LN)) | ||
| if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &AR, &ORE, | ||
| std::move(LoopBTC)) | ||
| .run(LN)) | ||
| return PreservedAnalyses::all(); | ||
| U.markLoopNestChanged(true); | ||
| return getLoopPassPreservedAnalyses(); | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,73 @@ | ||||||
| ; RUN: opt < %s -passes=loop-interchange -verify-dom-info -verify-loop-info \ | ||||||
| ; RUN: -pass-remarks-output=%t -pass-remarks='loop-interchange' -S | ||||||
| ; RUN: cat %t | FileCheck %s | ||||||
|
|
||||||
| target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | ||||||
|
|
||||||
| @D = common global [100 x [100 x [100 x i32]]] zeroinitializer | ||||||
|
|
||||||
| ; Test for interchange in | ||||||
|
||||||
| ; | ||||||
| ; for(int i=0;i<1;i++) | ||||||
| ; for(int j=0;j<100;j++) | ||||||
| ; for(int k=0;k<100;k++) | ||||||
| ; D[i][k][j] = D[i][k][j]+t; | ||||||
| ; | ||||||
|
|
||||||
| ; CHECK: --- !Analysis | ||||||
| ; CHECK-NEXT: Pass: loop-interchange | ||||||
| ; CHECK-NEXT: Name: Dependence | ||||||
| ; CHECK-NEXT: Function: interchange_i_and_j | ||||||
| ; CHECK-NEXT: Args: | ||||||
| ; CHECK-NEXT: - String: Computed dependence info, invoking the transform. | ||||||
| ; CHECK-NEXT: ... | ||||||
| ; CHECK-NEXT: --- !Passed | ||||||
| ; CHECK-NEXT: Pass: loop-interchange | ||||||
| ; CHECK-NEXT: Name: Interchanged | ||||||
| ; CHECK-NEXT: Function: interchange_i_and_j | ||||||
| ; CHECK-NEXT: Args: | ||||||
| ; CHECK-NEXT: - String: Loop interchanged with enclosing loop. | ||||||
| ; CHECK-NEXT: ... | ||||||
| ; CHECK-NEXT: --- !Missed | ||||||
| ; CHECK-NEXT: Pass: loop-interchange | ||||||
| ; CHECK-NEXT: Name: InterchangeNotProfitable | ||||||
| ; CHECK-NEXT: Function: interchange_i_and_j | ||||||
| ; CHECK-NEXT: Args: | ||||||
| ; CHECK-NEXT: - String: Insufficient information to calculate the cost of loop for interchange. | ||||||
| ; CHECK-NEXT: ... | ||||||
|
|
||||||
| define void @interchange_i_and_j(i32 %t){ | ||||||
| entry: | ||||||
| br label %outer.header | ||||||
|
|
||||||
| outer.header: | ||||||
| %i = phi i64 [ 0, %entry ], [ %inc16, %for.inc15 ] | ||||||
| br label %inner1.header | ||||||
|
|
||||||
| inner1.header: | ||||||
| %j = phi i64 [ 0, %outer.header ], [ %inc13, %for.inc12 ] | ||||||
| br label %inner2.body | ||||||
|
|
||||||
| inner2.body: | ||||||
| %k = phi i64 [ 0, %inner1.header ], [ %inc, %inner2.body ] | ||||||
| %arrayidx8 = getelementptr inbounds [100 x [100 x [100 x i32]]], ptr @D, i64 0, i64 %i, i64 %k, i64 %j | ||||||
|
||||||
| %arrayidx8 = getelementptr inbounds [100 x [100 x [100 x i32]]], ptr @D, i64 0, i64 %i, i64 %k, i64 %j | |
| %arrayidx8 = getelementptr inbounds [100 x [100 x i32]], ptr @D, i64 %i, i64 %k, i64 %j |
I remember hearing somewhere that this is the canonical form.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe unnecessary?