-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[LP] Assign weights when peeling last iteration. #166858
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[LP] Assign weights when peeling last iteration. #166858
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
|
@llvm/pr-subscribers-llvm-transforms Author: Mircea Trofin (mtrofin) ChangesAssigning weights when we peel the last iteration. The probability of going into the loop or exiting should stay the same. Looking at the BFI of before/after the peel, we notice that, indeed, the total count of the blocks corresponding to e.g. the header stays the same. Full diff: https://github.com/llvm/llvm-project/pull/166858.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index e1dcaa85a5780..3c3ce7b73f305 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -54,6 +54,7 @@ using namespace llvm::SCEVPatternMatch;
STATISTIC(NumPeeled, "Number of loops peeled");
STATISTIC(NumPeeledEnd, "Number of loops peeled from end");
+namespace llvm {
static cl::opt<unsigned> UnrollPeelCount(
"unroll-peel-count", cl::Hidden,
cl::desc("Set the unroll peeling count, for testing purposes"));
@@ -87,6 +88,9 @@ static cl::opt<bool> EnablePeelingForIV(
static const char *PeeledCountMetaData = "llvm.loop.peeled.count";
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+} // namespace llvm
+
// Check whether we are capable of peeling this loop.
bool llvm::canPeel(const Loop *L) {
// Make sure the loop is in simplified form
@@ -1190,7 +1194,19 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
IRBuilder<> B(PreHeaderBR);
Value *Cond =
B.CreateICmpNE(BTCValue, ConstantInt::get(BTCValue->getType(), 0));
- B.CreateCondBr(Cond, NewPreHeader, InsertTop);
+ auto *BI = B.CreateCondBr(Cond, NewPreHeader, InsertTop);
+ SmallVector<uint32_t> Weights;
+ auto *OrigLatchBr = Latch->getTerminator();
+ auto HasBranchWeights = !ProfcheckDisableMetadataFixes &&
+ extractBranchWeights(*OrigLatchBr, Weights);
+ if (HasBranchWeights) {
+ // The probability of going into the loop or exiting should stay the
+ // same, but we may need to flip the weights. For BI, InsertTop
+ // (position 1) is towards the exit.
+ if (L->getExitBlock() == OrigLatchBr->getSuccessor(0))
+ std::swap(Weights[0], Weights[1]);
+ setBranchWeights(*BI, Weights, /*IsExpected=*/false);
+ }
PreHeaderBR->eraseFromParent();
// PreHeader now dominates InsertTop.
diff --git a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-bfi.ll b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-bfi.ll
new file mode 100644
index 0000000000000..38fece4d56e34
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-bfi.ll
@@ -0,0 +1,64 @@
+; RUN: opt -p "print<block-freq>,loop-unroll,print<block-freq>" -scev-cheap-expansion-budget=3 -S %s -profcheck-disable-metadata-fixes 2>&1 | FileCheck %s --check-prefixes=COMMON,BAD
+; RUN: opt -p "print<block-freq>,loop-unroll,print<block-freq>" -scev-cheap-expansion-budget=3 -S %s 2>&1 | FileCheck %s --check-prefixes=COMMON,GOOD
+
+define i32 @test_expansion_cost_2(i32 %start, i32 %end) !prof !0 {
+entry:
+ %sub = add i32 %end, -1
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop.latch ]
+ %c = icmp eq i32 %iv, %sub
+ br i1 %c, label %then, label %loop.latch, !prof !1
+
+then:
+ br label %loop.latch
+
+loop.latch:
+ %iv.next = add nsw i32 %iv, 1
+ %ec = icmp eq i32 %iv.next, %end
+ br i1 %ec, label %exit, label %loop.header, !prof !2
+
+exit:
+ ret i32 0
+}
+
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"branch_weights", i32 2, i32 3}
+!2 = !{!"branch_weights", i32 1, i32 50}
+
+; COMMON: block-frequency-info: test_expansion_cost_2
+; COMMON-NEXT: entry: float = 1.0
+; COMMON-NEXT: loop.header: float = 51.0
+; COMMON-NEXT: then: float = 20.4
+; COMMON-NEXT: loop.latch: float = 51.0
+; COMMON-NEXT: exit: float = 1.0
+
+; COMMON: block-frequency-info: test_expansion_cost_2
+; GOOD-NEXT: entry: float = 1.0
+; GOOD-NEXT: entry.split: float = 0.98039
+; GOOD-NEXT: loop.header: float = 50.0
+; GOOD-NEXT: then: float = 20.0
+; GOOD-NEXT: loop.latch: float = 50.0
+; GOOD-NEXT: exit.peel.begin.loopexit: float = 0.98039
+; GOOD-NEXT: exit.peel.begin: float = 1.0
+; GOOD-NEXT: loop.header.peel: float = 1.0
+; GOOD-NEXT: then.peel: float = 0.4
+; GOOD-NEXT: loop.latch.peel: float = 1.0
+; GOOD-NEXT: exit.peel.next: float = 1.0
+; GOOD-NEXT: loop.header.peel.next: float = 1.0
+; GOOD-NEXT: exit: float = 1.0
+
+; BAD-NEXT: entry: float = 1.0
+; BAD-NEXT: entry.split: float = 0.625
+; BAD-NEXT: loop.header: float = 31.875
+; BAD-NEXT: then: float = 12.75
+; BAD-NEXT: loop.latch: float = 31.875
+; BAD-NEXT: exit.peel.begin.loopexit: float = 0.625
+; BAD-NEXT: exit.peel.begin: float = 1.0
+; BAD-NEXT: loop.header.peel: float = 1.0
+; BAD-NEXT: then.peel: float = 0.4
+; BAD-NEXT: loop.latch.peel: float = 1.0
+; BAD-NEXT: exit.peel.next: float = 1.0
+; BAD-NEXT: loop.header.peel.next: float = 1.0
+; BAD-NEXT: exit: float = 1.0
\ No newline at end of file
diff --git a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll
index f3910f9bfc399..9b1e08c8ca526 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll
+++ b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll
@@ -1,46 +1,46 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
; RUN: opt -p loop-unroll -scev-cheap-expansion-budget=2 -S %s | FileCheck --check-prefix=BUDGET2 %s
; RUN: opt -p loop-unroll -scev-cheap-expansion-budget=3 -S %s | FileCheck --check-prefix=BUDGET3 %s
-define i32 @test_expansion_cost_2(i32 %start, i32 %end) {
+define i32 @test_expansion_cost_2(i32 %start, i32 %end) !prof !0 {
; BUDGET2-LABEL: define i32 @test_expansion_cost_2(
-; BUDGET2-SAME: i32 [[START:%.*]], i32 [[END:%.*]]) {
+; BUDGET2-SAME: i32 [[START:%.*]], i32 [[END:%.*]]) !prof [[PROF0:![0-9]+]] {
; BUDGET2-NEXT: [[ENTRY:.*]]:
; BUDGET2-NEXT: [[SUB:%.*]] = add i32 [[END]], -1
; BUDGET2-NEXT: br label %[[LOOP_HEADER:.*]]
; BUDGET2: [[LOOP_HEADER]]:
; BUDGET2-NEXT: [[IV:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; BUDGET2-NEXT: [[C:%.*]] = icmp eq i32 [[IV]], [[SUB]]
-; BUDGET2-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
+; BUDGET2-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]], !prof [[PROF1:![0-9]+]]
; BUDGET2: [[THEN]]:
; BUDGET2-NEXT: br label %[[LOOP_LATCH]]
; BUDGET2: [[LOOP_LATCH]]:
; BUDGET2-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1
; BUDGET2-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[END]]
-; BUDGET2-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
+; BUDGET2-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]], !prof [[PROF2:![0-9]+]]
; BUDGET2: [[EXIT]]:
; BUDGET2-NEXT: ret i32 0
;
; BUDGET3-LABEL: define i32 @test_expansion_cost_2(
-; BUDGET3-SAME: i32 [[START:%.*]], i32 [[END:%.*]]) {
+; BUDGET3-SAME: i32 [[START:%.*]], i32 [[END:%.*]]) !prof [[PROF0:![0-9]+]] {
; BUDGET3-NEXT: [[ENTRY:.*]]:
; BUDGET3-NEXT: [[SUB:%.*]] = add i32 [[END]], -1
; BUDGET3-NEXT: [[TMP0:%.*]] = sub i32 [[SUB]], [[START]]
; BUDGET3-NEXT: [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
-; BUDGET3-NEXT: br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[EXIT_PEEL_BEGIN:.*]]
+; BUDGET3-NEXT: br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[EXIT_PEEL_BEGIN:.*]], !prof [[PROF1:![0-9]+]]
; BUDGET3: [[ENTRY_SPLIT]]:
; BUDGET3-NEXT: br label %[[LOOP_HEADER:.*]]
; BUDGET3: [[LOOP_HEADER]]:
; BUDGET3-NEXT: [[IV:%.*]] = phi i32 [ [[START]], %[[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; BUDGET3-NEXT: [[C:%.*]] = icmp eq i32 [[IV]], [[SUB]]
-; BUDGET3-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
+; BUDGET3-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]], !prof [[PROF2:![0-9]+]]
; BUDGET3: [[THEN]]:
; BUDGET3-NEXT: br label %[[LOOP_LATCH]]
; BUDGET3: [[LOOP_LATCH]]:
; BUDGET3-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1
; BUDGET3-NEXT: [[TMP2:%.*]] = sub i32 [[END]], 1
; BUDGET3-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[TMP2]]
-; BUDGET3-NEXT: br i1 [[EC]], label %[[EXIT_PEEL_BEGIN_LOOPEXIT:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP0:![0-9]+]]
+; BUDGET3-NEXT: br i1 [[EC]], label %[[EXIT_PEEL_BEGIN_LOOPEXIT:.*]], label %[[LOOP_HEADER]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]]
; BUDGET3: [[EXIT_PEEL_BEGIN_LOOPEXIT]]:
; BUDGET3-NEXT: [[DOTPH:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP_LATCH]] ]
; BUDGET3-NEXT: br label %[[EXIT_PEEL_BEGIN]]
@@ -49,13 +49,13 @@ define i32 @test_expansion_cost_2(i32 %start, i32 %end) {
; BUDGET3-NEXT: br label %[[LOOP_HEADER_PEEL:.*]]
; BUDGET3: [[LOOP_HEADER_PEEL]]:
; BUDGET3-NEXT: [[C_PEEL:%.*]] = icmp eq i32 [[TMP3]], [[SUB]]
-; BUDGET3-NEXT: br i1 [[C_PEEL]], label %[[THEN_PEEL:.*]], label %[[LOOP_LATCH_PEEL:.*]]
+; BUDGET3-NEXT: br i1 [[C_PEEL]], label %[[THEN_PEEL:.*]], label %[[LOOP_LATCH_PEEL:.*]], !prof [[PROF2]]
; BUDGET3: [[THEN_PEEL]]:
; BUDGET3-NEXT: br label %[[LOOP_LATCH_PEEL]]
; BUDGET3: [[LOOP_LATCH_PEEL]]:
; BUDGET3-NEXT: [[IV_NEXT_PEEL:%.*]] = add nsw i32 [[TMP3]], 1
; BUDGET3-NEXT: [[EC_PEEL:%.*]] = icmp eq i32 [[IV_NEXT_PEEL]], [[END]]
-; BUDGET3-NEXT: br i1 [[EC_PEEL]], label %[[EXIT_PEEL_NEXT:.*]], label %[[EXIT_PEEL_NEXT]]
+; BUDGET3-NEXT: br i1 [[EC_PEEL]], label %[[EXIT_PEEL_NEXT:.*]], label %[[EXIT_PEEL_NEXT]], !prof [[PROF3]]
; BUDGET3: [[EXIT_PEEL_NEXT]]:
; BUDGET3-NEXT: br label %[[LOOP_HEADER_PEEL_NEXT:.*]]
; BUDGET3: [[LOOP_HEADER_PEEL_NEXT]]:
@@ -70,7 +70,7 @@ entry:
loop.header:
%iv = phi i32 [ %start, %entry ], [ %iv.next, %loop.latch ]
%c = icmp eq i32 %iv, %sub
- br i1 %c, label %then, label %loop.latch
+ br i1 %c, label %then, label %loop.latch, !prof !1
then:
br label %loop.latch
@@ -78,12 +78,25 @@ then:
loop.latch:
%iv.next = add nsw i32 %iv, 1
%ec = icmp eq i32 %iv.next, %end
- br i1 %ec, label %exit, label %loop.header
+ br i1 %ec, label %exit, label %loop.header, !prof !2
exit:
ret i32 0
}
+
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"branch_weights", i32 2, i32 3}
+!2 = !{!"branch_weights", i32 1, i32 10}
+;.
+; BUDGET2: [[PROF0]] = !{!"function_entry_count", i32 10}
+; BUDGET2: [[PROF1]] = !{!"branch_weights", i32 2, i32 3}
+; BUDGET2: [[PROF2]] = !{!"branch_weights", i32 1, i32 10}
;.
-; BUDGET3: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
-; BUDGET3: [[META1]] = !{!"llvm.loop.peeled.count", i32 1}
+; BUDGET3: [[PROF0]] = !{!"function_entry_count", i32 10}
+; BUDGET3: [[PROF1]] = !{!"branch_weights", i32 10, i32 1}
+; BUDGET3: [[PROF2]] = !{!"branch_weights", i32 2, i32 3}
+; BUDGET3: [[PROF3]] = !{!"branch_weights", i32 1, i32 10}
+; BUDGET3: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]]}
+; BUDGET3: [[META5]] = !{!"llvm.loop.peeled.count", i32 1}
+; BUDGET3: [[META6]] = !{!"llvm.loop.estimated_trip_count", i32 10}
;.
|
0823bb2 to
1da3c16
Compare
1da3c16 to
35a769f
Compare
35a769f to
aeacb0b
Compare
aeacb0b to
1184128
Compare
| extractBranchWeights(*OrigLatchBr, Weights); | ||
| if (HasBranchWeights) { | ||
| // The probability of going into the loop or exiting should stay the | ||
| // same, but we may need to flip the weights. For BI, InsertTop |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Approximately the same?
The exit probability should change from 1 / LoopTripCount to 1 / (LoopTripCount - 1) after peeling, right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.

Assigning weights when we peel the last iteration. The probability of going into the loop or exiting should stay the same. Looking at the BFI of before/after the peel, we notice that, indeed, the total count of the blocks corresponding to e.g. the header stays the same.
Issue #147390