diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp index e1dcaa85a5780..d2dbe293e028a 100644 --- a/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -54,6 +54,7 @@ using namespace llvm::SCEVPatternMatch; STATISTIC(NumPeeled, "Number of loops peeled"); STATISTIC(NumPeeledEnd, "Number of loops peeled from end"); +namespace llvm { static cl::opt UnrollPeelCount( "unroll-peel-count", cl::Hidden, cl::desc("Set the unroll peeling count, for testing purposes")); @@ -87,6 +88,9 @@ static cl::opt EnablePeelingForIV( static const char *PeeledCountMetaData = "llvm.loop.peeled.count"; +extern cl::opt ProfcheckDisableMetadataFixes; +} // namespace llvm + // Check whether we are capable of peeling this loop. bool llvm::canPeel(const Loop *L) { // Make sure the loop is in simplified form @@ -1190,7 +1194,24 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, IRBuilder<> B(PreHeaderBR); Value *Cond = B.CreateICmpNE(BTCValue, ConstantInt::get(BTCValue->getType(), 0)); - B.CreateCondBr(Cond, NewPreHeader, InsertTop); + auto *BI = B.CreateCondBr(Cond, NewPreHeader, InsertTop); + SmallVector Weights; + auto *OrigLatchBr = Latch->getTerminator(); + auto HasBranchWeights = !ProfcheckDisableMetadataFixes && + extractBranchWeights(*OrigLatchBr, Weights); + if (HasBranchWeights) { + // The probability that the new guard skips the loop to execute just one + // iteration is the original loop's probability of exiting at the latch + // after any iteration. That should maintain the original loop body + // frequency. Upon arriving at the loop, due to the guard, the + // probability of reaching iteration i of the new loop is the + // probability of reaching iteration i+1 of the original loop. The + // probability of reaching the peeled iteration is 1, which is the + // probability of reaching iteration 0 of the original loop. + if (L->getExitBlock() == OrigLatchBr->getSuccessor(0)) + std::swap(Weights[0], Weights[1]); + setBranchWeights(*BI, Weights, /*IsExpected=*/false); + } PreHeaderBR->eraseFromParent(); // PreHeader now dominates InsertTop. diff --git a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-bfi.ll b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-bfi.ll new file mode 100644 index 0000000000000..43e2cd8dcd89c --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-bfi.ll @@ -0,0 +1,66 @@ +; Disable this test in profcheck because the first run would cause profcheck to fail. +; REQUIRES: !profcheck +; RUN: opt -p "print,loop-unroll,print" -scev-cheap-expansion-budget=3 -S %s -profcheck-disable-metadata-fixes 2>&1 | FileCheck %s --check-prefixes=COMMON,BAD +; RUN: opt -p "print,loop-unroll,print" -scev-cheap-expansion-budget=3 -S %s 2>&1 | FileCheck %s --check-prefixes=COMMON,GOOD + +define i32 @test_expansion_cost_2(i32 %start, i32 %end) !prof !0 { +entry: + %sub = add i32 %end, -1 + br label %loop.header + +loop.header: + %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop.latch ] + %c = icmp eq i32 %iv, %sub + br i1 %c, label %then, label %loop.latch, !prof !1 + +then: + br label %loop.latch + +loop.latch: + %iv.next = add nsw i32 %iv, 1 + %ec = icmp eq i32 %iv.next, %end + br i1 %ec, label %exit, label %loop.header, !prof !2 + +exit: + ret i32 0 +} + +!0 = !{!"function_entry_count", i32 10} +!1 = !{!"branch_weights", i32 2, i32 3} +!2 = !{!"branch_weights", i32 1, i32 50} + +; COMMON: block-frequency-info: test_expansion_cost_2 +; COMMON-NEXT: entry: float = 1.0 +; COMMON-NEXT: loop.header: float = 51.0 +; COMMON-NEXT: then: float = 20.4 +; COMMON-NEXT: loop.latch: float = 51.0 +; COMMON-NEXT: exit: float = 1.0 + +; COMMON: block-frequency-info: test_expansion_cost_2 +; GOOD-NEXT: entry: float = 1.0 +; GOOD-NEXT: entry.split: float = 0.98039 +; GOOD-NEXT: loop.header: float = 50.0 +; GOOD-NEXT: then: float = 20.0 +; GOOD-NEXT: loop.latch: float = 50.0 +; GOOD-NEXT: exit.peel.begin.loopexit: float = 0.98039 +; GOOD-NEXT: exit.peel.begin: float = 1.0 +; GOOD-NEXT: loop.header.peel: float = 1.0 +; GOOD-NEXT: then.peel: float = 0.4 +; GOOD-NEXT: loop.latch.peel: float = 1.0 +; GOOD-NEXT: exit.peel.next: float = 1.0 +; GOOD-NEXT: loop.header.peel.next: float = 1.0 +; GOOD-NEXT: exit: float = 1.0 + +; BAD-NEXT: entry: float = 1.0 +; BAD-NEXT: entry.split: float = 0.625 +; BAD-NEXT: loop.header: float = 31.875 +; BAD-NEXT: then: float = 12.75 +; BAD-NEXT: loop.latch: float = 31.875 +; BAD-NEXT: exit.peel.begin.loopexit: float = 0.625 +; BAD-NEXT: exit.peel.begin: float = 1.0 +; BAD-NEXT: loop.header.peel: float = 1.0 +; BAD-NEXT: then.peel: float = 0.4 +; BAD-NEXT: loop.latch.peel: float = 1.0 +; BAD-NEXT: exit.peel.next: float = 1.0 +; BAD-NEXT: loop.header.peel.next: float = 1.0 +; BAD-NEXT: exit: float = 1.0 \ No newline at end of file diff --git a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll index f3910f9bfc399..e8f58f5c4debd 100644 --- a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll @@ -1,46 +1,46 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5 ; RUN: opt -p loop-unroll -scev-cheap-expansion-budget=2 -S %s | FileCheck --check-prefix=BUDGET2 %s ; RUN: opt -p loop-unroll -scev-cheap-expansion-budget=3 -S %s | FileCheck --check-prefix=BUDGET3 %s -define i32 @test_expansion_cost_2(i32 %start, i32 %end) { +define i32 @test_expansion_cost_2(i32 %start, i32 %end) !prof !0 { ; BUDGET2-LABEL: define i32 @test_expansion_cost_2( -; BUDGET2-SAME: i32 [[START:%.*]], i32 [[END:%.*]]) { +; BUDGET2-SAME: i32 [[START:%.*]], i32 [[END:%.*]]) !prof [[PROF0:![0-9]+]] { ; BUDGET2-NEXT: [[ENTRY:.*]]: ; BUDGET2-NEXT: [[SUB:%.*]] = add i32 [[END]], -1 ; BUDGET2-NEXT: br label %[[LOOP_HEADER:.*]] ; BUDGET2: [[LOOP_HEADER]]: ; BUDGET2-NEXT: [[IV:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; BUDGET2-NEXT: [[C:%.*]] = icmp eq i32 [[IV]], [[SUB]] -; BUDGET2-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]] +; BUDGET2-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]], !prof [[PROF1:![0-9]+]] ; BUDGET2: [[THEN]]: ; BUDGET2-NEXT: br label %[[LOOP_LATCH]] ; BUDGET2: [[LOOP_LATCH]]: ; BUDGET2-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 ; BUDGET2-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[END]] -; BUDGET2-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]] +; BUDGET2-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]], !prof [[PROF2:![0-9]+]], !llvm.loop [[LOOP3:![0-9]+]] ; BUDGET2: [[EXIT]]: ; BUDGET2-NEXT: ret i32 0 ; ; BUDGET3-LABEL: define i32 @test_expansion_cost_2( -; BUDGET3-SAME: i32 [[START:%.*]], i32 [[END:%.*]]) { +; BUDGET3-SAME: i32 [[START:%.*]], i32 [[END:%.*]]) !prof [[PROF0:![0-9]+]] { ; BUDGET3-NEXT: [[ENTRY:.*]]: ; BUDGET3-NEXT: [[SUB:%.*]] = add i32 [[END]], -1 ; BUDGET3-NEXT: [[TMP0:%.*]] = sub i32 [[SUB]], [[START]] ; BUDGET3-NEXT: [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0 -; BUDGET3-NEXT: br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[EXIT_PEEL_BEGIN:.*]] +; BUDGET3-NEXT: br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[EXIT_PEEL_BEGIN:.*]], !prof [[PROF1:![0-9]+]] ; BUDGET3: [[ENTRY_SPLIT]]: ; BUDGET3-NEXT: br label %[[LOOP_HEADER:.*]] ; BUDGET3: [[LOOP_HEADER]]: ; BUDGET3-NEXT: [[IV:%.*]] = phi i32 [ [[START]], %[[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; BUDGET3-NEXT: [[C:%.*]] = icmp eq i32 [[IV]], [[SUB]] -; BUDGET3-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]] +; BUDGET3-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]], !prof [[PROF2:![0-9]+]] ; BUDGET3: [[THEN]]: ; BUDGET3-NEXT: br label %[[LOOP_LATCH]] ; BUDGET3: [[LOOP_LATCH]]: ; BUDGET3-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 ; BUDGET3-NEXT: [[TMP2:%.*]] = sub i32 [[END]], 1 ; BUDGET3-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[TMP2]] -; BUDGET3-NEXT: br i1 [[EC]], label %[[EXIT_PEEL_BEGIN_LOOPEXIT:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP0:![0-9]+]] +; BUDGET3-NEXT: br i1 [[EC]], label %[[EXIT_PEEL_BEGIN_LOOPEXIT:.*]], label %[[LOOP_HEADER]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]] ; BUDGET3: [[EXIT_PEEL_BEGIN_LOOPEXIT]]: ; BUDGET3-NEXT: [[DOTPH:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP_LATCH]] ] ; BUDGET3-NEXT: br label %[[EXIT_PEEL_BEGIN]] @@ -49,13 +49,13 @@ define i32 @test_expansion_cost_2(i32 %start, i32 %end) { ; BUDGET3-NEXT: br label %[[LOOP_HEADER_PEEL:.*]] ; BUDGET3: [[LOOP_HEADER_PEEL]]: ; BUDGET3-NEXT: [[C_PEEL:%.*]] = icmp eq i32 [[TMP3]], [[SUB]] -; BUDGET3-NEXT: br i1 [[C_PEEL]], label %[[THEN_PEEL:.*]], label %[[LOOP_LATCH_PEEL:.*]] +; BUDGET3-NEXT: br i1 [[C_PEEL]], label %[[THEN_PEEL:.*]], label %[[LOOP_LATCH_PEEL:.*]], !prof [[PROF2]] ; BUDGET3: [[THEN_PEEL]]: ; BUDGET3-NEXT: br label %[[LOOP_LATCH_PEEL]] ; BUDGET3: [[LOOP_LATCH_PEEL]]: ; BUDGET3-NEXT: [[IV_NEXT_PEEL:%.*]] = add nsw i32 [[TMP3]], 1 ; BUDGET3-NEXT: [[EC_PEEL:%.*]] = icmp eq i32 [[IV_NEXT_PEEL]], [[END]] -; BUDGET3-NEXT: br i1 [[EC_PEEL]], label %[[EXIT_PEEL_NEXT:.*]], label %[[EXIT_PEEL_NEXT]] +; BUDGET3-NEXT: br i1 [[EC_PEEL]], label %[[EXIT_PEEL_NEXT:.*]], label %[[EXIT_PEEL_NEXT]], !prof [[PROF3]] ; BUDGET3: [[EXIT_PEEL_NEXT]]: ; BUDGET3-NEXT: br label %[[LOOP_HEADER_PEEL_NEXT:.*]] ; BUDGET3: [[LOOP_HEADER_PEEL_NEXT]]: @@ -70,7 +70,7 @@ entry: loop.header: %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop.latch ] %c = icmp eq i32 %iv, %sub - br i1 %c, label %then, label %loop.latch + br i1 %c, label %then, label %loop.latch, !prof !1 then: br label %loop.latch @@ -78,12 +78,29 @@ then: loop.latch: %iv.next = add nsw i32 %iv, 1 %ec = icmp eq i32 %iv.next, %end - br i1 %ec, label %exit, label %loop.header + br i1 %ec, label %exit, label %loop.header, !prof !2, !llvm.loop !3 exit: ret i32 0 } + +!0 = !{!"function_entry_count", i32 10} +!1 = !{!"branch_weights", i32 2, i32 3} +!2 = !{!"branch_weights", i32 1, i32 10} +!3 = distinct !{!3, !4} +!4 = !{!"llvm.loop.estimated_trip_count", i32 11} +;. +; BUDGET2: [[PROF0]] = !{!"function_entry_count", i32 10} +; BUDGET2: [[PROF1]] = !{!"branch_weights", i32 2, i32 3} +; BUDGET2: [[PROF2]] = !{!"branch_weights", i32 1, i32 10} +; BUDGET2: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} +; BUDGET2: [[META4]] = !{!"llvm.loop.estimated_trip_count", i32 11} ;. -; BUDGET3: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]} -; BUDGET3: [[META1]] = !{!"llvm.loop.peeled.count", i32 1} +; BUDGET3: [[PROF0]] = !{!"function_entry_count", i32 10} +; BUDGET3: [[PROF1]] = !{!"branch_weights", i32 10, i32 1} +; BUDGET3: [[PROF2]] = !{!"branch_weights", i32 2, i32 3} +; BUDGET3: [[PROF3]] = !{!"branch_weights", i32 1, i32 10} +; BUDGET3: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]]} +; BUDGET3: [[META5]] = !{!"llvm.loop.peeled.count", i32 1} +; BUDGET3: [[META6]] = !{!"llvm.loop.estimated_trip_count", i32 10} ;. diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py index 94cf8bc358514..e374b2c2798aa 100644 --- a/llvm/test/lit.cfg.py +++ b/llvm/test/lit.cfg.py @@ -49,6 +49,7 @@ config.excludes = ["Inputs", "CMakeLists.txt", "README.txt", "LICENSE.txt"] if config.enable_profcheck: + config.available_features.add("profcheck") # Exclude llvm-reduce tests for profcheck because we substitute the FileCheck # binary with a no-op command for profcheck, but llvm-reduce tests have RUN # commands of the form llvm-reduce --test FileCheck, which explode if we