diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp index b18aceaa67d77..0192ce6a960ff 100644 --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -499,9 +499,8 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, const unsigned MaxTripCount = SE->getSmallConstantMaxTripCount(L); const bool MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L); - unsigned EstimatedLoopInvocationWeight = 0; std::optional OriginalTripCount = - llvm::getLoopEstimatedTripCount(L, &EstimatedLoopInvocationWeight); + llvm::getLoopEstimatedTripCount(L); // Effectively "DCE" unrolled iterations that are beyond the max tripcount // and will never be executed. @@ -1132,10 +1131,38 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, // We shouldn't try to use `L` anymore. L = nullptr; } else if (OriginalTripCount) { - // Update the trip count. Note that the remainder has already logic - // computing it in `UnrollRuntimeLoopRemainder`. - setLoopEstimatedTripCount(L, *OriginalTripCount / ULO.Count, - EstimatedLoopInvocationWeight); + // Update metadata for the loop's branch weights and estimated trip count: + // - If ULO.Runtime, UnrollRuntimeLoopRemainder sets the guard branch + // weights, latch branch weights, and estimated trip count of the + // remainder loop it creates. It also sets the branch weights for the + // unrolled loop guard it creates. The branch weights for the unrolled + // loop latch are adjusted below. FIXME: Actually handle ULO.Runtime. + // - Otherwise, if unrolled loop iteration latches become unconditional, + // branch weights are adjusted above. FIXME: Actually handle such + // unconditional latches. + // - Otherwise, the original loop's branch weights are correct for the + // unrolled loop, so do not adjust them. + // - In all cases, the unrolled loop's estimated trip count is set below. + // + // As an example of the last case, consider what happens if the unroll count + // is 4 for a loop with an estimated trip count of 10 when we do not create + // a remainder loop and all iterations' latches remain conditional. Each + // unrolled iteration's latch still has the same probability of exiting the + // loop as it did when in the original loop, and thus it should still have + // the same branch weights. Each unrolled iteration's non-zero probability + // of exiting already appropriately reduces the probability of reaching the + // remaining iterations just as it did in the original loop. Trying to also + // adjust the branch weights of the final unrolled iteration's latch (i.e., + // the backedge for the unrolled loop as a whole) to reflect its new trip + // count of 3 will erroneously further reduce its block frequencies. + // However, in case an analysis later needs to estimate the trip count of + // the unrolled loop as a whole without considering the branch weights for + // each unrolled iteration's latch within it, we store the new trip count as + // separate metadata. + unsigned NewTripCount = *OriginalTripCount / ULO.Count; + if (!ULO.Runtime && *OriginalTripCount % ULO.Count) + NewTripCount += 1; + setLoopEstimatedTripCount(L, NewTripCount); } // LoopInfo should not be valid, confirm that. diff --git a/llvm/test/Transforms/LoopUnroll/peel-branch-weights-freq.ll b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/peel.ll similarity index 100% rename from llvm/test/Transforms/LoopUnroll/peel-branch-weights-freq.ll rename to llvm/test/Transforms/LoopUnroll/branch-weights-freq/peel.ll diff --git a/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-partial.ll b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-partial.ll new file mode 100644 index 0000000000000..cde9d46ee8421 --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-partial.ll @@ -0,0 +1,68 @@ +; Test branch weight metadata, estimated trip count metadata, and block +; frequencies after partial loop unrolling without -unroll-runtime. + +; RUN: opt < %s -S -passes='print' 2>&1 | \ +; RUN: FileCheck -check-prefix=CHECK %s + +; The -implicit-check-not options make sure that no additional labels or calls +; to @f show up. +; RUN: opt < %s -S -passes='loop-unroll,print' \ +; RUN: -unroll-count=4 2>&1 | \ +; RUN: FileCheck %s -check-prefix=CHECK-UR \ +; RUN: -implicit-check-not='{{^( *- )?[^ ;]*:}}' \ +; RUN: -implicit-check-not='call void @f' + +; CHECK: block-frequency-info: test +; CHECK: do.body: float = 10.0, + +; The sum should still be ~10. +; +; CHECK-UR: block-frequency-info: test +; CHECK-UR: - [[ENTRY:.*]]: +; CHECK-UR: - [[DO_BODY:.*]]: float = 2.9078, +; CHECK-UR: - [[DO_BODY_1:.*]]: float = 2.617, +; CHECK-UR: - [[DO_BODY_2:.*]]: float = 2.3553, +; CHECK-UR: - [[DO_BODY_3:.*]]: float = 2.1198, +; CHECK-UR: - [[DO_END:.*]]: + +declare void @f(i32) + +define void @test(i32 %n) { +; CHECK-UR-LABEL: define void @test(i32 %{{.*}}) { +; CHECK-UR: [[ENTRY]]: +; CHECK-UR: br label %[[DO_BODY]] +; CHECK-UR: [[DO_BODY]]: +; CHECK-UR: call void @f +; CHECK-UR: br i1 %{{.*}}, label %[[DO_END]], label %[[DO_BODY_1]], !prof ![[#PROF:]] +; CHECK-UR: [[DO_BODY_1]]: +; CHECK-UR: call void @f +; CHECK-UR: br i1 %{{.*}}, label %[[DO_END]], label %[[DO_BODY_2]], !prof ![[#PROF]] +; CHECK-UR: [[DO_BODY_2]]: +; CHECK-UR: call void @f +; CHECK-UR: br i1 %{{.*}}, label %[[DO_END]], label %[[DO_BODY_3]], !prof ![[#PROF]] +; CHECK-UR: [[DO_BODY_3]]: +; CHECK-UR: call void @f +; CHECK-UR: br i1 %{{.*}}, label %[[DO_END]], label %[[DO_BODY]], !prof ![[#PROF]], !llvm.loop ![[#LOOP_UR_LATCH:]] +; CHECK-UR: [[DO_END]]: +; CHECK-UR: ret void + +entry: + br label %do.body + +do.body: + %i = phi i32 [ 0, %entry ], [ %inc, %do.body ] + %inc = add i32 %i, 1 + call void @f(i32 %i) + %c = icmp sge i32 %inc, %n + br i1 %c, label %do.end, label %do.body, !prof !0 + +do.end: + ret void +} + +!0 = !{!"branch_weights", i32 1, i32 9} + +; CHECK-UR: ![[#PROF]] = !{!"branch_weights", i32 1, i32 9} +; CHECK-UR: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]} +; CHECK-UR: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 3} +; CHECK-UR: ![[#DISABLE]] = !{!"llvm.loop.unroll.disable"} diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll index 26171990a2592..db87143286f93 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll @@ -6,7 +6,10 @@ ; CHECK: br i1 [[COND1:%.*]], label %for.end.loopexit.unr-lcssa, label %for.body, !prof ![[#PROF:]], !llvm.loop ![[#LOOP:]] ; CHECK-LABEL: for.body.epil: ; CHECK: br i1 [[COND2:%.*]], label %for.body.epil, label %for.end.loopexit.epilog-lcssa, !prof ![[#PROF2:]], !llvm.loop ![[#LOOP2:]] -; CHECK: ![[#PROF]] = !{!"branch_weights", i32 1, i32 2499} + +; FIXME: These branch weights are incorrect and should not be merged into main +; until PR #159163, which fixes them. +; CHECK: ![[#PROF]] = !{!"branch_weights", i32 1, i32 9999} ; CHECK: ![[#PROF2]] = !{!"branch_weights", i32 1, i32 1} define i3 @test(ptr %a, i3 %n) { diff --git a/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll b/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll index 611ee5fb5807e..1cd70f1d1dfd3 100644 --- a/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll +++ b/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll @@ -60,5 +60,7 @@ loop.end: !1 = !{!"function_entry_count", i64 1} !2 = !{!"branch_weights", i32 1, i32 1000} -; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 124} +; FIXME: These branch weights are incorrect and should not be merged into main +; until PR #159163, which fixes them. +; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 1000} ; CHECK: [[PROF1]] = !{!"branch_weights", i32 3, i32 1}