From 92337a6c4584b4920dfdcf7e5f0792d50f1de19d Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Fri, 17 Oct 2025 16:43:02 -0700 Subject: [PATCH] [SLU][profcheck] Estimate branch weights in partial unswitch cases --- .../Transforms/Scalar/SimpleLoopUnswitch.cpp | 60 ++++++- .../nontrivial-unswitch-profile.ll | 89 ++++++++++ .../Transforms/SimpleLoopUnswitch/pr60736.ll | 11 +- .../simple-unswitch-profile.ll | 157 ++++++++++++++++++ 4 files changed, 305 insertions(+), 12 deletions(-) create mode 100644 llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-profile.ll create mode 100644 llvm/test/Transforms/SimpleLoopUnswitch/simple-unswitch-profile.ll diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index 5af6c96c56a06..bb6c879f4d47e 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -81,6 +81,7 @@ STATISTIC( STATISTIC(NumInvariantConditionsInjected, "Number of invariant conditions injected and unswitched"); +namespace llvm { static cl::opt EnableNonTrivialUnswitch( "enable-nontrivial-unswitch", cl::init(false), cl::Hidden, cl::desc("Forcibly enables non-trivial loop unswitching rather than " @@ -131,11 +132,17 @@ static cl::opt InjectInvariantConditions( static cl::opt InjectInvariantConditionHotnesThreshold( "simple-loop-unswitch-inject-invariant-condition-hotness-threshold", - cl::Hidden, cl::desc("Only try to inject loop invariant conditions and " - "unswitch on them to eliminate branches that are " - "not-taken 1/ times or less."), + cl::Hidden, + cl::desc("Only try to inject loop invariant conditions and " + "unswitch on them to eliminate branches that are " + "not-taken 1/ times or less."), cl::init(16)); +static cl::opt EstimateProfile("simple-loop-unswitch-estimate-profile", + cl::Hidden, cl::init(true)); +extern cl::opt ProfcheckDisableMetadataFixes; +} // namespace llvm + AnalysisKey ShouldRunExtraSimpleLoopUnswitch::Key; namespace { struct CompareDesc { @@ -268,13 +275,42 @@ static bool areLoopExitPHIsLoopInvariant(const Loop &L, llvm_unreachable("Basic blocks should never be empty!"); } -/// Copy a set of loop invariant values \p ToDuplicate and insert them at the +/// Copy a set of loop invariant values \p Invariants and insert them at the /// end of \p BB and conditionally branch on the copied condition. We only /// branch on a single value. +/// We attempt to estimate the profile of the resulting conditional branch from +/// \p ComputeProfFrom, which is the original conditional branch we're +/// unswitching. +/// When \p Direction is true, the \p Invariants form a disjunction, and the +/// branch conditioned on it exits the loop on the "true" case. When \p +/// Direction is false, the \p Invariants form a conjunction and the branch +/// exits on the "false" case. static void buildPartialUnswitchConditionalBranch( BasicBlock &BB, ArrayRef Invariants, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze, - const Instruction *I, AssumptionCache *AC, const DominatorTree &DT) { + const Instruction *I, AssumptionCache *AC, const DominatorTree &DT, + const BranchInst &ComputeProfFrom) { + + SmallVector BranchWeights; + bool HasBranchWeights = EstimateProfile && !ProfcheckDisableMetadataFixes && + extractBranchWeights(ComputeProfFrom, BranchWeights); + // If Direction is true, that means we had a disjunction and that the "true" + // case exits. The probability of the disjunction of the subset of terms is at + // most as high as the original one. So, if the probability is higher than the + // one we'd assign in absence of a profile (i.e. 0.5), we will use 0.5, + // but if it's lower, we will use the original probability. + // Conversely, if Direction is false, that means we had a conjunction, and the + // probability of exiting is captured in the second branch weight. That + // probability is a disjunction (of the negation of the original terms). The + // same reasoning applies as above. + // Issue #165649: should we expect BFI to conserve, and use that to calculate + // the branch weights? + if (HasBranchWeights && + static_cast(BranchWeights[Direction ? 0 : 1]) / + static_cast(sum_of(BranchWeights)) > + 0.5) + HasBranchWeights = false; + IRBuilder<> IRB(&BB); IRB.SetCurrentDebugLocation(DebugLoc::getCompilerGenerated()); @@ -287,8 +323,14 @@ static void buildPartialUnswitchConditionalBranch( Value *Cond = Direction ? IRB.CreateOr(FrozenInvariants) : IRB.CreateAnd(FrozenInvariants); - IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc, - Direction ? &NormalSucc : &UnswitchedSucc); + auto *BR = IRB.CreateCondBr( + Cond, Direction ? &UnswitchedSucc : &NormalSucc, + Direction ? &NormalSucc : &UnswitchedSucc, + HasBranchWeights ? ComputeProfFrom.getMetadata(LLVMContext::MD_prof) + : nullptr); + if (!HasBranchWeights) + setExplicitlyUnknownBranchWeightsIfProfiled( + *BR, *BR->getParent()->getParent(), DEBUG_TYPE); } /// Copy a set of loop invariant values, and conditionally branch on them. @@ -658,7 +700,7 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, " condition!"); buildPartialUnswitchConditionalBranch( *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH, - FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT); + FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT, BI); } // Update the dominator tree with the added edge. @@ -2477,7 +2519,7 @@ static void unswitchNontrivialInvariants( else { buildPartialUnswitchConditionalBranch( *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, - FreezeLoopUnswitchCond, BI, &AC, DT); + FreezeLoopUnswitchCond, BI, &AC, DT, *BI); } DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH}); diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-profile.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-profile.ll new file mode 100644 index 0000000000000..9cc417f6b874e --- /dev/null +++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-profile.ll @@ -0,0 +1,89 @@ +; RUN: split-file %s %t +; RUN: cat %t/main.ll %t/probable-or.prof > %t/probable-or.ll +; RUN: cat %t/main.ll %t/probable-and.prof > %t/probable-and.ll +; RUN: opt -passes='loop(simple-loop-unswitch)' -S %t/probable-or.ll -o -| FileCheck %t/probable-or.prof +; RUN: opt -passes='loop(simple-loop-unswitch)' -S %t/probable-and.ll -o -| FileCheck %t/probable-and.prof + +;--- main.ll +declare i32 @a() +declare i32 @b() + +define i32 @or(ptr %ptr, i1 %cond) !prof !0 { +entry: + br label %loop_begin + +loop_begin: + %v1 = load i1, ptr %ptr + %cond_or = or i1 %v1, %cond + br i1 %cond_or, label %loop_a, label %loop_b, !prof !1 + +loop_a: + call i32 @a() + br label %latch + +loop_b: + call i32 @b() + br label %latch + +latch: + %v2 = load i1, ptr %ptr + br i1 %v2, label %loop_begin, label %loop_exit, !prof !2 + +loop_exit: + ret i32 0 +} + +define i32 @and(ptr %ptr, i1 %cond) !prof !0 { +entry: + br label %loop_begin + +loop_begin: + %v1 = load i1, ptr %ptr + %cond_and = and i1 %v1, %cond + br i1 %cond_and, label %loop_a, label %loop_b, !prof !1 + +loop_a: + call i32 @a() + br label %latch + +loop_b: + call i32 @b() + br label %latch + +latch: + %v2 = load i1, ptr %ptr + br i1 %v2, label %loop_begin, label %loop_exit, !prof !2 + +loop_exit: + ret i32 0 +} + +;--- probable-or.prof +!0 = !{!"function_entry_count", i32 10} +!1 = !{!"branch_weights", i32 1, i32 1000} +!2 = !{!"branch_weights", i32 5, i32 7} +; CHECK-LABEL: @or +; CHECK-LABEL: entry: +; CHECK-NEXT: %cond.fr = freeze i1 %cond +; CHECK-NEXT: br i1 %cond.fr, label %entry.split.us, label %entry.split, !prof !1 +; CHECK-LABEL: @and +; CHECK-LABEL: entry: +; CHECK-NEXT: %cond.fr = freeze i1 %cond +; CHECK-NEXT: br i1 %cond.fr, label %entry.split, label %entry.split.us, !prof !3 +; CHECK: !1 = !{!"branch_weights", i32 1, i32 1000} +; CHECK: !3 = !{!"unknown", !"simple-loop-unswitch"} + +;--- probable-and.prof +!0 = !{!"function_entry_count", i32 10} +!1 = !{!"branch_weights", i32 1000, i32 1} +!2 = !{!"branch_weights", i32 5, i32 7} +; CHECK-LABEL: @or +; CHECK-LABEL: entry: +; CHECK-NEXT: %cond.fr = freeze i1 %cond +; CHECK-NEXT: br i1 %cond.fr, label %entry.split.us, label %entry.split, !prof !1 +; CHECK-LABEL: @and +; CHECK-LABEL: entry: +; CHECK-NEXT: %cond.fr = freeze i1 %cond +; CHECK-NEXT: br i1 %cond.fr, label %entry.split, label %entry.split.us, !prof !3 +; CHECK: !1 = !{!"unknown", !"simple-loop-unswitch"} +; CHECK: !3 = !{!"branch_weights", i32 1000, i32 1} diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/pr60736.ll b/llvm/test/Transforms/SimpleLoopUnswitch/pr60736.ll index 0964c55d1dec6..3760be4b26f23 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/pr60736.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/pr60736.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt < %s -simple-loop-unswitch-inject-invariant-conditions=true -passes='loop(simple-loop-unswitch,loop-instsimplify)' -S | FileCheck %s define void @test() { @@ -7,7 +7,7 @@ define void @test() { ; CHECK-NEXT: [[TMP:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr addrspace(1) poison unordered, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load atomic i32, ptr addrspace(1) poison unordered, align 8 -; CHECK-NEXT: br i1 [[TMP]], label [[BB_SPLIT:%.*]], label [[BB3_SPLIT_US:%.*]] +; CHECK-NEXT: br i1 [[TMP]], label [[BB_SPLIT:%.*]], label [[BB3_SPLIT_US:%.*]], !prof [[PROF0:![0-9]+]] ; CHECK: bb.split: ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb3: @@ -19,7 +19,7 @@ define void @test() { ; CHECK-NEXT: [[TMP6_US:%.*]] = phi i32 [ poison, [[BB3_SPLIT_US]] ] ; CHECK-NEXT: [[TMP7_US:%.*]] = add nuw nsw i32 [[TMP6_US]], 2 ; CHECK-NEXT: [[TMP8_US:%.*]] = icmp ult i32 [[TMP7_US]], [[TMP2]] -; CHECK-NEXT: br i1 [[TMP8_US]], label [[BB9_US:%.*]], label [[BB16_SPLIT_US:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8_US]], label [[BB9_US:%.*]], label [[BB16_SPLIT_US:%.*]], !prof [[PROF0]] ; CHECK: bb9.us: ; CHECK-NEXT: br label [[BB17_SPLIT_US:%.*]] ; CHECK: bb16.split.us: @@ -96,3 +96,8 @@ declare i1 @llvm.experimental.widenable.condition() !0 = !{!"branch_weights", i32 1048576, i32 1} +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(inaccessiblemem: readwrite) } +;. +; CHECK: [[PROF0]] = !{!"branch_weights", i32 1048576, i32 1} +;. diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/simple-unswitch-profile.ll b/llvm/test/Transforms/SimpleLoopUnswitch/simple-unswitch-profile.ll new file mode 100644 index 0000000000000..ec6baa5b3772f --- /dev/null +++ b/llvm/test/Transforms/SimpleLoopUnswitch/simple-unswitch-profile.ll @@ -0,0 +1,157 @@ +; RUN: split-file %s %t +; RUN: cat %t/main.ll %t/probable-or.prof > %t/probable-or.ll +; RUN: cat %t/main.ll %t/probable-and.prof > %t/probable-and.ll +; RUN: opt -passes='loop-mssa(simple-loop-unswitch)' -S %t/probable-or.ll -o - | FileCheck %t/probable-or.prof +; RUN: opt -passes='loop-mssa(simple-loop-unswitch)' -S %t/probable-and.ll -o - | FileCheck %t/probable-and.prof +; +; RUN: opt -passes='module(print),function(loop-mssa(simple-loop-unswitch)),module(print)' \ +; RUN: %t/probable-or.ll -disable-output -simple-loop-unswitch-estimate-profile=0 2>&1 | FileCheck %t/probable-or.prof --check-prefixes=PROFILE-COM,PROFILE-REF + +; RUN: opt -passes='module(print),function(loop-mssa(simple-loop-unswitch)),module(print)' \ +; RUN: %t/probable-or.ll -disable-output -simple-loop-unswitch-estimate-profile=1 2>&1 | FileCheck %t/probable-or.prof --check-prefixes=PROFILE-COM,PROFILE-CHK + +; RUN: opt -passes='module(print),function(loop-mssa(simple-loop-unswitch)),module(print)' \ +; RUN: %t/probable-and.ll -disable-output -simple-loop-unswitch-estimate-profile=0 2>&1 | FileCheck %t/probable-and.prof --check-prefixes=PROFILE-COM,PROFILE-REF + +; RUN: opt -passes='module(print),function(loop-mssa(simple-loop-unswitch)),module(print)' \ +; RUN: %t/probable-and.ll -disable-output -simple-loop-unswitch-estimate-profile=1 2>&1 | FileCheck %t/probable-and.prof --check-prefixes=PROFILE-COM,PROFILE-CHK + +;--- main.ll +declare void @some_func() noreturn + +define i32 @or(i1 %cond1, i32 %var1) !prof !0 { +entry: + br label %loop_begin + +loop_begin: + %var3 = phi i32 [%var1, %entry], [%var2, %do_something] + %cond2 = icmp eq i32 %var3, 10 + %cond.or = or i1 %cond1, %cond2 + br i1 %cond.or, label %loop_exit, label %do_something, !prof !1 + +do_something: + %var2 = add i32 %var3, 1 + call void @some_func() noreturn nounwind + br label %loop_begin + +loop_exit: + ret i32 0 +} + +define i32 @and(i1 %cond1, i32 %var1) !prof !0 { +entry: + br label %loop_begin + +loop_begin: + %var3 = phi i32 [%var1, %entry], [%var2, %do_something] + %cond2 = icmp eq i32 %var3, 10 + %cond.and = and i1 %cond1, %cond2 + br i1 %cond.and, label %do_something, label %loop_exit, !prof !1 + +do_something: + %var2 = add i32 %var3, 1 + call void @some_func() noreturn nounwind + br label %loop_begin + +loop_exit: + ret i32 0 +} + +;--- probable-or.prof +!0 = !{!"function_entry_count", i32 10} +!1 = !{!"branch_weights", i32 1, i32 1000} +; CHECK-LABEL: @or +; CHECK-LABEL: entry: +; CHECK-NEXT: %cond1.fr = freeze i1 %cond1 +; CHECK-NEXT: br i1 %cond1.fr, label %loop_exit.split, label %entry.split, !prof !1 +; CHECK-LABEL: @and +; CHECK-LABEL: entry: +; CHECK-NEXT: %cond1.fr = freeze i1 %cond1 +; CHECK-NEXT: br i1 %cond1.fr, label %entry.split, label %loop_exit.split, !prof !2 +; CHECK: !1 = !{!"branch_weights", i32 1, i32 1000} +; CHECK: !2 = !{!"unknown", !"simple-loop-unswitch"} + +; PROFILE-COM: Printing analysis results of BFI for function 'or': +; PROFILE-COM: block-frequency-info: or + ; PROFILE-COM: - entry: {{.*}} count = 10 + ; PROFILE-COM: - loop_begin: {{.*}} count = 10010 + ; PROFILE-COM: - do_something: {{.*}} count = 10000 + ; PROFILE-COM: - loop_exit: {{.*}} count = 10 + +; PROFILE-COM: Printing analysis results of BFI for function 'and': +; PROFILE-COM: block-frequency-info: and + ; PROFILE-COM: - entry: {{.*}} count = 10 + ; PROFILE-COM: - loop_begin: {{.*}} count = 10 + ; PROFILE-COM: - do_something: {{.*}} count = 0 + ; PROFILE-COM: - loop_exit: {{.*}} count = 10 + +; PROFILE-COM: Printing analysis results of BFI for function 'or': +; PROFILE-COM: block-frequency-info: or + ; PROFILE-COM: - entry: {{.*}} count = 10 + ; PROFILE-REF: - entry.split: {{.*}} count = 5 + ; PROFILE-CHK: - entry.split: {{.*}} count = 10 + ; PROFILE-REF: - loop_begin: {{.*}} count = 5005 + ; PROFILE-CHK: - loop_begin: {{.*}} count = 10000 + ; PROFILE-REF: - do_something: {{.*}} count = 5000 + ; PROFILE-CHK: - do_something: {{.*}} count = 9990 + ; PROFILE-REF: - loop_exit: {{.*}} count = 5 + ; PROFILE-CHK: - loop_exit: {{.*}} count = 10 + ; PROFILE-COM: - loop_exit.split: {{.*}} count = 10 + +; PROFILE-COM: Printing analysis results of BFI for function 'and': +; PROFILE-COM: block-frequency-info: and + ; PROFILE-COM: - entry: {{.*}} count = 10 + ; PROFILE-COM: - entry.split: {{.*}} count = 5 + ; PROFILE-COM: - loop_begin: {{.*}} count = 5 + ; PROFILE-COM: - do_something: {{.*}} count = 0 + ; PROFILE-COM: - loop_exit: {{.*}} count = 5 + ; PROFILE-COM: - loop_exit.split: {{.*}} count = 10 + +;--- probable-and.prof +!0 = !{!"function_entry_count", i32 10} +!1 = !{!"branch_weights", i32 1000, i32 1} +; CHECK-LABEL: @or +; CHECK-LABEL: entry: +; CHECK-NEXT: %cond1.fr = freeze i1 %cond1 +; CHECK-NEXT: br i1 %cond1.fr, label %loop_exit.split, label %entry.split, !prof !1 +; CHECK-LABEL: @and +; CHECK-LABEL: entry: +; CHECK-NEXT: %cond1.fr = freeze i1 %cond1 +; CHECK-NEXT: br i1 %cond1.fr, label %entry.split, label %loop_exit.split, !prof !2 +; CHECK: !1 = !{!"unknown", !"simple-loop-unswitch"} +; CHECK: !2 = !{!"branch_weights", i32 1000, i32 1} +; PROFILE-COM: Printing analysis results of BFI for function 'or': +; PROFILE-COM: block-frequency-info: or + ; PROFILE-COM: - entry: {{.*}}, count = 10 + ; PROFILE-COM: - loop_begin: {{.*}}, count = 10 + ; PROFILE-COM: - do_something: {{.*}}, count = 0 + ; PROFILE-COM: - loop_exit: {{.*}}, count = 10 + +; PROFILE-COM: Printing analysis results of BFI for function 'and': +; PROFILE-COM: block-frequency-info: and + ; PROFILE-COM: - entry: {{.*}} count = 10 + ; PROFILE-COM: - loop_begin: {{.*}} count = 10010 + ; PROFILE-COM: - do_something: {{.*}} count = 10000 + ; PROFILE-COM: - loop_exit: {{.*}} count = 10 + +; PROFILE-COM: Printing analysis results of BFI for function 'or': +; PROFILE-COM: block-frequency-info: or + ; PROFILE-COM: - entry: {{.*}} count = 10 + ; PROFILE-COM: - entry.split: {{.*}} count = 5 + ; PROFILE-COM: - loop_begin: {{.*}} count = 5 + ; PROFILE-COM: - do_something: {{.*}} count = 0 + ; PROFILE-COM: - loop_exit: {{.*}} count = 5 + ; PROFILE-COM: - loop_exit.split: {{.*}} count = 10 + +; PROFILE-COM: Printing analysis results of BFI for function 'and': +; PROFILE-COM: block-frequency-info: and + ; PROFILE-COM: - entry: {{.*}} count = 10 + ; PROFILE-REF: - entry.split: {{.*}} count = 5 + ; PROFILE-CHK: - entry.split: {{.*}} count = 10 + ; PROFILE-REF: - loop_begin: {{.*}} count = 5005 + ; PROFILE-CHK: - loop_begin: {{.*}} count = 10000 + ; PROFILE-REF: - do_something: {{.*}} count = 5000 + ; PROFILE-CHK: - do_something: {{.*}} count = 9990 + ; PROFILE-REF: - loop_exit: {{.*}} count = 5 + ; PROFILE-CHK: - loop_exit: {{.*}} count = 10 + ; PROFILE-COM: - loop_exit.split: {{.*}} count = 10