-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[SimplifyCFG][profcheck] Synthesize profile for br (X == 0 | X == 1), T, F1 -> switch
#161549
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
mtrofin
merged 1 commit into
main
from
users/mtrofin/09-30-_simplifycfg_profcheck_synthesize_profile_for_br_x_0_x_1_t_f1_-_switch
Oct 4, 2025
Merged
[SimplifyCFG][profcheck] Synthesize profile for br (X == 0 | X == 1), T, F1 -> switch
#161549
mtrofin
merged 1 commit into
main
from
users/mtrofin/09-30-_simplifycfg_profcheck_synthesize_profile_for_br_x_0_x_1_t_f1_-_switch
Oct 4, 2025
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This was referenced Oct 1, 2025
Base automatically changed from
users/mtrofin/09-25-_simplifycfg_profcheck_fix_artificially-failing_preserve-branchweights.ll_
to
main
October 1, 2025 16:54
2d42032
to
0029ab3
Compare
@llvm/pr-subscribers-llvm-transforms Author: Mircea Trofin (mtrofin) ChangesFull diff: https://github.com/llvm/llvm-project/pull/161549.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 4d1f768e2177a..df436d0f68028 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -5148,14 +5148,18 @@ bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
if (ExtraCase && Values.size() < 2)
return false;
- // TODO: Preserve branch weight metadata, similarly to how
- // foldValueComparisonIntoPredecessors preserves it.
+ SmallVector<uint32_t> BranchWeights;
+ const bool HasProfile = !ProfcheckDisableMetadataFixes &&
+ extractBranchWeights(*BI, BranchWeights);
// Figure out which block is which destination.
BasicBlock *DefaultBB = BI->getSuccessor(1);
BasicBlock *EdgeBB = BI->getSuccessor(0);
- if (!TrueWhenEqual)
+ if (!TrueWhenEqual) {
std::swap(DefaultBB, EdgeBB);
+ if (HasProfile)
+ std::swap(BranchWeights[0], BranchWeights[1]);
+ }
BasicBlock *BB = BI->getParent();
@@ -5186,10 +5190,11 @@ bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
ExtraCase = Builder.CreateFreeze(ExtraCase);
- if (TrueWhenEqual)
- Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
- else
- Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
+ // We don't have any info about this condition.
+ auto *Br = TrueWhenEqual ? Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB)
+ : Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
+ setExplicitlyUnknownBranchWeightsIfProfiled(*Br, *NewBB->getParent(),
+ DEBUG_TYPE);
OldTI->eraseFromParent();
@@ -5216,6 +5221,17 @@ bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
// Create the new switch instruction now.
SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
+ if (HasProfile) {
+ // We know the weight of the default case. We don't know the weight of the
+ // other cases, but rather than completely loose profiling info, we split
+ // the remaining probability equally over them.
+ SmallVector<uint32_t> NewWeights(Values.size() + 1);
+ NewWeights[0] = BranchWeights[1]; // this is the default, and we swapped if
+ // TrueWhenEqual.
+ for (auto &V : drop_begin(NewWeights))
+ V = BranchWeights[0] / Values.size();
+ setBranchWeights(*New, NewWeights, /*IsExpected=*/false);
+ }
// Add all of the 'cases' to the switch instruction.
for (ConstantInt *Val : Values)
diff --git a/llvm/test/Transforms/SimplifyCFG/switch_create.ll b/llvm/test/Transforms/SimplifyCFG/switch_create.ll
index 18c4ade46162c..ef5aee68e268e 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch_create.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch_create.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
; RUN: opt -S -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -switch-range-to-icmp < %s | FileCheck %s
; RUN: opt -S -data-layout="p:32:32-p1:16:16" -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -switch-range-to-icmp < %s | FileCheck -check-prefix=CHECK -check-prefix=DL %s
@@ -6,12 +6,12 @@ declare void @foo1()
declare void @foo2()
-define void @test1(i32 %V) {
+define void @test1(i32 %V) !prof !0 {
; CHECK-LABEL: @test1(
; CHECK-NEXT: switch i32 [[V:%.*]], label [[F:%.*]] [
; CHECK-NEXT: i32 17, label [[T:%.*]]
; CHECK-NEXT: i32 4, label [[T]]
-; CHECK-NEXT: ]
+; CHECK-NEXT: ], !prof [[PROF1:![0-9]+]]
; CHECK: common.ret:
; CHECK-NEXT: ret void
; CHECK: T:
@@ -24,7 +24,7 @@ define void @test1(i32 %V) {
%C1 = icmp eq i32 %V, 4 ; <i1> [#uses=1]
%C2 = icmp eq i32 %V, 17 ; <i1> [#uses=1]
%CN = or i1 %C1, %C2 ; <i1> [#uses=1]
- br i1 %CN, label %T, label %F
+ br i1 %CN, label %T, label %F, !prof !1
T: ; preds = %0
call void @foo1( )
ret void
@@ -116,12 +116,12 @@ F: ; preds = %0
ret void
}
-define void @test2(i32 %V) {
+define void @test2(i32 %V) !prof !0 {
; CHECK-LABEL: @test2(
; CHECK-NEXT: switch i32 [[V:%.*]], label [[T:%.*]] [
; CHECK-NEXT: i32 17, label [[F:%.*]]
; CHECK-NEXT: i32 4, label [[F]]
-; CHECK-NEXT: ]
+; CHECK-NEXT: ], !prof [[PROF2:![0-9]+]]
; CHECK: common.ret:
; CHECK-NEXT: ret void
; CHECK: T:
@@ -134,7 +134,7 @@ define void @test2(i32 %V) {
%C1 = icmp ne i32 %V, 4 ; <i1> [#uses=1]
%C2 = icmp ne i32 %V, 17 ; <i1> [#uses=1]
%CN = and i1 %C1, %C2 ; <i1> [#uses=1]
- br i1 %CN, label %T, label %F
+ br i1 %CN, label %T, label %F, !prof !1
T: ; preds = %0
call void @foo1( )
ret void
@@ -1313,3 +1313,16 @@ if.then:
if.end:
ret void
}
+
+!0 = !{!"function_entry_count", i32 100}
+!1 = !{!"branch_weights", i32 6, i32 10}
+;.
+; DL: attributes #[[ATTR0:[0-9]+]] = { noredzone nounwind ssp }
+; DL: attributes #[[ATTR1:[0-9]+]] = { nounwind }
+; DL: attributes #[[ATTR2]] = { noredzone nounwind }
+; DL: attributes #[[ATTR3]] = { noredzone }
+;.
+; DL: [[META0:![0-9]+]] = !{!"function_entry_count", i32 100}
+; DL: [[PROF1]] = !{!"branch_weights", i32 10, i32 3, i32 3}
+; DL: [[PROF2]] = !{!"branch_weights", i32 6, i32 5, i32 5}
+;.
|
0029ab3
to
e2a3be6
Compare
This was referenced Oct 2, 2025
br (X == 0 | X == 1), T, F1 -> switch
e2a3be6
to
aaa7f1e
Compare
alanzhao1
approved these changes
Oct 3, 2025
…, T, F1 -> switch
aaa7f1e
to
e49a6f0
Compare
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/116/builds/19252 Here is the relevant piece of the build log for the reference
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
We cannot calculate the weights of the switch precisely, but we do know the probability of the default branch. We then split equally the remaining probability over the rest of the cases. If we did nothing, the static estimation could be considerably poorer.
Issue #147390