Skip to content

Commit 3949ad0

Browse files
committed
[SimplifyCFG] Propagate profile in simplifySwitchOfPowersOfTwo
1 parent 8d9cd5b commit 3949ad0

File tree

3 files changed

+48
-8
lines changed

3 files changed

+48
-8
lines changed

llvm/lib/Transforms/Utils/SimplifyCFG.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@
8080
#include <algorithm>
8181
#include <cassert>
8282
#include <climits>
83+
#include <cmath>
8384
#include <cstddef>
8485
#include <cstdint>
8586
#include <iterator>
@@ -7632,7 +7633,33 @@ static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
76327633
auto *DefaultCaseBB = SI->getDefaultDest();
76337634
BasicBlock *SplitBB = SplitBlock(OrigBB, SI, DTU);
76347635
auto It = OrigBB->getTerminator()->getIterator();
7636+
SmallVector<uint32_t> Weights;
7637+
auto HasWeights =
7638+
!ProfcheckDisableMetadataFixes && extractBranchWeights(*SI, Weights);
76357639
auto *BI = BranchInst::Create(SplitBB, DefaultCaseBB, IsPow2, It);
7640+
if (HasWeights && any_of(Weights, [](const auto &V) { return V != 0; })) {
7641+
// IsPow2 covers a subset of the cases in which we'd go to the default
7642+
// label. The other is those powers of 2 that don't appear in the case
7643+
// statement. We don't know the distribution of the values coming in, so
7644+
// the safest is to split 50-50 the original probability to `default`.
7645+
uint64_t OrigDenominator = sum_of(map_range(
7646+
Weights, [](const auto &V) { return static_cast<uint64_t>(V); }));
7647+
SmallVector<uint64_t> NewWeights(2);
7648+
NewWeights[1] = Weights[0] / 2;
7649+
NewWeights[0] = OrigDenominator - NewWeights[1];
7650+
setFittedBranchWeights(*BI, NewWeights, /*IsExpected=*/false);
7651+
7652+
// For the original switch, we reduce the weight of the default by the
7653+
// amount by which the previous branch contributes to getting to default,
7654+
// and then make sure the remaining weights have the same relative ratio
7655+
// wrt eachother.
7656+
uint64_t CasesDenominator = OrigDenominator - Weights[0];
7657+
Weights[0] /= 2;
7658+
for (auto &W : drop_begin(Weights))
7659+
W = NewWeights[0] * static_cast<double>(W) / CasesDenominator;
7660+
7661+
setBranchWeights(*SI, Weights, /*IsExpected=*/false);
7662+
}
76367663
// BI is handling the default case for SI, and so should share its DebugLoc.
76377664
BI->setDebugLoc(SI->getDebugLoc());
76387665
It->eraseFromParent();

llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
22
; RUN: opt -passes='simplifycfg<switch-to-lookup>' -simplifycfg-require-and-preserve-domtree=1 -S < %s | FileCheck %s
33

44
target triple = "x86_64-unknown-linux-gnu"
55

6+
;.
7+
; CHECK: @switch.table.switch_of_powers_two = private unnamed_addr constant [7 x i32] [i32 3, i32 poison, i32 poison, i32 2, i32 1, i32 0, i32 42], align 4
8+
; CHECK: @switch.table.switch_of_powers_two_default_reachable = private unnamed_addr constant [7 x i32] [i32 3, i32 5, i32 5, i32 2, i32 1, i32 0, i32 42], align 4
9+
; CHECK: @switch.table.switch_of_powers_two_default_reachable_multipreds = private unnamed_addr constant [7 x i32] [i32 3, i32 poison, i32 poison, i32 2, i32 1, i32 0, i32 42], align 4
10+
;.
611
define i32 @switch_of_powers_two(i32 %arg) {
712
; CHECK-LABEL: define i32 @switch_of_powers_two(
813
; CHECK-SAME: i32 [[ARG:%.*]]) {
@@ -35,17 +40,17 @@ return:
3540
ret i32 %phi
3641
}
3742

38-
define i32 @switch_of_powers_two_default_reachable(i32 %arg) {
43+
define i32 @switch_of_powers_two_default_reachable(i32 %arg) !prof !0 {
3944
; CHECK-LABEL: define i32 @switch_of_powers_two_default_reachable(
40-
; CHECK-SAME: i32 [[ARG:%.*]]) {
45+
; CHECK-SAME: i32 [[ARG:%.*]]) !prof [[PROF0:![0-9]+]] {
4146
; CHECK-NEXT: [[ENTRY:.*]]:
4247
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[ARG]])
4348
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 1
44-
; CHECK-NEXT: br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[RETURN:.*]]
49+
; CHECK-NEXT: br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[RETURN:.*]], !prof [[PROF1:![0-9]+]]
4550
; CHECK: [[ENTRY_SPLIT]]:
4651
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.cttz.i32(i32 [[ARG]], i1 true)
4752
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 7
48-
; CHECK-NEXT: br i1 [[TMP3]], label %[[SWITCH_LOOKUP:.*]], label %[[RETURN]]
53+
; CHECK-NEXT: br i1 [[TMP3]], label %[[SWITCH_LOOKUP:.*]], label %[[RETURN]], !prof [[PROF2:![0-9]+]]
4954
; CHECK: [[SWITCH_LOOKUP]]:
5055
; CHECK-NEXT: [[TMP4:%.*]] = zext nneg i32 [[TMP2]] to i64
5156
; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.switch_of_powers_two_default_reachable, i64 0, i64 [[TMP4]]
@@ -62,7 +67,7 @@ entry:
6267
i32 16, label %bb3
6368
i32 32, label %bb4
6469
i32 64, label %bb5
65-
]
70+
], !prof !1
6671

6772
default_case: br label %return
6873
bb1: br label %return
@@ -128,3 +133,13 @@ return:
128133
%phi = phi i32 [ 3, %bb1 ], [ 2, %bb2 ], [ 1, %bb3 ], [ 0, %bb4 ], [ 42, %bb5 ], [ %pn, %default_case ]
129134
ret i32 %phi
130135
}
136+
137+
!0 = !{!"function_entry_count", i32 10}
138+
!1 = !{!"branch_weights", i32 10, i32 5, i32 7, i32 11, i32 13, i32 17}
139+
;.
140+
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
141+
;.
142+
; CHECK: [[PROF0]] = !{!"function_entry_count", i32 10}
143+
; CHECK: [[PROF1]] = !{!"branch_weights", i32 58, i32 5}
144+
; CHECK: [[PROF2]] = !{!"branch_weights", i32 56, i32 5}
145+
;.

llvm/utils/profcheck-xfail.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1317,8 +1317,6 @@ Transforms/SimpleLoopUnswitch/pr60736.ll
13171317
Transforms/SimpleLoopUnswitch/trivial-unswitch-freeze-individual-conditions.ll
13181318
Transforms/SimpleLoopUnswitch/trivial-unswitch.ll
13191319
Transforms/SimpleLoopUnswitch/trivial-unswitch-logical-and-or.ll
1320-
Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll
1321-
Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll
13221320
Transforms/StackProtector/cross-dso-cfi-stack-chk-fail.ll
13231321
Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll
13241322
Transforms/StructurizeCFG/hoist-zerocost.ll

0 commit comments

Comments
 (0)