Skip to content

Commit a4ddaf2

Browse files
committed
[DFAJT][profcheck] Propagate select -> br profile metadata
1 parent 7e3080f commit a4ddaf2

File tree

4 files changed

+34
-14
lines changed

4 files changed

+34
-14
lines changed

llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,9 @@ static cl::opt<unsigned>
120120
CostThreshold("dfa-cost-threshold",
121121
cl::desc("Maximum cost accepted for the transformation"),
122122
cl::Hidden, cl::init(50));
123+
124+
extern cl::opt<bool> ProfcheckDisableMetadataFixes;
125+
123126
} // namespace llvm
124127

125128
static cl::opt<double> MaxClonedRate(
@@ -262,7 +265,11 @@ void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold,
262265

263266
// Insert the real conditional branch based on the original condition.
264267
StartBlockTerm->eraseFromParent();
265-
BranchInst::Create(EndBlock, NewBlock, SI->getCondition(), StartBlock);
268+
auto *BI =
269+
BranchInst::Create(EndBlock, NewBlock, SI->getCondition(), StartBlock);
270+
if (!ProfcheckDisableMetadataFixes)
271+
BI->setMetadata(LLVMContext::MD_prof,
272+
SI->getMetadata(LLVMContext::MD_prof));
266273
DTU->applyUpdates({{DominatorTree::Insert, StartBlock, EndBlock},
267274
{DominatorTree::Insert, StartBlock, NewBlock}});
268275
} else {
@@ -297,7 +304,11 @@ void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold,
297304
// (Use)
298305
BranchInst::Create(EndBlock, NewBlockF);
299306
// Insert the real conditional branch based on the original condition.
300-
BranchInst::Create(EndBlock, NewBlockF, SI->getCondition(), NewBlockT);
307+
auto *BI =
308+
BranchInst::Create(EndBlock, NewBlockF, SI->getCondition(), NewBlockT);
309+
if (!ProfcheckDisableMetadataFixes)
310+
BI->setMetadata(LLVMContext::MD_prof,
311+
SI->getMetadata(LLVMContext::MD_prof));
301312
DTU->applyUpdates({{DominatorTree::Insert, NewBlockT, NewBlockF},
302313
{DominatorTree::Insert, NewBlockT, EndBlock},
303314
{DominatorTree::Insert, NewBlockF, EndBlock}});

llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
; REQUIRES: asserts
22
; RUN: opt -S -passes=dfa-jump-threading -debug-only=dfa-jump-threading -disable-output %s 2>&1 | FileCheck %s
3+
; RUN: opt -S -passes=dfa-jump-threading -print-prof-data %s -o - | FileCheck %s --check-prefix=PROFILE
34

45
; This test checks that the analysis identifies all threadable paths in a
56
; simple CFG. A threadable path includes a list of basic blocks, the exit
67
; state, and the block that determines the next state.
78
; < path of BBs that form a cycle > [ state, determinator ]
8-
define i32 @test1(i32 %num) {
9+
define i32 @test1(i32 %num) !prof !0{
910
; CHECK: < case2 for.inc for.body > [ 1, for.inc ]
1011
; CHECK-NEXT: < for.inc for.body > [ 1, for.inc ]
1112
; CHECK-NEXT: < case1 for.inc for.body > [ 2, for.inc ]
@@ -25,8 +26,11 @@ case1:
2526
br label %for.inc
2627

2728
case2:
29+
; PROFILE-LABEL: @test1
30+
; PROFILE-LABEL: case2:
31+
; PROFILE: br i1 %cmp, label %for.inc.jt1, label %sel.si.unfold.false.jt2, !prof !1 ; !1 = !{!"branch_weights", i32 3, i32 5}
2832
%cmp = icmp eq i32 %count, 50
29-
%sel = select i1 %cmp, i32 1, i32 2
33+
%sel = select i1 %cmp, i32 1, i32 2, !prof !1
3034
br label %for.inc
3135

3236
for.inc:
@@ -182,7 +186,7 @@ bb66: ; preds = %bb59
182186
}
183187

184188
; Value %init is not predictable but it's okay since it is the value initial to the switch.
185-
define i32 @initial.value.positive1(i32 %init) {
189+
define i32 @initial.value.positive1(i32 %init) !prof !0 {
186190
; CHECK: < loop.1.backedge loop.1 loop.2 loop.3 > [ 1, loop.1 ]
187191
; CHECK-NEXT: < case4 loop.1.backedge state.1.be2.si.unfold.false loop.1 loop.2 loop.3 > [ 2, loop.1.backedge ]
188192
; CHECK-NEXT: < case2 loop.1.backedge state.1.be2.si.unfold.false loop.1 loop.2 loop.3 > [ 4, loop.1.backedge ]
@@ -241,3 +245,6 @@ infloop.i:
241245
exit:
242246
ret i32 0
243247
}
248+
249+
!0 = !{!"function_entry_count", i32 10}
250+
!1 = !{!"branch_weights", i32 3, i32 5}

llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
22
; RUN: opt -S -passes=dfa-jump-threading %s | FileCheck %s
33

44
; These tests check that the DFA jump threading transformation is applied
@@ -301,7 +301,7 @@ end:
301301
ret void
302302
}
303303

304-
define void @pr106083_invalidBBarg_fold(i1 %cmp1, i1 %cmp2, i1 %not, ptr %d) {
304+
define void @pr106083_invalidBBarg_fold(i1 %cmp1, i1 %cmp2, i1 %not, ptr %d) !prof !0 {
305305
; CHECK-LABEL: @pr106083_invalidBBarg_fold(
306306
; CHECK-NEXT: bb:
307307
; CHECK-NEXT: br label [[BB1:%.*]]
@@ -310,7 +310,7 @@ define void @pr106083_invalidBBarg_fold(i1 %cmp1, i1 %cmp2, i1 %not, ptr %d) {
310310
; CHECK-NEXT: br i1 [[NOT:%.*]], label [[BB7_JT0]], label [[BB2:%.*]]
311311
; CHECK: BB2:
312312
; CHECK-NEXT: store i16 0, ptr [[D:%.*]], align 2
313-
; CHECK-NEXT: br i1 [[CMP2:%.*]], label [[BB7:%.*]], label [[SPEC_SELECT_SI_UNFOLD_FALSE_JT0:%.*]]
313+
; CHECK-NEXT: br i1 [[CMP2:%.*]], label [[BB7:%.*]], label [[SPEC_SELECT_SI_UNFOLD_FALSE_JT0:%.*]], !prof [[PROF1:![0-9]+]]
314314
; CHECK: spec.select.si.unfold.false:
315315
; CHECK-NEXT: br label [[BB9]]
316316
; CHECK: spec.select.si.unfold.false.jt0:
@@ -357,7 +357,7 @@ BB1: ; preds = %BB1.backedge, %BB7,
357357

358358
BB2: ; preds = %BB1
359359
store i16 0, ptr %d, align 2
360-
%spec.select = select i1 %cmp2, i32 %sel, i32 0
360+
%spec.select = select i1 %cmp2, i32 %sel, i32 0, !prof !1
361361
br label %BB7
362362

363363
BB7: ; preds = %BB2, %BB1
@@ -444,3 +444,10 @@ select.unfold: ; preds = %bb1, %.loopexit6
444444
bb2: ; preds = %select.unfold
445445
unreachable
446446
}
447+
448+
!0 = !{!"function_entry_count", i32 10}
449+
!1 = !{!"branch_weights", i32 3, i32 5}
450+
;.
451+
; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10}
452+
; CHECK: [[PROF1]] = !{!"branch_weights", i32 3, i32 5}
453+
;.

llvm/utils/profcheck-xfail.txt

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -711,11 +711,6 @@ Transforms/CorrelatedValuePropagation/urem.ll
711711
Transforms/CrossDSOCFI/basic.ll
712712
Transforms/CrossDSOCFI/cfi_functions.ll
713713
Transforms/CrossDSOCFI/thumb.ll
714-
Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll
715-
Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll
716-
Transforms/DFAJumpThreading/dfa-unfold-select.ll
717-
Transforms/DFAJumpThreading/max-path-length.ll
718-
Transforms/DFAJumpThreading/negative.ll
719714
Transforms/ExpandFp/AMDGPU/frem-inf.ll
720715
Transforms/ExpandFp/AMDGPU/frem.ll
721716
Transforms/ExpandLargeDivRem/X86/sdiv129.ll

0 commit comments

Comments
 (0)