Skip to content

Commit e403ca4

Browse files
committed
[LIR][profcheck] Reuse the loop's exit condition profile
1 parent 2e48d8d commit e403ca4

File tree

2 files changed

+106
-4
lines changed

2 files changed

+106
-4
lines changed

llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@
7272
#include "llvm/IR/Module.h"
7373
#include "llvm/IR/PassManager.h"
7474
#include "llvm/IR/PatternMatch.h"
75+
#include "llvm/IR/ProfDataUtils.h"
7576
#include "llvm/IR/Type.h"
7677
#include "llvm/IR/User.h"
7778
#include "llvm/IR/Value.h"
@@ -105,6 +106,7 @@ STATISTIC(
105106
STATISTIC(NumShiftUntilZero,
106107
"Number of uncountable loops recognized as 'shift until zero' idiom");
107108

109+
namespace llvm {
108110
bool DisableLIRP::All;
109111
static cl::opt<bool, true>
110112
DisableLIRPAll("disable-" DEBUG_TYPE "-all",
@@ -163,6 +165,10 @@ static cl::opt<bool> ForceMemsetPatternIntrinsic(
163165
cl::desc("Use memset.pattern intrinsic whenever possible"), cl::init(false),
164166
cl::Hidden);
165167

168+
extern cl::opt<bool> ProfcheckDisableMetadataFixes;
169+
170+
} // namespace llvm
171+
166172
namespace {
167173

168174
class LoopIdiomRecognize {
@@ -3199,7 +3205,21 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
31993205
// The loop trip count check.
32003206
auto *IVCheck = Builder.CreateICmpEQ(IVNext, LoopTripCount,
32013207
CurLoop->getName() + ".ivcheck");
3202-
Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
3208+
SmallVector<uint32_t> BranchWeights;
3209+
const bool HasBranchWeights =
3210+
!ProfcheckDisableMetadataFixes &&
3211+
extractBranchWeights(*LoopHeaderBB->getTerminator(), BranchWeights);
3212+
3213+
auto *BI = Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
3214+
if (HasBranchWeights) {
3215+
if (SuccessorBB == LoopHeaderBB->getTerminator()->getSuccessor(1))
3216+
std::swap(BranchWeights[0], BranchWeights[1]);
3217+
// We're not changing the loop profile, so we can reuse the original loop's
3218+
// profile.
3219+
setBranchWeights(*BI, BranchWeights,
3220+
/*IsExpected=*/false);
3221+
}
3222+
32033223
LoopHeaderBB->getTerminator()->eraseFromParent();
32043224

32053225
// Populate the IV PHI.
@@ -3368,10 +3388,10 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, ScalarEvolution *SE,
33683388
/// %start = <...>
33693389
/// %extraoffset = <...>
33703390
/// <...>
3371-
/// br label %for.cond
3391+
/// br label %loop
33723392
///
33733393
/// loop:
3374-
/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %for.cond ]
3394+
/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
33753395
/// %nbits = add nsw i8 %iv, %extraoffset
33763396
/// %val.shifted = {{l,a}shr,shl} i8 %val, %nbits
33773397
/// %val.shifted.iszero = icmp eq i8 %val.shifted, 0
@@ -3533,7 +3553,19 @@ bool LoopIdiomRecognize::recognizeShiftUntilZero() {
35333553

35343554
// The loop terminator.
35353555
Builder.SetInsertPoint(LoopHeaderBB->getTerminator());
3536-
Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
3556+
SmallVector<uint32_t> BranchWeights;
3557+
const bool HasBranchWeights =
3558+
!ProfcheckDisableMetadataFixes &&
3559+
extractBranchWeights(*LoopHeaderBB->getTerminator(), BranchWeights);
3560+
3561+
auto *BI = Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
3562+
if (HasBranchWeights) {
3563+
if (InvertedCond)
3564+
std::swap(BranchWeights[0], BranchWeights[1]);
3565+
// We're not changing the loop profile, so we can reuse the original loop's
3566+
// profile.
3567+
setBranchWeights(*BI, BranchWeights, /*IsExpected=*/false);
3568+
}
35373569
LoopHeaderBB->getTerminator()->eraseFromParent();
35383570

35393571
// Populate the IV PHI.
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
; RUN: opt -passes="module(print<block-freq>),function(loop(loop-idiom)),module(print<block-freq>)" -mtriple=x86_64 -mcpu=core-avx2 %s -disable-output 2>&1 | FileCheck --check-prefix=PROFILE %s
2+
3+
declare void @escape_inner(i8, i8, i8, i1, i8)
4+
declare void @escape_outer(i8, i8, i8, i1, i8)
5+
6+
declare i8 @gen.i8()
7+
8+
; Most basic pattern; Note that iff the shift amount is offset, said offsetting
9+
; must not cause an overflow, but `add nsw` is fine.
10+
define i8 @p0(i8 %val, i8 %start, i8 %extraoffset) mustprogress {
11+
entry:
12+
br label %loop
13+
14+
loop:
15+
%iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
16+
%nbits = add nsw i8 %iv, %extraoffset
17+
%val.shifted = ashr i8 %val, %nbits
18+
%val.shifted.iszero = icmp eq i8 %val.shifted, 0
19+
%iv.next = add i8 %iv, 1
20+
21+
call void @escape_inner(i8 %iv, i8 %nbits, i8 %val.shifted, i1 %val.shifted.iszero, i8 %iv.next)
22+
23+
br i1 %val.shifted.iszero, label %end, label %loop, !prof !{!"branch_weights", i32 1, i32 1000 }
24+
25+
end:
26+
%iv.res = phi i8 [ %iv, %loop ]
27+
%nbits.res = phi i8 [ %nbits, %loop ]
28+
%val.shifted.res = phi i8 [ %val.shifted, %loop ]
29+
%val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ]
30+
%iv.next.res = phi i8 [ %iv.next, %loop ]
31+
32+
call void @escape_outer(i8 %iv.res, i8 %nbits.res, i8 %val.shifted.res, i1 %val.shifted.iszero.res, i8 %iv.next.res)
33+
34+
ret i8 %iv.res
35+
}
36+
37+
define i32 @p1(i32 %x, i32 %bit) {
38+
entry:
39+
%bitmask = shl i32 1, %bit
40+
br label %loop
41+
42+
loop:
43+
%x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ]
44+
%x.curr.bitmasked = and i32 %x.curr, %bitmask
45+
%x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0
46+
%x.next = shl i32 %x.curr, 1
47+
br i1 %x.curr.isbitunset, label %loop, label %end, !prof !{!"branch_weights", i32 500, i32 1 }
48+
49+
end:
50+
ret i32 %x.curr
51+
}
52+
53+
;
54+
; PROFILE: Printing analysis results of BFI for function 'p0':
55+
; PROFILE: block-frequency-info: p0
56+
; PROFILE: - entry: float = 1.0,
57+
; PROFILE: - loop: float = 1001.0,
58+
; PROFILE: - end: float = 1.0,
59+
; PROFILE: block-frequency-info: p1
60+
; PROFILE: - entry: float = 1.0,
61+
; PROFILE: - loop: float = 501.0,
62+
; PROFILE: - end: float = 1.0,
63+
; PROFILE: block-frequency-info: p0
64+
; PROFILE: - entry: float = 1.0,
65+
; PROFILE: - loop: float = 1001.0,
66+
; PROFILE: - end: float = 1.0,
67+
; PROFILE: block-frequency-info: p1
68+
; PROFILE: - entry: float = 1.0,
69+
; PROFILE: - loop: float = 501.0,
70+
; PROFILE: - end: float = 1.0,

0 commit comments

Comments
 (0)