-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[LIR][profcheck] Reuse the loop's exit condition profile #164523
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[LIR][profcheck] Reuse the loop's exit condition profile #164523
Conversation
|
@llvm/pr-subscribers-llvm-transforms Author: Mircea Trofin (mtrofin) ChangesThe Full diff: https://github.com/llvm/llvm-project/pull/164523.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 019536ca91ae0..9070d252ae09f 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -72,6 +72,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
@@ -105,6 +106,7 @@ STATISTIC(
STATISTIC(NumShiftUntilZero,
"Number of uncountable loops recognized as 'shift until zero' idiom");
+namespace llvm {
bool DisableLIRP::All;
static cl::opt<bool, true>
DisableLIRPAll("disable-" DEBUG_TYPE "-all",
@@ -163,6 +165,10 @@ static cl::opt<bool> ForceMemsetPatternIntrinsic(
cl::desc("Use memset.pattern intrinsic whenever possible"), cl::init(false),
cl::Hidden);
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+
+} // namespace llvm
+
namespace {
class LoopIdiomRecognize {
@@ -3199,7 +3205,21 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
// The loop trip count check.
auto *IVCheck = Builder.CreateICmpEQ(IVNext, LoopTripCount,
CurLoop->getName() + ".ivcheck");
- Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
+ SmallVector<uint32_t> BranchWeights;
+ const bool HasBranchWeights =
+ !ProfcheckDisableMetadataFixes &&
+ extractBranchWeights(*LoopHeaderBB->getTerminator(), BranchWeights);
+
+ auto *BI = Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
+ if (HasBranchWeights) {
+ if (SuccessorBB == LoopHeaderBB->getTerminator()->getSuccessor(1))
+ std::swap(BranchWeights[0], BranchWeights[1]);
+ // We're not changing the loop profile, so we can reuse the original loop's
+ // profile.
+ setBranchWeights(*BI, BranchWeights,
+ /*IsExpected=*/false);
+ }
+
LoopHeaderBB->getTerminator()->eraseFromParent();
// Populate the IV PHI.
@@ -3368,10 +3388,10 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, ScalarEvolution *SE,
/// %start = <...>
/// %extraoffset = <...>
/// <...>
-/// br label %for.cond
+/// br label %loop
///
/// loop:
-/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %for.cond ]
+/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
/// %nbits = add nsw i8 %iv, %extraoffset
/// %val.shifted = {{l,a}shr,shl} i8 %val, %nbits
/// %val.shifted.iszero = icmp eq i8 %val.shifted, 0
@@ -3533,7 +3553,19 @@ bool LoopIdiomRecognize::recognizeShiftUntilZero() {
// The loop terminator.
Builder.SetInsertPoint(LoopHeaderBB->getTerminator());
- Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
+ SmallVector<uint32_t> BranchWeights;
+ const bool HasBranchWeights =
+ !ProfcheckDisableMetadataFixes &&
+ extractBranchWeights(*LoopHeaderBB->getTerminator(), BranchWeights);
+
+ auto *BI = Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
+ if (HasBranchWeights) {
+ if (InvertedCond)
+ std::swap(BranchWeights[0], BranchWeights[1]);
+ // We're not changing the loop profile, so we can reuse the original loop's
+ // profile.
+ setBranchWeights(*BI, BranchWeights, /*IsExpected=*/false);
+ }
LoopHeaderBB->getTerminator()->eraseFromParent();
// Populate the IV PHI.
diff --git a/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll b/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll
new file mode 100644
index 0000000000000..d01bb748d9422
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll
@@ -0,0 +1,70 @@
+; RUN: opt -passes="module(print<block-freq>),function(loop(loop-idiom)),module(print<block-freq>)" -mtriple=x86_64 -mcpu=core-avx2 %s -disable-output 2>&1 | FileCheck --check-prefix=PROFILE %s
+
+declare void @escape_inner(i8, i8, i8, i1, i8)
+declare void @escape_outer(i8, i8, i8, i1, i8)
+
+declare i8 @gen.i8()
+
+; Most basic pattern; Note that iff the shift amount is offset, said offsetting
+; must not cause an overflow, but `add nsw` is fine.
+define i8 @p0(i8 %val, i8 %start, i8 %extraoffset) mustprogress {
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
+ %nbits = add nsw i8 %iv, %extraoffset
+ %val.shifted = ashr i8 %val, %nbits
+ %val.shifted.iszero = icmp eq i8 %val.shifted, 0
+ %iv.next = add i8 %iv, 1
+
+ call void @escape_inner(i8 %iv, i8 %nbits, i8 %val.shifted, i1 %val.shifted.iszero, i8 %iv.next)
+
+ br i1 %val.shifted.iszero, label %end, label %loop, !prof !{!"branch_weights", i32 1, i32 1000 }
+
+end:
+ %iv.res = phi i8 [ %iv, %loop ]
+ %nbits.res = phi i8 [ %nbits, %loop ]
+ %val.shifted.res = phi i8 [ %val.shifted, %loop ]
+ %val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ]
+ %iv.next.res = phi i8 [ %iv.next, %loop ]
+
+ call void @escape_outer(i8 %iv.res, i8 %nbits.res, i8 %val.shifted.res, i1 %val.shifted.iszero.res, i8 %iv.next.res)
+
+ ret i8 %iv.res
+}
+
+define i32 @p1(i32 %x, i32 %bit) {
+entry:
+ %bitmask = shl i32 1, %bit
+ br label %loop
+
+loop:
+ %x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ]
+ %x.curr.bitmasked = and i32 %x.curr, %bitmask
+ %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0
+ %x.next = shl i32 %x.curr, 1
+ br i1 %x.curr.isbitunset, label %loop, label %end, !prof !{!"branch_weights", i32 500, i32 1 }
+
+end:
+ ret i32 %x.curr
+}
+
+;
+; PROFILE: Printing analysis results of BFI for function 'p0':
+; PROFILE: block-frequency-info: p0
+; PROFILE: - entry: float = 1.0,
+; PROFILE: - loop: float = 1001.0,
+; PROFILE: - end: float = 1.0,
+; PROFILE: block-frequency-info: p1
+; PROFILE: - entry: float = 1.0,
+; PROFILE: - loop: float = 501.0,
+; PROFILE: - end: float = 1.0,
+; PROFILE: block-frequency-info: p0
+; PROFILE: - entry: float = 1.0,
+; PROFILE: - loop: float = 1001.0,
+; PROFILE: - end: float = 1.0,
+; PROFILE: block-frequency-info: p1
+; PROFILE: - entry: float = 1.0,
+; PROFILE: - loop: float = 501.0,
+; PROFILE: - end: float = 1.0,
|
|
@jdenny-ornl - I think I should also propagate the |
7b55a08 to
577971f
Compare
72e8716 to
1bd2ba5
Compare
577971f to
e745aaf
Compare
aafb258 to
6486fed
Compare
e745aaf to
452c2f5
Compare
452c2f5 to
ac02f48
Compare
f9110cf to
a731249
Compare
9d2017c to
19c883d
Compare
a731249 to
23cbeab
Compare
19c883d to
a5d8823
Compare
23cbeab to
648356d
Compare
e403ca4 to
d2a0486
Compare
7a7a7f2 to
3cbf2c9
Compare
d2a0486 to
3620cd8
Compare
3cbf2c9 to
b1dfb82
Compare
0037b3e to
c5282b7
Compare
7142f46 to
057f252
Compare
b095044 to
369240c
Compare
2e01505 to
9ddf571
Compare
369240c to
6cdaeb8
Compare
9ddf571 to
1f30253
Compare
6cdaeb8 to
71a68e9
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
1f30253 to
af190a9
Compare
71a68e9 to
a4e7fae
Compare
af190a9 to
0e19741
Compare
a4e7fae to
d41e0be
Compare
0e19741 to
e19f34d
Compare
d41e0be to
38069a3
Compare
38069a3 to
075b8ff
Compare
|
@jdenny-ornl I'll merge this, and can follow up with the |

The idioms are described in https://reviews.llvm.org/D102116 and https://reviews.llvm.org/D92754. In both cases, the way the loop is expressed changes, without changing its iteration count, which means we can reuse the original loop's branch probabilities.
Issue #147390