-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[SimplifyCFG][PGO] Reuse existing setBranchWeights
#160629
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SimplifyCFG][PGO] Reuse existing setBranchWeights
#160629
Conversation
@llvm/pr-subscribers-pgo @llvm/pr-subscribers-llvm-transforms Author: Mircea Trofin (mtrofin) ChangesThe Full diff: https://github.com/llvm/llvm-project/pull/160629.diff 2 Files Affected:
diff --git a/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll b/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll
index 0624f72d7a142..622dc7295644f 100644
--- a/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll
+++ b/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll
@@ -858,8 +858,8 @@ define void @or_icmps_empty_metadata(i32 %x, i32 %y, ptr %p) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
-; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 true, i1 [[EXPENSIVE]]
-; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[MORE_RARE:%.*]]
+; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 true, i1 [[EXPENSIVE]], !prof [[PROF25:![0-9]+]]
+; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[MORE_RARE:%.*]], !prof [[PROF25]]
; CHECK: more_rare:
; CHECK-NEXT: store i8 42, ptr [[P:%.*]], align 1
; CHECK-NEXT: br label [[EXIT]]
@@ -956,8 +956,8 @@ define void @and_icmps_not_that_harmful(i32 %x, i32 %y, ptr %p) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[EXPECTED_FALSE:%.*]] = icmp sgt i32 [[X:%.*]], -1
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
-; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_FALSE]], i1 [[EXPENSIVE]], i1 false, !prof [[PROF25:![0-9]+]]
-; CHECK-NEXT: br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof [[PROF25]]
+; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_FALSE]], i1 [[EXPENSIVE]], i1 false, !prof [[PROF26:![0-9]+]]
+; CHECK-NEXT: br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof [[PROF26]]
; CHECK: false:
; CHECK-NEXT: store i8 42, ptr [[P:%.*]], align 1
; CHECK-NEXT: br label [[EXIT]]
@@ -988,8 +988,8 @@ define void @and_icmps_not_that_harmful_inverted(i32 %x, i32 %y, ptr %p) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sle i32 [[X:%.*]], -1
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
-; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 [[EXPENSIVE]], i1 false, !prof [[PROF25]]
-; CHECK-NEXT: br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof [[PROF25]]
+; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 [[EXPENSIVE]], i1 false, !prof [[PROF26]]
+; CHECK-NEXT: br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof [[PROF26]]
; CHECK: false:
; CHECK-NEXT: store i8 42, ptr [[P:%.*]], align 1
; CHECK-NEXT: br label [[EXIT]]
@@ -1019,8 +1019,8 @@ define void @and_icmps_useful(i32 %x, i32 %y, ptr %p) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
-; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 [[EXPENSIVE]], i1 false, !prof [[PROF26:![0-9]+]]
-; CHECK-NEXT: br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof [[PROF26]]
+; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 [[EXPENSIVE]], i1 false, !prof [[PROF27:![0-9]+]]
+; CHECK-NEXT: br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof [[PROF27]]
; CHECK: false:
; CHECK-NEXT: store i8 42, ptr [[P:%.*]], align 1
; CHECK-NEXT: br label [[EXIT]]
@@ -1050,8 +1050,8 @@ define void @and_icmps_useful_inverted(i32 %x, i32 %y, ptr %p) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[EXPECTED_FALSE:%.*]] = icmp sle i32 [[X:%.*]], -1
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
-; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_FALSE]], i1 [[EXPENSIVE]], i1 false, !prof [[PROF26]]
-; CHECK-NEXT: br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof [[PROF26]]
+; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_FALSE]], i1 [[EXPENSIVE]], i1 false, !prof [[PROF27]]
+; CHECK-NEXT: br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof [[PROF27]]
; CHECK: false:
; CHECK-NEXT: store i8 42, ptr [[P:%.*]], align 1
; CHECK-NEXT: br label [[EXIT]]
@@ -1093,7 +1093,7 @@ exit:
!16 = !{!"branch_weights", i32 1, i32 99}
!17 = !{!"branch_weights", i32 98, i32 1}
!18 = !{!"branch_weights", i32 1, i32 98}
-!19 = !{!"branch_weights", i32 0, i32 0}
+!19 = !{!"branch_weights", i32 5, i32 7}
!20 = !{}
; .
@@ -1129,6 +1129,7 @@ exit:
; CHECK: [[PROF22]] = !{!"branch_weights", i32 197, i32 1}
; CHECK: [[PROF23]] = !{!"branch_weights", i32 100, i32 98}
; CHECK: [[PROF24]] = !{!"branch_weights", i32 101, i32 99}
-; CHECK: [[PROF25]] = !{!"branch_weights", i32 1, i32 197}
-; CHECK: [[PROF26]] = !{!"branch_weights", i32 99, i32 101}
+; CHECK: [[PROF25]] = !{!"branch_weights", i32 17, i32 7}
+; CHECK: [[PROF26]] = !{!"branch_weights", i32 1, i32 197}
+; CHECK: [[PROF27]] = !{!"branch_weights", i32 99, i32 101}
;.
diff --git a/llvm/utils/profcheck-xfail.txt b/llvm/utils/profcheck-xfail.txt
index 98c6d84950ff7..dc80fea5559e3 100644
--- a/llvm/utils/profcheck-xfail.txt
+++ b/llvm/utils/profcheck-xfail.txt
@@ -1856,7 +1856,6 @@ Transforms/SimplifyCFG/merge-cond-stores.ll
Transforms/SimplifyCFG/multiple-phis.ll
Transforms/SimplifyCFG/PhiBlockMerge.ll
Transforms/SimplifyCFG/pr48641.ll
-Transforms/SimplifyCFG/preserve-branchweights.ll
Transforms/SimplifyCFG/preserve-store-alignment.ll
Transforms/SimplifyCFG/rangereduce.ll
Transforms/SimplifyCFG/RISCV/select-trunc-i64.ll
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The test says:
; Don't crash processing degenerate metadata.
define void @or_icmps_empty_metadata(i32 %x, i32 %y, ptr %p) {
So I believe the intention here is specifically to test this degenerate situation of 0, 0 branch weights.
Possibly these should be rejected by the verifier? If that were the case, there would be no need to test them here.
Yup, and there's another test that I've been looking at that is about all-zero branch weights. I think a different fix is necessary than the one here. I'll mark this 'draft' for now. |
1fc13fb
to
e5a8d3f
Compare
preserve-branchweights.ll
setBranchWeights
llvm/include/llvm/IR/ProfDataUtils.h
Outdated
@@ -147,6 +147,12 @@ LLVM_ABI bool extractProfTotalWeight(const Instruction &I, | |||
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights, | |||
bool IsExpected); | |||
|
|||
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef<uint64_t> Weights, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add a comment for the new method -- indicate weight fitting is done.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe setAndFitBranchWeights()
? That way no one is surprised, or can choose the wrong overload? Maybe not a huge deal once its documented though.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
something like that - ptal
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
In MisExpect, we rely on the presence of the branch weight to tell us if checking/diagnostics should occur at all. Generally, we just check the annotated weight from the expect intrinsic against the real branch weight and report directly if they don't match up. So if you marked it expected, but the weights were zero, we issue a diagnostic, but if the weights aren't there, we assume it wasn't instrumented. That's part of its design, and we need weights to be attached to check frontend instrumentation at all. For middle-end instrumentation, I think we're OK mostly. The expect intrinsics are lowered earlier, and we check the branch weights as they're added to the IR. Given that you're dropping those weights from instructions, though, I assume we'll miss some level of diagnostics after this patch in ThinLTO, since I recall something in the PGO pipeline runs a second time under ThinLTO. I think for SampleProf, but maybe both modes. Do you know if there's a way to avoid dropping those in the ThinLTO case? |
should we always propagate IsExpected maybe? |
Oh, nevermind. I realized the double checking was why we created teh IR extension Expected in the first place, and that shouldn't happen anymore. I think we're good, w.r.t. ThinLTO. Can you double check the frontend instr would still have weights though? I'm pretty sure we still need weights to exist once we hit the LowerExpectIntrinsicPass, or we can't report on it anymore. |
e5a8d3f
to
c8a3312
Compare
Spoke offline. TLDR; Frontends use createBranchWeights, and don't reset them (I checked clang and rustc), so we should be OK for MisExpect. |
92c69e4
to
6197c26
Compare
6197c26
to
7512b7c
Compare
; IR-LABEL: define void @func5_zero_branch_weight | ||
; IR: entry: | ||
; IR: br i1 %cmp1, label %loop_exit, label %loop_body.lr.ph, !prof [[PROF_FUNC5_0:![0-9]+]] | ||
; IR: br i1 %cmp1, label %loop_exit, label %loop_body.lr.ph |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@MatzeB is this OK? If the all-zero weights are interesting for loop rotate, I can add a flag to setBranchWeights
to that effect.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think it matters as far as LoopRotate
is concerned. But what's the point of doing extra checking for all zero in the setter function and possibly removing the annotation there. Wouldn't it be simpler and more obvious if the functions just sets the values as given and we use a separate function if you want to remove the weights?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It comes from the code that I'm moving from SimplifyCFG. @david-xl believes there is some difference when leaving things as 0 vs removing, but we couldn't observe it in practice. I added the flag to allow us to experiment more. My take is that "0" really means "it's in a cold subgraph and happens to have been set" (by, e.g., iFDO), so leaving as 0 or removing should be equivalent.
I could flip the flag the other way, add an extra parameter to setBranchWeights
on whether to propagate or not 0s, and have SimplifyCFG use that parameter. I'd still flip the flag so we can see if we ever hit an issue, and if not, we can remove the flag, remove the parameter, and propagate things as given.
wdyt?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think stricly speaking branch weights (0, 0) is equivalent to (1,1) or (2,2), no? While this feels like it has some "unreachable" code vibe to it, I don't think that is used by anything / influences anything? And for all I know we make no effort to normalize (2,2) to (1,1) either, so may just as well keep (0,0) around too?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think stricly speaking branch weights (0, 0) is equivalent to (1,1) or (2,2), no?
Well, only problem is that p(true) = branch_weight(true)/branch_weight(true)+branch_weight(false)
so 0/0
lol
I think we're in agreement about bias towards keeping what's passed around. I'd prefer the staged approach though, at minimum, easier for integration teams. Any pushback?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Mostly wanted to hear opinions/discussion, but I guess we can land as-is for now...
7512b7c
to
ccef0cc
Compare
|
||
if (AttachProfToDirectCall) | ||
setBranchWeights(NewInst, {static_cast<uint32_t>(Count)}, | ||
setBranchWeights(NewInst, ArrayRef<uint32_t>{static_cast<uint32_t>(Count)}, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this cast is unnecessary, now that you are no longer overloading setBranchWeights
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also, I think the right thing here is to fit, not to cast. @mingmingl-llvm.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a nice catch. AFAIK branch weights on direct calls are used by SampleFDO (not InstrFDO), and fitting it (using fitWeights
) rather than truncating it should improve the profile quality.
ccef0cc
to
c814f4a
Compare
c814f4a
to
6bfe29d
Compare
/// Push the weights right to fit in uint32_t. | ||
static SmallVector<uint32_t> fitWeights(ArrayRef<uint64_t> Weights) { | ||
SmallVector<uint32_t> Ret; | ||
Ret.reserve(Weights.size()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: SmallVector
has a constructor that accepts a size_t
, so this can be combined with the previous line.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
that resize
s. I wanted reserve
, so I can use push_back
void setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights, | ||
bool IsExpected) { | ||
bool IsExpected, bool ElideAllZero) { | ||
if ((ElideAllZeroBranchWeights && ElideAllZero) && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should be || or && ? With &&, the zeros can not be elided (and default behavior is changed).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ElideAllZeroBranchWeights is true by default. It's only false when profcheck is enabled.
Then, the SimplifyCFG callers, which were the place where zero-elision was happening, call the new API with ElideAllZero = true
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should the default parameter to be true instead of 'false'?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should the default parameter to be true instead of 'false'?
no, because the behavior elsewhere was to not elide. Only SimplifyCFG had an eliding variant (I mean, there could be other passes with local variants, and we'll address accordingly)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I could rename the compiler flag to ElideAllZeroBranchWeightsIfRequested
for clarity.
LGTM |
The main difference between SimplifyCFG's `setBranchWeights` and the ProfDataUtils' is that the former doesn't propagate all-zero weights. That seems like a sensible thing to do, so updated the latter accordingly, and added a flag to control the behavior. Also moved to ProfDataUtils the logic fitting 64-bit weights to 32-bit. As side-effect, this fixes some profcheck failures.
The main difference between SimplifyCFG's
setBranchWeights
and the ProfDataUtils' is that the former doesn't propagate all-zero weights. That seems like a sensible thing to do, so updated the latter accordingly, and added a flag to control the behavior.Also moved to ProfDataUtils the logic fitting 64-bit weights to 32-bit.
As side-effect, this fixes some profcheck failures.