-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[PILC][profcheck] Bias branch weights when optimizing sqrt #167742
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[PILC][profcheck] Bias branch weights when optimizing sqrt #167742
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
|
@llvm/pr-subscribers-llvm-transforms Author: Mircea Trofin (mtrofin) ChangesFull diff: https://github.com/llvm/llvm-project/pull/167742.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index 2b50ccdc2eeb4..bcedc1ab2a3ca 100644
--- a/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/DebugCounter.h"
#include "llvm/Transforms/Scalar.h"
@@ -27,6 +28,10 @@
using namespace llvm;
+namespace llvm {
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+} // namespace llvm
+
#define DEBUG_TYPE "partially-inline-libcalls"
DEBUG_COUNTER(PILCounter, "partially-inline-libcalls-transform",
@@ -94,7 +99,14 @@ static bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
: Builder.CreateFCmpOGE(Call->getOperand(0),
ConstantFP::get(Ty, 0.0));
CurrBBTerm->setCondition(FCmp);
-
+ if (!ProfcheckDisableMetadataFixes &&
+ CurrBBTerm->getFunction()->getEntryCount()) {
+ // Presume the quick path - where we don't call the library call - is the
+ // frequent one
+ MDBuilder MDB(CurrBBTerm->getContext());
+ CurrBBTerm->setMetadata(LLVMContext::MD_prof,
+ MDB.createLikelyBranchWeights());
+ }
// Add phi operands.
Phi->addIncoming(Call, &CurrBB);
Phi->addIncoming(LibCall, LibCallBB);
diff --git a/llvm/test/Transforms/PartiallyInlineLibCalls/X86/good-prototype.ll b/llvm/test/Transforms/PartiallyInlineLibCalls/X86/good-prototype.ll
index 5719753aa2da0..4914726f1fcba 100644
--- a/llvm/test/Transforms/PartiallyInlineLibCalls/X86/good-prototype.ll
+++ b/llvm/test/Transforms/PartiallyInlineLibCalls/X86/good-prototype.ll
@@ -1,12 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
; RUN: opt -S -passes=partially-inline-libcalls -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
-define float @f(float %val) {
+define float @f(float %val) !prof !0 {
; CHECK-LABEL: @f(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RES:%.*]] = tail call float @sqrtf(float [[VAL:%.*]]) #[[ATTR0:[0-9]+]]
; CHECK-NEXT: [[TMP0:%.*]] = fcmp oge float [[VAL]], 0.000000e+00
-; CHECK-NEXT: br i1 [[TMP0]], label [[ENTRY_SPLIT:%.*]], label [[CALL_SQRT:%.*]]
+; CHECK-NEXT: br i1 [[TMP0]], label [[ENTRY_SPLIT:%.*]], label [[CALL_SQRT:%.*]], !prof [[PROF1:![0-9]+]]
; CHECK: call.sqrt:
; CHECK-NEXT: [[TMP1:%.*]] = tail call float @sqrtf(float [[VAL]])
; CHECK-NEXT: br label [[ENTRY_SPLIT]]
@@ -19,11 +19,11 @@ entry:
ret float %res
}
-define float @f_writeonly(float %val) {
+define float @f_writeonly(float %val) !prof !0 {
; CHECK-LABEL: @f_writeonly(
; CHECK-NEXT: [[RES:%.*]] = tail call float @sqrtf(float [[VAL:%.*]]) #[[ATTR0]]
; CHECK-NEXT: [[TMP1:%.*]] = fcmp oge float [[VAL]], 0.000000e+00
-; CHECK-NEXT: br i1 [[TMP1]], label [[DOTSPLIT:%.*]], label [[CALL_SQRT:%.*]]
+; CHECK-NEXT: br i1 [[TMP1]], label [[DOTSPLIT:%.*]], label [[CALL_SQRT:%.*]], !prof [[PROF1]]
; CHECK: call.sqrt:
; CHECK-NEXT: [[TMP2:%.*]] = tail call float @sqrtf(float [[VAL]]) #[[ATTR1:[0-9]+]]
; CHECK-NEXT: br label [[DOTSPLIT]]
@@ -45,8 +45,13 @@ define float @f_readonly(float %val) {
}
declare float @sqrtf(float)
+
+!0 = !{!"function_entry_count", i32 10}
;.
; CHECK: attributes #[[ATTR0]] = { memory(none) }
; CHECK: attributes #[[ATTR1]] = { memory(write) }
; CHECK: attributes #[[ATTR2]] = { memory(read) }
;.
+; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1048575, i32 1}
+;.
|
boomanaiden154
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Biasing against the native square root instruction returning NaN seems reasonable enough to me.
Prefacing PR #167742 (stacked above this), noticed that running UTC made some changes unrelated to the aforementioned PR. Factoring them out here.
f8a042d to
af22175
Compare
…nch_weights_when_optimizing_sqrt

Biasing towards the native
sqrt not returning NaN.Issue #147390