-
Notifications
You must be signed in to change notification settings - Fork 15.1k
ExpandFp: Require RuntimeLibcallsInfo analysis #165197
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
|
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-llvm-analysis Author: Matt Arsenault (arsenm) ChangesNot sure I'm doing the new pass manager handling correctly. I do Full diff: https://github.com/llvm/llvm-project/pull/165197.diff 5 Files Affected:
diff --git a/llvm/lib/CodeGen/ExpandFp.cpp b/llvm/lib/CodeGen/ExpandFp.cpp
index f44eb227133ae..9386ffe7791a3 100644
--- a/llvm/lib/CodeGen/ExpandFp.cpp
+++ b/llvm/lib/CodeGen/ExpandFp.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/RuntimeLibcallInfo.h"
#include "llvm/Analysis/SimplifyQuery.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/ISDOpcodes.h"
@@ -1092,6 +1093,8 @@ class ExpandFpLegacyPass : public FunctionPass {
auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
auto *TLI = TM->getSubtargetImpl(F)->getTargetLowering();
AssumptionCache *AC = nullptr;
+ const RTLIB::RuntimeLibcallsInfo *Libcalls =
+ &getAnalysis<RuntimeLibraryInfoWrapper>().getRTLCI(*F.getParent());
if (OptLevel != CodeGenOptLevel::None && !F.hasOptNone())
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
@@ -1104,6 +1107,7 @@ class ExpandFpLegacyPass : public FunctionPass {
AU.addRequired<AssumptionCacheTracker>();
AU.addPreserved<AAResultsWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addRequired<RuntimeLibraryInfoWrapper>();
}
};
} // namespace
@@ -1126,6 +1130,15 @@ PreservedAnalyses ExpandFpPass::run(Function &F, FunctionAnalysisManager &FAM) {
AssumptionCache *AC = nullptr;
if (OptLevel != CodeGenOptLevel::None)
AC = &FAM.getResult<AssumptionAnalysis>(F);
+
+ auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
+ const RTLIB::RuntimeLibcallsInfo *Libcalls =
+ MAMProxy.getCachedResult<RuntimeLibraryAnalysis>(*F.getParent());
+ if (!Libcalls) {
+ F.getContext().emitError("'runtime-libcall-info' analysis required");
+ return PreservedAnalyses::all();
+ }
+
return runImpl(F, TLI, AC) ? PreservedAnalyses::none()
: PreservedAnalyses::all();
}
@@ -1133,6 +1146,7 @@ PreservedAnalyses ExpandFpPass::run(Function &F, FunctionAnalysisManager &FAM) {
char ExpandFpLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(ExpandFpLegacyPass, "expand-fp",
"Expand certain fp instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(RuntimeLibraryInfoWrapper)
INITIALIZE_PASS_END(ExpandFpLegacyPass, "expand-fp", "Expand fp", false, false)
FunctionPass *llvm::createExpandFpPass(CodeGenOptLevel OptLevel) {
diff --git a/llvm/test/Transforms/ExpandFp/AMDGPU/frem-inf.ll b/llvm/test/Transforms/ExpandFp/AMDGPU/frem-inf.ll
index f70f0d25f172d..4d302f63e1f0b 100644
--- a/llvm/test/Transforms/ExpandFp/AMDGPU/frem-inf.ll
+++ b/llvm/test/Transforms/ExpandFp/AMDGPU/frem-inf.ll
@@ -1,5 +1,5 @@
-; RUN: opt -mtriple=amdgcn -passes="expand-fp<O0>" %s -S -o - | FileCheck --check-prefixes CHECK %s
-; RUN: opt -mtriple=amdgcn -passes="expand-fp<O1>" %s -S -o - | FileCheck --check-prefixes CHECK,OPT1 %s
+; RUN: opt -mtriple=amdgcn -passes="require<runtime-libcall-info>,expand-fp<O0>" %s -S -o - | FileCheck --check-prefixes CHECK %s
+; RUN: opt -mtriple=amdgcn -passes="require<runtime-libcall-info>,expand-fp<O1>" %s -S -o - | FileCheck --check-prefixes CHECK,OPT1 %s
; Check the handling of potentially infinite numerators in the frem
; expansion at different optimization levels and with different
diff --git a/llvm/test/Transforms/ExpandFp/AMDGPU/frem.ll b/llvm/test/Transforms/ExpandFp/AMDGPU/frem.ll
index 4c0f9db147c96..56ccfb6bf454c 100644
--- a/llvm/test/Transforms/ExpandFp/AMDGPU/frem.ll
+++ b/llvm/test/Transforms/ExpandFp/AMDGPU/frem.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -mtriple=amdgcn -passes="expand-fp<O1>" %s -S -o - | FileCheck %s
+; RUN: opt -mtriple=amdgcn -passes="require<runtime-libcall-info>,expand-fp<O1>" %s -S -o - | FileCheck %s
define amdgpu_kernel void @frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1,
; CHECK-LABEL: define amdgpu_kernel void @frem_f16(
diff --git a/llvm/test/Transforms/ExpandFp/AMDGPU/missing-analysis.ll b/llvm/test/Transforms/ExpandFp/AMDGPU/missing-analysis.ll
new file mode 100644
index 0000000000000..5cad68e66d3ee
--- /dev/null
+++ b/llvm/test/Transforms/ExpandFp/AMDGPU/missing-analysis.ll
@@ -0,0 +1,6 @@
+; RUN: not opt -mtriple=amdgcn -passes=expand-fp -disable-output %s 2>&1 | FileCheck %s
+
+; CHECK: 'runtime-libcall-info' analysis required
+define void @empty() {
+ ret void
+}
diff --git a/llvm/test/Transforms/ExpandFp/AMDGPU/pass-parameters.ll b/llvm/test/Transforms/ExpandFp/AMDGPU/pass-parameters.ll
index 03cafd4ff1160..b3ee0a94ed348 100644
--- a/llvm/test/Transforms/ExpandFp/AMDGPU/pass-parameters.ll
+++ b/llvm/test/Transforms/ExpandFp/AMDGPU/pass-parameters.ll
@@ -1,7 +1,7 @@
-; RUN: opt -mtriple=amdgcn -passes="expand-fp<O0>" %s -S -o /dev/null
-; RUN: opt -mtriple=amdgcn -passes="expand-fp<O1>" %s -S -o /dev/null
-; RUN: opt -mtriple=amdgcn -passes="expand-fp<O2>" %s -S -o /dev/null
-; RUN: opt -mtriple=amdgcn -passes="expand-fp<O3>" %s -S -o /dev/null
+; RUN: opt -mtriple=amdgcn -passes="require<runtime-libcall-info>,expand-fp<O0>" -disable-output %s
+; RUN: opt -mtriple=amdgcn -passes="require<runtime-libcall-info>,expand-fp<O1>" -disable-output %s
+; RUN: opt -mtriple=amdgcn -passes="require<runtime-libcall-info>,expand-fp<O2>" -disable-output %s
+; RUN: opt -mtriple=amdgcn -passes="require<runtime-libcall-info>,expand-fp<O3>" -disable-output %s
; RUN: not opt -mtriple=amdgcn -passes="expand-fp<O4>" %s -S -o /dev/null 2>&1 | FileCheck --check-prefix=TOO-LARGE %s
; TOO-LARGE: {{.*}}invalid optimization level for expand-fp pass: 4
|
| const RTLIB::RuntimeLibcallsInfo *Libcalls = | ||
| MAMProxy.getCachedResult<RuntimeLibraryAnalysis>(*F.getParent()); | ||
| if (!Libcalls) { | ||
| F.getContext().emitError("'runtime-libcall-info' analysis required"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this pattern widely used? Should this just be an assert instead?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You cannot assert Libcalls here. If nothing runs the analysis before this point, the result will be null. That's why the explicit require in the pass pipeline in the tests is necessary.
|
|
||
| auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F); | ||
| const RTLIB::RuntimeLibcallsInfo *Libcalls = | ||
| MAMProxy.getCachedResult<RuntimeLibraryAnalysis>(*F.getParent()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@arsenm In the PR description, you wrote:
Not sure I'm doing the new pass manager handling correctly. I do
not like needing to manually check if the cached module pass is
available and manually erroring in every pass.
I am not super familiar with the details of this, but I had a look at the implementation and the documentation. The documentation on ModuleAnalysisManagerFunctionProxy aka OuterAnalysisManagerProxy<ModuleAnalysisManager, Function> in include/llvm/IR/PassManager.h states that:
This proxy only exposes the const interface of the outer analysis manager,
to indicate that you cannot cause an outer analysis to run from within an
inner pass. Instead, you must rely on the getCachedResult API.
This is due to keeping potential future concurrency in mind.
Hence it seems to me that you cannot/should not request the computation of the analysis information from here and you need to do it in this way.
aeubanks
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if the analysis is primarily intended to be used from function passes, it should be a function analysis, like TLI
e.g.
llvm-project/llvm/tools/opt/NewPMDriver.cpp
Line 476 in 22079e3
| FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); }); |
Most of the uses will be function passes, but this is fundamentally a module level concept and should be a module analysis |
c60d7b2 to
57b2c82
Compare
5ea7deb to
14f5831
Compare
if every function in a module will always have the same set of available libcalls then this way of making it a module analysis is ok at the cost of usability of the module analysis in function passes, but imo this is something that a function cares about, not a module. don't feel too strongly about this though |
Yes, the set of RuntimeLibcalls should be consistent through a module, and only invalidated on link
Separately I am going to introduce a new LibcallLoweringInfo, which will need to become another analysis which will be function level (though it's really Module + subtarget, so I'm questioning if it should really be a function analysis as well) |
14f5831 to
7fd144c
Compare
57b2c82 to
0158564
Compare
7fd144c to
3f57ae9
Compare
4da4e58 to
3626798
Compare
3f57ae9 to
01bf0d3
Compare
3626798 to
833269e
Compare
01bf0d3 to
c4f4a6b
Compare
833269e to
bab2e01
Compare
c4f4a6b to
70f2855
Compare
Currently RuntimeLibcallsInfo is a hardcoded list based on the triple. In the future the available libcall set should be dynamically modifiable with module flags. Note this isn't really used yet. TargetLowering is still constructing its own copy, and untangling that to use this requires several more steps.
Not sure I'm doing the new pass manager handling correctly. I do not like needing to manually check if the cached module pass is available and manually erroring in every pass.
bab2e01 to
85d64c1
Compare
70f2855 to
6d5974e
Compare

Not sure I'm doing the new pass manager handling correctly. I do
not like needing to manually check if the cached module pass is
available and manually erroring in every pass.