-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[mlir][Target] Support Fatbin target for static nvptxcompiler #118044
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[mlir][Target] Support Fatbin target for static nvptxcompiler #118044
Conversation
|
@llvm/pr-subscribers-mlir-llvm @llvm/pr-subscribers-mlir Author: Zichen Lu (MikaOvO) ChangesIn If building mlir with I have tested it locally, the two flows can return the same Fatbin result after inputing the same Full diff: https://github.com/llvm/llvm-project/pull/118044.diff 2 Files Affected:
diff --git a/mlir/lib/Target/LLVM/CMakeLists.txt b/mlir/lib/Target/LLVM/CMakeLists.txt
index 422f7e5fa7caec..4843377f1797fa 100644
--- a/mlir/lib/Target/LLVM/CMakeLists.txt
+++ b/mlir/lib/Target/LLVM/CMakeLists.txt
@@ -66,12 +66,14 @@ if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
set(MLIR_CUDAToolkit_ROOT ${CUDAToolkit_LIBRARY_ROOT})
endif()
- # Add the `nvptxcompiler` library.
+ # Add the `nvptxcompiler` library and the `nvfatbin` library.
if(MLIR_ENABLE_NVPTXCOMPILER)
# Find the `nvptxcompiler` library.
# TODO: Bump the MLIR CMake version to 3.25 and use `CUDA::nvptxcompiler_static`.
find_library(MLIR_NVPTXCOMPILER_LIB_PATH nvptxcompiler_static
PATHS ${CUDAToolkit_LIBRARY_DIR} NO_DEFAULT_PATH)
+ find_library(MLIR_NVFATBIN_LIB_PATH nvfatbin_static
+ PATHS ${CUDAToolkit_LIBRARY_DIR} NO_DEFAULT_PATH)
# Fail if `nvptxcompiler_static` couldn't be found.
if(MLIR_NVPTXCOMPILER_LIB_PATH STREQUAL "MLIR_NVPTXCOMPILER_LIB_PATH-NOTFOUND")
@@ -79,14 +81,24 @@ if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
"Requested using the `nvptxcompiler` library backend but it couldn't be found.")
endif()
+ # Fail if `nvfatbin_static` couldn't be found.
+ if(MLIR_NVFATBIN_LIB_PATH STREQUAL "MLIR_NVFATBIN_LIB_PATH-NOTFOUND")
+ message(FATAL_ERROR
+ "Requested using the `nvfatbin` library backend but it couldn't be found.")
+ endif()
+
add_library(MLIR_NVPTXCOMPILER_LIB STATIC IMPORTED GLOBAL)
+ add_library(MLIR_NVFATBIN_LIB STATIC IMPORTED GLOBAL)
# Downstream projects can modify this path and use it in CMake. For example:
# add_library(MLIR_NVPTXCOMPILER_LIB STATIC IMPORTED GLOBAL)
# set_property(TARGET MLIR_NVPTXCOMPILER_LIB PROPERTY IMPORTED_LOCATION ${...})
# where `...` is to be replaced with the path to the library.
set_property(TARGET MLIR_NVPTXCOMPILER_LIB PROPERTY IMPORTED_LOCATION ${MLIR_NVPTXCOMPILER_LIB_PATH})
- # Link against `nvptxcompiler_static`. TODO: use `CUDA::nvptxcompiler_static`.
+ set_property(TARGET MLIR_NVFATBIN_LIB PROPERTY IMPORTED_LOCATION ${MLIR_NVFATBIN_LIB_PATH})
+ # Link against `nvptxcompiler_static` and `nvfatbin_static`.
+ # TODO: use `CUDA::nvptxcompiler_static` and `CUDA::nvfatbin_static`.
target_link_libraries(MLIRNVVMTarget PRIVATE MLIR_NVPTXCOMPILER_LIB)
+ target_link_libraries(MLIRNVVMTarget PRIVATE MLIR_NVFATBIN_LIB)
target_include_directories(obj.MLIRNVVMTarget PUBLIC ${CUDAToolkit_INCLUDE_DIRS})
endif()
else()
diff --git a/mlir/lib/Target/LLVM/NVVM/Target.cpp b/mlir/lib/Target/LLVM/NVVM/Target.cpp
index bca26e3a0e84a9..1c5aa24dffc9d0 100644
--- a/mlir/lib/Target/LLVM/NVVM/Target.cpp
+++ b/mlir/lib/Target/LLVM/NVVM/Target.cpp
@@ -473,6 +473,18 @@ NVPTXSerializer::compileToBinary(const std::string &ptxCode) {
} \
} while (false)
+#include <nvFatbin.h>
+
+#define RETURN_ON_NVFATBIN_ERROR(expr) \
+ do { \
+ auto result = (expr); \
+ if (result != nvFatbinResult::NVFATBIN_SUCCESS) { \
+ emitError(loc) << llvm::Twine(#expr).concat(" failed with error: ") \
+ << nvFatbinGetErrorString(result); \
+ return std::nullopt; \
+ } \
+ } while (false)
+
std::optional<SmallVector<char, 0>>
NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) {
Location loc = getOperation().getLoc();
@@ -486,6 +498,11 @@ NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) {
targetOptions.tokenizeCmdOptions();
cmdOpts.second.append(
{"-arch", getTarget().getChip().data(), "--opt-level", optLevel.c_str()});
+ bool useFatbin32 = false;
+ for (const char *option : cmdOpts.second) {
+ if (StringRef(option) == "-32")
+ useFatbin32 = true;
+ }
// Create the compiler handle.
RETURN_ON_NVPTXCOMPILER_ERROR(
@@ -538,6 +555,30 @@ NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) {
});
#undef DEBUG_TYPE
RETURN_ON_NVPTXCOMPILER_ERROR(nvPTXCompilerDestroy(&compiler));
+
+ if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Fatbin) {
+ const char *cubinOpts[1] = {"-64"};
+ if (useFatbin32) {
+ cubinOpts[0] = {"-32"};
+ }
+ nvFatbinHandle handle;
+
+ auto chip = getTarget().getChip();
+ chip.consume_front("sm_");
+
+ RETURN_ON_NVFATBIN_ERROR(nvFatbinCreate(&handle, cubinOpts, 1));
+ RETURN_ON_NVFATBIN_ERROR(nvFatbinAddCubin(
+ handle, binary.data(), binary.size(), chip.data(), nullptr));
+ RETURN_ON_NVFATBIN_ERROR(nvFatbinAddPTX(
+ handle, ptxCode.data(), ptxCode.size(), chip.data(), nullptr, nullptr));
+
+ size_t fatbinSize;
+ RETURN_ON_NVFATBIN_ERROR(nvFatbinSize(handle, &fatbinSize));
+ SmallVector<char, 0> fatbin(fatbinSize, 0);
+ RETURN_ON_NVFATBIN_ERROR(nvFatbinGet(handle, (void *)fatbin.data()));
+ RETURN_ON_NVFATBIN_ERROR(nvFatbinDestroy(&handle));
+ return fatbin;
+ }
return binary;
}
#endif // MLIR_ENABLE_NVPTXCOMPILER
|
|
Should we add a |
Dinistro
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Dropped a few drive by nits
05d10a6 to
bbfcb48
Compare
640d37c to
2b562b9
Compare
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
2b562b9 to
34456e0
Compare
|
Hi, I I added Could you please help review this and provide suggestions on whether the macro |
deb7ef5 to
d6c59d4
Compare
What happens in this case:
How do you preserve consistency when |
Isn't libnvptx_compiler.a distributed alongside libnvfatbin_static.a? If yes, then which use-case is this addressing? |
My concern is downstream projects that use custom extracted Cuda Toolkit (not extracted libnvfatbin_static.a) will directly cause the original build to fail after this MR if there is no MLIR_ENABLE_NVPTXCOMPILER_NVFATBIN. Of course, this does not preserve consistency when |
I don't see this as a problem? Seems like a legitimate break and they should update their flow.
That makes it something we should not support then IMO. |
|
You are right. I thought there are two approaches here: Which one is better? Thanks. |
|
Either is fine, but I would take the simplest approach unless there is an actual identified need. |
91fdf32 to
66fea13
Compare
|
I have fixed by removing |
Background
In
lib/Target/LLVM/NVVM/Target.cpp,NVPTXSerializercompile PTX to binary with two different flows controlled byMLIR_ENABLE_NVPTXCOMPILER.If building mlir with
-DMLIR_ENABLE_NVPTXCOMPILER=ON, the flow does not check if the target isgpu::CompilationTarget::Fatbin, and compile PTX to cubin directly, which is not consistent with another flow.Implement
Use static nvfatbin library.
I have tested it locally, the two flows can return the same Fatbin result after inputing the same
GpuModule.