diff --git a/mlir/lib/Target/LLVM/CMakeLists.txt b/mlir/lib/Target/LLVM/CMakeLists.txt index 543e917b528b3..4be147d02d579 100644 --- a/mlir/lib/Target/LLVM/CMakeLists.txt +++ b/mlir/lib/Target/LLVM/CMakeLists.txt @@ -119,6 +119,45 @@ if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) ) endif() + +function(embed_binary_to_src file output_file symbol) + file(READ ${file} filedata HEX) + # Convert hex data for C compatibility + string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," filedata ${filedata}) + # Write data to output file + file(WRITE ${output_file} "const char ${symbol}[] = {${filedata}};\nconst int ${symbol}_size = sizeof(${symbol});\n") +endfunction() + +set(MLIR_NVVM_EMBED_LIBDEVICE 0 CACHE BOOL "Embed CUDA libdevice.bc in the binary at build time instead of looking it up at runtime") +if (MLIR_NVVM_EMBED_LIBDEVICE) + if (NOT MLIR_NVVM_LIBDEVICE_PATH) + if(CUDAToolkit_FOUND) + find_file(MLIR_NVVM_LIBDEVICE_PATH libdevice.10.bc + PATHS ${CUDAToolkit_LIBRARY_ROOT} + PATH_SUFFIXES "nvvm/libdevice" NO_DEFAULT_PATH REQUIRED) + else() + message(FATAL_ERROR + "Requested using the `nvptxcompiler` library backend but it couldn't be found.") + endif() + endif() + + embed_binary_to_src(${MLIR_NVVM_LIBDEVICE_PATH} ${CMAKE_CURRENT_BINARY_DIR}/libdevice_embedded.c _mlir_embedded_libdevice) + add_mlir_library(MLIRNVVMLibdevice + ${CMAKE_CURRENT_BINARY_DIR}/libdevice_embedded.c + ) + target_link_libraries(MLIRNVVMTarget PRIVATE MLIRNVVMLibdevice) + target_compile_definitions(obj.MLIRNVVMTarget + PRIVATE + MLIR_NVVM_EMBED_LIBDEVICE=1 + ) +else() + target_compile_definitions(obj.MLIRNVVMTarget + PRIVATE + MLIR_NVVM_EMBED_LIBDEVICE=0 + ) +endif() + + if (MLIR_ENABLE_ROCM_CONVERSIONS) set(AMDGPU_LIBS AMDGPUAsmParser @@ -169,3 +208,4 @@ if(MLIR_ENABLE_ROCM_CONVERSIONS) __DEFAULT_ROCM_PATH__="${DEFAULT_ROCM_PATH}" ) endif() + diff --git a/mlir/lib/Target/LLVM/NVVM/Target.cpp b/mlir/lib/Target/LLVM/NVVM/Target.cpp index a9f7806b10f40..86ff848d6c6c2 100644 --- a/mlir/lib/Target/LLVM/NVVM/Target.cpp +++ b/mlir/lib/Target/LLVM/NVVM/Target.cpp @@ -16,6 +16,10 @@ #include "mlir/Dialect/GPU/IR/CompilationInterfaces.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/LLVMIR/NVVMDialect.h" +#include "mlir/IR/BuiltinAttributeInterfaces.h" +#include "mlir/IR/BuiltinDialect.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/DialectResourceBlobManager.h" #include "mlir/Target/LLVM/NVVM/Utils.h" #include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" @@ -33,6 +37,7 @@ #include "llvm/Support/TargetSelect.h" #include "llvm/Support/raw_ostream.h" +#include #include using namespace mlir; @@ -42,6 +47,9 @@ using namespace mlir::NVVM; #define __DEFAULT_CUDATOOLKIT_PATH__ "" #endif +extern "C" const char _mlir_embedded_libdevice[]; +extern "C" const unsigned _mlir_embedded_libdevice_size; + namespace { // Implementation of the `TargetAttrInterface` model. class NVVMTargetAttrImpl @@ -130,6 +138,33 @@ ArrayRef SerializeGPUModuleBase::getLibrariesToLink() const { // Try to append `libdevice` from a CUDA toolkit installation. LogicalResult SerializeGPUModuleBase::appendStandardLibs() { +#if MLIR_NVVM_EMBED_LIBDEVICE + // If libdevice is embedded in the binary, we don't look it up on the + // filesystem. + MLIRContext *ctx = target.getContext(); + auto type = + RankedTensorType::get(ArrayRef{_mlir_embedded_libdevice_size}, + IntegerType::get(ctx, 8)); + auto resourceManager = DenseResourceElementsHandle::getManagerInterface(ctx); + + // Lookup if we already loaded the resource, otherwise create it. + DialectResourceBlobManager::BlobEntry *blob = + resourceManager.getBlobManager().lookup("_mlir_embedded_libdevice"); + if (blob) { + librariesToLink.push_back(DenseResourceElementsAttr::get( + type, DenseResourceElementsHandle( + blob, ctx->getLoadedDialect()))); + return success(); + } + + // Allocate a resource using one of the UnManagedResourceBlob method to wrap + // the embedded data. + auto unmanagedBlob = UnmanagedAsmResourceBlob::allocateInferAlign( + ArrayRef{_mlir_embedded_libdevice, _mlir_embedded_libdevice_size}); + librariesToLink.push_back(DenseResourceElementsAttr::get( + type, resourceManager.insert("_mlir_embedded_libdevice", + std::move(unmanagedBlob)))); +#else StringRef pathRef = getToolkitPath(); if (!pathRef.empty()) { SmallVector path; @@ -149,6 +184,7 @@ LogicalResult SerializeGPUModuleBase::appendStandardLibs() { } librariesToLink.push_back(StringAttr::get(target.getContext(), pathRef)); } +#endif return success(); }