diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td index 0c8a0c7a677ab..bfb407b3d7907 100644 --- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td @@ -113,7 +113,9 @@ def GpuModuleToBinaryPass Option<"compilationTarget", "format", "std::string", [{"fatbin"}], "The target representation of the compilation process.">, Option<"elfSection", "section", "std::string", [{""}], - "ELF section where binary is to be located."> + "ELF section where binary is to be located.">, + Option<"dumpIntermediates", "dump-intermediates", "std::string", [{""}], + "Directory to dump intermediate artifacts (LLVM IR, device assembly)."> ]; } diff --git a/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp b/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp index 95d5cadbd4e1a..6eef8ba69617c 100644 --- a/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp @@ -17,6 +17,9 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/ToolOutputFile.h" using namespace mlir; using namespace mlir::gpu; @@ -26,6 +29,26 @@ namespace mlir { #include "mlir/Dialect/GPU/Transforms/Passes.h.inc" } // namespace mlir +static LogicalResult +dumpToFile(Operation *op, StringRef dumpDir, const llvm::Twine &filename, + function_ref writeContent) { + if (dumpDir.empty()) + return success(); + + llvm::SmallString<128> path(dumpDir); + llvm::sys::path::append(path, filename); + + std::error_code ec; + llvm::ToolOutputFile output(path, ec, llvm::sys::fs::OF_None); + if (ec) + return op->emitError() << "Failed to create file '" << path + << "': " << ec.message(); + + writeContent(output.os()); + output.keep(); + return success(); +} + namespace { class GpuModuleToBinaryPass : public impl::GpuModuleToBinaryPassBase { @@ -64,11 +87,53 @@ void GpuModuleToBinaryPass::runOnOperation() { SmallVector librariesToLink; for (const std::string &path : linkFiles) librariesToLink.push_back(StringAttr::get(&getContext(), path)); + + Operation *op = getOperation(); + + // Create dump directory if specified. + if (!dumpIntermediates.empty()) { + if (std::error_code ec = + llvm::sys::fs::create_directories(dumpIntermediates)) { + op->emitError() << "Failed to create dump directory '" + << dumpIntermediates << "': " << ec.message(); + return signalPassFailure(); + } + } + + // Create callbacks for dumping intermediate artifacts if requested. + auto initialIRCallback = [&](llvm::Module &mod) { + if (failed( + dumpToFile(op, dumpIntermediates, mod.getName() + ".initial.ll", + [&](llvm::raw_ostream &os) { mod.print(os, nullptr); }))) + signalPassFailure(); + }; + + auto linkedIRCallback = [&](llvm::Module &mod) { + if (failed( + dumpToFile(op, dumpIntermediates, mod.getName() + ".linked.ll", + [&](llvm::raw_ostream &os) { mod.print(os, nullptr); }))) + signalPassFailure(); + }; + + auto optimizedIRCallback = [&](llvm::Module &mod) { + if (failed( + dumpToFile(op, dumpIntermediates, mod.getName() + ".opt.ll", + [&](llvm::raw_ostream &os) { mod.print(os, nullptr); }))) + signalPassFailure(); + }; + + auto isaCallback = [&](StringRef isa) { + if (failed(dumpToFile(op, dumpIntermediates, "kernel.isa", + [&](llvm::raw_ostream &os) { os << isa; }))) + signalPassFailure(); + }; + TargetOptions targetOptions(toolkitPath, librariesToLink, cmdOptions, - elfSection, *targetFormat, lazyTableBuilder); + elfSection, *targetFormat, lazyTableBuilder, + initialIRCallback, linkedIRCallback, + optimizedIRCallback, isaCallback); if (failed(transformGpuModulesToBinaries( - getOperation(), OffloadingLLVMTranslationAttrInterface(nullptr), - targetOptions))) + op, OffloadingLLVMTranslationAttrInterface(nullptr), targetOptions))) return signalPassFailure(); } diff --git a/mlir/lib/Target/LLVM/ModuleToObject.cpp b/mlir/lib/Target/LLVM/ModuleToObject.cpp index 4098ccc548dc1..d0a295a5abf50 100644 --- a/mlir/lib/Target/LLVM/ModuleToObject.cpp +++ b/mlir/lib/Target/LLVM/ModuleToObject.cpp @@ -143,7 +143,13 @@ LogicalResult ModuleToObject::loadBitcodeFilesFromList( std::unique_ptr ModuleToObject::translateToLLVMIR(llvm::LLVMContext &llvmContext) { - return translateModuleToLLVMIR(&getOperation(), llvmContext); + Operation &op = getOperation(); + StringRef name = "LLVMDialectModule"; + // Try to get nicer name from the operation. + if (auto symOp = dyn_cast(op); + symOp && symOp.getNameAttr()) + name = symOp.getNameAttr().getValue(); + return translateModuleToLLVMIR(&op, llvmContext, name); } LogicalResult diff --git a/mlir/lib/Target/LLVM/ROCDL/Target.cpp b/mlir/lib/Target/LLVM/ROCDL/Target.cpp index f813f8db8fc94..6b3cbbddcea08 100644 --- a/mlir/lib/Target/LLVM/ROCDL/Target.cpp +++ b/mlir/lib/Target/LLVM/ROCDL/Target.cpp @@ -95,7 +95,11 @@ SerializeGPUModuleBase::SerializeGPUModuleBase( Operation &module, ROCDLTargetAttr target, const gpu::TargetOptions &targetOptions) : ModuleToObject(module, target.getTriple(), target.getChip(), - target.getFeatures(), target.getO()), + target.getFeatures(), target.getO(), + targetOptions.getInitialLlvmIRCallback(), + targetOptions.getLinkedLlvmIRCallback(), + targetOptions.getOptimizedLlvmIRCallback(), + targetOptions.getISACallback()), target(target), toolkitPath(targetOptions.getToolkitPath()), librariesToLink(targetOptions.getLibrariesToLink()) { @@ -428,6 +432,10 @@ std::optional> SerializeGPUModuleBase::moduleToObjectImpl( getOperation().emitError() << "failed translating the module to ISA"; return std::nullopt; } + + if (isaCallback) + isaCallback(serializedISA.value()); + #define DEBUG_TYPE "serialize-to-isa" LLVM_DEBUG({ llvm::dbgs() << "ISA for module: " diff --git a/mlir/test/Dialect/GPU/module-to-binary-nvvm-intermediates.mlir b/mlir/test/Dialect/GPU/module-to-binary-nvvm-intermediates.mlir new file mode 100644 index 0000000000000..249860952eff0 --- /dev/null +++ b/mlir/test/Dialect/GPU/module-to-binary-nvvm-intermediates.mlir @@ -0,0 +1,17 @@ +// REQUIRES: host-supports-nvptx +// RUN: rm -rf %t +// RUN: mlir-opt %s --gpu-module-to-binary='format=isa dump-intermediates=%t' | FileCheck %s +// RUN: test -f %t/kernel_module.initial.ll +// RUN: test -f %t/kernel_module.linked.ll +// RUN: test -f %t/kernel_module.opt.ll +// RUN: test -f %t/kernel.isa + +module attributes {gpu.container_module} { + // CHECK-LABEL: gpu.binary @kernel_module + + gpu.module @kernel_module [#nvvm.target] { + llvm.func @kernel(%arg0: f32) { + llvm.return + } + } +} diff --git a/mlir/test/Dialect/GPU/module-to-binary-rocdl-intermediates.mlir b/mlir/test/Dialect/GPU/module-to-binary-rocdl-intermediates.mlir new file mode 100644 index 0000000000000..5c01e86d9ee7e --- /dev/null +++ b/mlir/test/Dialect/GPU/module-to-binary-rocdl-intermediates.mlir @@ -0,0 +1,17 @@ +// REQUIRES: host-supports-amdgpu +// RUN: rm -rf %t +// RUN: mlir-opt %s --gpu-module-to-binary='format=isa dump-intermediates=%t' | FileCheck %s +// RUN: test -f %t/kernel_module.initial.ll +// RUN: test -f %t/kernel_module.linked.ll +// RUN: test -f %t/kernel_module.opt.ll +// RUN: test -f %t/kernel.isa + +module attributes {gpu.container_module} { + // CHECK-LABEL: gpu.binary @kernel_module + + gpu.module @kernel_module [#rocdl.target] { + llvm.func @kernel(%arg0: f32) { + llvm.return + } + } +}