Skip to content

Commit beed7ae

Browse files
committed
[mlir][gpu] gpu-module-to-binary: add option to dump intermediate files
Add option to specify dir to dump inetrmediate files during gpu binaries generation for debug. Also fix ROCDL lowering bug where callbacks weren't propagated.
1 parent 24b87b8 commit beed7ae

File tree

6 files changed

+113
-4
lines changed

6 files changed

+113
-4
lines changed

mlir/include/mlir/Dialect/GPU/Transforms/Passes.td

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,9 @@ def GpuModuleToBinaryPass
113113
Option<"compilationTarget", "format", "std::string", [{"fatbin"}],
114114
"The target representation of the compilation process.">,
115115
Option<"elfSection", "section", "std::string", [{""}],
116-
"ELF section where binary is to be located.">
116+
"ELF section where binary is to be located.">,
117+
Option<"dumpIntermediates", "dump-intermediates", "std::string", [{""}],
118+
"Directory to dump intermediate artifacts (LLVM IR, device assembly).">
117119
];
118120
}
119121

mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,12 @@
1717

1818
#include "llvm/ADT/STLExtras.h"
1919
#include "llvm/ADT/StringSwitch.h"
20+
#include "llvm/Support/Debug.h"
21+
#include "llvm/Support/FileSystem.h"
22+
#include "llvm/Support/Path.h"
23+
#include "llvm/Support/ToolOutputFile.h"
24+
25+
#define DEBUG_TYPE "gpu-module-to-binary"
2026

2127
using namespace mlir;
2228
using namespace mlir::gpu;
@@ -26,6 +32,27 @@ namespace mlir {
2632
#include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
2733
} // namespace mlir
2834

35+
static void dumpToFile(StringRef dumpDir, const llvm::Twine &filename,
36+
function_ref<void(llvm::raw_ostream &)> writeContent) {
37+
if (dumpDir.empty())
38+
return;
39+
40+
llvm::SmallString<128> path(dumpDir);
41+
llvm::sys::path::append(path, filename);
42+
43+
std::error_code ec;
44+
llvm::ToolOutputFile output(path, ec, llvm::sys::fs::OF_None);
45+
if (ec) {
46+
LLVM_DEBUG(llvm::dbgs() << "Failed to create file '" << path
47+
<< "': " << ec.message() << "\n");
48+
return;
49+
}
50+
51+
writeContent(output.os());
52+
output.keep();
53+
LLVM_DEBUG(llvm::dbgs() << "Dumped intermediate to: " << path << "\n");
54+
}
55+
2956
namespace {
3057
class GpuModuleToBinaryPass
3158
: public impl::GpuModuleToBinaryPassBase<GpuModuleToBinaryPass> {
@@ -64,8 +91,42 @@ void GpuModuleToBinaryPass::runOnOperation() {
6491
SmallVector<Attribute> librariesToLink;
6592
for (const std::string &path : linkFiles)
6693
librariesToLink.push_back(StringAttr::get(&getContext(), path));
94+
95+
// Create dump directory if specified
96+
if (!dumpIntermediates.empty()) {
97+
if (std::error_code ec =
98+
llvm::sys::fs::create_directories(dumpIntermediates)) {
99+
getOperation()->emitError() << "Failed to create dump directory '"
100+
<< dumpIntermediates << "': " << ec.message();
101+
return signalPassFailure();
102+
}
103+
}
104+
105+
// Create callbacks for dumping intermediate artifacts if requested
106+
auto initialIRCallback = [&](llvm::Module &module) {
107+
dumpToFile(dumpIntermediates, module.getName() + ".initial.ll",
108+
[&](llvm::raw_ostream &os) { module.print(os, nullptr); });
109+
};
110+
111+
auto linkedIRCallback = [&](llvm::Module &module) {
112+
dumpToFile(dumpIntermediates, module.getName() + ".linked.ll",
113+
[&](llvm::raw_ostream &os) { module.print(os, nullptr); });
114+
};
115+
116+
auto optimizedIRCallback = [&](llvm::Module &module) {
117+
dumpToFile(dumpIntermediates, module.getName() + ".opt.ll",
118+
[&](llvm::raw_ostream &os) { module.print(os, nullptr); });
119+
};
120+
121+
auto isaCallback = [&](StringRef isa) {
122+
dumpToFile(dumpIntermediates, "kernel.isa",
123+
[&](llvm::raw_ostream &os) { os << isa; });
124+
};
125+
67126
TargetOptions targetOptions(toolkitPath, librariesToLink, cmdOptions,
68-
elfSection, *targetFormat, lazyTableBuilder);
127+
elfSection, *targetFormat, lazyTableBuilder,
128+
initialIRCallback, linkedIRCallback,
129+
optimizedIRCallback, isaCallback);
69130
if (failed(transformGpuModulesToBinaries(
70131
getOperation(), OffloadingLLVMTranslationAttrInterface(nullptr),
71132
targetOptions)))

mlir/lib/Target/LLVM/ModuleToObject.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,11 @@ LogicalResult ModuleToObject::loadBitcodeFilesFromList(
143143

144144
std::unique_ptr<llvm::Module>
145145
ModuleToObject::translateToLLVMIR(llvm::LLVMContext &llvmContext) {
146-
return translateModuleToLLVMIR(&getOperation(), llvmContext);
146+
Operation &op = getOperation();
147+
// Try to get nicer name from the operation.
148+
auto nameAttr = op.getAttrOfType<StringAttr>("sym_name");
149+
StringRef name = nameAttr ? nameAttr.getValue() : "LLVMDialectModule";
150+
return translateModuleToLLVMIR(&op, llvmContext, name);
147151
}
148152

149153
LogicalResult

mlir/lib/Target/LLVM/ROCDL/Target.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,11 @@ SerializeGPUModuleBase::SerializeGPUModuleBase(
9595
Operation &module, ROCDLTargetAttr target,
9696
const gpu::TargetOptions &targetOptions)
9797
: ModuleToObject(module, target.getTriple(), target.getChip(),
98-
target.getFeatures(), target.getO()),
98+
target.getFeatures(), target.getO(),
99+
targetOptions.getInitialLlvmIRCallback(),
100+
targetOptions.getLinkedLlvmIRCallback(),
101+
targetOptions.getOptimizedLlvmIRCallback(),
102+
targetOptions.getISACallback()),
99103
target(target), toolkitPath(targetOptions.getToolkitPath()),
100104
librariesToLink(targetOptions.getLibrariesToLink()) {
101105

@@ -428,6 +432,10 @@ std::optional<SmallVector<char, 0>> SerializeGPUModuleBase::moduleToObjectImpl(
428432
getOperation().emitError() << "failed translating the module to ISA";
429433
return std::nullopt;
430434
}
435+
436+
if (isaCallback)
437+
isaCallback(serializedISA.value());
438+
431439
#define DEBUG_TYPE "serialize-to-isa"
432440
LLVM_DEBUG({
433441
llvm::dbgs() << "ISA for module: "
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// REQUIRES: host-supports-nvptx
2+
// RUN: rm -rf %t || true
3+
// RUN: mlir-opt %s --gpu-module-to-binary='format=isa dump-intermediates=%t' | FileCheck %s
4+
// RUN: test -f %t/kernel_module.initial.ll
5+
// RUN: test -f %t/kernel_module.linked.ll
6+
// RUN: test -f %t/kernel_module.opt.ll
7+
// RUN: test -f %t/kernel.isa
8+
9+
module attributes {gpu.container_module} {
10+
// CHECK-LABEL: gpu.binary @kernel_module
11+
12+
gpu.module @kernel_module [#nvvm.target<chip = "sm_70">] {
13+
llvm.func @kernel(%arg0: f32) {
14+
llvm.return
15+
}
16+
}
17+
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// REQUIRES: host-supports-amdgpu
2+
// RUN: rm -rf %t || true
3+
// RUN: mlir-opt %s --gpu-module-to-binary='format=isa dump-intermediates=%t' | FileCheck %s
4+
// RUN: test -f %t/kernel_module.initial.ll
5+
// RUN: test -f %t/kernel_module.linked.ll
6+
// RUN: test -f %t/kernel_module.opt.ll
7+
// RUN: test -f %t/kernel.isa
8+
9+
module attributes {gpu.container_module} {
10+
// CHECK-LABEL: gpu.binary @kernel_module
11+
12+
gpu.module @kernel_module [#rocdl.target<chip = "gfx942">] {
13+
llvm.func @kernel(%arg0: f32) {
14+
llvm.return
15+
}
16+
}
17+
}

0 commit comments

Comments
 (0)