From 92fb60ffadd636c2c96fa3acbda530b10f7a796b Mon Sep 17 00:00:00 2001 From: Guray Ozen Date: Thu, 14 Nov 2024 11:16:19 +0100 Subject: [PATCH] [MLIR][NVVM] Add `dumpISA` and `dumpMachineISA` Flags Currently, dumping virtual and machine-level ISA is restricted to debug builds, making it unavailable in release builds. However, the ability to view these ISAs can be valuable even in release builds or with production compilers. For instance, `nvcc` provides similar functionality, making it a useful reference. This PR introduces `dumpISA` and `dumpMachineISA` flags to the `GpuModuleToBinaryPass`. Additionally, it adds `dump-ptx` and `dump-sass` flags to the `GPUToNVVMPipelineOptions`. --- .../Dialect/GPU/IR/CompilationInterfaces.h | 18 ++++++++++-- .../mlir/Dialect/GPU/Pipelines/Passes.h | 6 ++++ .../mlir/Dialect/GPU/Transforms/Passes.td | 10 ++++++- mlir/include/mlir/Target/LLVM/NVVM/Utils.h | 2 +- mlir/lib/Dialect/GPU/IR/GPUDialect.cpp | 16 ++++++++--- .../GPU/Pipelines/GPUToNVVMPipeline.cpp | 2 ++ .../Dialect/GPU/Transforms/ModuleToBinary.cpp | 2 +- mlir/lib/Target/LLVM/NVVM/Target.cpp | 28 +++++++++---------- mlir/lib/Target/LLVM/ROCDL/Target.cpp | 7 ++--- mlir/test/Integration/GPU/CUDA/dump-ptx.mlir | 2 +- mlir/test/Integration/GPU/CUDA/dump-sass.mlir | 2 +- 11 files changed, 66 insertions(+), 29 deletions(-) diff --git a/mlir/include/mlir/Dialect/GPU/IR/CompilationInterfaces.h b/mlir/include/mlir/Dialect/GPU/IR/CompilationInterfaces.h index 6d7cb5ca7a7f8..88111aed2a0a6 100644 --- a/mlir/include/mlir/Dialect/GPU/IR/CompilationInterfaces.h +++ b/mlir/include/mlir/Dialect/GPU/IR/CompilationInterfaces.h @@ -52,7 +52,8 @@ class TargetOptions { StringRef toolkitPath = {}, ArrayRef linkFiles = {}, StringRef cmdOptions = {}, CompilationTarget compilationTarget = getDefaultCompilationTarget(), - function_ref getSymbolTableCallback = {}); + function_ref getSymbolTableCallback = {}, + bool dumpISA = false, bool dumpMachineISA = false); /// Returns the typeID. TypeID getTypeID() const; @@ -66,6 +67,12 @@ class TargetOptions { /// Returns the command line options. StringRef getCmdOptions() const; + /// Returns the dump-isa command line options. + bool getDumpISA() const; + + /// Returns the dump-machine-isa command line options. + bool getDumpMachineISA() const; + /// Returns a tokenization of the command line options. std::pair> tokenizeCmdOptions() const; @@ -90,7 +97,8 @@ class TargetOptions { TypeID typeID, StringRef toolkitPath = {}, ArrayRef linkFiles = {}, StringRef cmdOptions = {}, CompilationTarget compilationTarget = getDefaultCompilationTarget(), - function_ref getSymbolTableCallback = {}); + function_ref getSymbolTableCallback = {}, + bool dumpISA = false, bool dumpMachineISA = false); /// Path to the target toolkit. std::string toolkitPath; @@ -102,6 +110,12 @@ class TargetOptions { /// process. std::string cmdOptions; + /// An optional flag to dump generated ISA. + bool dumpISA = false; + + /// An optional flag to dump generated and disassembled machine ISA. + bool dumpMachineISA = false; + /// Compilation process target format. CompilationTarget compilationTarget; diff --git a/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h b/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h index caa0901bb4943..485eac6bad5b8 100644 --- a/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h +++ b/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h @@ -53,6 +53,12 @@ struct GPUToNVVMPipelineOptions "Whether to use the bareptr calling convention on the host (warning " "this should be false until the GPU layering is fixed)"), llvm::cl::init(false)}; + PassOptions::Option dumpPtx{ + *this, "dump-ptx", llvm::cl::desc("Dumps PTX code to the error output"), + llvm::cl::init(false)}; + PassOptions::Option dumpSass{ + *this, "dump-sass", llvm::cl::desc("Dumps SASS code to the error output"), + llvm::cl::init(false)}; }; //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td index 4a9ddafdd177d..f7cd9dd8c15bf 100644 --- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td @@ -95,7 +95,15 @@ def GpuModuleToBinaryPass Option<"cmdOptions", "opts", "std::string", [{""}], "Command line options to pass to the tools.">, Option<"compilationTarget", "format", "std::string", [{"fatbin"}], - "The target representation of the compilation process."> + "The target representation of the compilation process.">, + Option<"dumpISA", "dump-isa", "bool", + /*default=*/"false", + "Dumps generated ISA to the error output.">, + Option<"dumpMachineISA", "dump-machine-isa", "bool", + /*default=*/"false", + "Dumps the generated machine-level ISA to the error output. " + "If the generated ISA is virtual, it instead dumps the" + "machine-level equivalent."> ]; } diff --git a/mlir/include/mlir/Target/LLVM/NVVM/Utils.h b/mlir/include/mlir/Target/LLVM/NVVM/Utils.h index 65ae8a6bdb4ad..6926acaa3e337 100644 --- a/mlir/include/mlir/Target/LLVM/NVVM/Utils.h +++ b/mlir/include/mlir/Target/LLVM/NVVM/Utils.h @@ -54,7 +54,7 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject { LogicalResult appendStandardLibs(); /// Loads the bitcode files in `fileList`. - virtual std::optional>> + std::optional>> loadBitcodeFiles(llvm::Module &module) override; protected: diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index 956877497d933..3407cb9ec3cba 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -2302,18 +2302,26 @@ KernelMetadataAttr KernelTableAttr::lookup(StringAttr key) const { TargetOptions::TargetOptions( StringRef toolkitPath, ArrayRef linkFiles, StringRef cmdOptions, CompilationTarget compilationTarget, - function_ref getSymbolTableCallback) + function_ref getSymbolTableCallback, bool dumpISA, + bool dumpMachineISA) : TargetOptions(TypeID::get(), toolkitPath, linkFiles, - cmdOptions, compilationTarget, getSymbolTableCallback) {} + cmdOptions, compilationTarget, getSymbolTableCallback, + dumpISA, dumpMachineISA) {} TargetOptions::TargetOptions( TypeID typeID, StringRef toolkitPath, ArrayRef linkFiles, StringRef cmdOptions, CompilationTarget compilationTarget, - function_ref getSymbolTableCallback) + function_ref getSymbolTableCallback, bool dumpISA, + bool dumpMachineISA) : toolkitPath(toolkitPath.str()), linkFiles(linkFiles), - cmdOptions(cmdOptions.str()), compilationTarget(compilationTarget), + cmdOptions(cmdOptions.str()), dumpISA(dumpISA), + dumpMachineISA(dumpMachineISA), compilationTarget(compilationTarget), getSymbolTableCallback(getSymbolTableCallback), typeID(typeID) {} +bool TargetOptions::getDumpISA() const { return dumpISA; } + +bool TargetOptions::getDumpMachineISA() const { return dumpMachineISA; } + TypeID TargetOptions::getTypeID() const { return typeID; } StringRef TargetOptions::getToolkitPath() const { return toolkitPath; } diff --git a/mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp b/mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp index fb440756e0c1d..8d32be95f5769 100644 --- a/mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp +++ b/mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp @@ -95,6 +95,8 @@ void buildHostPostPipeline(OpPassManager &pm, GpuModuleToBinaryPassOptions gpuModuleToBinaryPassOptions; gpuModuleToBinaryPassOptions.compilationTarget = options.cubinFormat; + gpuModuleToBinaryPassOptions.dumpISA = options.dumpPtx; + gpuModuleToBinaryPassOptions.dumpMachineISA = options.dumpSass; pm.addPass(createGpuModuleToBinaryPass(gpuModuleToBinaryPassOptions)); pm.addPass(createConvertMathToLLVMPass()); pm.addPass(createCanonicalizerPass()); diff --git a/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp b/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp index 86a3b4780e88c..a9538407888cb 100644 --- a/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp @@ -70,7 +70,7 @@ void GpuModuleToBinaryPass::runOnOperation() { }; TargetOptions targetOptions(toolkitPath, linkFiles, cmdOptions, *targetFormat, - lazyTableBuilder); + lazyTableBuilder, dumpISA, dumpMachineISA); if (failed(transformGpuModulesToBinaries( getOperation(), OffloadingLLVMTranslationAttrInterface(nullptr), targetOptions))) diff --git a/mlir/lib/Target/LLVM/NVVM/Target.cpp b/mlir/lib/Target/LLVM/NVVM/Target.cpp index 69602af8563aa..b5aff6be272ff 100644 --- a/mlir/lib/Target/LLVM/NVVM/Target.cpp +++ b/mlir/lib/Target/LLVM/NVVM/Target.cpp @@ -292,8 +292,8 @@ NVPTXSerializer::compileToBinary(const std::string &ptxCode) { return std::nullopt; TmpFile cubinFile; if (createFatbin) { - Twine cubinFilename = ptxFile->first + ".cubin"; - cubinFile = TmpFile(cubinFilename.str(), llvm::FileRemover(cubinFilename)); + std::string cubinFilename = (ptxFile->first + ".cubin").str(); + cubinFile = TmpFile(cubinFilename, llvm::FileRemover(cubinFilename)); } else { cubinFile.first = binaryFile->first; } @@ -402,8 +402,8 @@ NVPTXSerializer::compileToBinary(const std::string &ptxCode) { /*MemoryLimit=*/0, /*ErrMsg=*/&message)) return emitLogError("`ptxas`"); -#define DEBUG_TYPE "dump-sass" - LLVM_DEBUG({ + + if (targetOptions.getDumpMachineISA()) { std::optional nvdisasm = findTool("nvdisasm"); SmallVector nvdisasmArgs( {StringRef("nvdisasm"), StringRef(cubinFile.first)}); @@ -417,11 +417,10 @@ NVPTXSerializer::compileToBinary(const std::string &ptxCode) { llvm::ErrorOr> logBuffer = llvm::MemoryBuffer::getFile(logFile->first); if (logBuffer && !(*logBuffer)->getBuffer().empty()) { - llvm::dbgs() << "Output:\n" << (*logBuffer)->getBuffer() << "\n"; - llvm::dbgs().flush(); + llvm::errs() << "Output:\n" << (*logBuffer)->getBuffer() << "\n"; + llvm::errs().flush(); } - }); -#undef DEBUG_TYPE + } // Invoke `fatbin`. message.clear(); @@ -572,12 +571,13 @@ NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) { getOperation().emitError() << "Failed translating the module to ISA."; return std::nullopt; } -#define DEBUG_TYPE "serialize-to-isa" - LLVM_DEBUG({ - llvm::dbgs() << "PTX for module: " << getOperation().getNameAttr() << "\n"; - llvm::dbgs() << *serializedISA << "\n"; - llvm::dbgs().flush(); - }); + if (targetOptions.getDumpISA()) { + llvm::errs() << "// Generated PTX for module: " + << getOperation().getNameAttr() << "\n"; + llvm::errs() << *serializedISA << "\n"; + llvm::errs().flush(); + } + #undef DEBUG_TYPE // Return PTX if the compilation target is `assembly`. diff --git a/mlir/lib/Target/LLVM/ROCDL/Target.cpp b/mlir/lib/Target/LLVM/ROCDL/Target.cpp index 227b45133b57e..6761479e88da6 100644 --- a/mlir/lib/Target/LLVM/ROCDL/Target.cpp +++ b/mlir/lib/Target/LLVM/ROCDL/Target.cpp @@ -430,13 +430,12 @@ std::optional> SerializeGPUModuleBase::moduleToObjectImpl( getOperation().emitError() << "failed translating the module to ISA"; return std::nullopt; } -#define DEBUG_TYPE "serialize-to-isa" - LLVM_DEBUG({ + if (targetOptions.getDumpISA()) { llvm::dbgs() << "ISA for module: " << cast(getOperation()).getNameAttr() << "\n" << *serializedISA << "\n"; - }); -#undef DEBUG_TYPE + } + // Return ISA assembly code if the compilation target is assembly. if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Assembly) return SmallVector(serializedISA->begin(), serializedISA->end()); diff --git a/mlir/test/Integration/GPU/CUDA/dump-ptx.mlir b/mlir/test/Integration/GPU/CUDA/dump-ptx.mlir index 0cc5d8645bb36..c511596d49c27 100644 --- a/mlir/test/Integration/GPU/CUDA/dump-ptx.mlir +++ b/mlir/test/Integration/GPU/CUDA/dump-ptx.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s \ -// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline -debug-only=serialize-to-isa \ +// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="dump-ptx=true" \ // RUN: 2>&1 | FileCheck %s // CHECK: Generated by LLVM NVPTX Back-End diff --git a/mlir/test/Integration/GPU/CUDA/dump-sass.mlir b/mlir/test/Integration/GPU/CUDA/dump-sass.mlir index d32f5efc29d58..2b204644c90c1 100644 --- a/mlir/test/Integration/GPU/CUDA/dump-sass.mlir +++ b/mlir/test/Integration/GPU/CUDA/dump-sass.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s \ -// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline -debug-only=dump-sass \ +// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="dump-sass=true" \ // RUN: 2>&1 | FileCheck %s // CHECK: MOV