diff --git a/mlir/include/mlir/Dialect/GPU/IR/CompilationInterfaces.h b/mlir/include/mlir/Dialect/GPU/IR/CompilationInterfaces.h index 6d7cb5ca7a7f8..88111aed2a0a6 100644 --- a/mlir/include/mlir/Dialect/GPU/IR/CompilationInterfaces.h +++ b/mlir/include/mlir/Dialect/GPU/IR/CompilationInterfaces.h @@ -52,7 +52,8 @@ class TargetOptions { StringRef toolkitPath = {}, ArrayRef linkFiles = {}, StringRef cmdOptions = {}, CompilationTarget compilationTarget = getDefaultCompilationTarget(), - function_ref getSymbolTableCallback = {}); + function_ref getSymbolTableCallback = {}, + bool dumpISA = false, bool dumpMachineISA = false); /// Returns the typeID. TypeID getTypeID() const; @@ -66,6 +67,12 @@ class TargetOptions { /// Returns the command line options. StringRef getCmdOptions() const; + /// Returns the dump-isa command line options. + bool getDumpISA() const; + + /// Returns the dump-machine-isa command line options. + bool getDumpMachineISA() const; + /// Returns a tokenization of the command line options. std::pair> tokenizeCmdOptions() const; @@ -90,7 +97,8 @@ class TargetOptions { TypeID typeID, StringRef toolkitPath = {}, ArrayRef linkFiles = {}, StringRef cmdOptions = {}, CompilationTarget compilationTarget = getDefaultCompilationTarget(), - function_ref getSymbolTableCallback = {}); + function_ref getSymbolTableCallback = {}, + bool dumpISA = false, bool dumpMachineISA = false); /// Path to the target toolkit. std::string toolkitPath; @@ -102,6 +110,12 @@ class TargetOptions { /// process. std::string cmdOptions; + /// An optional flag to dump generated ISA. + bool dumpISA = false; + + /// An optional flag to dump generated and disassembled machine ISA. + bool dumpMachineISA = false; + /// Compilation process target format. CompilationTarget compilationTarget; diff --git a/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h b/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h index caa0901bb4943..485eac6bad5b8 100644 --- a/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h +++ b/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h @@ -53,6 +53,12 @@ struct GPUToNVVMPipelineOptions "Whether to use the bareptr calling convention on the host (warning " "this should be false until the GPU layering is fixed)"), llvm::cl::init(false)}; + PassOptions::Option dumpPtx{ + *this, "dump-ptx", llvm::cl::desc("Dumps PTX code to the error output"), + llvm::cl::init(false)}; + PassOptions::Option dumpSass{ + *this, "dump-sass", llvm::cl::desc("Dumps SASS code to the error output"), + llvm::cl::init(false)}; }; //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td index 4a9ddafdd177d..f7cd9dd8c15bf 100644 --- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td @@ -95,7 +95,15 @@ def GpuModuleToBinaryPass Option<"cmdOptions", "opts", "std::string", [{""}], "Command line options to pass to the tools.">, Option<"compilationTarget", "format", "std::string", [{"fatbin"}], - "The target representation of the compilation process."> + "The target representation of the compilation process.">, + Option<"dumpISA", "dump-isa", "bool", + /*default=*/"false", + "Dumps generated ISA to the error output.">, + Option<"dumpMachineISA", "dump-machine-isa", "bool", + /*default=*/"false", + "Dumps the generated machine-level ISA to the error output. " + "If the generated ISA is virtual, it instead dumps the" + "machine-level equivalent."> ]; } diff --git a/mlir/include/mlir/Target/LLVM/NVVM/Utils.h b/mlir/include/mlir/Target/LLVM/NVVM/Utils.h index 65ae8a6bdb4ad..6926acaa3e337 100644 --- a/mlir/include/mlir/Target/LLVM/NVVM/Utils.h +++ b/mlir/include/mlir/Target/LLVM/NVVM/Utils.h @@ -54,7 +54,7 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject { LogicalResult appendStandardLibs(); /// Loads the bitcode files in `fileList`. - virtual std::optional>> + std::optional>> loadBitcodeFiles(llvm::Module &module) override; protected: diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index 956877497d933..3407cb9ec3cba 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -2302,18 +2302,26 @@ KernelMetadataAttr KernelTableAttr::lookup(StringAttr key) const { TargetOptions::TargetOptions( StringRef toolkitPath, ArrayRef linkFiles, StringRef cmdOptions, CompilationTarget compilationTarget, - function_ref getSymbolTableCallback) + function_ref getSymbolTableCallback, bool dumpISA, + bool dumpMachineISA) : TargetOptions(TypeID::get(), toolkitPath, linkFiles, - cmdOptions, compilationTarget, getSymbolTableCallback) {} + cmdOptions, compilationTarget, getSymbolTableCallback, + dumpISA, dumpMachineISA) {} TargetOptions::TargetOptions( TypeID typeID, StringRef toolkitPath, ArrayRef linkFiles, StringRef cmdOptions, CompilationTarget compilationTarget, - function_ref getSymbolTableCallback) + function_ref getSymbolTableCallback, bool dumpISA, + bool dumpMachineISA) : toolkitPath(toolkitPath.str()), linkFiles(linkFiles), - cmdOptions(cmdOptions.str()), compilationTarget(compilationTarget), + cmdOptions(cmdOptions.str()), dumpISA(dumpISA), + dumpMachineISA(dumpMachineISA), compilationTarget(compilationTarget), getSymbolTableCallback(getSymbolTableCallback), typeID(typeID) {} +bool TargetOptions::getDumpISA() const { return dumpISA; } + +bool TargetOptions::getDumpMachineISA() const { return dumpMachineISA; } + TypeID TargetOptions::getTypeID() const { return typeID; } StringRef TargetOptions::getToolkitPath() const { return toolkitPath; } diff --git a/mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp b/mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp index fb440756e0c1d..8d32be95f5769 100644 --- a/mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp +++ b/mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp @@ -95,6 +95,8 @@ void buildHostPostPipeline(OpPassManager &pm, GpuModuleToBinaryPassOptions gpuModuleToBinaryPassOptions; gpuModuleToBinaryPassOptions.compilationTarget = options.cubinFormat; + gpuModuleToBinaryPassOptions.dumpISA = options.dumpPtx; + gpuModuleToBinaryPassOptions.dumpMachineISA = options.dumpSass; pm.addPass(createGpuModuleToBinaryPass(gpuModuleToBinaryPassOptions)); pm.addPass(createConvertMathToLLVMPass()); pm.addPass(createCanonicalizerPass()); diff --git a/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp b/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp index 86a3b4780e88c..a9538407888cb 100644 --- a/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp @@ -70,7 +70,7 @@ void GpuModuleToBinaryPass::runOnOperation() { }; TargetOptions targetOptions(toolkitPath, linkFiles, cmdOptions, *targetFormat, - lazyTableBuilder); + lazyTableBuilder, dumpISA, dumpMachineISA); if (failed(transformGpuModulesToBinaries( getOperation(), OffloadingLLVMTranslationAttrInterface(nullptr), targetOptions))) diff --git a/mlir/lib/Target/LLVM/NVVM/Target.cpp b/mlir/lib/Target/LLVM/NVVM/Target.cpp index 69602af8563aa..b5aff6be272ff 100644 --- a/mlir/lib/Target/LLVM/NVVM/Target.cpp +++ b/mlir/lib/Target/LLVM/NVVM/Target.cpp @@ -292,8 +292,8 @@ NVPTXSerializer::compileToBinary(const std::string &ptxCode) { return std::nullopt; TmpFile cubinFile; if (createFatbin) { - Twine cubinFilename = ptxFile->first + ".cubin"; - cubinFile = TmpFile(cubinFilename.str(), llvm::FileRemover(cubinFilename)); + std::string cubinFilename = (ptxFile->first + ".cubin").str(); + cubinFile = TmpFile(cubinFilename, llvm::FileRemover(cubinFilename)); } else { cubinFile.first = binaryFile->first; } @@ -402,8 +402,8 @@ NVPTXSerializer::compileToBinary(const std::string &ptxCode) { /*MemoryLimit=*/0, /*ErrMsg=*/&message)) return emitLogError("`ptxas`"); -#define DEBUG_TYPE "dump-sass" - LLVM_DEBUG({ + + if (targetOptions.getDumpMachineISA()) { std::optional nvdisasm = findTool("nvdisasm"); SmallVector nvdisasmArgs( {StringRef("nvdisasm"), StringRef(cubinFile.first)}); @@ -417,11 +417,10 @@ NVPTXSerializer::compileToBinary(const std::string &ptxCode) { llvm::ErrorOr> logBuffer = llvm::MemoryBuffer::getFile(logFile->first); if (logBuffer && !(*logBuffer)->getBuffer().empty()) { - llvm::dbgs() << "Output:\n" << (*logBuffer)->getBuffer() << "\n"; - llvm::dbgs().flush(); + llvm::errs() << "Output:\n" << (*logBuffer)->getBuffer() << "\n"; + llvm::errs().flush(); } - }); -#undef DEBUG_TYPE + } // Invoke `fatbin`. message.clear(); @@ -572,12 +571,13 @@ NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) { getOperation().emitError() << "Failed translating the module to ISA."; return std::nullopt; } -#define DEBUG_TYPE "serialize-to-isa" - LLVM_DEBUG({ - llvm::dbgs() << "PTX for module: " << getOperation().getNameAttr() << "\n"; - llvm::dbgs() << *serializedISA << "\n"; - llvm::dbgs().flush(); - }); + if (targetOptions.getDumpISA()) { + llvm::errs() << "// Generated PTX for module: " + << getOperation().getNameAttr() << "\n"; + llvm::errs() << *serializedISA << "\n"; + llvm::errs().flush(); + } + #undef DEBUG_TYPE // Return PTX if the compilation target is `assembly`. diff --git a/mlir/lib/Target/LLVM/ROCDL/Target.cpp b/mlir/lib/Target/LLVM/ROCDL/Target.cpp index 227b45133b57e..6761479e88da6 100644 --- a/mlir/lib/Target/LLVM/ROCDL/Target.cpp +++ b/mlir/lib/Target/LLVM/ROCDL/Target.cpp @@ -430,13 +430,12 @@ std::optional> SerializeGPUModuleBase::moduleToObjectImpl( getOperation().emitError() << "failed translating the module to ISA"; return std::nullopt; } -#define DEBUG_TYPE "serialize-to-isa" - LLVM_DEBUG({ + if (targetOptions.getDumpISA()) { llvm::dbgs() << "ISA for module: " << cast(getOperation()).getNameAttr() << "\n" << *serializedISA << "\n"; - }); -#undef DEBUG_TYPE + } + // Return ISA assembly code if the compilation target is assembly. if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Assembly) return SmallVector(serializedISA->begin(), serializedISA->end()); diff --git a/mlir/test/Integration/GPU/CUDA/dump-ptx.mlir b/mlir/test/Integration/GPU/CUDA/dump-ptx.mlir index 0cc5d8645bb36..c511596d49c27 100644 --- a/mlir/test/Integration/GPU/CUDA/dump-ptx.mlir +++ b/mlir/test/Integration/GPU/CUDA/dump-ptx.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s \ -// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline -debug-only=serialize-to-isa \ +// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="dump-ptx=true" \ // RUN: 2>&1 | FileCheck %s // CHECK: Generated by LLVM NVPTX Back-End diff --git a/mlir/test/Integration/GPU/CUDA/dump-sass.mlir b/mlir/test/Integration/GPU/CUDA/dump-sass.mlir index d32f5efc29d58..2b204644c90c1 100644 --- a/mlir/test/Integration/GPU/CUDA/dump-sass.mlir +++ b/mlir/test/Integration/GPU/CUDA/dump-sass.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s \ -// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline -debug-only=dump-sass \ +// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="dump-sass=true" \ // RUN: 2>&1 | FileCheck %s // CHECK: MOV