From c03637be0750aff5b0711a9a3fac2d7b5e13f32a Mon Sep 17 00:00:00 2001 From: Pablo Antonio Martinez Date: Mon, 8 Sep 2025 02:34:00 -0500 Subject: [PATCH 1/4] [mlir][gpu] GPUToROCDL: Add C++ argument to populate allowedDialects The `convert-gpu-to-rocdl` pass provides the option `allowed-dialects`, which allows users to control which dialects can be used to populate conversions. This PR adds a C++ argument to createLowerGpuOpsToROCDLOpsPass, so that this option can also be controlled programatically when creating the pass. --- .../Conversion/GPUToROCDL/GPUToROCDLPass.h | 6 ++++- .../GPUToROCDL/LowerGpuOpsToROCDLOps.cpp | 23 ++++++++++++------- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h b/mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h index 291b809071ce9..a6099bde2a70e 100644 --- a/mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h +++ b/mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h @@ -10,6 +10,8 @@ #include "mlir/Conversion/GPUToROCDL/Runtimes.h" #include "mlir/Conversion/LLVMCommon/LoweringOptions.h" +#include "llvm/ADT/DenseSet.h" +#include #include namespace mlir { @@ -50,7 +52,9 @@ createLowerGpuOpsToROCDLOpsPass( const std::string &chipset = "gfx900", unsigned indexBitwidth = kDeriveIndexBitwidthFromDataLayout, bool useBarePtrCallConv = false, - gpu::amd::Runtime runtime = gpu::amd::Runtime::Unknown); + gpu::amd::Runtime runtime = gpu::amd::Runtime::Unknown, + const std::optional> &allowedDialects = + std::nullopt); } // namespace mlir diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp index 807d1f52ee69b..965089df0303e 100644 --- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp +++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp @@ -288,9 +288,10 @@ struct GPUShuffleOpLowering : public ConvertOpToLLVMPattern { struct LowerGpuOpsToROCDLOpsPass final : public impl::ConvertGpuOpsToROCDLOpsBase { LowerGpuOpsToROCDLOpsPass() = default; - LowerGpuOpsToROCDLOpsPass(const std::string &chipset, unsigned indexBitwidth, - bool useBarePtrCallConv, - gpu::amd::Runtime runtime) { + LowerGpuOpsToROCDLOpsPass( + const std::string &chipset, unsigned indexBitwidth, + bool useBarePtrCallConv, gpu::amd::Runtime runtime, + std::optional> allowedDialects) { if (this->chipset.getNumOccurrences() == 0) this->chipset = chipset; if (this->indexBitwidth.getNumOccurrences() == 0) @@ -299,6 +300,12 @@ struct LowerGpuOpsToROCDLOpsPass final this->useBarePtrCallConv = useBarePtrCallConv; if (this->runtime.getNumOccurrences() == 0) this->runtime = runtime; + if (this->allowedDialects.getNumOccurrences() == 0 && + allowedDialects.has_value()) { + for (auto &str : allowedDialects.value()) { + this->allowedDialects.push_back(str.str()); + } + } } void getDependentDialects(DialectRegistry ®istry) const override { @@ -501,10 +508,10 @@ void mlir::populateGpuToROCDLConversionPatterns( } std::unique_ptr> -mlir::createLowerGpuOpsToROCDLOpsPass(const std::string &chipset, - unsigned indexBitwidth, - bool useBarePtrCallConv, - gpu::amd::Runtime runtime) { +mlir::createLowerGpuOpsToROCDLOpsPass( + const std::string &chipset, unsigned indexBitwidth, bool useBarePtrCallConv, + gpu::amd::Runtime runtime, + const std::optional> &allowedDialects) { return std::make_unique( - chipset, indexBitwidth, useBarePtrCallConv, runtime); + chipset, indexBitwidth, useBarePtrCallConv, runtime, allowedDialects); } From 5572996277e7531060470689863f7fd2cbca5ef0 Mon Sep 17 00:00:00 2001 From: Pablo Antonio Martinez Date: Mon, 8 Sep 2025 11:06:16 -0500 Subject: [PATCH 2/4] [Refactor] Force users to use create* function autogenerated from tablegen and delete handwritten version --- .../mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h | 13 +------------ mlir/include/mlir/Conversion/Passes.td | 1 - .../Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp | 12 ++---------- 3 files changed, 3 insertions(+), 23 deletions(-) diff --git a/mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h b/mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h index a6099bde2a70e..81405e7fa425a 100644 --- a/mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h +++ b/mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h @@ -10,6 +10,7 @@ #include "mlir/Conversion/GPUToROCDL/Runtimes.h" #include "mlir/Conversion/LLVMCommon/LoweringOptions.h" +#include "mlir/Pass/Pass.h" #include "llvm/ADT/DenseSet.h" #include #include @@ -44,18 +45,6 @@ void populateGpuToROCDLConversionPatterns(const LLVMTypeConverter &converter, /// Configure target to convert from the GPU dialect to ROCDL. void configureGpuToROCDLConversionLegality(ConversionTarget &target); -/// Creates a pass that lowers GPU dialect operations to ROCDL counterparts. The -/// index bitwidth used for the lowering of the device side index computations -/// is configurable. -std::unique_ptr> -createLowerGpuOpsToROCDLOpsPass( - const std::string &chipset = "gfx900", - unsigned indexBitwidth = kDeriveIndexBitwidthFromDataLayout, - bool useBarePtrCallConv = false, - gpu::amd::Runtime runtime = gpu::amd::Runtime::Unknown, - const std::optional> &allowedDialects = - std::nullopt); - } // namespace mlir #endif // MLIR_CONVERSION_GPUTOROCDL_GPUTOROCDLPASS_H_ diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index 44dc1bc923a6b..1a37d057776e2 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -624,7 +624,6 @@ def ConvertGpuOpsToNVVMOps : Pass<"convert-gpu-to-nvvm", "gpu::GPUModuleOp"> { def ConvertGpuOpsToROCDLOps : Pass<"convert-gpu-to-rocdl", "gpu::GPUModuleOp"> { let summary = "Generate ROCDL operations for gpu operations"; - let constructor = "mlir::createLowerGpuOpsToROCDLOpsPass()"; let dependentDialects = [ "ROCDL::ROCDLDialect", "amdgpu::AMDGPUDialect", diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp index 965089df0303e..ad0571027b0d0 100644 --- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp +++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp @@ -36,7 +36,6 @@ #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" #include "mlir/IR/BuiltinAttributes.h" -#include "mlir/Pass/Pass.h" #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" @@ -288,6 +287,8 @@ struct GPUShuffleOpLowering : public ConvertOpToLLVMPattern { struct LowerGpuOpsToROCDLOpsPass final : public impl::ConvertGpuOpsToROCDLOpsBase { LowerGpuOpsToROCDLOpsPass() = default; + LowerGpuOpsToROCDLOpsPass(ConvertGpuOpsToROCDLOpsOptions options) + : ConvertGpuOpsToROCDLOpsBase(options) {} LowerGpuOpsToROCDLOpsPass( const std::string &chipset, unsigned indexBitwidth, bool useBarePtrCallConv, gpu::amd::Runtime runtime, @@ -506,12 +507,3 @@ void mlir::populateGpuToROCDLConversionPatterns( populateMathToROCDLConversionPatterns(converter, patterns); } - -std::unique_ptr> -mlir::createLowerGpuOpsToROCDLOpsPass( - const std::string &chipset, unsigned indexBitwidth, bool useBarePtrCallConv, - gpu::amd::Runtime runtime, - const std::optional> &allowedDialects) { - return std::make_unique( - chipset, indexBitwidth, useBarePtrCallConv, runtime, allowedDialects); -} From 5dbb951e6594a3db648131cfe7fcf0b9a41bf02e Mon Sep 17 00:00:00 2001 From: Pablo Antonio Martinez Date: Tue, 9 Sep 2025 02:00:28 -0500 Subject: [PATCH 3/4] Cleanup GPUToROCDLPass header --- mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h b/mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h index 81405e7fa425a..220da0ad3c08f 100644 --- a/mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h +++ b/mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h @@ -10,12 +10,10 @@ #include "mlir/Conversion/GPUToROCDL/Runtimes.h" #include "mlir/Conversion/LLVMCommon/LoweringOptions.h" -#include "mlir/Pass/Pass.h" -#include "llvm/ADT/DenseSet.h" -#include #include namespace mlir { +class Pass; class LLVMTypeConverter; class ConversionTarget; class RewritePatternSet; From a118652ed3b48d94dd09edc73dad465a0a1c15c6 Mon Sep 17 00:00:00 2001 From: Pablo Antonio Martinez Date: Tue, 9 Sep 2025 02:32:14 -0500 Subject: [PATCH 4/4] Delete constructors and use the ones from base --- .../GPUToROCDL/LowerGpuOpsToROCDLOps.cpp | 23 +------------------ 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp index ad0571027b0d0..bbfa3d17bc7e6 100644 --- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp +++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp @@ -286,28 +286,7 @@ struct GPUShuffleOpLowering : public ConvertOpToLLVMPattern { // code. struct LowerGpuOpsToROCDLOpsPass final : public impl::ConvertGpuOpsToROCDLOpsBase { - LowerGpuOpsToROCDLOpsPass() = default; - LowerGpuOpsToROCDLOpsPass(ConvertGpuOpsToROCDLOpsOptions options) - : ConvertGpuOpsToROCDLOpsBase(options) {} - LowerGpuOpsToROCDLOpsPass( - const std::string &chipset, unsigned indexBitwidth, - bool useBarePtrCallConv, gpu::amd::Runtime runtime, - std::optional> allowedDialects) { - if (this->chipset.getNumOccurrences() == 0) - this->chipset = chipset; - if (this->indexBitwidth.getNumOccurrences() == 0) - this->indexBitwidth = indexBitwidth; - if (this->useBarePtrCallConv.getNumOccurrences() == 0) - this->useBarePtrCallConv = useBarePtrCallConv; - if (this->runtime.getNumOccurrences() == 0) - this->runtime = runtime; - if (this->allowedDialects.getNumOccurrences() == 0 && - allowedDialects.has_value()) { - for (auto &str : allowedDialects.value()) { - this->allowedDialects.push_back(str.str()); - } - } - } + using Base::Base; void getDependentDialects(DialectRegistry ®istry) const override { Base::getDependentDialects(registry);