From cebb8a744fc83086db95af6a67916bfa54568d79 Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Sun, 26 Jan 2025 02:54:12 +0100 Subject: [PATCH 1/7] [mlir][gpu] GPUToROCDL/NVVM: use generic llvm conversion interface instead of hardcoded connversions. --- .../GPUToNVVM/LowerGpuOpsToNVVMOps.cpp | 37 ++++++++++++------- .../GPUToROCDL/LowerGpuOpsToROCDLOps.cpp | 30 ++++++++------- 2 files changed, 40 insertions(+), 27 deletions(-) diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp index 11363a0d60ebf..669e2651e63fe 100644 --- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -11,19 +11,14 @@ // //===----------------------------------------------------------------------===// -#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h" - -#include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h" -#include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h" #include "mlir/Conversion/ConvertToLLVM/ToLLVMInterface.h" -#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h" +#include "mlir/Conversion/ConvertToLLVM/ToLLVMPass.h" #include "mlir/Conversion/GPUCommon/GPUCommonPass.h" #include "mlir/Conversion/GPUToNVVM/GPUToNVVM.h" +#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h" #include "mlir/Conversion/LLVMCommon/ConversionTarget.h" #include "mlir/Conversion/LLVMCommon/LoweringOptions.h" #include "mlir/Conversion/LLVMCommon/TypeConverter.h" -#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h" -#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h" #include "mlir/Dialect/ControlFlow/IR/ControlFlow.h" #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" #include "mlir/Dialect/Func/IR/FuncOps.h" @@ -346,6 +341,11 @@ struct LowerGpuOpsToNVVMOpsPass : public impl::ConvertGpuOpsToNVVMOpsBase { using Base::Base; + void getDependentDialects(DialectRegistry ®istry) const override final { + Base::getDependentDialects(registry); + registerConvertToLLVMDependentDialectLoading(registry); + } + void runOnOperation() override { gpu::GPUModuleOp m = getOperation(); @@ -376,17 +376,24 @@ struct LowerGpuOpsToNVVMOpsPass LLVMTypeConverter converter(m.getContext(), options); configureGpuToNVVMTypeConverter(converter); RewritePatternSet llvmPatterns(m.getContext()); + LLVMConversionTarget target(getContext()); + + for (Dialect *dialect : getContext().getLoadedDialects()) { + if (isa(dialect)) + continue; + + auto iface = dyn_cast(dialect); + if (!iface) + continue; + + iface->populateConvertToLLVMConversionPatterns(target, converter, + llvmPatterns); + } - arith::populateArithToLLVMConversionPatterns(converter, llvmPatterns); - cf::populateControlFlowToLLVMConversionPatterns(converter, llvmPatterns); - populateFuncToLLVMConversionPatterns(converter, llvmPatterns); - populateFinalizeMemRefToLLVMConversionPatterns(converter, llvmPatterns); populateGpuToNVVMConversionPatterns(converter, llvmPatterns); populateGpuWMMAToNVVMConversionPatterns(converter, llvmPatterns); - populateVectorToLLVMConversionPatterns(converter, llvmPatterns); if (this->hasRedux) populateGpuSubgroupReduceOpLoweringPattern(converter, llvmPatterns); - LLVMConversionTarget target(getContext()); configureGpuToNVVMConversionLegality(target); if (failed(applyPartialConversion(m, target, std::move(llvmPatterns)))) signalPassFailure(); @@ -472,8 +479,10 @@ void mlir::populateGpuToNVVMConversionPatterns( using gpu::index_lowering::IndexKind; using gpu::index_lowering::IntrType; populateWithGenerated(patterns); + + // Set higher benefit, so patterns will run before generic LLVM lowering. patterns.add( - converter); + converter, /*benefit*/ 10); patterns.add< gpu::index_lowering::OpLowering>( diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp index afebded1c3ea4..2c281e580754e 100644 --- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp +++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h" #include "mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h" #include "mlir/Dialect/Arith/Transforms/Passes.h" #include "mlir/Pass/Pass.h" @@ -19,8 +18,8 @@ #include "mlir/Transforms/Passes.h" #include "mlir/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.h" -#include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h" -#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h" +#include "mlir/Conversion/ConvertToLLVM/ToLLVMInterface.h" +#include "mlir/Conversion/ConvertToLLVM/ToLLVMPass.h" #include "mlir/Conversion/GPUCommon/GPUCommonPass.h" #include "mlir/Conversion/LLVMCommon/ConversionTarget.h" #include "mlir/Conversion/LLVMCommon/LoweringOptions.h" @@ -28,8 +27,6 @@ #include "mlir/Conversion/LLVMCommon/TypeConverter.h" #include "mlir/Conversion/MathToLLVM/MathToLLVM.h" #include "mlir/Conversion/MathToROCDL/MathToROCDL.h" -#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h" -#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h" #include "mlir/Dialect/ControlFlow/IR/ControlFlow.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" @@ -218,6 +215,11 @@ struct LowerGpuOpsToROCDLOpsPass this->runtime = runtime; } + void getDependentDialects(DialectRegistry ®istry) const override final { + Base::getDependentDialects(registry); + registerConvertToLLVMDependentDialectLoading(registry); + } + void runOnOperation() override { gpu::GPUModuleOp m = getOperation(); MLIRContext *ctx = m.getContext(); @@ -289,18 +291,20 @@ struct LowerGpuOpsToROCDLOpsPass }); RewritePatternSet llvmPatterns(ctx); + LLVMConversionTarget target(getContext()); + + for (Dialect *dialect : ctx->getLoadedDialects()) { + auto iface = dyn_cast(dialect); + if (!iface) + continue; + + iface->populateConvertToLLVMConversionPatterns(target, converter, + llvmPatterns); + } - mlir::arith::populateArithToLLVMConversionPatterns(converter, llvmPatterns); populateAMDGPUToROCDLConversionPatterns(converter, llvmPatterns, *maybeChipset); - populateVectorToLLVMConversionPatterns(converter, llvmPatterns); - populateMathToLLVMConversionPatterns(converter, llvmPatterns); - cf::populateControlFlowToLLVMConversionPatterns(converter, llvmPatterns); - cf::populateAssertToLLVMConversionPattern(converter, llvmPatterns); - populateFuncToLLVMConversionPatterns(converter, llvmPatterns); - populateFinalizeMemRefToLLVMConversionPatterns(converter, llvmPatterns); populateGpuToROCDLConversionPatterns(converter, llvmPatterns, runtime); - LLVMConversionTarget target(getContext()); configureGpuToROCDLConversionLegality(target); if (failed(applyPartialConversion(m, target, std::move(llvmPatterns)))) signalPassFailure(); From ac698cf8a4dd6af1d20d54b6f369847ebf6ce4b2 Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Sat, 8 Feb 2025 17:58:57 +0100 Subject: [PATCH 2/7] gpu-to-rocd filter-dialects --- mlir/include/mlir/Conversion/Passes.td | 20 +++++++----- .../GPUToROCDL/LowerGpuOpsToROCDLOps.cpp | 32 +++++++++++++++---- .../Conversion/GPUToROCDL/gpu-to-rocdl.mlir | 1 + 3 files changed, 39 insertions(+), 14 deletions(-) diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index ff79a1226c047..63952c5a79b9b 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -578,20 +578,24 @@ def ConvertGpuOpsToROCDLOps : Pass<"convert-gpu-to-rocdl", "gpu::GPUModuleOp"> { /*default=*/"\"gfx000\"", "Chipset that these operations will run on">, Option<"indexBitwidth", "index-bitwidth", "unsigned", - /*default=kDeriveIndexBitwidthFromDataLayout*/"0", + /*default=kDeriveIndexBitwidthFromDataLayout*/ "0", "Bitwidth of the index type, 0 to use size of machine word">, Option<"useBarePtrCallConv", "use-bare-ptr-memref-call-conv", "bool", /*default=*/"false", "Replace memref arguments in GPU functions with bare pointers." "All memrefs must have static shape">, Option<"runtime", "runtime", "::mlir::gpu::amd::Runtime", - "::mlir::gpu::amd::Runtime::Unknown", - "Runtime code will be run on (default is Unknown, can also use HIP or OpenCl)", - [{::llvm::cl::values( - clEnumValN(::mlir::gpu::amd::Runtime::Unknown, "unknown", "Unknown (default)"), - clEnumValN(::mlir::gpu::amd::Runtime::HIP, "HIP", "HIP"), - clEnumValN(::mlir::gpu::amd::Runtime::OpenCL, "OpenCL", "OpenCL") - )}]> + "::mlir::gpu::amd::Runtime::Unknown", + "Runtime code will be run on (default is Unknown, can also use HIP " + "or OpenCl)", + [{::llvm::cl::values( + clEnumValN(::mlir::gpu::amd::Runtime::Unknown, "unknown", + "Unknown (default)"), + clEnumValN(::mlir::gpu::amd::Runtime::HIP, "HIP", "HIP"), + clEnumValN(::mlir::gpu::amd::Runtime::OpenCL, "OpenCL", + "OpenCL"))}]>, + ListOption<"filterDialects", "filter-dialects", "std::string", + "Run conversion patterns of only the specified dialects">, ]; } diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp index 2c281e580754e..48f24b3fb9549 100644 --- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp +++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp @@ -293,13 +293,33 @@ struct LowerGpuOpsToROCDLOpsPass RewritePatternSet llvmPatterns(ctx); LLVMConversionTarget target(getContext()); - for (Dialect *dialect : ctx->getLoadedDialects()) { - auto iface = dyn_cast(dialect); - if (!iface) - continue; + if (!filterDialects.empty()) { + for (StringRef dialectName : filterDialects) { + Dialect *dialect = ctx->getLoadedDialect(dialectName); + // Dialect may not be loaded if it wasn't used in source module, ignore. + if (!dialect) + continue; + + auto *iface = dyn_cast(dialect); + if (!iface) { + m.emitError() + << "dialect does not implement ConvertToLLVMPatternInterface: " + << dialectName << "\n"; + return signalPassFailure(); + } - iface->populateConvertToLLVMConversionPatterns(target, converter, - llvmPatterns); + iface->populateConvertToLLVMConversionPatterns(target, converter, + llvmPatterns); + } + } else { + for (Dialect *dialect : ctx->getLoadedDialects()) { + auto iface = dyn_cast(dialect); + if (!iface) + continue; + + iface->populateConvertToLLVMConversionPatterns(target, converter, + llvmPatterns); + } } populateAMDGPUToROCDLConversionPatterns(converter, llvmPatterns, diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir index 11b9fa5e33f10..4e59578d078a9 100644 --- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir +++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir @@ -1,4 +1,5 @@ // RUN: mlir-opt %s -convert-gpu-to-rocdl -split-input-file | FileCheck %s +// RUN: mlir-opt %s -convert-gpu-to-rocdl='filter-dialects=func,arith,math' -split-input-file | FileCheck %s // RUN: mlir-opt %s -convert-gpu-to-rocdl='index-bitwidth=32' -split-input-file | FileCheck --check-prefix=CHECK32 %s // CHECK-LABEL: @test_module From 2a0327ebcc8a49ffee8d082c67f1b90e4780ee5a Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Sat, 8 Feb 2025 18:22:00 +0100 Subject: [PATCH 3/7] gpu-to-nvvm filter-dialects --- mlir/include/mlir/Conversion/Passes.td | 6 ++- .../GPUToNVVM/LowerGpuOpsToNVVMOps.cpp | 37 +++++++++++++++---- .../Conversion/GPUToNVVM/gpu-to-nvvm.mlir | 1 + 3 files changed, 34 insertions(+), 10 deletions(-) diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index 63952c5a79b9b..1873d95eed88f 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -550,14 +550,16 @@ def ConvertGpuOpsToNVVMOps : Pass<"convert-gpu-to-nvvm", "gpu::GPUModuleOp"> { ]; let options = [ Option<"indexBitwidth", "index-bitwidth", "unsigned", - /*default=kDeriveIndexBitwidthFromDataLayout*/"0", + /*default=kDeriveIndexBitwidthFromDataLayout*/ "0", "Bitwidth of the index type, 0 to use size of machine word">, Option<"hasRedux", "has-redux", "bool", /*default=*/"false", "Target gpu supports redux">, Option<"useBarePtrCallConv", "use-bare-ptr-memref-call-conv", "bool", /*default=*/"false", "Replace memref arguments in GPU functions with bare pointers. " - "All memrefs must have static shape."> + "All memrefs must have static shape.">, + ListOption<"filterDialects", "filter-dialects", "std::string", + "Run conversion patterns of only the specified dialects">, ]; } diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp index 669e2651e63fe..e03335a9f696c 100644 --- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -378,16 +378,36 @@ struct LowerGpuOpsToNVVMOpsPass RewritePatternSet llvmPatterns(m.getContext()); LLVMConversionTarget target(getContext()); - for (Dialect *dialect : getContext().getLoadedDialects()) { - if (isa(dialect)) - continue; + if (!filterDialects.empty()) { + for (StringRef dialectName : filterDialects) { + Dialect *dialect = getContext().getLoadedDialect(dialectName); + // Dialect may not be loaded if it wasn't used in source module, ignore. + if (!dialect) + continue; + + auto *iface = dyn_cast(dialect); + if (!iface) { + m.emitError() + << "dialect does not implement ConvertToLLVMPatternInterface: " + << dialectName << "\n"; + return signalPassFailure(); + } - auto iface = dyn_cast(dialect); - if (!iface) - continue; + iface->populateConvertToLLVMConversionPatterns(target, converter, + llvmPatterns); + } + } else { + for (Dialect *dialect : getContext().getLoadedDialects()) { + if (isa(dialect)) // Need custom math lowering + continue; - iface->populateConvertToLLVMConversionPatterns(target, converter, - llvmPatterns); + auto iface = dyn_cast(dialect); + if (!iface) + continue; + + iface->populateConvertToLLVMConversionPatterns(target, converter, + llvmPatterns); + } } populateGpuToNVVMConversionPatterns(converter, llvmPatterns); @@ -404,6 +424,7 @@ struct LowerGpuOpsToNVVMOpsPass void mlir::configureGpuToNVVMConversionLegality(ConversionTarget &target) { target.addIllegalOp(); + target.addIllegalOp(); target.addLegalDialect<::mlir::LLVM::LLVMDialect>(); target.addLegalDialect<::mlir::NVVM::NVVMDialect>(); target.addIllegalDialect(); diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir index de2a4ff2079e2..e917ae46dfc24 100644 --- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir +++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir @@ -1,4 +1,5 @@ // RUN: mlir-opt %s -convert-gpu-to-nvvm='has-redux=1' -split-input-file | FileCheck %s +// RUN: mlir-opt %s -convert-gpu-to-nvvm='has-redux=1 filter-dialects=func,arith,cf' -split-input-file | FileCheck %s // RUN: mlir-opt %s -convert-gpu-to-nvvm='has-redux=1 use-bare-ptr-memref-call-conv=1' -split-input-file | FileCheck %s --check-prefix=CHECK-BARE // RUN: mlir-opt %s -transform-interpreter | FileCheck %s From a0e8c1f92890c0b3d7bf7e6426a70e2f8d10b5dd Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Sat, 8 Feb 2025 23:37:23 +0100 Subject: [PATCH 4/7] rename option --- mlir/include/mlir/Conversion/Passes.td | 6 +++--- mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp | 4 ++-- mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp | 4 ++-- mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir | 2 +- mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index 1873d95eed88f..438b7512773c8 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -558,7 +558,7 @@ def ConvertGpuOpsToNVVMOps : Pass<"convert-gpu-to-nvvm", "gpu::GPUModuleOp"> { /*default=*/"false", "Replace memref arguments in GPU functions with bare pointers. " "All memrefs must have static shape.">, - ListOption<"filterDialects", "filter-dialects", "std::string", + ListOption<"allowedDialects", "allowed-dialects", "std::string", "Run conversion patterns of only the specified dialects">, ]; } @@ -589,14 +589,14 @@ def ConvertGpuOpsToROCDLOps : Pass<"convert-gpu-to-rocdl", "gpu::GPUModuleOp"> { Option<"runtime", "runtime", "::mlir::gpu::amd::Runtime", "::mlir::gpu::amd::Runtime::Unknown", "Runtime code will be run on (default is Unknown, can also use HIP " - "or OpenCl)", + "or OpenCL)", [{::llvm::cl::values( clEnumValN(::mlir::gpu::amd::Runtime::Unknown, "unknown", "Unknown (default)"), clEnumValN(::mlir::gpu::amd::Runtime::HIP, "HIP", "HIP"), clEnumValN(::mlir::gpu::amd::Runtime::OpenCL, "OpenCL", "OpenCL"))}]>, - ListOption<"filterDialects", "filter-dialects", "std::string", + ListOption<"allowedDialects", "allowed-dialects", "std::string", "Run conversion patterns of only the specified dialects">, ]; } diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp index e03335a9f696c..7557016987312 100644 --- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -378,8 +378,8 @@ struct LowerGpuOpsToNVVMOpsPass RewritePatternSet llvmPatterns(m.getContext()); LLVMConversionTarget target(getContext()); - if (!filterDialects.empty()) { - for (StringRef dialectName : filterDialects) { + if (!allowedDialects.empty()) { + for (StringRef dialectName : allowedDialects) { Dialect *dialect = getContext().getLoadedDialect(dialectName); // Dialect may not be loaded if it wasn't used in source module, ignore. if (!dialect) diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp index 48f24b3fb9549..43c60609b393f 100644 --- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp +++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp @@ -293,8 +293,8 @@ struct LowerGpuOpsToROCDLOpsPass RewritePatternSet llvmPatterns(ctx); LLVMConversionTarget target(getContext()); - if (!filterDialects.empty()) { - for (StringRef dialectName : filterDialects) { + if (!allowedDialects.empty()) { + for (StringRef dialectName : allowedDialects) { Dialect *dialect = ctx->getLoadedDialect(dialectName); // Dialect may not be loaded if it wasn't used in source module, ignore. if (!dialect) diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir index e917ae46dfc24..9f74e0c7947e6 100644 --- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir +++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s -convert-gpu-to-nvvm='has-redux=1' -split-input-file | FileCheck %s -// RUN: mlir-opt %s -convert-gpu-to-nvvm='has-redux=1 filter-dialects=func,arith,cf' -split-input-file | FileCheck %s +// RUN: mlir-opt %s -convert-gpu-to-nvvm='has-redux=1 allowed-dialects=func,arith,cf' -split-input-file | FileCheck %s // RUN: mlir-opt %s -convert-gpu-to-nvvm='has-redux=1 use-bare-ptr-memref-call-conv=1' -split-input-file | FileCheck %s --check-prefix=CHECK-BARE // RUN: mlir-opt %s -transform-interpreter | FileCheck %s diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir index 4e59578d078a9..e23ab16ccd94b 100644 --- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir +++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s -convert-gpu-to-rocdl -split-input-file | FileCheck %s -// RUN: mlir-opt %s -convert-gpu-to-rocdl='filter-dialects=func,arith,math' -split-input-file | FileCheck %s +// RUN: mlir-opt %s -convert-gpu-to-rocdl='allowed-dialects=func,arith,math' -split-input-file | FileCheck %s // RUN: mlir-opt %s -convert-gpu-to-rocdl='index-bitwidth=32' -split-input-file | FileCheck --check-prefix=CHECK32 %s // CHECK-LABEL: @test_module From f11c4c06538c6d4e98d8d405ad64f7eb2084b01c Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Sat, 8 Feb 2025 23:50:41 +0100 Subject: [PATCH 5/7] fixes --- mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp | 9 +++++---- mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp | 6 +++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp index 7557016987312..9bf7315166383 100644 --- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -337,11 +337,11 @@ struct AssertOpToAssertfailLowering /// /// This pass only handles device code and is not meant to be run on GPU host /// code. -struct LowerGpuOpsToNVVMOpsPass +struct LowerGpuOpsToNVVMOpsPass final : public impl::ConvertGpuOpsToNVVMOpsBase { using Base::Base; - void getDependentDialects(DialectRegistry ®istry) const override final { + void getDependentDialects(DialectRegistry ®istry) const override { Base::getDependentDialects(registry); registerConvertToLLVMDependentDialectLoading(registry); } @@ -389,7 +389,7 @@ struct LowerGpuOpsToNVVMOpsPass if (!iface) { m.emitError() << "dialect does not implement ConvertToLLVMPatternInterface: " - << dialectName << "\n"; + << dialectName; return signalPassFailure(); } @@ -398,7 +398,8 @@ struct LowerGpuOpsToNVVMOpsPass } } else { for (Dialect *dialect : getContext().getLoadedDialects()) { - if (isa(dialect)) // Need custom math lowering + // Skip math patterns as nvvm needs custom math lowering. + if (isa(dialect)) continue; auto iface = dyn_cast(dialect); diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp index 43c60609b393f..fb1631879265e 100644 --- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp +++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp @@ -199,7 +199,7 @@ struct GPUShuffleOpLowering : public ConvertOpToLLVMPattern { // // This pass only handles device code and is not meant to be run on GPU host // code. -struct LowerGpuOpsToROCDLOpsPass +struct LowerGpuOpsToROCDLOpsPass final : public impl::ConvertGpuOpsToROCDLOpsBase { LowerGpuOpsToROCDLOpsPass() = default; LowerGpuOpsToROCDLOpsPass(const std::string &chipset, unsigned indexBitwidth, @@ -215,7 +215,7 @@ struct LowerGpuOpsToROCDLOpsPass this->runtime = runtime; } - void getDependentDialects(DialectRegistry ®istry) const override final { + void getDependentDialects(DialectRegistry ®istry) const override { Base::getDependentDialects(registry); registerConvertToLLVMDependentDialectLoading(registry); } @@ -304,7 +304,7 @@ struct LowerGpuOpsToROCDLOpsPass if (!iface) { m.emitError() << "dialect does not implement ConvertToLLVMPatternInterface: " - << dialectName << "\n"; + << dialectName; return signalPassFailure(); } From d655d8b7dbcf94f391db60db3f32babe161c22cb Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Sun, 9 Feb 2025 01:07:24 +0100 Subject: [PATCH 6/7] refac dialect filtering --- .../GPUToNVVM/LowerGpuOpsToNVVMOps.cpp | 46 +++++++++---------- .../GPUToROCDL/LowerGpuOpsToROCDLOps.cpp | 40 ++++++++-------- 2 files changed, 39 insertions(+), 47 deletions(-) diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp index 9bf7315166383..35330f870e6ae 100644 --- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -378,37 +378,33 @@ struct LowerGpuOpsToNVVMOpsPass final RewritePatternSet llvmPatterns(m.getContext()); LLVMConversionTarget target(getContext()); - if (!allowedDialects.empty()) { - for (StringRef dialectName : allowedDialects) { - Dialect *dialect = getContext().getLoadedDialect(dialectName); - // Dialect may not be loaded if it wasn't used in source module, ignore. - if (!dialect) - continue; - - auto *iface = dyn_cast(dialect); - if (!iface) { + llvm::SmallDenseSet allowedDialectsSet(allowedDialects.begin(), + allowedDialects.end()); + for (Dialect *dialect : getContext().getLoadedDialects()) { + // Skip math patterns as nvvm needs custom math lowering. + if (isa(dialect)) + continue; + + bool allowed = allowedDialectsSet.contains(dialect->getNamespace()); + // Empty `allowedDialectsSet` means all dialects are allowed. + if (!allowedDialectsSet.empty() && !allowed) + continue; + + auto iface = dyn_cast(dialect); + if (!iface) { + // Error out if dialect was explicily specified but doesn't implement + // conversion interface. + if (allowed) { m.emitError() << "dialect does not implement ConvertToLLVMPatternInterface: " - << dialectName; + << dialect->getNamespace(); return signalPassFailure(); } - - iface->populateConvertToLLVMConversionPatterns(target, converter, - llvmPatterns); + continue; } - } else { - for (Dialect *dialect : getContext().getLoadedDialects()) { - // Skip math patterns as nvvm needs custom math lowering. - if (isa(dialect)) - continue; - auto iface = dyn_cast(dialect); - if (!iface) - continue; - - iface->populateConvertToLLVMConversionPatterns(target, converter, - llvmPatterns); - } + iface->populateConvertToLLVMConversionPatterns(target, converter, + llvmPatterns); } populateGpuToNVVMConversionPatterns(converter, llvmPatterns); diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp index fb1631879265e..4891dab3aa1d0 100644 --- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp +++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp @@ -293,33 +293,29 @@ struct LowerGpuOpsToROCDLOpsPass final RewritePatternSet llvmPatterns(ctx); LLVMConversionTarget target(getContext()); - if (!allowedDialects.empty()) { - for (StringRef dialectName : allowedDialects) { - Dialect *dialect = ctx->getLoadedDialect(dialectName); - // Dialect may not be loaded if it wasn't used in source module, ignore. - if (!dialect) - continue; - - auto *iface = dyn_cast(dialect); - if (!iface) { + llvm::SmallDenseSet allowedDialectsSet(allowedDialects.begin(), + allowedDialects.end()); + for (Dialect *dialect : ctx->getLoadedDialects()) { + bool allowed = allowedDialectsSet.contains(dialect->getNamespace()); + // Empty `allowedDialectsSet` means all dialects are allowed. + if (!allowedDialectsSet.empty() && !allowed) + continue; + + auto iface = dyn_cast(dialect); + if (!iface) { + // Error out if dialect was explicily specified but doesn't implement + // conversion interface. + if (allowed) { m.emitError() << "dialect does not implement ConvertToLLVMPatternInterface: " - << dialectName; + << dialect->getNamespace(); return signalPassFailure(); } - - iface->populateConvertToLLVMConversionPatterns(target, converter, - llvmPatterns); - } - } else { - for (Dialect *dialect : ctx->getLoadedDialects()) { - auto iface = dyn_cast(dialect); - if (!iface) - continue; - - iface->populateConvertToLLVMConversionPatterns(target, converter, - llvmPatterns); + continue; } + + iface->populateConvertToLLVMConversionPatterns(target, converter, + llvmPatterns); } populateAMDGPUToROCDLConversionPatterns(converter, llvmPatterns, From bcd2d22307c62b650bc157b1664b953b5e82f700 Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Sun, 9 Feb 2025 16:13:31 +0100 Subject: [PATCH 7/7] Test invalid dialect error --- .../GPUToNVVM/gpu-to-nvvm-invalid-dialect.mlir | 10 ++++++++++ .../GPUToROCDL/gpu-to-rocdl-invalid-dialect.mlir | 10 ++++++++++ 2 files changed, 20 insertions(+) create mode 100644 mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm-invalid-dialect.mlir create mode 100644 mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-invalid-dialect.mlir diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm-invalid-dialect.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm-invalid-dialect.mlir new file mode 100644 index 0000000000000..a293191e11567 --- /dev/null +++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm-invalid-dialect.mlir @@ -0,0 +1,10 @@ +// RUN: mlir-opt %s -convert-gpu-to-nvvm='allowed-dialects=test' -verify-diagnostics + +// expected-error @+1 {{dialect does not implement ConvertToLLVMPatternInterface: test}} +gpu.module @test_module_1 { + func.func @test(%0 : index) -> index { + %1 = test.increment %0 : index + func.return %1 : index + } +} + diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-invalid-dialect.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-invalid-dialect.mlir new file mode 100644 index 0000000000000..117f7692669de --- /dev/null +++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-invalid-dialect.mlir @@ -0,0 +1,10 @@ +// RUN: mlir-opt %s -convert-gpu-to-rocdl='allowed-dialects=test' -verify-diagnostics + +// expected-error @+1 {{dialect does not implement ConvertToLLVMPatternInterface: test}} +gpu.module @test_module_1 { + func.func @test(%0 : index) -> index { + %1 = test.increment %0 : index + func.return %1 : index + } +} +