diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h index aaef91f31ab9c..5cc65082a7e56 100644 --- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h @@ -35,25 +35,6 @@ class FuncOp; #define GEN_PASS_DECL #include "mlir/Dialect/GPU/Transforms/Passes.h.inc" -/// Pass that moves ops which are likely an index computation into gpu.launch -/// body. -std::unique_ptr createGpuLauchSinkIndexComputationsPass(); - -/// Replaces `gpu.launch` with `gpu.launch_func` by moving the region into -/// a separate kernel function. -std::unique_ptr> -createGpuKernelOutliningPass(StringRef dataLayoutStr = StringRef()); - -/// Rewrites a function region so that GPU ops execute asynchronously. -std::unique_ptr> createGpuAsyncRegionPass(); - -/// Maps the parallel loops found in the given function to workgroups. The first -/// loop encountered will be mapped to the global workgroup and the second loop -/// encountered to the local workgroup. Within each mapping, the first three -/// dimensions are mapped to x/y/z hardware ids and all following dimensions are -/// mapped to sequential loops. -std::unique_ptr> createGpuMapParallelLoopsPass(); - /// Collect a set of patterns to rewrite GlobalIdOp op within the GPU dialect. void populateGpuGlobalIdPatterns(RewritePatternSet &patterns); @@ -110,9 +91,6 @@ LogicalResult transformGpuModulesToBinaries( /// Collect a set of patterns to decompose memrefs ops. void populateGpuDecomposeMemrefsPatterns(RewritePatternSet &patterns); -/// Pass decomposes memref ops inside `gpu.launch` body. -std::unique_ptr createGpuDecomposeMemrefsPass(); - /// Erase barriers that do not enforce conflicting memory side effects. void populateGpuEliminateBarriersPatterns(RewritePatternSet &patterns); diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td index faf4c9ddbc7a7..03b1272095d64 100644 --- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td @@ -11,29 +11,35 @@ include "mlir/Pass/PassBase.td" -def GpuLaunchSinkIndexComputations : Pass<"gpu-launch-sink-index-computations"> { +def GpuLaunchSinkIndexComputationsPass + : Pass<"gpu-launch-sink-index-computations"> { let summary = "Sink index computations into gpu.launch body"; - let constructor = "mlir::createGpuLauchSinkIndexComputationsPass()"; let dependentDialects = ["mlir::gpu::GPUDialect"]; } -def GpuKernelOutlining : Pass<"gpu-kernel-outlining", "ModuleOp"> { +def GpuKernelOutliningPass : Pass<"gpu-kernel-outlining", "ModuleOp"> { let summary = "Outline gpu.launch bodies to kernel functions"; - let constructor = "mlir::createGpuKernelOutliningPass()"; let dependentDialects = ["mlir::DLTIDialect", "cf::ControlFlowDialect"]; + let options = [Option<"dataLayoutStr", "data-layout-str", "std::string", + /*default=*/"", + "String description of the data layout">]; } def GpuAsyncRegionPass : Pass<"gpu-async-region", "func::FuncOp"> { let summary = "Make GPU ops async"; - let constructor = "mlir::createGpuAsyncRegionPass()"; let dependentDialects = ["async::AsyncDialect"]; } def GpuMapParallelLoopsPass : Pass<"gpu-map-parallel-loops", "mlir::func::FuncOp"> { let summary = "Greedily maps loops to GPU hardware dimensions."; - let constructor = "mlir::createGpuMapParallelLoopsPass()"; - let description = "Greedily maps loops to GPU hardware dimensions."; + let description = [{ + Maps the parallel loops found in the given function to workgroups. The first + loop encountered will be mapped to the global workgroup and the second loop + encountered to the local workgroup. Within each mapping, the first three + dimensions are mapped to x/y/z hardware ids and all following dimensions are + mapped to sequential loops. + }]; let dependentDialects = ["mlir::gpu::GPUDialect"]; } @@ -66,7 +72,6 @@ def GpuDecomposeMemrefsPass : Pass<"gpu-decompose-memrefs"> { and sizes/strides for dynamically-sized memrefs are not available inside `gpu.launch`. }]; - let constructor = "mlir::createGpuDecomposeMemrefsPass()"; let dependentDialects = [ "mlir::gpu::GPUDialect", "mlir::memref::MemRefDialect", "mlir::affine::AffineDialect" diff --git a/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp b/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp index 41a5e39e55064..99a91ecd5642c 100644 --- a/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp @@ -347,7 +347,3 @@ void GpuAsyncRegionPass::runOnOperation() { // Makes each !gpu.async.token returned from async.execute op have single use. getOperation().getRegion().walk(SingleTokenUseCallback()); } - -std::unique_ptr> mlir::createGpuAsyncRegionPass() { - return std::make_unique(); -} diff --git a/mlir/lib/Dialect/GPU/Transforms/DecomposeMemRefs.cpp b/mlir/lib/Dialect/GPU/Transforms/DecomposeMemRefs.cpp index 2afdeff3a7be1..a64dc7f74a19c 100644 --- a/mlir/lib/Dialect/GPU/Transforms/DecomposeMemRefs.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/DecomposeMemRefs.cpp @@ -238,7 +238,3 @@ void mlir::populateGpuDecomposeMemrefsPatterns(RewritePatternSet &patterns) { patterns.insert( patterns.getContext()); } - -std::unique_ptr mlir::createGpuDecomposeMemrefsPass() { - return std::make_unique(); -} diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp index a6a36848b5635..f5b5e3709d8e9 100644 --- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp @@ -30,8 +30,8 @@ #include namespace mlir { -#define GEN_PASS_DEF_GPULAUNCHSINKINDEXCOMPUTATIONS -#define GEN_PASS_DEF_GPUKERNELOUTLINING +#define GEN_PASS_DEF_GPULAUNCHSINKINDEXCOMPUTATIONSPASS +#define GEN_PASS_DEF_GPUKERNELOUTLININGPASS #include "mlir/Dialect/GPU/Transforms/Passes.h.inc" } // namespace mlir @@ -302,7 +302,7 @@ namespace { /// Pass that moves ops which are likely an index computation into gpu.launch /// body. class GpuLaunchSinkIndexComputationsPass - : public impl::GpuLaunchSinkIndexComputationsBase< + : public impl::GpuLaunchSinkIndexComputationsPassBase< GpuLaunchSinkIndexComputationsPass> { public: void runOnOperation() override { @@ -329,17 +329,9 @@ class GpuLaunchSinkIndexComputationsPass /// a separate pass. The external functions can then be annotated with the /// symbol of the cubin accessor function. class GpuKernelOutliningPass - : public impl::GpuKernelOutliningBase { + : public impl::GpuKernelOutliningPassBase { public: - GpuKernelOutliningPass(StringRef dlStr) { - if (!dlStr.empty() && !dataLayoutStr.hasValue()) - dataLayoutStr = dlStr.str(); - } - - GpuKernelOutliningPass(const GpuKernelOutliningPass &other) - : GpuKernelOutliningBase(other), dataLayoutSpec(other.dataLayoutSpec) { - dataLayoutStr = other.dataLayoutStr.getValue(); - } + using Base::Base; LogicalResult initialize(MLIRContext *context) override { // Initialize the data layout specification from the data layout string. @@ -457,21 +449,7 @@ class GpuKernelOutliningPass return kernelModule; } - Option dataLayoutStr{ - *this, "data-layout-str", - llvm::cl::desc("String containing the data layout specification to be " - "attached to the GPU kernel module")}; - DataLayoutSpecInterface dataLayoutSpec; }; } // namespace - -std::unique_ptr mlir::createGpuLauchSinkIndexComputationsPass() { - return std::make_unique(); -} - -std::unique_ptr> -mlir::createGpuKernelOutliningPass(StringRef dataLayoutStr) { - return std::make_unique(dataLayoutStr); -} diff --git a/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp b/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp index 9d398998dd63b..a098e721303a8 100644 --- a/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp @@ -146,8 +146,3 @@ struct GpuMapParallelLoopsPass } // namespace } // namespace gpu } // namespace mlir - -std::unique_ptr> -mlir::createGpuMapParallelLoopsPass() { - return std::make_unique(); -}