Add optional attributes kernelFunc and kernelModule to specify the kernel function name or kernel module name.

wangzpgi · wangzpgi · commit 52ad728bef87 · 2024-12-05T10:37:00.000-08:00
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -804,7 +804,8 @@ def GPU_LaunchOp : GPU_Op<"launch", [
                Optional<Index>:$clusterSizeY,
                Optional<Index>:$clusterSizeZ,
                Optional<I32>:$dynamicSharedMemorySize,
-               OptionalAttr<SymbolRefAttr>:$outlineModule)>,
+               OptionalAttr<SymbolRefAttr>:$kernelFunc,
+               OptionalAttr<SymbolRefAttr>:$kernelModule)>,
     Results<(outs Optional<GPU_AsyncToken>:$asyncToken)> {
   let summary = "GPU kernel launch operation";
 
@@ -838,9 +839,8 @@ def GPU_LaunchOp : GPU_Op<"launch", [
     -   a variadic number of Workgroup memory attributions.
     -   a variadic number of Private memory attributions.
 
-    The `outline_module` attribute is optional and specifies a module in which 
-    the kernel should be outlined. When this attribute is present, the kernel is
-    outlined into the specified module instead of the default behavior.
+    The `kernelFunc` and `kernelModule` attributes are optional and specifies the kernel name and a module in whichthe kernel should be outlined. 
+
 
     Syntax:
 
@@ -1036,9 +1036,14 @@ def GPU_LaunchOp : GPU_Op<"launch", [
       return "workgroup_attributions";
     }
 
-    /// Checks if the outline_module attribute is present.
-    bool hasOutlineModule() {
-      return getOutlineModule().has_value();
+    /// Checks if the kernel func name attribute is present.
+    bool hasKernelFuncName() {
+      return getKernelFunc().has_value();
+    }
+
+    /// Checks if the kernel module name attribute is present.
+    bool hasKernelModuleName() {
+      return getKernelModule().has_value();
     }
   }];
 
diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -365,14 +365,14 @@ class GpuKernelOutliningPass
       auto funcWalkResult = func.walk([&](gpu::LaunchOp op) {
         SetVector<Value> operands;
         std::string kernelFnName;
-        if (auto outlineModuleAttr = op->getAttrOfType<SymbolRefAttr>("outline_module")) {
-          kernelFnName = outlineModuleAttr.getRootReference().str();
-          llvm::errs() << "outlined module name = " << kernelFnName << "\n";
+        if (op.hasKernelFuncName()) {
+          kernelFnName = op->getAttrOfType<mlir::SymbolRefAttr>("kernelFunc").getRootReference().str();
+          llvm::errs() << "use provided kernel func name = " << kernelFnName << "\n";
         } else {
           kernelFnName =
               Twine(op->getParentOfType<SymbolOpInterface>().getName(), "_kernel")
                   .str();
-          llvm::errs() << "original module name = " << kernelFnName << "\n";
+          llvm::errs() << "use default kernel func name = " << kernelFnName << "\n";
         }
 
         gpu::GPUFuncOp outlinedFunc =
@@ -381,7 +381,7 @@ class GpuKernelOutliningPass
         // Create nested module and insert outlinedFunc. The module will
         // originally get the same name as the function, but may be renamed on
         // insertion into the parent module.
-        auto kernelModule = createKernelModule(outlinedFunc, symbolTable);
+        auto kernelModule = createKernelModule(op, outlinedFunc, symbolTable);
         symbolTable.insert(kernelModule, insertPt);
 
         // Potentially changes signature, pulling in constants.
@@ -402,16 +402,34 @@ class GpuKernelOutliningPass
 
 private:
   /// Returns a gpu.module containing kernelFunc and all callees (recursive).
-  gpu::GPUModuleOp createKernelModule(gpu::GPUFuncOp kernelFunc,
+  gpu::GPUModuleOp createKernelModule(gpu::LaunchOp gpuLaunchOp, gpu::GPUFuncOp kernelFunc,
                                       const SymbolTable &parentSymbolTable) {
     // TODO: This code cannot use an OpBuilder because it must be inserted into
     // a SymbolTable by the caller. SymbolTable needs to be refactored to
     // prevent manual building of Ops with symbols in code using SymbolTables
     // and then this needs to use the OpBuilder.
     auto *context = getOperation().getContext();
     OpBuilder builder(context);
-    auto kernelModule = builder.create<gpu::GPUModuleOp>(kernelFunc.getLoc(),
-                                                         kernelFunc.getName());
+    std::string kernelModuleName;
+    if (gpuLaunchOp.hasKernelModuleName()) {
+      kernelModuleName = gpuLaunchOp->getAttrOfType<mlir::SymbolRefAttr>("kernelModule").getRootReference().str();
+      llvm::errs() << "use provided kernel module name = " << kernelModuleName << "\n";
+    } else {
+      kernelModuleName = kernelFunc.getName();
+      llvm::errs() << "use default kernel module name = " << kernelModuleName << "\n";
+    }
+
+    gpu::GPUModuleOp kernelModule;
+    // Check if the module already exists in the symbol table
+    if (auto existingModule = parentSymbolTable.lookup<gpu::GPUModuleOp>(kernelModuleName)) {
+      llvm::errs() << "Reusing existing kernel module: " << kernelModuleName << "\n";
+      kernelModule = existingModule;
+    } else {
+      // If not found, create a new GPU module
+      llvm::errs() << "Creating new kernel module: " << kernelModuleName << "\n";
+      kernelModule = builder.create<gpu::GPUModuleOp>(kernelFunc.getLoc(),
+                                                           kernelModuleName);
+    }
 
     // If a valid data layout spec was provided, attach it to the kernel module.
     // Otherwise, the default data layout will be used.
@@ -439,6 +457,8 @@ class GpuKernelOutliningPass
       }
     }
 
+    //llvm::errs() << "kernelModule:\n" << kernelModule << "\n";
+
     return kernelModule;
   }