[mlir][gpu][RFC] Add a source language enum attribute to gpu.func and gpu.launch ops

clementval · clementval · commit cc965f760e21 · 2025-03-26T13:56:40.000-07:00
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -351,6 +351,23 @@ def GPU_OptionalDimSizeHintAttr : ConfinedAttr<OptionalAttr<DenseI32ArrayAttr>,
   [AttrConstraint<Or<[IsNullAttr.predicate, DenseArrayCount<3>.predicate]>,
     "with 3 elements (if present)">]>;
 
+// Source language of the gpu.func or gpu.launc_func operations.
+def GPU_KernelSourceLangOpenACC : I32EnumAttrCase<"OpenACC", 0, "openacc">;
+def GPU_KernelSourceLangOpenMP : I32EnumAttrCase<"OpenMP", 1, "openmp">;
+def GPU_KernelSourceLangCUDAFortran
+    : I32EnumAttrCase<"CUDAFortran", 2, "cuda_fortran">;
+
+def GPU_KernelSourceLang
+    : I32EnumAttr<"KernelSourceLang", "Source language of a kernel",
+                  [GPU_KernelSourceLangOpenACC, GPU_KernelSourceLangOpenMP,
+                   GPU_KernelSourceLangCUDAFortran,
+]> {
+  let genSpecializedAttr = 0;
+  let cppNamespace = "::mlir::gpu";
+}
+def GPU_KernelSourceLangAttr
+    : EnumAttr<GPU_Dialect, GPU_KernelSourceLang, "kernel_source_lang">;
+
 def GPU_GPUFuncOp : GPU_Op<"func", [
     HasParent<"GPUModuleOp">, AutomaticAllocationScope, FunctionOpInterface,
     IsolatedFromAbove, AffineScope
@@ -426,12 +443,13 @@ def GPU_GPUFuncOp : GPU_Op<"func", [
   }];
 
   let arguments = (ins TypeAttrOf<FunctionType>:$function_type,
-                       OptionalAttr<DictArrayAttr>:$arg_attrs,
-                       OptionalAttr<DictArrayAttr>:$res_attrs,
-                       OptionalAttr<DictArrayAttr>:$workgroup_attrib_attrs,
-                       OptionalAttr<DictArrayAttr>:$private_attrib_attrs,
-                       GPU_OptionalDimSizeHintAttr:$known_block_size,
-                       GPU_OptionalDimSizeHintAttr:$known_grid_size);
+      OptionalAttr<DictArrayAttr>:$arg_attrs,
+      OptionalAttr<DictArrayAttr>:$res_attrs,
+      OptionalAttr<DictArrayAttr>:$workgroup_attrib_attrs,
+      OptionalAttr<DictArrayAttr>:$private_attrib_attrs,
+      GPU_OptionalDimSizeHintAttr:$known_block_size,
+      GPU_OptionalDimSizeHintAttr:$known_grid_size,
+      OptionalAttr<GPU_KernelSourceLangAttr>:$kernel_source_lang);
   let regions = (region AnyRegion:$body);
 
   let skipDefaultBuilders = 1;
@@ -793,20 +811,21 @@ def GPU_LaunchFuncOp :GPU_Op<"launch_func", [
   let hasVerifier = 1;
 }
 
-def GPU_LaunchOp : GPU_Op<"launch", [
-      AffineScope, AutomaticAllocationScope, AttrSizedOperandSegments,
-      DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>,
-      GPU_AsyncOpInterface, RecursiveMemoryEffects]>,
-    Arguments<(ins Variadic<GPU_AsyncToken>:$asyncDependencies,
-               Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,
-               Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ,
-               Optional<Index>:$clusterSizeX,
-               Optional<Index>:$clusterSizeY,
-               Optional<Index>:$clusterSizeZ,
-               Optional<I32>:$dynamicSharedMemorySize,
-               OptionalAttr<SymbolRefAttr>:$kernelFunc,
-               OptionalAttr<SymbolRefAttr>:$kernelModule)>,
-    Results<(outs Optional<GPU_AsyncToken>:$asyncToken)> {
+def GPU_LaunchOp
+    : GPU_Op<"launch", [AffineScope, AutomaticAllocationScope,
+                        AttrSizedOperandSegments,
+                        DeclareOpInterfaceMethods<
+                            InferIntRangeInterface, ["inferResultRanges"]>,
+                        GPU_AsyncOpInterface, RecursiveMemoryEffects]>,
+      Arguments<(ins Variadic<GPU_AsyncToken>:$asyncDependencies,
+          Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,
+          Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ,
+          Optional<Index>:$clusterSizeX, Optional<Index>:$clusterSizeY,
+          Optional<Index>:$clusterSizeZ, Optional<I32>:$dynamicSharedMemorySize,
+          OptionalAttr<SymbolRefAttr>:$kernelFunc,
+          OptionalAttr<SymbolRefAttr>:$kernelModule,
+          OptionalAttr<GPU_KernelSourceLangAttr>:$kernelSourceLang)>,
+      Results<(outs Optional<GPU_AsyncToken>:$asyncToken)> {
   let summary = "GPU kernel launch operation";
 
   let description = [{
@@ -840,7 +859,10 @@ def GPU_LaunchOp : GPU_Op<"launch", [
     -   a variadic number of Private memory attributions.
 
     The `kernelFunc` and `kernelModule` attributes are optional and specifies
-    the kernel name and a module in which the kernel should be outlined. 
+    the kernel name and a module in which the kernel should be outlined.
+
+    The optional `kernelSourceLang` attribute can be set to specify the Source
+    language of the kernel.
 
     Syntax:
 
diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -202,6 +202,7 @@ static gpu::GPUFuncOp outlineKernelFuncImpl(gpu::LaunchOp launchOp,
       TypeRange(ValueRange(launchOp.getPrivateAttributions())));
   outlinedFunc->setAttr(gpu::GPUDialect::getKernelFuncAttrName(),
                         builder.getUnitAttr());
+  outlinedFunc.setKernelSourceLangAttr(launchOp.getKernelSourceLangAttr());
 
   // If we can infer bounds on the grid and/or block sizes from the arguments
   // to the launch op, propagate them to the generated kernel. This is safe
diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir
@@ -14,6 +14,13 @@ module attributes {gpu.container_module} {
       // CHECK: gpu.terminator
       gpu.terminator
     }
+    // CHECK: gpu.launch
+    gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %sz, %grid_y = %sz, %grid_z = %sz)
+               threads(%tx, %ty, %tz) in (%block_x = %sz, %block_y = %sz, %block_z = %sz) {
+      // CHECK: gpu.terminator
+      gpu.terminator
+    // CHECK: } {kernelSourceLang = #gpu<kernel_source_lang openmp>}
+    } {kernelSourceLang = #gpu<kernel_source_lang openmp>}
     return
   }
 
@@ -279,6 +286,12 @@ module attributes {gpu.container_module} {
     gpu.func @empty_attribution(%arg0: f32) workgroup() private() {
       gpu.return
     }
+
+    // CHECK-LABEL: gpu.func @source_lang(%{{.*}}: f32) kernel attributes {kernel_source_lang = #gpu<kernel_source_lang openacc>}
+    // CHECK: {
+    gpu.func @source_lang(%arg0: f32) kernel attributes {kernel_source_lang = #gpu<kernel_source_lang openacc>} {
+      gpu.return
+    }
   }
 
   gpu.module @explicit_attributions {
diff --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir
@@ -630,3 +630,28 @@ func.func @testNoAttributes() {
   }
   return
 }
+
+// -----
+
+// This test tests that the kernelSourceLang is propagated to the gpu.func.
+
+// CHECK-LABEL: func.func @testKernelFuncOnly()
+// CHECK: gpu.launch_func  @testKernelFuncOnly_kernel::@testKernelFuncOnly_kernel
+
+// CHECK: gpu.module @testKernelFuncOnly_kernel
+// CHECK: gpu.func @testKernelFuncOnly_kernel() kernel attributes {kernel_source_lang = #gpu<kernel_source_lang openacc>
+func.func @testKernelFuncOnly() {
+  %gDimX = arith.constant 8 : index
+  %gDimY = arith.constant 12 : index
+  %gDimZ = arith.constant 16 : index
+  %bDimX = arith.constant 32 : index
+  %bDimY = arith.constant 16 : index
+  %bDimZ = arith.constant 8 : index
+
+  gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY, %grid_z = %gDimZ)
+             threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ) {
+    "some_op"(%bx, %tx) : (index, index) -> ()
+    gpu.terminator
+  } {kernelSourceLang = #gpu<kernel_source_lang openacc>}
+  return
+}

Original file line number	Diff line number	Diff line change
`@@ -14,6 +14,13 @@ module attributes {gpu.container_module} {`
`14`	`14`	`// CHECK: gpu.terminator`
`15`	`15`	`gpu.terminator`
`16`	`16`	`}`
	`17`	`+ // CHECK: gpu.launch`
	`18`	`+ gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %sz, %grid_y = %sz, %grid_z = %sz)`
	`19`	`+ threads(%tx, %ty, %tz) in (%block_x = %sz, %block_y = %sz, %block_z = %sz) {`
	`20`	`+ // CHECK: gpu.terminator`
	`21`	`+ gpu.terminator`
	`22`	`+ // CHECK: } {kernelSourceLang = #gpu<kernel_source_lang openmp>}`
	`23`	`+ } {kernelSourceLang = #gpu<kernel_source_lang openmp>}`
`17`	`24`	`return`
`18`	`25`	`}`
`19`	`26`
`@@ -279,6 +286,12 @@ module attributes {gpu.container_module} {`
`279`	`286`	`gpu.func @empty_attribution(%arg0: f32) workgroup() private() {`
`280`	`287`	`gpu.return`
`281`	`288`	`}`
	`289`	`+`
	`290`	`+ // CHECK-LABEL: gpu.func @source_lang(%{{.*}}: f32) kernel attributes {kernel_source_lang = #gpu<kernel_source_lang openacc>}`
	`291`	`+ // CHECK: {`
	`292`	`+ gpu.func @source_lang(%arg0: f32) kernel attributes {kernel_source_lang = #gpu<kernel_source_lang openacc>} {`
	`293`	`+ gpu.return`
	`294`	`+ }`
`282`	`295`	`}`
`283`	`296`
`284`	`297`	`gpu.module @explicit_attributions {`