llvm · clementval · Mar 26, 2025 · grypp · Mar 26, 2025 · clementval
@@ -351,6 +351,23 @@ def GPU_OptionalDimSizeHintAttr : ConfinedAttr<OptionalAttr<DenseI32ArrayAttr>,
   [AttrConstraint<Or<[IsNullAttr.predicate, DenseArrayCount<3>.predicate]>,
     "with 3 elements (if present)">]>;
 
+// Source language of the gpu.func or gpu.launc_func operations.
+def GPU_KernelSourceLangOpenACC : I32EnumAttrCase<"OpenACC", 0, "openacc">;
+def GPU_KernelSourceLangOpenMP : I32EnumAttrCase<"OpenMP", 1, "openmp">;
+def GPU_KernelSourceLangCUDAFortran
+    : I32EnumAttrCase<"CUDAFortran", 2, "cuda_fortran">;
+
+def GPU_KernelSourceLang
+    : I32EnumAttr<"KernelSourceLang", "Source language of a kernel",
+                  [GPU_KernelSourceLangOpenACC, GPU_KernelSourceLangOpenMP,
+                   GPU_KernelSourceLangCUDAFortran,
+]> {
+  let genSpecializedAttr = 0;
+  let cppNamespace = "::mlir::gpu";
+}
+def GPU_KernelSourceLangAttr
+    : EnumAttr<GPU_Dialect, GPU_KernelSourceLang, "kernel_source_lang">;
+
 def GPU_GPUFuncOp : GPU_Op<"func", [
     HasParent<"GPUModuleOp">, AutomaticAllocationScope, FunctionOpInterface,
     IsolatedFromAbove, AffineScope
@@ -426,12 +443,13 @@ def GPU_GPUFuncOp : GPU_Op<"func", [
   }];
 
   let arguments = (ins TypeAttrOf<FunctionType>:$function_type,
-                       OptionalAttr<DictArrayAttr>:$arg_attrs,
-                       OptionalAttr<DictArrayAttr>:$res_attrs,
-                       OptionalAttr<DictArrayAttr>:$workgroup_attrib_attrs,
-                       OptionalAttr<DictArrayAttr>:$private_attrib_attrs,
-                       GPU_OptionalDimSizeHintAttr:$known_block_size,
-                       GPU_OptionalDimSizeHintAttr:$known_grid_size);
+      OptionalAttr<DictArrayAttr>:$arg_attrs,
+      OptionalAttr<DictArrayAttr>:$res_attrs,
+      OptionalAttr<DictArrayAttr>:$workgroup_attrib_attrs,
+      OptionalAttr<DictArrayAttr>:$private_attrib_attrs,
+      GPU_OptionalDimSizeHintAttr:$known_block_size,
+      GPU_OptionalDimSizeHintAttr:$known_grid_size,
+      OptionalAttr<GPU_KernelSourceLangAttr>:$kernel_source_lang);
   let regions = (region AnyRegion:$body);
 
   let skipDefaultBuilders = 1;
@@ -793,20 +811,21 @@ def GPU_LaunchFuncOp :GPU_Op<"launch_func", [
   let hasVerifier = 1;
 }
 
-def GPU_LaunchOp : GPU_Op<"launch", [
-      AffineScope, AutomaticAllocationScope, AttrSizedOperandSegments,
-      DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>,
-      GPU_AsyncOpInterface, RecursiveMemoryEffects]>,
-    Arguments<(ins Variadic<GPU_AsyncToken>:$asyncDependencies,
-               Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,
-               Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ,
-               Optional<Index>:$clusterSizeX,
-               Optional<Index>:$clusterSizeY,
-               Optional<Index>:$clusterSizeZ,
-               Optional<I32>:$dynamicSharedMemorySize,
-               OptionalAttr<SymbolRefAttr>:$kernelFunc,
-               OptionalAttr<SymbolRefAttr>:$kernelModule)>,
-    Results<(outs Optional<GPU_AsyncToken>:$asyncToken)> {
+def GPU_LaunchOp
+    : GPU_Op<"launch", [AffineScope, AutomaticAllocationScope,
+                        AttrSizedOperandSegments,
+                        DeclareOpInterfaceMethods<
+                            InferIntRangeInterface, ["inferResultRanges"]>,
+                        GPU_AsyncOpInterface, RecursiveMemoryEffects]>,
+      Arguments<(ins Variadic<GPU_AsyncToken>:$asyncDependencies,
+          Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,
+          Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ,
+          Optional<Index>:$clusterSizeX, Optional<Index>:$clusterSizeY,
+          Optional<Index>:$clusterSizeZ, Optional<I32>:$dynamicSharedMemorySize,
+          OptionalAttr<SymbolRefAttr>:$kernelFunc,
+          OptionalAttr<SymbolRefAttr>:$kernelModule,
+          OptionalAttr<GPU_KernelSourceLangAttr>:$kernelSourceLang)>,
+      Results<(outs Optional<GPU_AsyncToken>:$asyncToken)> {
   let summary = "GPU kernel launch operation";
 
   let description = [{
@@ -840,7 +859,10 @@ def GPU_LaunchOp : GPU_Op<"launch", [
     -   a variadic number of Private memory attributions.
 
     The `kernelFunc` and `kernelModule` attributes are optional and specifies
-    the kernel name and a module in which the kernel should be outlined. 
+    the kernel name and a module in which the kernel should be outlined.
+
+    The optional `kernelSourceLang` attribute can be set to specify the Source
+    language of the kernel.
 
     Syntax:
 

@@ -202,6 +202,7 @@ static gpu::GPUFuncOp outlineKernelFuncImpl(gpu::LaunchOp launchOp,
       TypeRange(ValueRange(launchOp.getPrivateAttributions())));
   outlinedFunc->setAttr(gpu::GPUDialect::getKernelFuncAttrName(),
                         builder.getUnitAttr());
+  outlinedFunc.setKernelSourceLangAttr(launchOp.getKernelSourceLangAttr());
 
   // If we can infer bounds on the grid and/or block sizes from the arguments
   // to the launch op, propagate them to the generated kernel. This is safe

@@ -14,6 +14,13 @@ module attributes {gpu.container_module} {
       // CHECK: gpu.terminator
       gpu.terminator
     }
+    // CHECK: gpu.launch
+    gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %sz, %grid_y = %sz, %grid_z = %sz)
+               threads(%tx, %ty, %tz) in (%block_x = %sz, %block_y = %sz, %block_z = %sz) {
+      // CHECK: gpu.terminator
+      gpu.terminator
+    // CHECK: } {kernelSourceLang = #gpu<kernel_source_lang openmp>}
+    } {kernelSourceLang = #gpu<kernel_source_lang openmp>}
     return
   }
 
@@ -279,6 +286,12 @@ module attributes {gpu.container_module} {
     gpu.func @empty_attribution(%arg0: f32) workgroup() private() {
       gpu.return
     }
+
+    // CHECK-LABEL: gpu.func @source_lang(%{{.*}}: f32) kernel attributes {kernel_source_lang = #gpu<kernel_source_lang openacc>}
+    // CHECK: {
+    gpu.func @source_lang(%arg0: f32) kernel attributes {kernel_source_lang = #gpu<kernel_source_lang openacc>} {
+      gpu.return
+    }
   }
 
   gpu.module @explicit_attributions {

@@ -630,3 +630,28 @@ func.func @testNoAttributes() {
   }
   return
 }
+
+// -----
+
+// This test tests that the kernelSourceLang is propagated to the gpu.func.
+
+// CHECK-LABEL: func.func @testKernelFuncOnly()
+// CHECK: gpu.launch_func  @testKernelFuncOnly_kernel::@testKernelFuncOnly_kernel
+
+// CHECK: gpu.module @testKernelFuncOnly_kernel
+// CHECK: gpu.func @testKernelFuncOnly_kernel() kernel attributes {kernel_source_lang = #gpu<kernel_source_lang openacc>
+func.func @testKernelFuncOnly() {
+  %gDimX = arith.constant 8 : index
+  %gDimY = arith.constant 12 : index
+  %gDimZ = arith.constant 16 : index
+  %bDimX = arith.constant 32 : index
+  %bDimY = arith.constant 16 : index
+  %bDimZ = arith.constant 8 : index
+
+  gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY, %grid_z = %gDimZ)
+             threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ) {
+    "some_op"(%bx, %tx) : (index, index) -> ()
+    gpu.terminator
+  } {kernelSourceLang = #gpu<kernel_source_lang openacc>}
+  return
+}