Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 43 additions & 21 deletions mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,23 @@ def GPU_OptionalDimSizeHintAttr : ConfinedAttr<OptionalAttr<DenseI32ArrayAttr>,
[AttrConstraint<Or<[IsNullAttr.predicate, DenseArrayCount<3>.predicate]>,
"with 3 elements (if present)">]>;

// Source language of the gpu.func or gpu.launc_func operations.
def GPU_KernelSourceLangOpenACC : I32EnumAttrCase<"OpenACC", 0, "openacc">;
def GPU_KernelSourceLangOpenMP : I32EnumAttrCase<"OpenMP", 1, "openmp">;
def GPU_KernelSourceLangCUDAFortran
: I32EnumAttrCase<"CUDAFortran", 2, "cuda_fortran">;

def GPU_KernelSourceLang
: I32EnumAttr<"KernelSourceLang", "Source language of a kernel",
[GPU_KernelSourceLangOpenACC, GPU_KernelSourceLangOpenMP,
GPU_KernelSourceLangCUDAFortran,
]> {
Comment on lines +355 to +364
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Having these attributes won't scale. GPU dialect has many users

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The attribute is meant to be optional and the enum can be populated with any needed source language.

What would you suggest otherwise? Keep the attribute downstream?

let genSpecializedAttr = 0;
let cppNamespace = "::mlir::gpu";
}
def GPU_KernelSourceLangAttr
: EnumAttr<GPU_Dialect, GPU_KernelSourceLang, "kernel_source_lang">;

def GPU_GPUFuncOp : GPU_Op<"func", [
HasParent<"GPUModuleOp">, AutomaticAllocationScope, FunctionOpInterface,
IsolatedFromAbove, AffineScope
Expand Down Expand Up @@ -426,12 +443,13 @@ def GPU_GPUFuncOp : GPU_Op<"func", [
}];

let arguments = (ins TypeAttrOf<FunctionType>:$function_type,
OptionalAttr<DictArrayAttr>:$arg_attrs,
OptionalAttr<DictArrayAttr>:$res_attrs,
OptionalAttr<DictArrayAttr>:$workgroup_attrib_attrs,
OptionalAttr<DictArrayAttr>:$private_attrib_attrs,
GPU_OptionalDimSizeHintAttr:$known_block_size,
GPU_OptionalDimSizeHintAttr:$known_grid_size);
OptionalAttr<DictArrayAttr>:$arg_attrs,
OptionalAttr<DictArrayAttr>:$res_attrs,
OptionalAttr<DictArrayAttr>:$workgroup_attrib_attrs,
OptionalAttr<DictArrayAttr>:$private_attrib_attrs,
GPU_OptionalDimSizeHintAttr:$known_block_size,
GPU_OptionalDimSizeHintAttr:$known_grid_size,
OptionalAttr<GPU_KernelSourceLangAttr>:$kernel_source_lang);
let regions = (region AnyRegion:$body);

let skipDefaultBuilders = 1;
Expand Down Expand Up @@ -793,20 +811,21 @@ def GPU_LaunchFuncOp :GPU_Op<"launch_func", [
let hasVerifier = 1;
}

def GPU_LaunchOp : GPU_Op<"launch", [
AffineScope, AutomaticAllocationScope, AttrSizedOperandSegments,
DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>,
GPU_AsyncOpInterface, RecursiveMemoryEffects]>,
Arguments<(ins Variadic<GPU_AsyncToken>:$asyncDependencies,
Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,
Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ,
Optional<Index>:$clusterSizeX,
Optional<Index>:$clusterSizeY,
Optional<Index>:$clusterSizeZ,
Optional<I32>:$dynamicSharedMemorySize,
OptionalAttr<SymbolRefAttr>:$kernelFunc,
OptionalAttr<SymbolRefAttr>:$kernelModule)>,
Results<(outs Optional<GPU_AsyncToken>:$asyncToken)> {
def GPU_LaunchOp
: GPU_Op<"launch", [AffineScope, AutomaticAllocationScope,
AttrSizedOperandSegments,
DeclareOpInterfaceMethods<
InferIntRangeInterface, ["inferResultRanges"]>,
GPU_AsyncOpInterface, RecursiveMemoryEffects]>,
Arguments<(ins Variadic<GPU_AsyncToken>:$asyncDependencies,
Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,
Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ,
Optional<Index>:$clusterSizeX, Optional<Index>:$clusterSizeY,
Optional<Index>:$clusterSizeZ, Optional<I32>:$dynamicSharedMemorySize,
OptionalAttr<SymbolRefAttr>:$kernelFunc,
OptionalAttr<SymbolRefAttr>:$kernelModule,
OptionalAttr<GPU_KernelSourceLangAttr>:$kernelSourceLang)>,
Results<(outs Optional<GPU_AsyncToken>:$asyncToken)> {
let summary = "GPU kernel launch operation";

let description = [{
Expand Down Expand Up @@ -840,7 +859,10 @@ def GPU_LaunchOp : GPU_Op<"launch", [
- a variadic number of Private memory attributions.

The `kernelFunc` and `kernelModule` attributes are optional and specifies
the kernel name and a module in which the kernel should be outlined.
the kernel name and a module in which the kernel should be outlined.

The optional `kernelSourceLang` attribute can be set to specify the Source
language of the kernel.

Syntax:

Expand Down
1 change: 1 addition & 0 deletions mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ static gpu::GPUFuncOp outlineKernelFuncImpl(gpu::LaunchOp launchOp,
TypeRange(ValueRange(launchOp.getPrivateAttributions())));
outlinedFunc->setAttr(gpu::GPUDialect::getKernelFuncAttrName(),
builder.getUnitAttr());
outlinedFunc.setKernelSourceLangAttr(launchOp.getKernelSourceLangAttr());

// If we can infer bounds on the grid and/or block sizes from the arguments
// to the launch op, propagate them to the generated kernel. This is safe
Expand Down
13 changes: 13 additions & 0 deletions mlir/test/Dialect/GPU/ops.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,13 @@ module attributes {gpu.container_module} {
// CHECK: gpu.terminator
gpu.terminator
}
// CHECK: gpu.launch
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %sz, %grid_y = %sz, %grid_z = %sz)
threads(%tx, %ty, %tz) in (%block_x = %sz, %block_y = %sz, %block_z = %sz) {
// CHECK: gpu.terminator
gpu.terminator
// CHECK: } {kernelSourceLang = #gpu<kernel_source_lang openmp>}
} {kernelSourceLang = #gpu<kernel_source_lang openmp>}
return
}

Expand Down Expand Up @@ -279,6 +286,12 @@ module attributes {gpu.container_module} {
gpu.func @empty_attribution(%arg0: f32) workgroup() private() {
gpu.return
}

// CHECK-LABEL: gpu.func @source_lang(%{{.*}}: f32) kernel attributes {kernel_source_lang = #gpu<kernel_source_lang openacc>}
// CHECK: {
gpu.func @source_lang(%arg0: f32) kernel attributes {kernel_source_lang = #gpu<kernel_source_lang openacc>} {
gpu.return
}
}

gpu.module @explicit_attributions {
Expand Down
25 changes: 25 additions & 0 deletions mlir/test/Dialect/GPU/outlining.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -630,3 +630,28 @@ func.func @testNoAttributes() {
}
return
}

// -----

// This test tests that the kernelSourceLang is propagated to the gpu.func.

// CHECK-LABEL: func.func @testKernelFuncOnly()
// CHECK: gpu.launch_func @testKernelFuncOnly_kernel::@testKernelFuncOnly_kernel

// CHECK: gpu.module @testKernelFuncOnly_kernel
// CHECK: gpu.func @testKernelFuncOnly_kernel() kernel attributes {kernel_source_lang = #gpu<kernel_source_lang openacc>
func.func @testKernelFuncOnly() {
%gDimX = arith.constant 8 : index
%gDimY = arith.constant 12 : index
%gDimZ = arith.constant 16 : index
%bDimX = arith.constant 32 : index
%bDimY = arith.constant 16 : index
%bDimZ = arith.constant 8 : index

gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY, %grid_z = %gDimZ)
threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ) {
"some_op"(%bx, %tx) : (index, index) -> ()
gpu.terminator
} {kernelSourceLang = #gpu<kernel_source_lang openacc>}
return
}