Skip to content

Commit 26ffca0

Browse files
[mlir][gpu]add AffineScope to gpu.func op. (#118010)
This PR in order to solve the following problem. #117721. To efficiently implement the thread-to-data mapping relationship, I introduced AffineScope in gpu.func(Data or thread layout).
1 parent cb4f22c commit 26ffca0

File tree

2 files changed

+27
-1
lines changed

2 files changed

+27
-1
lines changed

mlir/include/mlir/Dialect/GPU/IR/GPUOps.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,7 @@ def GPU_OptionalDimSizeHintAttr : ConfinedAttr<OptionalAttr<DenseI32ArrayAttr>,
353353

354354
def GPU_GPUFuncOp : GPU_Op<"func", [
355355
HasParent<"GPUModuleOp">, AutomaticAllocationScope, FunctionOpInterface,
356-
IsolatedFromAbove
356+
IsolatedFromAbove, AffineScope
357357
]> {
358358
let summary = "Function executable on a GPU";
359359

mlir/test/Dialect/Affine/ops.mlir

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,3 +298,29 @@ func.func @linearize_mixed(%index0: index, %index1: index, %index2: index, %basi
298298
%1 = affine.linearize_index disjoint [%index0, %index1, %index2] by (2, %basis1, 3) : index
299299
return %1 : index
300300
}
301+
302+
// -----
303+
304+
#map = affine_map<()[s0] -> (s0)>
305+
306+
// CHECK-LABEL: @gpu_affine_for
307+
308+
module attributes {gpu.container_module} {
309+
gpu.module @gpu {
310+
gpu.func @gpu_affine_for(%arg0: memref<?x?xf32>) kernel {
311+
%c3 = arith.constant 1 : index
312+
%dim = memref.dim %arg0, %c3 : memref<?x?xf32>
313+
%c0 = arith.constant 0 : index
314+
affine.for %arg3 = %c0 to #map()[%dim] step 32 {
315+
}
316+
gpu.return
317+
}
318+
}
319+
}
320+
// CHECK-SAME: (%[[VAL_0:.*]]: memref<?x?xf32>) kernel {
321+
// CHECK: %[[VAL_1:.*]] = arith.constant 1 : index
322+
// CHECK: %[[VAL_2:.*]] = memref.dim %[[VAL_0]], %[[VAL_1]] : memref<?x?xf32>
323+
// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index
324+
// CHECK: affine.for %[[VAL_4:.*]] = %[[VAL_3]] to %[[VAL_2]] step 32 {
325+
// CHECK: }
326+
// CHECK: gpu.return

0 commit comments

Comments
 (0)