From ca23030b07c98158908c8958373d5fa009e4935e Mon Sep 17 00:00:00 2001 From: linuxlonelyeagle <2020382038@qq.com> Date: Tue, 24 Dec 2024 23:58:40 +0800 Subject: [PATCH 1/3] add AffineScope Trait to gpu.launch. --- mlir/include/mlir/Dialect/GPU/IR/GPUOps.td | 2 +- mlir/test/Dialect/Affine/ops.mlir | 29 ++++++++ mlir/test/Dialect/GPU/transform-gpu.mlir | 80 +++++++++++----------- 3 files changed, 70 insertions(+), 41 deletions(-) diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td index 42a017db300af..7de184560e616 100644 --- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td @@ -796,7 +796,7 @@ def GPU_LaunchFuncOp :GPU_Op<"launch_func", [ def GPU_LaunchOp : GPU_Op<"launch", [ AutomaticAllocationScope, AttrSizedOperandSegments, GPU_AsyncOpInterface, DeclareOpInterfaceMethods, - RecursiveMemoryEffects]>, + RecursiveMemoryEffects, AffineScope]>, Arguments<(ins Variadic:$asyncDependencies, Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ, Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ, diff --git a/mlir/test/Dialect/Affine/ops.mlir b/mlir/test/Dialect/Affine/ops.mlir index c6bfb688db1c1..28cbccaf6131b 100644 --- a/mlir/test/Dialect/Affine/ops.mlir +++ b/mlir/test/Dialect/Affine/ops.mlir @@ -324,3 +324,32 @@ module attributes {gpu.container_module} { // CHECK: affine.for %[[VAL_4:.*]] = %[[VAL_3]] to %[[VAL_2]] step 32 { // CHECK: } // CHECK: gpu.return + +// ----- + +// CHECK-LABEL: @gpu_launch_affine + +module { + func.func @gpu_launch_affine() { + %c1 = arith.constant 1 : index + gpu.launch blocks(%arg0, %arg1, %arg2) in (%arg6 = %c1, %arg7 = %c1, %arg8 = %c1) + threads(%arg3, %arg4, %arg5) in (%arg9 = %c1, %arg10 = %c1, %arg11 = %c1) { + %thread_id_x = gpu.thread_id x + %c128 = arith.constant 128 : index + affine.for %arg12 = %thread_id_x to %c128 step 8 { + } + gpu.terminator + } + return + } +} + +// CHECK-NEXT: %[[VAL_0:.*]] = arith.constant 1 : index +// CHECK-NEXT: gpu.launch blocks(%[[VAL_1:.*]], %[[VAL_2:.*]], %[[VAL_3:.*]]) in (%[[VAL_4:.*]] = %[[VAL_0]], %[[VAL_5:.*]] = %[[VAL_0]], %[[VAL_6:.*]] = %[[VAL_0]]) threads(%[[VAL_7:.*]], %[[VAL_8:.*]], %[[VAL_9:.*]]) in (%[[VAL_10:.*]] = %[[VAL_0]], %[[VAL_11:.*]] = %[[VAL_0]], %[[VAL_12:.*]] = %[[VAL_0]]) { +// CHECK-NEXT: %[[VAL_13:.*]] = gpu.thread_id x +// CHECK-NEXT: %[[VAL_14:.*]] = arith.constant 128 : index +// CHECK-NEXT: affine.for %[[VAL_15:.*]] = %[[VAL_13]] to %[[VAL_14]] step 8 { +// CHECK-NEXT: } +// CHECK-NEXT: gpu.terminator +// CHECK-NEXT: } +// CHECK-NEXT: return diff --git a/mlir/test/Dialect/GPU/transform-gpu.mlir b/mlir/test/Dialect/GPU/transform-gpu.mlir index 72572c6a38de1..b90f703861f63 100644 --- a/mlir/test/Dialect/GPU/transform-gpu.mlir +++ b/mlir/test/Dialect/GPU/transform-gpu.mlir @@ -43,7 +43,7 @@ module attributes {transform.with_named_sequence} { !type = memref<2 x 32 x f32> !type1d = memref<32 x f32> -// CHECK-DAG: #[[$MAP:.*]] = affine_map<(d0) -> (d0 floordiv 128)> +// CHECK-DAG: #[[$MAP:.*]] = affine_map<()[s0] -> (s0 floordiv 128)> // CHECK-LABEL: func.func @warpgroup_3d( // CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32> @@ -61,7 +61,7 @@ func.func @warpgroup_3d(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %stream // CHECK: gpu.launch // CHECK: %[[TIDX:.*]] = gpu.thread_id x // CHECK: %[[TIDY:.*]] = gpu.thread_id y -// CHECK-DAG: %[[WG:.*]] = affine.apply #[[$MAP]](%[[TIDX]]) +// CHECK-DAG: %[[WG:.*]] = affine.apply #[[$MAP]]()[%[[TIDX]]] // CHECK-DAG: %[[CMPX:.*]] = arith.cmpi ult, %[[TIDX]], %[[C384]] : index // CHECK-DAG: %[[CMPY:.*]] = arith.cmpi ult, %[[TIDY]], %[[C1]] : index // CHECK: %[[COND:.*]] = arith.andi %[[CMPX]], %[[CMPY]] : i1 @@ -95,7 +95,7 @@ module attributes {transform.with_named_sequence} { !type = memref<2 x 32 x f32> !type1d = memref<32 x f32> -// CHECK-DAG: #[[$MAP:.*]] = affine_map<(d0) -> (d0 floordiv 16)> +// CHECK-DAG: #map = affine_map<()[s0] -> (s0 floordiv 16)> // CHECK-LABEL: func.func @warp_3d( // CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32> @@ -114,7 +114,7 @@ func.func @warp_3d(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %stream : !g // CHECK: gpu.launch // CHECK: %[[TIDX:.*]] = gpu.thread_id x // CHECK: %[[TIDY:.*]] = gpu.thread_id y -// CHECK-DAG: %[[W:.*]] = affine.apply #[[$MAP]](%[[TIDX]]) +// CHECK-DAG: %[[W:.*]] = affine.apply #[[$MAP]]()[%[[TIDX]]] // CHECK-DAG: %[[CMPX:.*]] = arith.cmpi ult, %[[TIDX]], %[[C32]] : index // CHECK-DAG: %[[CMPY:.*]] = arith.cmpi ult, %[[TIDY]], %[[C3]] : index // CHECK: %[[COND:.*]] = arith.andi %[[CMPX]], %[[CMPY]] : i1 @@ -354,9 +354,9 @@ module attributes {transform.with_named_sequence} { !type = memref<2 x 32 x f32> !type1d = memref<32 x f32> -// CHECK-DAG: #[[$MAPWGLIN:.*]] = affine_map<(d0, d1, d2) -> (d0 + d1 * 32 + d2 * 256)> -// CHECK-DAG: #[[$MAPWGX:.*]] = affine_map<(d0, d1) -> (((d0 + d1 * 32) floordiv 128) mod 2)> -// CHECK-DAG: #[[$MAPWGY:.*]] = affine_map<(d0, d1, d2) -> (d2 + ((d0 + d1 * 32) floordiv 128) floordiv 2)> +// CHECK-DAG: #[[$MAPWGLIN:.*]] = affine_map<()[s0, s1, s2] -> (s0 + s1 * 32 + s2 * 256)> +// CHECK-DAG: #[[$MAPWGX:.*]] = affine_map<()[s0, s1] -> (((s0 + s1 * 32) floordiv 128) mod 2)> +// CHECK-DAG: #[[$MAPWGY:.*]] = affine_map<()[s0, s1, s2] -> (s2 + ((s0 + s1 * 32) floordiv 128) floordiv 2)> // CHECK-LABEL: func.func @warpgroup_linear( // CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32> @@ -376,9 +376,9 @@ func.func @warpgroup_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %st // CHECK-DAG: %[[TIDX:.*]] = gpu.thread_id x // CHECK-DAG: %[[TIDY:.*]] = gpu.thread_id y // CHECK-DAG: %[[TIDZ:.*]] = gpu.thread_id z -// CHECK-DAG: %[[WIDLIN:.*]] = affine.apply #[[$MAPWGLIN]](%[[TIDX]], %[[TIDY]], %[[TIDZ]]) -// CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWGX]](%[[TIDX]], %[[TIDY]]) -// CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWGY]](%[[TIDX]], %[[TIDY]], %[[TIDZ]]) +// CHECK-DAG: %[[WIDLIN:.*]] = affine.apply #[[$MAPWGLIN]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]] +// CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWGX]]()[%[[TIDX]], %[[TIDY]]] +// CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWGY]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]] // CHECK-DAG: %[[CMPLIN:.*]] = arith.cmpi ult, %[[WIDLIN]], %[[C768]] : index // CHECK: scf.if %[[CMPLIN]] // CHECK: memref.load %[[ARGX]][%[[WIDX]], %[[WIDY]]] @@ -410,9 +410,9 @@ module attributes {transform.with_named_sequence} { !type = memref<2 x 32 x f32> !type1d = memref<32 x f32> -// CHECK-DAG: #[[$MAPWLIN:.*]] = affine_map<(d0, d1, d2) -> (d0 + d1 * 32 + d2 * 256)> -// CHECK-DAG: #[[$MAPWX:.*]] = affine_map<(d0, d1, d2) -> ((d1 + d2 * 8 + d0 floordiv 32) mod 2)> -// CHECK-DAG: #[[$MAPWY:.*]] = affine_map<(d0, d1, d2) -> ((d1 + d2 * 8 + d0 floordiv 32) floordiv 2)> +// CHECK-DAG: #[[$MAPWLIN:.*]] = affine_map<()[s0, s1, s2] -> (s0 + s1 * 32 + s2 * 256)> +// CHECK-DAG: #[[$MAPWX:.*]] = affine_map<()[s0, s1, s2] -> ((s1 + s2 * 8 + s0 floordiv 32) mod 2)> +// CHECK-DAG: #[[$MAPWY:.*]] = affine_map<()[s0, s1, s2] -> ((s1 + s2 * 8 + s0 floordiv 32) floordiv 2)> // CHECK-LABEL: func.func @warp_linear( // CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32> @@ -432,9 +432,9 @@ func.func @warp_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %stream // CHECK-DAG: %[[TIDX:.*]] = gpu.thread_id x // CHECK-DAG: %[[TIDY:.*]] = gpu.thread_id y // CHECK-DAG: %[[TIDZ:.*]] = gpu.thread_id z -// CHECK-DAG: %[[WIDLIN:.*]] = affine.apply #[[$MAPWLIN]](%[[TIDX]], %[[TIDY]], %[[TIDZ]]) -// CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWX]](%[[TIDX]], %[[TIDY]], %[[TIDZ]]) -// CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWY]](%[[TIDX]], %[[TIDY]], %[[TIDZ]]) +// CHECK-DAG: %[[WIDLIN:.*]] = affine.apply #[[$MAPWLIN]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]] +// CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWX]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]] +// CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWY]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]] // CHECK-DAG: %[[CMPLIN:.*]] = arith.cmpi ult, %[[WIDLIN]], %[[C192]] : index // CHECK: scf.if %[[CMPLIN]] // CHECK: memref.load %[[ARGX]][%[[WIDX]], %[[WIDY]]] @@ -466,12 +466,12 @@ module attributes {transform.with_named_sequence} { !type = memref<2 x 32 x f32> !type1d = memref<32 x f32> -// CHECK-DAG: #[[$MAPWX:.*]] = affine_map<(d0, d1) -> (((d0 + d1 * 18) floordiv 32) mod 3)> -// CHECK-DAG: #[[$MAPWY:.*]] = affine_map<(d0, d1) -> ((((d0 + d1 * 18) floordiv 32) mod 6) floordiv 3)> +// CHECK-DAG: #[[$MAPWX:.*]] = affine_map<()[s0, s1] -> (((s0 + s1 * 18) floordiv 32) mod 3)> +// CHECK-DAG: #[[$MAPWY:.*]] = affine_map<()[s0, s1] -> ((((s0 + s1 * 18) floordiv 32) mod 6) floordiv 3)> -// CHECK-DAG: #[[$MAPLIN:.*]] = affine_map<(d0, d1) -> (d0 + d1 * 18)> -// CHECK-DAG: #[[$MAPLX:.*]] = affine_map<(d0, d1) -> ((d0 + d1 * 18) mod 10)> -// CHECK-DAG: #[[$MAPLY:.*]] = affine_map<(d0, d1) -> ((d0 + d1 * 18) floordiv 10)> +// CHECK-DAG: #[[$MAPLIN:.*]] = affine_map<()[s0, s1] -> (s0 + s1 * 18)> +// CHECK-DAG: #[[$MAPLX:.*]] = affine_map<()[s0, s1] -> ((s0 + s1 * 18) mod 10)> +// CHECK-DAG: #[[$MAPLY:.*]] = affine_map<()[s0, s1] -> ((s0 + s1 * 18) floordiv 10)> // CHECK-LABEL: func.func @map_multi_level_linear( func.func @map_multi_level_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %stream : !gpu.async.token) -> !type { @@ -504,9 +504,9 @@ func.func @map_multi_level_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f3 memref.store %6, %y[%i, %j] : !type } { mapping = [#gpu.thread, #gpu.thread]} - // CHECK-DAG: %[[LIN:.*]] = affine.apply #[[$MAPLIN]](%[[TIDX]], %[[TIDY]]) - // CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWX]](%[[TIDX]], %[[TIDY]]) - // CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWY]](%[[TIDX]], %[[TIDY]]) + // CHECK-DAG: %[[LIN:.*]] = affine.apply #[[$MAPLIN]]()[%[[TIDX]], %[[TIDY]]] + // CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWX]]()[%[[TIDX]], %[[TIDY]]] + // CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWY]]()[%[[TIDX]], %[[TIDY]]] // CHECK-DAG: %[[CMPLIN:.*]] = arith.cmpi ult, %[[LIN]], %[[C192]] : index // CHECK: scf.if %[[CMPLIN]] scf.forall (%i, %j, %k) in (%c3, %c2, %c1) { @@ -515,8 +515,8 @@ func.func @map_multi_level_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f3 memref.store %8, %y[%i, %j] : !type } {mapping = [#gpu.warp, #gpu.warp, #gpu.warp] } - // CHECK-DAG: %[[LIDX:.*]] = affine.apply #[[$MAPLX]](%[[TIDX]], %[[TIDY]]) - // CHECK-DAG: %[[LIDY:.*]] = affine.apply #[[$MAPLY]](%[[TIDX]], %[[TIDY]]) + // CHECK-DAG: %[[LIDX:.*]] = affine.apply #[[$MAPLX]]()[%[[TIDX]], %[[TIDY]]] + // CHECK-DAG: %[[LIDY:.*]] = affine.apply #[[$MAPLY]]()[%[[TIDX]], %[[TIDY]]] // CHECK-DAG: %[[COND:.*]] = arith.cmpi ult, %[[LIN]], %[[C20]] : index // CHECK: scf.if %[[COND]] // CHECK: memref.load %{{.*}}[%[[LIDX]]] : memref<32xf32> @@ -545,9 +545,9 @@ module attributes {transform.with_named_sequence} { !type = memref<2 x 32 x f32> !type1d = memref<32 x f32> -// CHECK-DAG: #[[$MAPBLIN:.*]] = affine_map<(d0, d1, d2) -> (d0 + d1 * 12 + d2 * 108)> -// CHECK-DAG: #[[$MAPBX:.*]] = affine_map<(d0, d1, d2) -> ((d0 + d1 * 12 + d2 * 108) mod 7)> -// CHECK-DAG: #[[$MAPBY:.*]] = affine_map<(d0, d1, d2) -> ((d0 + d1 * 12 + d2 * 108) floordiv 7)> +// CHECK-DAG: #[[$MAPBLIN:.*]] = affine_map<()[s0, s1, s2] -> (s0 + s1 * 12 + s2 * 108)> +// CHECK-DAG: #[[$MAPBX:.*]] = affine_map<()[s0, s1, s2] -> ((s0 + s1 * 12 + s2 * 108) mod 7)> +// CHECK-DAG: #[[$MAPBY:.*]] = affine_map<()[s0, s1, s2] -> ((s0 + s1 * 12 + s2 * 108) floordiv 7)> // CHECK-LABEL: func.func @block_linear_existing_launch( // CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32> @@ -566,9 +566,9 @@ func.func @block_linear_existing_launch( // CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x // CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y // CHECK-DAG: %[[BIDZ:.*]] = gpu.block_id z -// CHECK-DAG: %[[BIDLIN:.*]] = affine.apply #[[$MAPBLIN]](%[[BIDX]], %[[BIDY]], %[[BIDZ]]) -// CHECK-DAG: %[[BLX:.*]] = affine.apply #[[$MAPBX]](%[[BIDX]], %[[BIDY]], %[[BIDZ]]) -// CHECK-DAG: %[[BLY:.*]] = affine.apply #[[$MAPBY]](%[[BIDX]], %[[BIDY]], %[[BIDZ]]) +// CHECK-DAG: %[[BIDLIN:.*]] = affine.apply #[[$MAPBLIN]]()[%[[BIDX]], %[[BIDY]], %[[BIDZ]]] +// CHECK-DAG: %[[BLX:.*]] = affine.apply #[[$MAPBX]]()[%[[BIDX]], %[[BIDY]], %[[BIDZ]]] +// CHECK-DAG: %[[BLY:.*]] = affine.apply #[[$MAPBY]]()[%[[BIDX]], %[[BIDY]], %[[BIDZ]]] // CHECK-DAG: %[[CMPLIN:.*]] = arith.cmpi ult, %[[BIDLIN]], %[[C63]] : index // CHECK: scf.if %[[CMPLIN]] // CHECK: memref.load %[[ARGX]][%[[BLX]], %[[BLY]]] @@ -600,8 +600,8 @@ module attributes {transform.with_named_sequence} { !type = memref<2 x 32 x f32> !type1d = memref<32 x f32> -// CHECK-DAG: #[[$MAPBX:.*]] = affine_map<(d0) -> (d0 mod 7)> -// CHECK-DAG: #[[$MAPBY:.*]] = affine_map<(d0, d1, d2) -> (d1 + d2 * 9 + d0 floordiv 7)> +// CHECK-DAG: #[[$MAPBX:.*]] = affine_map<()[s0] -> (s0 mod 7)> +// CHECK-DAG: #[[$MAPBY:.*]] = affine_map<()[s0, s1, s2] -> (s1 + s2 * 9 + s0 floordiv 7)> // CHECK-LABEL: func.func @block_linear_generate_launch( // CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32> @@ -620,8 +620,8 @@ func.func @block_linear_generate_launch( // CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x // CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y // CHECK-DAG: %[[BIDZ:.*]] = gpu.block_id z -// CHECK-DAG: %[[BLX:.*]] = affine.apply #[[$MAPBX]](%[[BIDX]]) -// CHECK-DAG: %[[BLY:.*]] = affine.apply #[[$MAPBY]](%[[BIDX]], %[[BIDY]], %[[BIDZ]]) +// CHECK-DAG: %[[BLX:.*]] = affine.apply #[[$MAPBX]]()[%[[BIDX]]] +// CHECK-DAG: %[[BLY:.*]] = affine.apply #[[$MAPBY]]()[%[[BIDX]], %[[BIDY]], %[[BIDZ]]] // CHECK: memref.load %[[ARGX]][%[[BLX]], %[[BLY]]] // CHECK: memref.load %[[ARGY]][%[[BLX]], %[[BLY]]] scf.forall (%i, %j) in (%c7, %c9) { @@ -647,8 +647,8 @@ module attributes {transform.with_named_sequence} { #map = affine_map<(d0) -> (d0 * 128)> #map1 = affine_map<(d0) -> (d0 * 32)> -// CHECK-DAG: #[[$MAPB:.*]] = affine_map<(d0) -> (d0 * 128)> -// CHECK-DAG: #[[$MAPW:.*]] = affine_map<(d0, d1, d2) -> (d2 * 32 + ((d0 + d1 * 4) floordiv 32) * 32)> +// CHECK-DAG: #[[$MAPB:.*]] = affine_map<()[s0] -> (s0 * 128)> +// CHECK-DAG: #[[$MAPW:.*]] = affine_map<()[s0, s1, s2] -> (s2 * 32 + ((s0 + s1 * 4) floordiv 32) * 32)> // CHECK-LABEL: func.func @simple_fill( func.func @simple_fill(%arg0: memref<128xf32>) -> memref<128xf32> { @@ -660,14 +660,14 @@ func.func @simple_fill(%arg0: memref<128xf32>) -> memref<128xf32> { // CHECK: gpu.launch scf.forall (%arg1) in (1) { // CHECK: %[[BIDX:.*]] = gpu.block_id x -// CHECK: %[[BLX:.*]] = affine.apply #[[$MAPB]](%[[BIDX]]) +// CHECK: %[[BLX:.*]] = affine.apply #[[$MAPB]]()[%[[BIDX]]] %0 = affine.apply #map(%arg1) %subview = memref.subview %arg0[%0] [128] [1] : memref<128xf32> to memref<128xf32, strided<[1], offset: ?>> scf.forall (%arg2) in (4) { // CHECK: %[[TIDX:.*]] = gpu.thread_id x // CHECK: %[[TIDY:.*]] = gpu.thread_id y // CHECK: %[[TIDZ:.*]] = gpu.thread_id z -// CHECK: %[[THX:.*]] = affine.apply #[[$MAPW]](%[[TIDX]], %[[TIDY]], %[[TIDZ]]) +// CHECK: %[[THX:.*]] = affine.apply #[[$MAPW]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]] // CHECK-NOT: scf.if // CHECK: memref.subview %{{.*}}[%[[THX]]] %1 = affine.apply #map1(%arg2) From 989e3d6406d0600efd392aecf645698acbcb3ef3 Mon Sep 17 00:00:00 2001 From: linuxlonelyeagle <2020382038@qq.com> Date: Sat, 18 Jan 2025 19:43:31 +0800 Subject: [PATCH 2/3] rebase main. --- mlir/test/Dialect/Affine/ops.mlir | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/mlir/test/Dialect/Affine/ops.mlir b/mlir/test/Dialect/Affine/ops.mlir index 1632b52764b9a..42bad527b451a 100644 --- a/mlir/test/Dialect/Affine/ops.mlir +++ b/mlir/test/Dialect/Affine/ops.mlir @@ -317,13 +317,13 @@ module attributes {gpu.container_module} { } } } -// CHECK-SAME: (%[[VAL_0:.*]]: memref) kernel { -// CHECK: %[[VAL_1:.*]] = arith.constant 1 : index -// CHECK: %[[VAL_2:.*]] = memref.dim %[[VAL_0]], %[[VAL_1]] : memref -// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index -// CHECK: affine.for %[[VAL_4:.*]] = %[[VAL_3]] to %[[VAL_2]] step 32 { -// CHECK: } -// CHECK: gpu.return +// CHECK-SAME: (%[[VAL_0:.*]]: memref) kernel { +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_2:.*]] = memref.dim %[[VAL_0]], %[[VAL_1]] : memref +// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK: affine.for %{{.*}} = %[[VAL_3]] to %[[VAL_2]] step 32 { +// CHECK: } +// CHECK: gpu.return // ----- @@ -344,15 +344,15 @@ module { } } -// CHECK-NEXT: %[[VAL_0:.*]] = arith.constant 1 : index -// CHECK-NEXT: gpu.launch blocks(%[[VAL_1:.*]], %[[VAL_2:.*]], %[[VAL_3:.*]]) in (%[[VAL_4:.*]] = %[[VAL_0]], %[[VAL_5:.*]] = %[[VAL_0]], %[[VAL_6:.*]] = %[[VAL_0]]) threads(%[[VAL_7:.*]], %[[VAL_8:.*]], %[[VAL_9:.*]]) in (%[[VAL_10:.*]] = %[[VAL_0]], %[[VAL_11:.*]] = %[[VAL_0]], %[[VAL_12:.*]] = %[[VAL_0]]) { -// CHECK-NEXT: %[[VAL_13:.*]] = gpu.thread_id x -// CHECK-NEXT: %[[VAL_14:.*]] = arith.constant 128 : index -// CHECK-NEXT: affine.for %[[VAL_15:.*]] = %[[VAL_13]] to %[[VAL_14]] step 8 { -// CHECK-NEXT: } -// CHECK-NEXT: gpu.terminator +// CHECK-NEXT: %[[VAL_0:.*]] = arith.constant 1 : index +// CHECK-NEXT: gpu.launch blocks(%[[VAL_1:.*]], %[[VAL_2:.*]], %[[VAL_3:.*]]) in (%[[VAL_4:.*]] = %[[VAL_0]], %[[VAL_5:.*]] = %[[VAL_0]], %[[VAL_6:.*]] = %[[VAL_0]]) threads(%[[VAL_7:.*]], %[[VAL_8:.*]], %[[VAL_9:.*]]) in (%[[VAL_10:.*]] = %[[VAL_0]], %[[VAL_11:.*]] = %[[VAL_0]], %[[VAL_12:.*]] = %[[VAL_0]]) { +// CHECK-NEXT: %[[VAL_13:.*]] = gpu.thread_id x +// CHECK-NEXT: %[[VAL_14:.*]] = arith.constant 128 : index +// CHECK-NEXT: affine.for %{{.*}} = %[[VAL_13]] to %[[VAL_14]] step 8 { // CHECK-NEXT: } -// CHECK-NEXT: return +// CHECK-NEXT: gpu.terminator +// CHECK-NEXT: } +// CHECK-NEXT: return // ----- From 12e2e4a7e553b37ec2c656001182e728b62ed385 Mon Sep 17 00:00:00 2001 From: linuxlonelyeagle <2020382038@qq.com> Date: Tue, 4 Feb 2025 11:35:37 +0800 Subject: [PATCH 3/3] update test and update op define,. --- mlir/include/mlir/Dialect/GPU/IR/GPUOps.td | 4 +-- mlir/test/Dialect/Affine/ops.mlir | 41 ++++------------------ 2 files changed, 8 insertions(+), 37 deletions(-) diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td index 66062af958890..2b1ce573effd0 100644 --- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td @@ -794,9 +794,9 @@ def GPU_LaunchFuncOp :GPU_Op<"launch_func", [ } def GPU_LaunchOp : GPU_Op<"launch", [ - AutomaticAllocationScope, AttrSizedOperandSegments, GPU_AsyncOpInterface, + AffineScope, AutomaticAllocationScope, AttrSizedOperandSegments, DeclareOpInterfaceMethods, - RecursiveMemoryEffects, AffineScope]>, + GPU_AsyncOpInterface, RecursiveMemoryEffects]>, Arguments<(ins Variadic:$asyncDependencies, Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ, Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ, diff --git a/mlir/test/Dialect/Affine/ops.mlir b/mlir/test/Dialect/Affine/ops.mlir index 42bad527b451a..233c18c82831c 100644 --- a/mlir/test/Dialect/Affine/ops.mlir +++ b/mlir/test/Dialect/Affine/ops.mlir @@ -301,34 +301,11 @@ func.func @linearize_mixed(%index0: index, %index1: index, %index2: index, %basi // ----- -#map = affine_map<()[s0] -> (s0)> - -// CHECK-LABEL: @gpu_affine_for - -module attributes {gpu.container_module} { - gpu.module @gpu { - gpu.func @gpu_affine_for(%arg0: memref) kernel { - %c3 = arith.constant 1 : index - %dim = memref.dim %arg0, %c3 : memref - %c0 = arith.constant 0 : index - affine.for %arg3 = %c0 to #map()[%dim] step 32 { - } - gpu.return - } - } -} -// CHECK-SAME: (%[[VAL_0:.*]]: memref) kernel { -// CHECK: %[[VAL_1:.*]] = arith.constant 1 : index -// CHECK: %[[VAL_2:.*]] = memref.dim %[[VAL_0]], %[[VAL_1]] : memref -// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index -// CHECK: affine.for %{{.*}} = %[[VAL_3]] to %[[VAL_2]] step 32 { -// CHECK: } -// CHECK: gpu.return - -// ----- - // CHECK-LABEL: @gpu_launch_affine +// Test `thread_id` in AffineScope, the `thread_id` is in AffineScope's toplevel, +// it is a valid symbol. + module { func.func @gpu_launch_affine() { %c1 = arith.constant 1 : index @@ -344,15 +321,9 @@ module { } } -// CHECK-NEXT: %[[VAL_0:.*]] = arith.constant 1 : index -// CHECK-NEXT: gpu.launch blocks(%[[VAL_1:.*]], %[[VAL_2:.*]], %[[VAL_3:.*]]) in (%[[VAL_4:.*]] = %[[VAL_0]], %[[VAL_5:.*]] = %[[VAL_0]], %[[VAL_6:.*]] = %[[VAL_0]]) threads(%[[VAL_7:.*]], %[[VAL_8:.*]], %[[VAL_9:.*]]) in (%[[VAL_10:.*]] = %[[VAL_0]], %[[VAL_11:.*]] = %[[VAL_0]], %[[VAL_12:.*]] = %[[VAL_0]]) { -// CHECK-NEXT: %[[VAL_13:.*]] = gpu.thread_id x -// CHECK-NEXT: %[[VAL_14:.*]] = arith.constant 128 : index -// CHECK-NEXT: affine.for %{{.*}} = %[[VAL_13]] to %[[VAL_14]] step 8 { -// CHECK-NEXT: } -// CHECK-NEXT: gpu.terminator -// CHECK-NEXT: } -// CHECK-NEXT: return +// CHECK: %[[THREAD_ID:.*]] = gpu.thread_id x +// CHECK: %[[VAL:.*]] = arith.constant 128 : index +// CHECK: affine.for %{{.*}} = %[[THREAD_ID]] to %[[VAL]] step 8 { // -----