|
2 | 2 |
|
3 | 3 | from mlir.ir import * |
4 | 4 | import mlir.ir as ir |
5 | | -import mlir.dialects.gpu as gpu |
| 5 | +from mlir.dialects import gpu, func, arith, math |
| 6 | +from mlir.extras import types as T |
6 | 7 | import mlir.dialects.gpu.passes |
7 | 8 | from mlir.passmanager import * |
8 | 9 |
|
@@ -157,3 +158,99 @@ def builder(func: gpu.GPUFuncOp) -> None: |
157 | 158 | # CHECK: %[[VAL_0:.*]] = gpu.global_id x |
158 | 159 | # CHECK: gpu.return |
159 | 160 | # CHECK: } |
| 161 | + |
| 162 | +# CHECK-LABEL: testGPULaunchFuncOp |
| 163 | +@run |
| 164 | +def testGPULaunchFuncOp(): |
| 165 | + module = Module.create() |
| 166 | + |
| 167 | + module.operation.attributes["gpu.container_module"] = UnitAttr.get() |
| 168 | + with InsertionPoint(module.body): |
| 169 | + gpu_module = gpu.GPUModuleOp("gpu_module") |
| 170 | + block = gpu_module.bodyRegion.blocks.append() |
| 171 | + |
| 172 | + with InsertionPoint(block): |
| 173 | + gpu_func = gpu.GPUFuncOp( |
| 174 | + FunctionType.get([], []), |
| 175 | + "kernel", |
| 176 | + body_builder=lambda func: gpu.return_([]), |
| 177 | + kernel=True, |
| 178 | + ) |
| 179 | + |
| 180 | + with InsertionPoint(module.body): |
| 181 | + host = func.FuncOp(type=FunctionType.get([], []), name="host") |
| 182 | + |
| 183 | + with InsertionPoint(host.add_entry_block()): |
| 184 | + c1 = arith.constant(T.index(), 1) |
| 185 | + grid_sizes = [c1] * 3 |
| 186 | + block_sizes = [c1] * 3 |
| 187 | + sym_ref = SymbolRefAttr.get([gpu_module.sym_name.value, gpu_func.name.value]) |
| 188 | + token_type = Type.parse("!gpu.async.token") |
| 189 | + token = gpu.wait(async_token=token_type, async_dependencies=[]) |
| 190 | + token = gpu.launch_func( |
| 191 | + async_token=token_type, |
| 192 | + async_dependencies=[token], |
| 193 | + kernel=sym_ref, |
| 194 | + grid_size_x=grid_sizes[0], |
| 195 | + grid_size_y=grid_sizes[1], |
| 196 | + grid_size_z=grid_sizes[2], |
| 197 | + block_size_x=block_sizes[0], |
| 198 | + block_size_y=block_sizes[1], |
| 199 | + block_size_z=block_sizes[2], |
| 200 | + kernel_operands=[], |
| 201 | + ) |
| 202 | + gpu.wait(async_token=None, async_dependencies=[token]) |
| 203 | + func.ReturnOp([]) |
| 204 | + |
| 205 | + print(module) |
| 206 | + |
| 207 | + # CHECK-LABEL: gpu.module @gpu_module { |
| 208 | + # CHECK: gpu.func @kernel() kernel { |
| 209 | + # CHECK: gpu.return |
| 210 | + # CHECK: } |
| 211 | + # CHECK: } |
| 212 | + |
| 213 | + # CHECK-LABEL: func.func @host() { |
| 214 | + # CHECK: %[[CONSTANT_0:.*]] = arith.constant 1 : index |
| 215 | + # CHECK: %[[WAIT_0:.*]] = gpu.wait async |
| 216 | + # CHECK: %[[LAUNCH_FUNC_0:.*]] = gpu.launch_func async {{\[}}%[[WAIT_0]]] @gpu_module::@kernel blocks in (%[[CONSTANT_0]], %[[CONSTANT_0]], %[[CONSTANT_0]]) threads in (%[[CONSTANT_0]], %[[CONSTANT_0]], %[[CONSTANT_0]]) |
| 217 | + # CHECK: gpu.wait {{\[}}%[[LAUNCH_FUNC_0]]] |
| 218 | + # CHECK: return |
| 219 | + # CHECK: } |
| 220 | + |
| 221 | + |
| 222 | +# CHECK-LABEL: testGPULaunchOp |
| 223 | +@run |
| 224 | +def testGPULaunchOp(): |
| 225 | + module = Module.create() |
| 226 | + |
| 227 | + with InsertionPoint(module.body): |
| 228 | + host = func.FuncOp(type=FunctionType.get([T.f32()], []), name="gpu_printf") |
| 229 | + |
| 230 | + entry_block = host.add_entry_block() |
| 231 | + with InsertionPoint(entry_block): |
| 232 | + c1 = arith.constant(T.index(), 1) |
| 233 | + |
| 234 | + launch = gpu.launch(None, [], c1, c1, c1, c1, c1, c1) |
| 235 | + launch_block = launch.regions[0].blocks.append() |
| 236 | + for _ in range(12): |
| 237 | + launch_block.add_argument(T.index(), Location.unknown()) |
| 238 | + |
| 239 | + with InsertionPoint(launch_block): |
| 240 | + gpu.printf("%f", [entry_block.arguments[0]]) |
| 241 | + gpu.terminator() |
| 242 | + |
| 243 | + with InsertionPoint(entry_block): |
| 244 | + func.ReturnOp([]) |
| 245 | + |
| 246 | + print(module) |
| 247 | + |
| 248 | + # CHECK-LABEL: func.func @gpu_printf( |
| 249 | + # CHECK-SAME: %[[ARG0:.*]]: f32) { |
| 250 | + # CHECK: %[[CONSTANT_0:.*]] = arith.constant 1 : index |
| 251 | + # CHECK: gpu.launch blocks(%[[VAL_0:.*]], %[[VAL_1:.*]], %[[VAL_2:.*]]) in (%[[VAL_3:.*]] = %[[CONSTANT_0]], %[[VAL_4:.*]] = %[[CONSTANT_0]], %[[VAL_5:.*]] = %[[CONSTANT_0]]) threads(%[[VAL_6:.*]], %[[VAL_7:.*]], %[[VAL_8:.*]]) in (%[[VAL_9:.*]] = %[[CONSTANT_0]], %[[VAL_10:.*]] = %[[CONSTANT_0]], %[[VAL_11:.*]] = %[[CONSTANT_0]]) { |
| 252 | + # CHECK: gpu.printf "%[[VAL_12:.*]]", %[[ARG0]] : f32 |
| 253 | + # CHECK: gpu.terminator |
| 254 | + # CHECK: } |
| 255 | + # CHECK: return |
| 256 | + # CHECK: } |
0 commit comments