-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[mlir][python] Add Pythonic wrappers for gpu ops #163883
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,8 @@ | |
|
||
from mlir.ir import * | ||
import mlir.ir as ir | ||
import mlir.dialects.gpu as gpu | ||
from mlir.dialects import gpu, func, arith, math | ||
from mlir.extras import types as T | ||
import mlir.dialects.gpu.passes | ||
from mlir.passmanager import * | ||
|
||
|
@@ -157,3 +158,96 @@ def builder(func: gpu.GPUFuncOp) -> None: | |
# CHECK: %[[VAL_0:.*]] = gpu.global_id x | ||
# CHECK: gpu.return | ||
# CHECK: } | ||
|
||
|
||
# CHECK-LABEL: testGPULaunchFuncOp | ||
@run | ||
def testGPULaunchFuncOp(): | ||
module = Module.create() | ||
|
||
module.operation.attributes["gpu.container_module"] = UnitAttr.get() | ||
with InsertionPoint(module.body): | ||
gpu_module = gpu.GPUModuleOp("gpu_module") | ||
block = gpu_module.bodyRegion.blocks.append() | ||
|
||
with InsertionPoint(block): | ||
gpu_func = gpu.GPUFuncOp( | ||
FunctionType.get([], []), | ||
"kernel", | ||
body_builder=lambda func: gpu.return_([]), | ||
kernel=True, | ||
) | ||
|
||
with InsertionPoint(module.body): | ||
host = func.FuncOp(type=FunctionType.get([], []), name="host") | ||
|
||
with InsertionPoint(host.add_entry_block()): | ||
c1 = arith.constant(T.index(), 1) | ||
grid_sizes = (1, 1, 1) | ||
block_sizes = (1, 1, 1) | ||
token = gpu.wait() | ||
token = gpu.launch_func( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can just grab this https://github.com/makslevental/mlir-python-extras/blob/main/mlir/extras/dialects/ext/gpu.py#L339-L379 (which supports exactly what you're saying - 3-tuples), put it in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That all looks great to me, but are we worried about forcing folks to update their code if it depends on the Python bindings? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what do you mean? oh you're saying since this meaningfully changes the signature of both these existing APIs ( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right - anyone using either api will need to update their code. That's annoying! But maybe worth it. Just making sure 😄 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The python APIs aren't stable (ie we make no stability guarantees). So basically this same "breakage" occurs whenever we add one of these nicer builders. Also there's a simple "migration path": people can just import the generated original APIs directly from There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since users always have an easy way to get their old bindings back, I feel better about adding the Python-extras builders. I'll pull in the builder you linked, and I'll be ready to merge if I get an OK from Mehdi or Guray. Thanks! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree with @makslevental about the stability guarantees. MLIR is more progressive compared to LLVM — there’s no API stability guarantee as long as you can migrate to something better. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good to know 😃 |
||
async_dependencies=[token], | ||
kernel=[gpu_module.sym_name.value, gpu_func.name.value], | ||
grid_size=grid_sizes, | ||
block_size=block_sizes, | ||
kernel_operands=[], | ||
) | ||
gpu.wait(async_dependencies=[token]) | ||
func.ReturnOp([]) | ||
|
||
print(module) | ||
|
||
# CHECK-LABEL: gpu.module @gpu_module { | ||
# CHECK: gpu.func @kernel() kernel { | ||
# CHECK: gpu.return | ||
# CHECK: } | ||
# CHECK: } | ||
|
||
# CHECK-LABEL: func.func @host() { | ||
# CHECK: %[[CONSTANT_0:.*]] = arith.constant 1 : index | ||
# CHECK: %[[WAIT_0:.*]] = gpu.wait async | ||
# CHECK: %[[CONSTANT_1:.*]] = arith.constant 1 : index | ||
# CHECK: %[[CONSTANT_2:.*]] = arith.constant 1 : index | ||
# CHECK: %[[CONSTANT_3:.*]] = arith.constant 1 : index | ||
# CHECK: %[[CONSTANT_4:.*]] = arith.constant 1 : index | ||
# CHECK: %[[CONSTANT_5:.*]] = arith.constant 1 : index | ||
# CHECK: %[[CONSTANT_6:.*]] = arith.constant 1 : index | ||
# CHECK: %[[LAUNCH_FUNC_0:.*]] = gpu.launch_func async {{\[}}%[[WAIT_0]]] @gpu_module::@kernel blocks in (%[[CONSTANT_1]], %[[CONSTANT_2]], %[[CONSTANT_3]]) threads in (%[[CONSTANT_4]], %[[CONSTANT_5]], %[[CONSTANT_6]]) | ||
# CHECK: %[[WAIT_1:.*]] = gpu.wait async {{\[}}%[[LAUNCH_FUNC_0]]] | ||
# CHECK: return | ||
# CHECK: } | ||
|
||
|
||
# CHECK-LABEL: testGPULaunchOp | ||
@run | ||
def testGPULaunchOp(): | ||
module = Module.create() | ||
|
||
with InsertionPoint(module.body): | ||
host = func.FuncOp(type=FunctionType.get([T.f32()], []), name="gpu_printf") | ||
|
||
entry_block = host.add_entry_block() | ||
with InsertionPoint(entry_block): | ||
c1 = arith.constant(T.index(), 1) | ||
grid_sizes = (c1, c1, c1) | ||
block_sizes = (c1, c1, c1) | ||
|
||
launch = gpu.launch(grid_sizes, block_sizes) | ||
|
||
op = launch(lambda *args: gpu.printf("%f", args[0])) | ||
|
||
with InsertionPoint(entry_block): | ||
func.ReturnOp([]) | ||
|
||
print(module) | ||
|
||
# CHECK-LABEL: func.func @gpu_printf( | ||
# CHECK-SAME: %[[ARG0:.*]]: f32) { | ||
# CHECK: %[[CONSTANT_0:.*]] = arith.constant 1 : index | ||
# CHECK: gpu.launch blocks(%[[VAL_0:.*]], %[[VAL_1:.*]], %[[VAL_2:.*]]) in (%[[VAL_3:.*]] = %[[CONSTANT_0]], %[[VAL_4:.*]] = %[[CONSTANT_0]], %[[VAL_5:.*]] = %[[CONSTANT_0]]) threads(%[[VAL_6:.*]], %[[VAL_7:.*]], %[[VAL_8:.*]]) in (%[[VAL_9:.*]] = %[[CONSTANT_0]], %[[VAL_10:.*]] = %[[CONSTANT_0]], %[[VAL_11:.*]] = %[[CONSTANT_0]]) { | ||
# CHECK: gpu.printf "%[[VAL_12:.*]]", %[[VAL_0]] : index | ||
# CHECK: gpu.terminator | ||
# CHECK: } | ||
# CHECK: return | ||
# CHECK: } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok lol this is a good use of
*args