From d355389ce7855f57830bb09dda9c3af6074e9357 Mon Sep 17 00:00:00 2001 From: Srinivasa Ravi Date: Mon, 27 Jan 2025 14:29:15 +0530 Subject: [PATCH] [MLIR][NVVM] Add support for griddepcontrol Ops Adds `griddepcontrol.wait` and `griddepcontrol.launch.dependents` MLIR Ops to generate griddepcontrol instructions. `griddepcontrol` - Allows dependent and prerequisite grids as defined by the runtime to control execution in the following ways: - `griddepcontrol.wait` - causes the executing thread to wait until all prerequisite grids in flight have completed and all the memory operations from the prerequisite grids are performed and made visible to the current grid. - `griddepcontrol.launch.dependents` - signals that specific dependents the runtime system designated to react to this instruction can be scheduled as soon as all other CTAs in the grid issue the same instruction or have completed. PTX Spec Reference: https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-griddepcontrol --- mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 27 +++++++++++++++++++++ mlir/test/Dialect/LLVMIR/nvvm.mlir | 13 ++++++++++ mlir/test/Target/LLVMIR/nvvmir.mlir | 16 ++++++++++++ 3 files changed, 56 insertions(+) diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 8c8e44a054a62..11143151ddd85 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -2512,6 +2512,33 @@ def NVVM_WgmmaMmaAsyncOp : NVVM_Op<"wgmma.mma_async", }]; } +//===----------------------------------------------------------------------===// +// NVVM Griddepcontrol Ops +//===----------------------------------------------------------------------===// + +def NVVM_GriddepcontrolWaitOp : NVVM_IntrOp<"griddepcontrol.wait", [], 0> { + let assemblyFormat = "attr-dict"; + + let description = [{ + Causes the executing thread to wait until all prerequisite grids in flight + have completed and all the memory operations from the prerequisite grids + are performed and made visible to the current grid. + [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-griddepcontrol) + }]; +} + +def NVVM_GriddepcontrolLaunchDependentsOp + : NVVM_IntrOp<"griddepcontrol.launch.dependents", [], 0> { + let assemblyFormat = "attr-dict"; + + let description = [{ + Signals that specific dependents the runtime system designated to react to + this instruction can be scheduled as soon as all other CTAs in the grid + issue the same instruction or have completed. + [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-griddepcontrol) + }]; +} + def NVVM_Exit : NVVM_Op<"exit"> { let summary = "Exit Op"; let description = [{ diff --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir index 4c3b6648a41c0..7d1efdfa44150 100644 --- a/mlir/test/Dialect/LLVMIR/nvvm.mlir +++ b/mlir/test/Dialect/LLVMIR/nvvm.mlir @@ -509,6 +509,19 @@ func.func @wgmma_wait_group_sync_aligned() { return } +func.func @griddepcontrol_wait() { + // CHECK: nvvm.griddepcontrol.wait + nvvm.griddepcontrol.wait + return +} + +func.func @griddepcontrol_launch_dependents() +{ + // CHECK: nvvm.griddepcontrol.launch.dependents + nvvm.griddepcontrol.launch.dependents + return +} + // ----- // Just check these don't emit errors. diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir index 7dad9a403def0..99a71748b0a16 100644 --- a/mlir/test/Target/LLVMIR/nvvmir.mlir +++ b/mlir/test/Target/LLVMIR/nvvmir.mlir @@ -757,3 +757,19 @@ llvm.func @nvvm_wgmma_wait_group_aligned() { nvvm.wgmma.wait.group.sync.aligned 20 llvm.return } + +// ----- +// CHECK-LABEL: @nvvm_griddepcontrol_wait +llvm.func @nvvm_griddepcontrol_wait() { + // CHECK: call void @llvm.nvvm.griddepcontrol.wait() + nvvm.griddepcontrol.wait + llvm.return +} + +// ----- +// CHECK-LABEL: @nvvm_griddepcontrol_launch_dependents +llvm.func @nvvm_griddepcontrol_launch_dependents() { + // CHECK: call void @llvm.nvvm.griddepcontrol.launch.dependents() + nvvm.griddepcontrol.launch.dependents + llvm.return +}