From c6779ecb7306dc92940c6962a537454588ee2c03 Mon Sep 17 00:00:00 2001 From: Kolya Panchenko Date: Wed, 6 Aug 2025 14:09:21 -0400 Subject: [PATCH 1/3] [mlir] Added `Convergent` trait that matches LLVM's semantics LLVM provides `convergent` function attribute that says call to it must not be made control-dependent on any new condition. For example, that attribute disables jump threading that otherwise can lead to runtime errors or dead lock. See https://llvm.org/docs/ConvergentOperations.html for more details. It appears that MLIR does not provide a trait for this even though some operations, such as `nvvm.barrier0` is convergent due it lowering to `llvm::Intrinsic::nvvm_barrier_cta_sync_aligned_all`. The patch adds `Convergent` trait to `ControlFlowInterface` (IMO, that's appropriate place for this trait) and adds that trait to some NVVM operations that are lowered to convergent LLVM Intrinsic. --- mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 37 ++++++++++++------- .../mlir/Interfaces/ControlFlowInterfaces.h | 6 ++- .../mlir/Interfaces/ControlFlowInterfaces.td | 4 ++ mlir/test/lib/Dialect/Test/TestOps.td | 5 +++ 4 files changed, 37 insertions(+), 15 deletions(-) diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 30df3b739e5ca..e95328398fe0c 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -14,12 +14,13 @@ #define NVVMIR_OPS include "mlir/IR/EnumAttr.td" +include "mlir/Interfaces/ControlFlowInterfaces.td" +include "mlir/Interfaces/InferIntRangeInterface.td" +include "mlir/Interfaces/SideEffectInterfaces.td" include "mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td" include "mlir/Dialect/LLVMIR/LLVMOpBase.td" include "mlir/Dialect/LLVMIR/NVVMRequiresSMTraits.td" -include "mlir/Interfaces/SideEffectInterfaces.td" include "mlir/Dialect/LLVMIR/BasicPtxBuilderInterface.td" -include "mlir/Interfaces/InferIntRangeInterface.td" include "mlir/Dialect/LLVMIR/LLVMTypes.td" def LLVM_PointerGeneric : LLVM_PointerInAddressSpace<0>; @@ -561,7 +562,7 @@ def NVVM_MBarrierTestWaitSharedOp : NVVM_Op<"mbarrier.test.wait.shared">, // NVVM synchronization op definitions //===----------------------------------------------------------------------===// -def NVVM_Barrier0Op : NVVM_Op<"barrier0"> { +def NVVM_Barrier0Op : NVVM_Op<"barrier0", [Convergent]> { let assemblyFormat = "attr-dict"; string llvmBuilder = [{ createIntrinsicCall( @@ -570,8 +571,9 @@ def NVVM_Barrier0Op : NVVM_Op<"barrier0"> { }]; } -def NVVM_BarrierOp : NVVM_Op<"barrier", [AttrSizedOperandSegments]> { - let arguments = (ins +def NVVM_BarrierOp : NVVM_Op<"barrier", + [Convergent, AttrSizedOperandSegments]> { + let arguments = (ins Optional:$barrierId, Optional:$numberOfThreads); string llvmBuilder = [{ @@ -598,7 +600,7 @@ def NVVM_BarrierOp : NVVM_Op<"barrier", [AttrSizedOperandSegments]> { ]; } -def NVVM_BarrierArriveOp : NVVM_PTXBuilder_Op<"barrier.arrive"> +def NVVM_BarrierArriveOp : NVVM_PTXBuilder_Op<"barrier.arrive", [Convergent]> { let arguments = (ins Optional:$barrierId, I32:$numberOfThreads); @@ -624,7 +626,7 @@ def NVVM_BarrierArriveOp : NVVM_PTXBuilder_Op<"barrier.arrive"> }]; } -def NVVM_ClusterArriveOp : NVVM_Op<"cluster.arrive"> { +def NVVM_ClusterArriveOp : NVVM_Op<"cluster.arrive", [Convergent]> { let arguments = (ins OptionalAttr:$aligned); let summary = "Cluster Barrier Arrive Op"; @@ -647,7 +649,8 @@ def NVVM_ClusterArriveOp : NVVM_Op<"cluster.arrive"> { let assemblyFormat = "attr-dict"; } -def NVVM_ClusterArriveRelaxedOp : NVVM_Op<"cluster.arrive.relaxed", [NVVMRequiresSM<90>]> { +def NVVM_ClusterArriveRelaxedOp : NVVM_Op<"cluster.arrive.relaxed", + [Convergent, NVVMRequiresSM<90>]> { let arguments = (ins OptionalAttr:$aligned); let summary = "Cluster Barrier Relaxed Arrive Op"; @@ -673,7 +676,8 @@ def NVVM_ClusterArriveRelaxedOp : NVVM_Op<"cluster.arrive.relaxed", [NVVMRequire let assemblyFormat = "attr-dict"; } -def NVVM_ClusterWaitOp : NVVM_Op<"cluster.wait", [NVVMRequiresSM<90>]> { +def NVVM_ClusterWaitOp : NVVM_Op<"cluster.wait", + [Convergent, NVVMRequiresSM<90>]> { let arguments = (ins OptionalAttr:$aligned); let summary = "Cluster Barrier Wait Op"; @@ -1054,7 +1058,8 @@ def NVVM_CpAsyncWaitGroupOp : NVVM_Op<"cp.async.wait.group">, let assemblyFormat = "$n attr-dict"; } -def NVVM_CpAsyncMBarrierArriveOp : NVVM_Op<"cp.async.mbarrier.arrive"> { +def NVVM_CpAsyncMBarrierArriveOp : NVVM_Op<"cp.async.mbarrier.arrive", + [Convergent]> { let summary = "NVVM Dialect Op for cp.async.mbarrier.arrive"; let description = [{ The `cp.async.mbarrier.arrive` Op makes the mbarrier object track @@ -1079,7 +1084,8 @@ def NVVM_CpAsyncMBarrierArriveOp : NVVM_Op<"cp.async.mbarrier.arrive"> { }]; } -def NVVM_CpAsyncMBarrierArriveSharedOp : NVVM_Op<"cp.async.mbarrier.arrive.shared"> { +def NVVM_CpAsyncMBarrierArriveSharedOp : NVVM_Op<"cp.async.mbarrier.arrive.shared", + [Convergent]> { let summary = "NVVM Dialect Op for cp.async.mbarrier.arrive.shared"; let description = [{ The `cp.async.mbarrier.arrive.shared` Op makes the mbarrier object @@ -2806,7 +2812,8 @@ def NVVM_CpAsyncBulkSharedCTAToGlobalOp : // NVVM Wgmma Ops //===----------------------------------------------------------------------===// -def NVVM_WgmmaFenceAlignedOp : NVVM_Op<"wgmma.fence.aligned", [NVVMRequiresSMa<[90]>]> { +def NVVM_WgmmaFenceAlignedOp : NVVM_Op<"wgmma.fence.aligned", + [Convergent, NVVMRequiresSMa<[90]>]> { let arguments = (ins); let description = [{ Enforce an ordering of register accesses between warpgroup level matrix @@ -2820,7 +2827,8 @@ def NVVM_WgmmaFenceAlignedOp : NVVM_Op<"wgmma.fence.aligned", [NVVMRequiresSMa<[ }]; } -def NVVM_WgmmaGroupSyncAlignedOp : NVVM_Op<"wgmma.commit.group.sync.aligned", [NVVMRequiresSMa<[90]>]> { +def NVVM_WgmmaGroupSyncAlignedOp : NVVM_Op<"wgmma.commit.group.sync.aligned", + [Convergent, NVVMRequiresSMa<[90]>]> { let assemblyFormat = "attr-dict"; let description = [{ Commits all prior uncommitted warpgroup level matrix multiplication operations. @@ -2832,7 +2840,8 @@ def NVVM_WgmmaGroupSyncAlignedOp : NVVM_Op<"wgmma.commit.group.sync.aligned", [N }]; } -def NVVM_WgmmaWaitGroupSyncOp : NVVM_Op<"wgmma.wait.group.sync.aligned", [NVVMRequiresSMa<[90]>]> { +def NVVM_WgmmaWaitGroupSyncOp : NVVM_Op<"wgmma.wait.group.sync.aligned", + [Convergent, NVVMRequiresSMa<[90]>]> { let arguments = (ins I64Attr:$group); let assemblyFormat = "attr-dict $group"; let description = [{ diff --git a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h index d63800c12d132..750a9f86e49d7 100644 --- a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h +++ b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h @@ -337,8 +337,12 @@ struct ReturnLike : public TraitBase { return success(); } }; -} // namespace OpTrait +// The Operation may not be made control-dependent on any additional values. +// See https://llvm.org/docs/ConvergentOperations.html for more details. +template +struct Convergent : public TraitBase {}; +} // namespace OpTrait } // namespace mlir //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td index b8d08cc553caa..6eb2f9002d7cf 100644 --- a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td +++ b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td @@ -511,4 +511,8 @@ def ReturnLike : TraitList<[ > ]>; +// Use to inject an implementation of getSpeculatability. Users should not use +// this directly. +def Convergent : NativeOpTrait<"Convergent">; + #endif // MLIR_INTERFACES_CONTROLFLOWINTERFACES diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index 2eaad552a7a3a..ad14666a1a2cc 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -2113,6 +2113,11 @@ def TestTypeChangerOp : TEST_Op<"type_changer">, def TestValidOp : TEST_Op<"valid", [Terminator]>, Arguments<(ins Variadic)>; +def TestConvergentOp : TEST_Op<"convergent", [Convergent]> { + let arguments = (ins AnyType); + let results = (outs AnyType); +} + def TestMergeBlocksOp : TEST_Op<"merge_blocks"> { let summary = "merge_blocks operation"; let description = [{ From 058f9aefa7c2d9e0943b9d7ee7afadf04f09788c Mon Sep 17 00:00:00 2001 From: Kolya Panchenko Date: Wed, 6 Aug 2025 16:30:49 -0400 Subject: [PATCH 2/3] Fixed `NVVM_PTXBuilder_Op` and removed bad comment --- mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 7 ++++--- mlir/include/mlir/Interfaces/ControlFlowInterfaces.td | 3 +-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index e95328398fe0c..031a0b9772ec3 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -106,9 +106,10 @@ class NVVM_Op traits = []> : } /// Base class that defines BasicPtxBuilderOpInterface. -class NVVM_PTXBuilder_Op traits = [DeclareOpInterfaceMethods]> : - LLVM_OpBase { +class NVVM_PTXBuilder_Op traits = []> : + LLVM_OpBase])> { } //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td index 6eb2f9002d7cf..6545d72aeb61a 100644 --- a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td +++ b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td @@ -511,8 +511,7 @@ def ReturnLike : TraitList<[ > ]>; -// Use to inject an implementation of getSpeculatability. Users should not use -// this directly. +// Op is "convergent". def Convergent : NativeOpTrait<"Convergent">; #endif // MLIR_INTERFACES_CONTROLFLOWINTERFACES From cb8f0a2be4f7b1167108f2d9d40c9f1f4247f19d Mon Sep 17 00:00:00 2001 From: Kolya Panchenko Date: Fri, 8 Aug 2025 13:23:33 -0400 Subject: [PATCH 3/3] Only mark `nvvm.barrier0` as convergent --- mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 37 ++++++++------------- 1 file changed, 14 insertions(+), 23 deletions(-) diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 031a0b9772ec3..2fcb199e65673 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -106,10 +106,9 @@ class NVVM_Op traits = []> : } /// Base class that defines BasicPtxBuilderOpInterface. -class NVVM_PTXBuilder_Op traits = []> : - LLVM_OpBase])> { +class NVVM_PTXBuilder_Op traits = [DeclareOpInterfaceMethods]> : + LLVM_OpBase { } //===----------------------------------------------------------------------===// @@ -572,9 +571,8 @@ def NVVM_Barrier0Op : NVVM_Op<"barrier0", [Convergent]> { }]; } -def NVVM_BarrierOp : NVVM_Op<"barrier", - [Convergent, AttrSizedOperandSegments]> { - let arguments = (ins +def NVVM_BarrierOp : NVVM_Op<"barrier", [AttrSizedOperandSegments]> { + let arguments = (ins Optional:$barrierId, Optional:$numberOfThreads); string llvmBuilder = [{ @@ -601,7 +599,7 @@ def NVVM_BarrierOp : NVVM_Op<"barrier", ]; } -def NVVM_BarrierArriveOp : NVVM_PTXBuilder_Op<"barrier.arrive", [Convergent]> +def NVVM_BarrierArriveOp : NVVM_PTXBuilder_Op<"barrier.arrive"> { let arguments = (ins Optional:$barrierId, I32:$numberOfThreads); @@ -627,7 +625,7 @@ def NVVM_BarrierArriveOp : NVVM_PTXBuilder_Op<"barrier.arrive", [Convergent]> }]; } -def NVVM_ClusterArriveOp : NVVM_Op<"cluster.arrive", [Convergent]> { +def NVVM_ClusterArriveOp : NVVM_Op<"cluster.arrive"> { let arguments = (ins OptionalAttr:$aligned); let summary = "Cluster Barrier Arrive Op"; @@ -650,8 +648,7 @@ def NVVM_ClusterArriveOp : NVVM_Op<"cluster.arrive", [Convergent]> { let assemblyFormat = "attr-dict"; } -def NVVM_ClusterArriveRelaxedOp : NVVM_Op<"cluster.arrive.relaxed", - [Convergent, NVVMRequiresSM<90>]> { +def NVVM_ClusterArriveRelaxedOp : NVVM_Op<"cluster.arrive.relaxed", [NVVMRequiresSM<90>]> { let arguments = (ins OptionalAttr:$aligned); let summary = "Cluster Barrier Relaxed Arrive Op"; @@ -677,8 +674,7 @@ def NVVM_ClusterArriveRelaxedOp : NVVM_Op<"cluster.arrive.relaxed", let assemblyFormat = "attr-dict"; } -def NVVM_ClusterWaitOp : NVVM_Op<"cluster.wait", - [Convergent, NVVMRequiresSM<90>]> { +def NVVM_ClusterWaitOp : NVVM_Op<"cluster.wait", [NVVMRequiresSM<90>]> { let arguments = (ins OptionalAttr:$aligned); let summary = "Cluster Barrier Wait Op"; @@ -1059,8 +1055,7 @@ def NVVM_CpAsyncWaitGroupOp : NVVM_Op<"cp.async.wait.group">, let assemblyFormat = "$n attr-dict"; } -def NVVM_CpAsyncMBarrierArriveOp : NVVM_Op<"cp.async.mbarrier.arrive", - [Convergent]> { +def NVVM_CpAsyncMBarrierArriveOp : NVVM_Op<"cp.async.mbarrier.arrive"> { let summary = "NVVM Dialect Op for cp.async.mbarrier.arrive"; let description = [{ The `cp.async.mbarrier.arrive` Op makes the mbarrier object track @@ -1085,8 +1080,7 @@ def NVVM_CpAsyncMBarrierArriveOp : NVVM_Op<"cp.async.mbarrier.arrive", }]; } -def NVVM_CpAsyncMBarrierArriveSharedOp : NVVM_Op<"cp.async.mbarrier.arrive.shared", - [Convergent]> { +def NVVM_CpAsyncMBarrierArriveSharedOp : NVVM_Op<"cp.async.mbarrier.arrive.shared"> { let summary = "NVVM Dialect Op for cp.async.mbarrier.arrive.shared"; let description = [{ The `cp.async.mbarrier.arrive.shared` Op makes the mbarrier object @@ -2813,8 +2807,7 @@ def NVVM_CpAsyncBulkSharedCTAToGlobalOp : // NVVM Wgmma Ops //===----------------------------------------------------------------------===// -def NVVM_WgmmaFenceAlignedOp : NVVM_Op<"wgmma.fence.aligned", - [Convergent, NVVMRequiresSMa<[90]>]> { +def NVVM_WgmmaFenceAlignedOp : NVVM_Op<"wgmma.fence.aligned", [NVVMRequiresSMa<[90]>]> { let arguments = (ins); let description = [{ Enforce an ordering of register accesses between warpgroup level matrix @@ -2828,8 +2821,7 @@ def NVVM_WgmmaFenceAlignedOp : NVVM_Op<"wgmma.fence.aligned", }]; } -def NVVM_WgmmaGroupSyncAlignedOp : NVVM_Op<"wgmma.commit.group.sync.aligned", - [Convergent, NVVMRequiresSMa<[90]>]> { +def NVVM_WgmmaGroupSyncAlignedOp : NVVM_Op<"wgmma.commit.group.sync.aligned", [NVVMRequiresSMa<[90]>]> { let assemblyFormat = "attr-dict"; let description = [{ Commits all prior uncommitted warpgroup level matrix multiplication operations. @@ -2841,8 +2833,7 @@ def NVVM_WgmmaGroupSyncAlignedOp : NVVM_Op<"wgmma.commit.group.sync.aligned", }]; } -def NVVM_WgmmaWaitGroupSyncOp : NVVM_Op<"wgmma.wait.group.sync.aligned", - [Convergent, NVVMRequiresSMa<[90]>]> { +def NVVM_WgmmaWaitGroupSyncOp : NVVM_Op<"wgmma.wait.group.sync.aligned", [NVVMRequiresSMa<[90]>]> { let arguments = (ins I64Attr:$group); let assemblyFormat = "attr-dict $group"; let description = [{