[mlir][NVVM] Add support for barrier0-reduction operation #167036

(modified) mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td (+49)
(modified) mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp (+14)
(modified) mlir/test/Target/LLVMIR/nvvmir.mlir (-7)

diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 1cc5b74a3cb67..0921272b538bc 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -977,6 +977,55 @@ def NVVM_Barrier0Op : NVVM_Op<"barrier0"> {
   }];
 }
 
+// Attrs describing the predicate of barrier0 operation.
+def Barrier0PredPopc : I32EnumAttrCase<"POPC", 0, "popc">;
+def Barrier0PredAnd : I32EnumAttrCase<"AND", 1, "and">;
+def Barrier0PredOr : I32EnumAttrCase<"OR", 2, "or">;
+
+def Barrier0Pred
+    : I32EnumAttr<"Barrier0Pred", "NVVM barrier0 predicate",
+                  [Barrier0PredPopc, Barrier0PredAnd, Barrier0PredOr]> {
+  let genSpecializedAttr = 0;
+  let cppNamespace = "::mlir::NVVM";
+}
+def Barrier0PredAttr : EnumAttr<NVVM_Dialect, Barrier0Pred, "barrier0_pred"> {
+  let assemblyFormat = "`<` $value `>`";
+}
+
+def NVVM_Barrier0PredOp : NVVM_Op<"barrier0.pred">,
+                          Arguments<(ins Barrier0PredAttr:$pred, I32:$value)>,
+                          Results<(outs I32:$res)> {
+  let summary = "CTA Barrier Synchronization with predicate (Barrier ID 0)";
+  let description = [{
+    The `nvvm.barrier0` operation is a convenience operation that performs
+    barrier synchronization and communication within a CTA
+    (Cooperative Thread Array) using barrier ID 0. It is functionally
+    equivalent to `nvvm.barrier` or `nvvm.barrier id=0`.
+
+    `popc` is identical to `nvvm.barrier0` with the additional feature that it
+    evaluates predicate for all threads of the block and returns the number of
+    threads for which predicate evaluates to non-zero.
+
+    `and` is identical to `nvvm.barrier0` with the additional feature that it
+    evaluates predicate for all threads of the block and returns non-zero if
+    and only if predicate evaluates to non-zero for all of them.
+
+    `or` is identical to `nvvm.barrier0` with the additional feature that it
+    evaluates predicate for all threads of the block and returns non-zero if and
+    only if predicate evaluates to non-zero for any of them.
+
+    [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-bar)
+  }];
+
+  let assemblyFormat =
+      " ($value^ `:` type($value))? ($pred^)? attr-dict `->` type($res)";
+  string llvmBuilder = [{
+      createIntrinsicCall(
+          builder, getBarrier0IntrinsicID($pred),
+          {$value ? $value : builder.getInt32(0)});
+  }];
+}
+
 def NVVM_BarrierOp : NVVM_Op<"barrier", [AttrSizedOperandSegments]> {
   let summary = "CTA Barrier Synchronization Op";
   let description = [{
diff --git a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
index cecff51e637a5..f23758dbb5439 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
@@ -291,6 +291,20 @@ static unsigned getUnidirectionalFenceProxyID(NVVM::ProxyKind fromProxy,
   llvm_unreachable("Unsupported proxy kinds");
 }
 
+static unsigned getBarrier0IntrinsicID(std::optional<NVVM::Barrier0Pred> pred) {
+  if (!pred)
+    return llvm::Intrinsic::nvvm_barrier_cta_sync_aligned_all;
+  switch (*pred) {
+  case NVVM::Barrier0Pred::AND:
+    return llvm::Intrinsic::nvvm_barrier0_and;
+  case NVVM::Barrier0Pred::OR:
+    return llvm::Intrinsic::nvvm_barrier0_or;
+  case NVVM::Barrier0Pred::POPC:
+    return llvm::Intrinsic::nvvm_barrier0_popc;
+  }
+  llvm_unreachable("Unknown predicate for barrier0");
+}
+
 static unsigned getMembarIntrinsicID(NVVM::MemScopeKind scope) {
   switch (scope) {
   case NVVM::MemScopeKind::CTA:
diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
index 1ec55408e97a5..9929882a033de 100644
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -166,13 +166,6 @@ llvm.func @nvvm_rcp(%0: f32) -> f32 {
   llvm.return %1 : f32
 }
 
-// CHECK-LABEL: @llvm_nvvm_barrier0
-llvm.func @llvm_nvvm_barrier0() {
-  // CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 0)
-  nvvm.barrier0
-  llvm.return
-}
-
 // CHECK-LABEL: @llvm_nvvm_barrier(
 // CHECK-SAME: i32 %[[barId:.*]], i32 %[[numThreads:.*]])
 llvm.func @llvm_nvvm_barrier(%barID : i32, %numberOfThreads : i32) {

mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td

mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp

mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td

mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp

mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp

mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td

mlir/test/Target/LLVMIR/nvvm/barrier.mlir

mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td

mlir/test/Target/LLVMIR/nvvm/barrier.mlir

durga4github

LGTM, assuming we rename the reduction-operand as reductionPredicate

Co-authored-by: Guray Ozen <[email protected]>

Add support for `nvvm.barrier0.[popc|and|or]` operation. It is added as a separate operation since `Barrier0Op` has no result. https://docs.nvidia.com/cuda/nvvm-ir-spec/#barrier-and-memory-fence This will be used in CUDA Fortran lowering: https://github.com/llvm/llvm-project/blob/49f55f4991227f3c7a2b8161bbf45c74b7023944/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp#L1081 And could be used later in the CUDA C/C++ with CIR https://github.com/llvm/llvm-project/blob/49f55f4991227f3c7a2b8161bbf45c74b7023944/clang/lib/Headers/__clang_cuda_device_functions.h#L524 --------- Co-authored-by: Guray Ozen <[email protected]>

Simplify the lowering by using the barrier op from NVVM updated in #167036

clementval requested review from durga4github and grypp November 7, 2025 22:49

llvmbot added mlir:llvm mlir labels Nov 7, 2025

grypp reviewed Nov 8, 2025

View reviewed changes

mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td Outdated Show resolved Hide resolved

mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td Outdated Show resolved Hide resolved

grypp reviewed Nov 8, 2025

View reviewed changes

mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td Outdated Show resolved Hide resolved

durga4github reviewed Nov 9, 2025

View reviewed changes

mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp Outdated Show resolved Hide resolved

grypp reviewed Nov 10, 2025

View reviewed changes

durga4github reviewed Nov 10, 2025

View reviewed changes

mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td Outdated Show resolved Hide resolved

durga4github reviewed Nov 10, 2025

View reviewed changes

mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td Outdated Show resolved Hide resolved

durga4github changed the title ~~[mlir][NVVM] Add support for barrier0 operation with predicate~~ [mlir][NVVM] Add support for barrier0-reduction operation Nov 10, 2025

durga4github reviewed Nov 10, 2025

View reviewed changes

mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td Show resolved Hide resolved

durga4github reviewed Nov 10, 2025

View reviewed changes

mlir/test/Target/LLVMIR/nvvm/barrier.mlir Show resolved Hide resolved

durga4github approved these changes Nov 10, 2025

View reviewed changes

clementval force-pushed the nvvm_barrier0_pred branch from a0bfaa8 to 6a10b51 Compare November 12, 2025 22:18

clementval and others added 10 commits November 12, 2025 14:24

[mlir][NVVM] Add support for barrier0 operation with predicate

e274e04

Fix assembly format

6f99dc4

Add test file

79c888c

Remove optional from getBarrier0IntrinsicID

4586c0f

Merge with nvvm.barrier

b8c1066

Add doc

bfc1bd1

Apply suggestion

4771142

Co-authored-by: Guray Ozen <[email protected]>

Rename reductionPredicate to redcutionOperand

cfd79ea

Add getIntrinsicIDAndArgs function

6a1a71b

Rename to reductionPredicate

485ae20

clementval force-pushed the nvvm_barrier0_pred branch from 6a10b51 to 485ae20 Compare November 12, 2025 22:25

Remove default that trigger Werror

88a786f

clementval merged commit bdf3f24 into llvm:main Nov 12, 2025
10 checks passed

clementval deleted the nvvm_barrier0_pred branch November 12, 2025 23:00

clementval mentioned this pull request Nov 13, 2025

[flang][cuda][NFC] Use NVVM barrier op with reduction #167940

Merged

clementval added a commit that referenced this pull request Nov 13, 2025

[flang][cuda][NFC] Use NVVM barrier op with reduction (#167940)

606a0c2

Simplify the lowering by using the barrier op from NVVM updated in #167036

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[mlir][NVVM] Add support for barrier0-reduction operation #167036

[mlir][NVVM] Add support for barrier0-reduction operation #167036

Uh oh!

clementval commented Nov 7, 2025

Uh oh!

llvmbot commented Nov 7, 2025 •

edited

Loading

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

durga4github left a comment

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

[mlir][NVVM] Add support for barrier0-reduction operation #167036

[mlir][NVVM] Add support for barrier0-reduction operation #167036

Uh oh!

Conversation

clementval commented Nov 7, 2025

Uh oh!

llvmbot commented Nov 7, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

durga4github left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

llvmbot commented Nov 7, 2025 •

edited

Loading