Skip to content

Commit 46ef57a

Browse files
authored
[MLIR][NVVM] Add mbarrier.try_wait Op (#170285)
This patch adds an Op for mbarrier.try_wait operation which lowers to the corresponding intrinsics. This Op has support for an optional time-limit, state-or-phase as well as relaxed memory semantics, completing the features on this Op up to Blackwell. Unlike the existing `nvvm.mbarrier.try_wait.parity` Op, this Op does not provide a _blocking_ implementation. We intend to add looping around this at NVGPU in a subsequent PR (and deprecate the inline-asm based Op here). lit tests are added to verify the lowering to the intrinsics. Signed-off-by: Durgadoss R <[email protected]>
1 parent cb5ccab commit 46ef57a

File tree

4 files changed

+258
-0
lines changed

4 files changed

+258
-0
lines changed

mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1130,6 +1130,47 @@ def NVVM_MBarrierTestWaitOp : NVVM_Op<"mbarrier.test.wait"> {
11301130
}];
11311131
}
11321132

1133+
def NVVM_MBarrierTryWaitOp : NVVM_Op<"mbarrier.try_wait"> {
1134+
let summary = "MBarrier try wait on state or phase with an optional timelimit";
1135+
let description = [{
1136+
The `nvvm.mbarrier.try_wait` operation checks whether the specified
1137+
*mbarrier object* at `addr` has completed the given phase. Note that
1138+
unlike the `nvvm.mbarrier.test.wait` operation, the try_wait operation
1139+
is a potentially-blocking one. If the phase is not yet complete, the
1140+
calling thread may be suspended. A suspended thread resumes execution
1141+
once the phase completes or when a system-defined timeout occurs.
1142+
Optionally, the `ticks` operand can be used to provide a custom timeout
1143+
(in nanoseconds), overriding the system-defined one. The semantics of
1144+
this operation and its operands are otherwise similar to those of the
1145+
`nvvm.mbarrier.test.wait` Op.
1146+
1147+
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-mbarrier-test-wait-try-wait)
1148+
}];
1149+
1150+
let results = (outs I1:$res);
1151+
let arguments = (ins
1152+
AnyTypeOf<[LLVM_PointerGeneric, LLVM_PointerShared]>:$addr,
1153+
AnyTypeOf<[I64, I32]>:$stateOrPhase,
1154+
Optional<I32>:$ticks,
1155+
DefaultValuedAttr<MemScopeKindAttr, "MemScopeKind::CTA">:$scope,
1156+
DefaultValuedAttr<BoolAttr, "false">:$relaxed);
1157+
1158+
let assemblyFormat = "$addr `,` $stateOrPhase (`,` $ticks^)? attr-dict `:` type(operands) `->` type($res)";
1159+
let hasVerifier = 1;
1160+
1161+
let extraClassDeclaration = [{
1162+
static mlir::NVVM::IDArgPair
1163+
getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
1164+
llvm::IRBuilderBase& builder);
1165+
}];
1166+
1167+
string llvmBuilder = [{
1168+
auto [id, args] = NVVM::MBarrierTryWaitOp::getIntrinsicIDAndArgs(
1169+
*op, moduleTranslation, builder);
1170+
$res = createIntrinsicCall(builder, id, args);
1171+
}];
1172+
}
1173+
11331174
//===----------------------------------------------------------------------===//
11341175
// NVVM synchronization op definitions
11351176
//===----------------------------------------------------------------------===//

mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,10 @@ LogicalResult MBarrierTestWaitOp::verify() {
314314
return verifyMBarrierArriveLikeOp(getOperation(), getAddr(), getScope());
315315
}
316316

317+
LogicalResult MBarrierTryWaitOp::verify() {
318+
return verifyMBarrierArriveLikeOp(getOperation(), getAddr(), getScope());
319+
}
320+
317321
LogicalResult ConvertFloatToTF32Op::verify() {
318322
using RndMode = NVVM::FPRoundingMode;
319323
switch (getRnd()) {
@@ -2752,6 +2756,56 @@ mlir::NVVM::IDArgPair MBarrierTestWaitOp::getIntrinsicIDAndArgs(
27522756
return {id, {mbar, input}};
27532757
}
27542758

2759+
mlir::NVVM::IDArgPair MBarrierTryWaitOp::getIntrinsicIDAndArgs(
2760+
Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
2761+
auto thisOp = cast<NVVM::MBarrierTryWaitOp>(op);
2762+
bool isPhaseParity = thisOp.getStateOrPhase().getType().isInteger(32);
2763+
bool isClusterScope = thisOp.getScope() == NVVM::MemScopeKind::CLUSTER;
2764+
bool hasTicks = static_cast<bool>(thisOp.getTicks());
2765+
// bit-0: isPhaseParity
2766+
// bit-1: Scope
2767+
// bit-2: hasTicks
2768+
size_t index = ((hasTicks ? 1 : 0) << 2) | ((isClusterScope ? 1 : 0) << 1) |
2769+
(isPhaseParity ? 1 : 0);
2770+
2771+
// clang-format off
2772+
static constexpr llvm::Intrinsic::ID IDs[] = {
2773+
llvm::Intrinsic::nvvm_mbarrier_try_wait_scope_cta_space_cta,
2774+
llvm::Intrinsic::nvvm_mbarrier_try_wait_parity_scope_cta_space_cta,
2775+
llvm::Intrinsic::nvvm_mbarrier_try_wait_scope_cluster_space_cta,
2776+
llvm::Intrinsic::nvvm_mbarrier_try_wait_parity_scope_cluster_space_cta,
2777+
llvm::Intrinsic::nvvm_mbarrier_try_wait_tl_scope_cta_space_cta,
2778+
llvm::Intrinsic::nvvm_mbarrier_try_wait_parity_tl_scope_cta_space_cta,
2779+
llvm::Intrinsic::nvvm_mbarrier_try_wait_tl_scope_cluster_space_cta,
2780+
llvm::Intrinsic::nvvm_mbarrier_try_wait_parity_tl_scope_cluster_space_cta};
2781+
static constexpr llvm::Intrinsic::ID relaxedIDs[] = {
2782+
llvm::Intrinsic::nvvm_mbarrier_try_wait_relaxed_scope_cta_space_cta,
2783+
llvm::Intrinsic::nvvm_mbarrier_try_wait_parity_relaxed_scope_cta_space_cta,
2784+
llvm::Intrinsic::nvvm_mbarrier_try_wait_relaxed_scope_cluster_space_cta,
2785+
llvm::Intrinsic::nvvm_mbarrier_try_wait_parity_relaxed_scope_cluster_space_cta,
2786+
llvm::Intrinsic::nvvm_mbarrier_try_wait_tl_relaxed_scope_cta_space_cta,
2787+
llvm::Intrinsic::nvvm_mbarrier_try_wait_parity_tl_relaxed_scope_cta_space_cta,
2788+
llvm::Intrinsic::nvvm_mbarrier_try_wait_tl_relaxed_scope_cluster_space_cta,
2789+
llvm::Intrinsic::nvvm_mbarrier_try_wait_parity_tl_relaxed_scope_cluster_space_cta};
2790+
// clang-format on
2791+
auto id = thisOp.getRelaxed() ? relaxedIDs[index] : IDs[index];
2792+
2793+
// Tidy-up the mbarrier pointer
2794+
llvm::Value *mbar = mt.lookupValue(thisOp.getAddr());
2795+
bool needCast = isPtrInGenericSpace(thisOp.getAddr());
2796+
if (needCast)
2797+
mbar = castPtrToAddrSpace(builder, mbar, NVVMMemorySpace::Shared);
2798+
2799+
// Fill the Intrinsic Args
2800+
llvm::SmallVector<llvm::Value *> args;
2801+
args.push_back(mbar);
2802+
args.push_back(mt.lookupValue(thisOp.getStateOrPhase()));
2803+
if (hasTicks)
2804+
args.push_back(mt.lookupValue(thisOp.getTicks()));
2805+
2806+
return {id, std::move(args)};
2807+
}
2808+
27552809
mlir::NVVM::IDArgPair CpAsyncMBarrierArriveOp::getIntrinsicIDAndArgs(
27562810
Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
27572811
auto thisOp = cast<NVVM::CpAsyncMBarrierArriveOp>(op);

mlir/test/Target/LLVMIR/nvvm/mbar_invalid.mlir

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,3 +120,19 @@ llvm.func @mbarrier_test_wait(%barrier: !llvm.ptr<3>, %phase: i32) {
120120
llvm.return
121121
}
122122

123+
// -----
124+
125+
llvm.func @mbarrier_try_wait(%barrier: !llvm.ptr<3>, %phase: i32) {
126+
// expected-error @below {{mbarrier scope must be either CTA or Cluster}}
127+
%1 = nvvm.mbarrier.try_wait %barrier, %phase {scope = #nvvm.mem_scope<sys>} : !llvm.ptr<3>, i32 -> i1
128+
llvm.return
129+
}
130+
131+
// -----
132+
133+
llvm.func @mbarrier_try_wait_with_timelimit(%barrier: !llvm.ptr<3>, %phase: i32, %ticks: i32) {
134+
// expected-error @below {{mbarrier scope must be either CTA or Cluster}}
135+
%1 = nvvm.mbarrier.try_wait %barrier, %phase, %ticks {scope = #nvvm.mem_scope<gpu>} : !llvm.ptr<3>, i32, i32 -> i1
136+
llvm.return
137+
}
138+
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
2+
3+
llvm.func @mbarrier_try_wait_state(%barrier: !llvm.ptr, %state : i64) {
4+
// CHECK-LABEL: define void @mbarrier_try_wait_state(ptr %0, i64 %1) {
5+
// CHECK-NEXT: %3 = addrspacecast ptr %0 to ptr addrspace(3)
6+
// CHECK-NEXT: %4 = call i1 @llvm.nvvm.mbarrier.try.wait.scope.cta.space.cta(ptr addrspace(3) %3, i64 %1)
7+
// CHECK-NEXT: %5 = addrspacecast ptr %0 to ptr addrspace(3)
8+
// CHECK-NEXT: %6 = call i1 @llvm.nvvm.mbarrier.try.wait.scope.cluster.space.cta(ptr addrspace(3) %5, i64 %1)
9+
// CHECK-NEXT: %7 = addrspacecast ptr %0 to ptr addrspace(3)
10+
// CHECK-NEXT: %8 = call i1 @llvm.nvvm.mbarrier.try.wait.relaxed.scope.cta.space.cta(ptr addrspace(3) %7, i64 %1)
11+
// CHECK-NEXT: %9 = addrspacecast ptr %0 to ptr addrspace(3)
12+
// CHECK-NEXT: %10 = call i1 @llvm.nvvm.mbarrier.try.wait.relaxed.scope.cluster.space.cta(ptr addrspace(3) %9, i64 %1)
13+
// CHECK-NEXT: ret void
14+
// CHECK-NEXT: }
15+
%0 = nvvm.mbarrier.try_wait %barrier, %state : !llvm.ptr, i64 -> i1
16+
%1 = nvvm.mbarrier.try_wait %barrier, %state {scope = #nvvm.mem_scope<cluster>} : !llvm.ptr, i64 -> i1
17+
18+
%2 = nvvm.mbarrier.try_wait %barrier, %state {relaxed = true} : !llvm.ptr, i64 -> i1
19+
%3 = nvvm.mbarrier.try_wait %barrier, %state {relaxed = true, scope = #nvvm.mem_scope<cluster>} : !llvm.ptr, i64 -> i1
20+
21+
llvm.return
22+
}
23+
24+
llvm.func @mbarrier_try_wait_state_with_timelimit(%barrier: !llvm.ptr, %state : i64, %ticks : i32) {
25+
// CHECK-LABEL: define void @mbarrier_try_wait_state_with_timelimit(ptr %0, i64 %1, i32 %2) {
26+
// CHECK-NEXT: %4 = addrspacecast ptr %0 to ptr addrspace(3)
27+
// CHECK-NEXT: %5 = call i1 @llvm.nvvm.mbarrier.try.wait.tl.scope.cta.space.cta(ptr addrspace(3) %4, i64 %1, i32 %2)
28+
// CHECK-NEXT: %6 = addrspacecast ptr %0 to ptr addrspace(3)
29+
// CHECK-NEXT: %7 = call i1 @llvm.nvvm.mbarrier.try.wait.tl.scope.cluster.space.cta(ptr addrspace(3) %6, i64 %1, i32 %2)
30+
// CHECK-NEXT: %8 = addrspacecast ptr %0 to ptr addrspace(3)
31+
// CHECK-NEXT: %9 = call i1 @llvm.nvvm.mbarrier.try.wait.tl.relaxed.scope.cta.space.cta(ptr addrspace(3) %8, i64 %1, i32 %2)
32+
// CHECK-NEXT: %10 = addrspacecast ptr %0 to ptr addrspace(3)
33+
// CHECK-NEXT: %11 = call i1 @llvm.nvvm.mbarrier.try.wait.tl.relaxed.scope.cluster.space.cta(ptr addrspace(3) %10, i64 %1, i32 %2)
34+
// CHECK-NEXT: ret void
35+
// CHECK-NEXT: }
36+
%0 = nvvm.mbarrier.try_wait %barrier, %state, %ticks : !llvm.ptr, i64, i32 -> i1
37+
%1 = nvvm.mbarrier.try_wait %barrier, %state, %ticks {scope = #nvvm.mem_scope<cluster>} : !llvm.ptr, i64, i32 -> i1
38+
39+
%2 = nvvm.mbarrier.try_wait %barrier, %state, %ticks {relaxed = true} : !llvm.ptr, i64, i32 -> i1
40+
%3 = nvvm.mbarrier.try_wait %barrier, %state, %ticks {relaxed = true, scope = #nvvm.mem_scope<cluster>} : !llvm.ptr, i64, i32 -> i1
41+
42+
llvm.return
43+
}
44+
45+
llvm.func @mbarrier_try_wait_shared_state(%barrier: !llvm.ptr<3>, %state : i64) {
46+
// CHECK-LABEL: define void @mbarrier_try_wait_shared_state(ptr addrspace(3) %0, i64 %1) {
47+
// CHECK-NEXT: %3 = call i1 @llvm.nvvm.mbarrier.try.wait.scope.cta.space.cta(ptr addrspace(3) %0, i64 %1)
48+
// CHECK-NEXT: %4 = call i1 @llvm.nvvm.mbarrier.try.wait.scope.cluster.space.cta(ptr addrspace(3) %0, i64 %1)
49+
// CHECK-NEXT: %5 = call i1 @llvm.nvvm.mbarrier.try.wait.relaxed.scope.cta.space.cta(ptr addrspace(3) %0, i64 %1)
50+
// CHECK-NEXT: %6 = call i1 @llvm.nvvm.mbarrier.try.wait.relaxed.scope.cluster.space.cta(ptr addrspace(3) %0, i64 %1)
51+
// CHECK-NEXT: ret void
52+
// CHECK-NEXT: }
53+
%0 = nvvm.mbarrier.try_wait %barrier, %state : !llvm.ptr<3>, i64 -> i1
54+
%1 = nvvm.mbarrier.try_wait %barrier, %state {scope = #nvvm.mem_scope<cluster>} : !llvm.ptr<3>, i64 -> i1
55+
56+
%2 = nvvm.mbarrier.try_wait %barrier, %state {relaxed = true} : !llvm.ptr<3>, i64 -> i1
57+
%3 = nvvm.mbarrier.try_wait %barrier, %state {relaxed = true, scope = #nvvm.mem_scope<cluster>} : !llvm.ptr<3>, i64 -> i1
58+
llvm.return
59+
}
60+
61+
llvm.func @mbarrier_try_wait_shared_state_with_timelimit(%barrier: !llvm.ptr<3>, %state : i64, %ticks : i32) {
62+
// CHECK-LABEL: define void @mbarrier_try_wait_shared_state_with_timelimit(ptr addrspace(3) %0, i64 %1, i32 %2) {
63+
// CHECK-NEXT: %4 = call i1 @llvm.nvvm.mbarrier.try.wait.tl.scope.cta.space.cta(ptr addrspace(3) %0, i64 %1, i32 %2)
64+
// CHECK-NEXT: %5 = call i1 @llvm.nvvm.mbarrier.try.wait.tl.scope.cluster.space.cta(ptr addrspace(3) %0, i64 %1, i32 %2)
65+
// CHECK-NEXT: %6 = call i1 @llvm.nvvm.mbarrier.try.wait.tl.relaxed.scope.cta.space.cta(ptr addrspace(3) %0, i64 %1, i32 %2)
66+
// CHECK-NEXT: %7 = call i1 @llvm.nvvm.mbarrier.try.wait.tl.relaxed.scope.cluster.space.cta(ptr addrspace(3) %0, i64 %1, i32 %2)
67+
// CHECK-NEXT: ret void
68+
// CHECK-NEXT: }
69+
%0 = nvvm.mbarrier.try_wait %barrier, %state, %ticks : !llvm.ptr<3>, i64, i32 -> i1
70+
%1 = nvvm.mbarrier.try_wait %barrier, %state, %ticks {scope = #nvvm.mem_scope<cluster>} : !llvm.ptr<3>, i64, i32 -> i1
71+
72+
%2 = nvvm.mbarrier.try_wait %barrier, %state, %ticks {relaxed = true} : !llvm.ptr<3>, i64, i32 -> i1
73+
%3 = nvvm.mbarrier.try_wait %barrier, %state, %ticks {relaxed = true, scope = #nvvm.mem_scope<cluster>} : !llvm.ptr<3>, i64, i32 -> i1
74+
llvm.return
75+
}
76+
77+
llvm.func @mbarrier_try_wait_phase(%barrier: !llvm.ptr, %phase : i32) {
78+
// CHECK-LABEL: define void @mbarrier_try_wait_phase(ptr %0, i32 %1) {
79+
// CHECK-NEXT: %3 = addrspacecast ptr %0 to ptr addrspace(3)
80+
// CHECK-NEXT: %4 = call i1 @llvm.nvvm.mbarrier.try.wait.parity.scope.cta.space.cta(ptr addrspace(3) %3, i32 %1)
81+
// CHECK-NEXT: %5 = addrspacecast ptr %0 to ptr addrspace(3)
82+
// CHECK-NEXT: %6 = call i1 @llvm.nvvm.mbarrier.try.wait.parity.scope.cluster.space.cta(ptr addrspace(3) %5, i32 %1)
83+
// CHECK-NEXT: %7 = addrspacecast ptr %0 to ptr addrspace(3)
84+
// CHECK-NEXT: %8 = call i1 @llvm.nvvm.mbarrier.try.wait.parity.relaxed.scope.cta.space.cta(ptr addrspace(3) %7, i32 %1)
85+
// CHECK-NEXT: %9 = addrspacecast ptr %0 to ptr addrspace(3)
86+
// CHECK-NEXT: %10 = call i1 @llvm.nvvm.mbarrier.try.wait.parity.relaxed.scope.cluster.space.cta(ptr addrspace(3) %9, i32 %1)
87+
// CHECK-NEXT: ret void
88+
// CHECK-NEXT: }
89+
%0 = nvvm.mbarrier.try_wait %barrier, %phase : !llvm.ptr, i32 -> i1
90+
%1 = nvvm.mbarrier.try_wait %barrier, %phase {scope = #nvvm.mem_scope<cluster>} : !llvm.ptr, i32 -> i1
91+
92+
%2 = nvvm.mbarrier.try_wait %barrier, %phase {relaxed = true} : !llvm.ptr, i32 -> i1
93+
%3 = nvvm.mbarrier.try_wait %barrier, %phase {relaxed = true, scope = #nvvm.mem_scope<cluster>} : !llvm.ptr, i32 -> i1
94+
llvm.return
95+
}
96+
97+
llvm.func @mbarrier_try_wait_phase_with_timelimit(%barrier: !llvm.ptr, %phase : i32, %ticks : i32) {
98+
// CHECK-LABEL: define void @mbarrier_try_wait_phase_with_timelimit(ptr %0, i32 %1, i32 %2) {
99+
// CHECK-NEXT: %4 = addrspacecast ptr %0 to ptr addrspace(3)
100+
// CHECK-NEXT: %5 = call i1 @llvm.nvvm.mbarrier.try.wait.parity.tl.scope.cta.space.cta(ptr addrspace(3) %4, i32 %1, i32 %2)
101+
// CHECK-NEXT: %6 = addrspacecast ptr %0 to ptr addrspace(3)
102+
// CHECK-NEXT: %7 = call i1 @llvm.nvvm.mbarrier.try.wait.parity.tl.scope.cluster.space.cta(ptr addrspace(3) %6, i32 %1, i32 %2)
103+
// CHECK-NEXT: %8 = addrspacecast ptr %0 to ptr addrspace(3)
104+
// CHECK-NEXT: %9 = call i1 @llvm.nvvm.mbarrier.try.wait.parity.tl.relaxed.scope.cta.space.cta(ptr addrspace(3) %8, i32 %1, i32 %2)
105+
// CHECK-NEXT: %10 = addrspacecast ptr %0 to ptr addrspace(3)
106+
// CHECK-NEXT: %11 = call i1 @llvm.nvvm.mbarrier.try.wait.parity.tl.relaxed.scope.cluster.space.cta(ptr addrspace(3) %10, i32 %1, i32 %2)
107+
// CHECK-NEXT: ret void
108+
// CHECK-NEXT: }
109+
%0 = nvvm.mbarrier.try_wait %barrier, %phase, %ticks : !llvm.ptr, i32, i32 -> i1
110+
%1 = nvvm.mbarrier.try_wait %barrier, %phase, %ticks {scope = #nvvm.mem_scope<cluster>} : !llvm.ptr, i32, i32 -> i1
111+
112+
%2 = nvvm.mbarrier.try_wait %barrier, %phase, %ticks {relaxed = true} : !llvm.ptr, i32, i32 -> i1
113+
%3 = nvvm.mbarrier.try_wait %barrier, %phase, %ticks {relaxed = true, scope = #nvvm.mem_scope<cluster>} : !llvm.ptr, i32, i32 -> i1
114+
llvm.return
115+
}
116+
117+
llvm.func @mbarrier_try_wait_shared_phase(%barrier: !llvm.ptr<3>, %phase : i32) {
118+
// CHECK-LABEL: define void @mbarrier_try_wait_shared_phase(ptr addrspace(3) %0, i32 %1) {
119+
// CHECK-NEXT: %3 = call i1 @llvm.nvvm.mbarrier.try.wait.parity.scope.cta.space.cta(ptr addrspace(3) %0, i32 %1)
120+
// CHECK-NEXT: %4 = call i1 @llvm.nvvm.mbarrier.try.wait.parity.scope.cluster.space.cta(ptr addrspace(3) %0, i32 %1)
121+
// CHECK-NEXT: %5 = call i1 @llvm.nvvm.mbarrier.try.wait.parity.relaxed.scope.cta.space.cta(ptr addrspace(3) %0, i32 %1)
122+
// CHECK-NEXT: %6 = call i1 @llvm.nvvm.mbarrier.try.wait.parity.relaxed.scope.cluster.space.cta(ptr addrspace(3) %0, i32 %1)
123+
// CHECK-NEXT: ret void
124+
// CHECK-NEXT: }
125+
%0 = nvvm.mbarrier.try_wait %barrier, %phase : !llvm.ptr<3>, i32 -> i1
126+
%1 = nvvm.mbarrier.try_wait %barrier, %phase {scope = #nvvm.mem_scope<cluster>} : !llvm.ptr<3>, i32 -> i1
127+
128+
%2 = nvvm.mbarrier.try_wait %barrier, %phase {relaxed = true} : !llvm.ptr<3>, i32 -> i1
129+
%3 = nvvm.mbarrier.try_wait %barrier, %phase {relaxed = true, scope = #nvvm.mem_scope<cluster>} : !llvm.ptr<3>, i32 -> i1
130+
llvm.return
131+
}
132+
133+
llvm.func @mbarrier_try_wait_shared_phase_with_timelimit(%barrier: !llvm.ptr<3>, %phase : i32, %ticks : i32) {
134+
// CHECK-LABEL: define void @mbarrier_try_wait_shared_phase_with_timelimit(ptr addrspace(3) %0, i32 %1, i32 %2) {
135+
// CHECK-NEXT: %4 = call i1 @llvm.nvvm.mbarrier.try.wait.parity.tl.scope.cta.space.cta(ptr addrspace(3) %0, i32 %1, i32 %2)
136+
// CHECK-NEXT: %5 = call i1 @llvm.nvvm.mbarrier.try.wait.parity.tl.scope.cluster.space.cta(ptr addrspace(3) %0, i32 %1, i32 %2)
137+
// CHECK-NEXT: %6 = call i1 @llvm.nvvm.mbarrier.try.wait.parity.tl.relaxed.scope.cta.space.cta(ptr addrspace(3) %0, i32 %1, i32 %2)
138+
// CHECK-NEXT: %7 = call i1 @llvm.nvvm.mbarrier.try.wait.parity.tl.relaxed.scope.cluster.space.cta(ptr addrspace(3) %0, i32 %1, i32 %2)
139+
// CHECK-NEXT: ret void
140+
// CHECK-NEXT: }
141+
%0 = nvvm.mbarrier.try_wait %barrier, %phase, %ticks : !llvm.ptr<3>, i32, i32 -> i1
142+
%1 = nvvm.mbarrier.try_wait %barrier, %phase, %ticks {scope = #nvvm.mem_scope<cluster>} : !llvm.ptr<3>, i32, i32 -> i1
143+
144+
%2 = nvvm.mbarrier.try_wait %barrier, %phase, %ticks {relaxed = true} : !llvm.ptr<3>, i32, i32 -> i1
145+
%3 = nvvm.mbarrier.try_wait %barrier, %phase, %ticks {relaxed = true, scope = #nvvm.mem_scope<cluster>} : !llvm.ptr<3>, i32, i32 -> i1
146+
llvm.return
147+
}

0 commit comments

Comments
 (0)