Skip to content

Commit 3e14134

Browse files
authored
[Gluon] Add thread_barrier and {hopper,blackwell}.fence_async_shared (#7152)
1 parent 4390874 commit 3e14134

File tree

4 files changed

+35
-4
lines changed

4 files changed

+35
-4
lines changed

python/test/gluon/test_frontend.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -908,3 +908,20 @@ def test_zeros():
908908

909909
# CHECK: arith.constant dense<7> : tensor<8x8xi16, [[BLOCKED2D]]>
910910
ttgl.full_like(a, 7, shape=[8, 8], dtype=ttgl.int16, layout=layout_2d)
911+
912+
913+
@filecheck_test
914+
@gluon.jit
915+
def test_barrier():
916+
# CHECK: gpu.barrier
917+
ttgl.thread_barrier()
918+
919+
920+
@filecheck_test
921+
@gluon.jit
922+
def test_fence_async_shared():
923+
# CHECK: ttng.fence_async_shared {bCluster = false}
924+
blackwell.fence_async_shared()
925+
926+
# CHECK-NEXT: ttng.fence_async_shared {bCluster = true}
927+
blackwell.fence_async_shared(cluster=True)

python/triton/experimental/gluon/language/_core.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@
9191
"tensor",
9292
"tuple",
9393
"tuple_type",
94+
"thread_barrier",
9495
"arange",
9596
"full",
9697
"convert_layout",
@@ -313,3 +314,8 @@ def warp_specialize(args, default_partition, worker_partitions, worker_num_warps
313314
worker_num_regs = [_unwrap_if_constexpr(r) for r in worker_num_regs]
314315
return _semantic.warp_specialize(args, default_partition, worker_partitions, worker_num_warps, #
315316
worker_num_regs, _generator)
317+
318+
319+
@builtin
320+
def thread_barrier(_semantic=None):
321+
return _semantic.debug_barrier()

python/triton/experimental/gluon/language/nvidia/blackwell/__init__.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,19 @@
66
from triton.experimental.gluon.language._core import builtin, base_type, base_value, _unwrap_if_constexpr
77

88
from . import tma
9-
from ..hopper import mbarrier
9+
from ..hopper import mbarrier, fence_async_shared
1010

1111
if TYPE_CHECKING:
1212
from triton._C.libtriton.gluon_ir import GluonOpBuilder
1313
from triton._C.libtriton import gluon_ir as ir
1414
from ..._semantic import GluonSemantic
1515

1616
__all__ = [
17-
"TensorMemoryLayout",
18-
"tensor_memory_descriptor",
1917
"allocate_tensor_memory",
18+
"fence_async_shared",
2019
"mbarrier",
20+
"tensor_memory_descriptor",
21+
"TensorMemoryLayout",
2122
"tma",
2223
]
2324

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,11 @@
11
from . import mbarrier
22
from . import tma
3+
from ... import _core
34

4-
__all__ = ["mbarrier", "tma"]
5+
__all__ = ["fence_async_shared", "mbarrier", "tma"]
6+
7+
8+
@_core.builtin
9+
def fence_async_shared(cluster=False, _semantic=None):
10+
cluster = _core._unwrap_if_constexpr(cluster)
11+
_semantic.builder.create_fence_async_shared(cluster)

0 commit comments

Comments
 (0)