|
8 | 8 | from triton.experimental import gluon
|
9 | 9 | from triton.experimental.gluon import language as ttgl
|
10 | 10 | from triton.experimental.gluon.language.nvidia import blackwell
|
11 |
| -from triton.experimental.gluon.language.nvidia.blackwell import mbarrier, tma |
| 11 | +from triton.experimental.gluon.language.nvidia.blackwell import mbarrier, tma, TensorMemoryLayout |
12 | 12 | from triton._filecheck import filecheck_test, run_parser
|
13 | 13 | import triton.language as tl
|
14 | 14 | from triton._internal_testing import is_cuda
|
@@ -123,7 +123,7 @@ def test_tensor_memory(fresh_knobs):
|
123 | 123 | knobs.compilation.disable_line_info = True
|
124 | 124 |
|
125 | 125 | layout = ttgl.BlockedLayout(size_per_thread=[1, 64], threads_per_warp=[32, 1], warps_per_cta=[4, 1], order=[0, 1])
|
126 |
| - tmem_layout = ttgl.nvidia.blackwell.TensorMemoryLayout(block=[128, 128], unpacked=True) |
| 126 | + tmem_layout = TensorMemoryLayout(block=[128, 128], unpacked=True) |
127 | 127 | h = tensor_memory_kernel.warmup(layout, tmem_layout, num_warps=4, grid=(1, ))
|
128 | 128 | expecttest.assert_expected_inline(
|
129 | 129 | anonymize_ir(h.asm["source"]), """\
|
@@ -400,7 +400,7 @@ def test_tcgen05_mma(fresh_knobs):
|
400 | 400 | knobs.compilation.disable_line_info = True
|
401 | 401 |
|
402 | 402 | nvmma_layout = ttgl.NVMMASharedLayout(swizzle_byte_width=128, element_bitwidth=16, rank=2)
|
403 |
| - acc_layout = blackwell.TensorMemoryLayout([128, 128], unpacked=True) |
| 403 | + acc_layout = TensorMemoryLayout([128, 128], unpacked=True) |
404 | 404 |
|
405 | 405 | h = tcgen05_mma_kernel.warmup(nvmma_layout, acc_layout, grid=(1, ))
|
406 | 406 | expecttest.assert_expected_inline(
|
@@ -553,7 +553,7 @@ def kernel():
|
553 | 553 |
|
554 | 554 | @gluon.jit
|
555 | 555 | def tmem_subslice_kernel():
|
556 |
| - layout: ttgl.constexpr = ttgl.nvidia.blackwell.TensorMemoryLayout(block=[128, 128], unpacked=True) |
| 556 | + layout: ttgl.constexpr = TensorMemoryLayout(block=[128, 128], unpacked=True) |
557 | 557 | tmem = ttgl.nvidia.blackwell.allocate_tensor_memory(ttgl.int32, [2, 256, 256], layout)
|
558 | 558 | tmem.subslice(0)
|
559 | 559 |
|
|
0 commit comments