[Cherry-pick] [Frontend][Test] Make gluon/test_frontend.py runnable locally (#8006) (#624)

agron911 · meta-codesync[bot] · commit 425f6ee1ab8e · 2025-11-03T15:08:04.000-08:00
Summary: Cherry-picked from upstream OAI repository. Original Commit: d441626 Original Author: Jeff Niu Original Date: 2025-08-29 05:59:56 -0700 Original commit message: ``` [Frontend][Test] Make `gluon/test_frontend.py` runnable locally (#8006) ``` This PR was automatically cherry-picked from the upstream triton-lang/triton repository. Pull Request resolved: #624 Reviewed By: dshi7 Differential Revision: D86112807 Pulled By: agron911 fbshipit-source-id: 1ce60cdc592ec7d57bd660ef36ed50ce15fa8635
diff --git a/python/test/gluon/test_frontend.py b/python/test/gluon/test_frontend.py
@@ -1,5 +1,4 @@
 import expecttest
-import torch
 import pytest
 import re
 
@@ -128,7 +127,7 @@ def kernel(src_layout: ttgl.constexpr, dst_layout: ttgl.constexpr):
     with pytest.raises(CompilationError) as e:
         src_layout: ttgl.constexpr = ttgl.AutoLayout()
         dst_layout: ttgl.constexpr = ttgl.BlockedLayout([2], [32], [4], [0])
-        kernel.warmup(src_layout, dst_layout, grid=(1, ))
+        run_parser(kernel, *make_args(src_layout, dst_layout), target=target)
 
     assert "layout conversion from AutoLayout()" in str(e.value.__cause__)
     assert "to BlockedLayout(size_per_thread=[2]" in str(e.value.__cause__)
@@ -699,7 +698,7 @@ def async_tma_kernel(input_desc, XBLOCK: ttgl.constexpr):
 
 @pytest.mark.parametrize("target", [HOPPER_TARGET, BLACKWELL_TARGET])
 def test_async_tma(target):
-    input = torch.randn((1024, 1024), device="cuda", dtype=torch.float16)
+    input = MockTensor(ttgl.float16, (1024, 1024))
     XBLOCK = 128
     shared_layout = ttgl.NVMMASharedLayout(swizzle_byte_width=128, element_bitwidth=16, rank=2)
     input_desc = TensorDescriptor.from_tensor(input, [XBLOCK, XBLOCK], shared_layout)
@@ -758,7 +757,7 @@ def async_tma_blackwell_kernel(input_desc, XBLOCK: ttgl.constexpr):
 
 
 def test_async_tma_blackwell():
-    input = torch.randn((1024, 1024), device="cuda", dtype=torch.float16)
+    input = MockTensor(ttgl.float16, (1024, 1024))
     XBLOCK = 128
     shared_layout = ttgl.NVMMASharedLayout(swizzle_byte_width=128, element_bitwidth=16, rank=2)
     input_desc = TensorDescriptor.from_tensor(input, [1, XBLOCK], shared_layout)
diff --git a/python/triton/runtime/jit.py b/python/triton/runtime/jit.py
@@ -968,8 +968,17 @@ def wrap_dtype(arg):
             return MockTensor(arg)
         return arg
 
-    def __init__(self, dtype):
+    def __init__(self, dtype, shape=None):
+        if shape is None:
+            shape = [1]
         self.dtype = dtype
+        self.shape = shape
+
+    def stride(self):
+        strides = [1]
+        for size in self.shape[1:]:
+            strides.append(strides[-1] * size)
+        return tuple(reversed(strides))
 
     @staticmethod
     def data_ptr():