Fix JIT specialization data (de)serialization for tuples and constexprs (triton-lang#8639)

Jiangyi · web-flow · commit 1a9734ca032e · 2025-11-12T06:05:35.000Z
As part of using the `triton.knobs.runtime.jit_cache_hook`, the
`JITFunction` class performs JSON serialization on the specialization
data. The serialized specialization data is then expected to be used as
part of the `preload()` function, where it will be deserialized and used
to compile the Triton kernel.

However, this process fails to account for the following cases:
- When part of the Triton python signature is a Python tuple, the
serialization process will transform it into a list (Because JSON
serializes tuples as lists); the deserialization process does not
transform it back into a tuple, leading to a parsing failure when
`ast_to_ttir()` is invoked.
- When the constants contain a `tl.constexpr` value, the serialization
process will raise an error, because `tl.constexpr` is not serializable.

This PR addresses both of these issues by:
- Applying the reverse transformation in the deserialization from lists
to tuples for signatures. We can do this unconditionally because lists
are not accepted as part of the signature of a Triton kernel.
- Adding a special case for `constexpr` for constants in the
specialization data, so that it can be serialized and deserialized
without losing its type.
- Adding a test that is the exact same as
`test_passing_nested_tuple_with_constexpr`, but with the JIT hook setup
so that we can verify that the serialization/deserialization round-trip
works as intended.
diff --git a/python/test/unit/language/test_tuple.py b/python/test/unit/language/test_tuple.py
@@ -256,17 +256,45 @@ def m_to_the_n(X, shape: tl.constexpr, strides, m_n):
     torch.testing.assert_close(x, expected_x, rtol=0, atol=0)
 
 
+@triton.jit
+def _nested_tuple_kernel(x):
+    # This creates a new scope, which will force a copy of liveins. It's
+    # important for this to happen as it forces IR flattening/unflattening,
+    # which relies on the types being correct for the roundtrip to succeed.
+    for _ in range(1):
+        tl.static_assert(x[1][0] == 2)
+
+
 def test_passing_nested_tuple_with_constexpr(device):
+    _nested_tuple_kernel[(1, )](((1, ), (tl.constexpr(2), )))
 
-    @triton.jit
-    def test(x):
-        # This creates a new scope, which will force a copy of liveins. It's
-        # important for this to happen as it forces IR flattening/unflattening,
-        # which relies on the types being correct for the roundtrip to succeed.
-        for _ in range(1):
-            tl.static_assert(x[1][0] == 2)
-
-    test[(1, )](((1, ), (tl.constexpr(2), )))
+
+def test_passing_nested_tuple_with_constexpr_and_jit_hook(device, fresh_knobs):
+    # get the serialized specialization data
+    specialization_data = None
+
+    def cache_hook(*args, **kwargs):
+        nonlocal specialization_data
+        specialization_data = kwargs["compile"]["specialization_data"]
+
+    fresh_knobs.runtime.jit_cache_hook = cache_hook
+
+    device = getattr(torch, device).current_device()
+
+    # Clear the existing cache for this device to ensure that the hook is called;
+    # This is needed because the kernel is shared between multiple tests and may
+    # already have been compiled for this device.
+    _nested_tuple_kernel.device_caches[device][0].clear()
+
+    warmup_run = _nested_tuple_kernel.warmup(((1, ), (tl.constexpr(2), )), grid=(1, ))
+    assert warmup_run is not None
+
+    assert specialization_data is not None
+
+    preload_run = _nested_tuple_kernel.preload(specialization_data)
+    assert preload_run is not None
+
+    assert warmup_run.hash == preload_run.hash
 
 
 def test_passing_tuple_to_make_tensor_descriptor(device, with_allocator):
diff --git a/python/triton/runtime/jit.py b/python/triton/runtime/jit.py
@@ -366,7 +366,12 @@ def __getitem__(self, grid) -> T:
 
 
 def serialize_specialization_data(name, signature, constants, attrs, options, key):
-    constants = {key: str(value) if value.__class__.__name__ == "dtype" else value for key, value in constants.items()}
+    constants = {
+        key: str(value) if value.__class__.__name__ == "dtype" else
+        {"constexpr": value.value} if value.__class__.__name__ == "constexpr" else value
+        for key, value in constants.items()
+    }
+
     import json
     obj = {
         'name': name, 'signature': signature, 'constant_keys': [list(x) for x in constants.keys()], 'constant_vals':
@@ -557,6 +562,18 @@ def compute_cache_key(kernel_key_cache, specialization, options):
     return cache_key
 
 
+def convert_to_tuple_if_list(item):
+    # If the incoming item is a list, recursively iterate through it to convert all lists therein into tuples
+    if not isinstance(item, list):
+        return item
+
+    # The value must be a list at this point
+    for i, nested_value in enumerate(item):
+        item[i] = convert_to_tuple_if_list(nested_value)
+
+    return tuple(item)
+
+
 class JITFunction(JITCallable, KernelInterface[T]):
 
     def is_gluon(self):
@@ -759,13 +776,17 @@ def preload(self, specialization_data):
         constant_keys = map(tuple, deserialized_obj['constant_keys'])
         constant_vals = deserialized_obj['constant_vals']
         constexprs = {
-            key: tl.dtype(value) if tl.dtype.is_dtype(value) else value
+            key:
+            tl.dtype(value) if tl.dtype.is_dtype(value) else
+            tl.constexpr(value['constexpr']) if isinstance(value, dict) and 'constexpr' in value else value
             for key, value in zip(constant_keys, constant_vals)
         }
         attrs_keys = map(tuple, deserialized_obj['attrs_keys'])
         attrs_vals = deserialized_obj['attrs_vals']
         attrs = dict(zip(attrs_keys, attrs_vals))
-        signature = dict(deserialized_obj['signature'].items())
+        # JSON serializes tuples as lists, so they need to be converted back;
+        # This can be done unconditionally, since lists are not accepted in Triton kernel signatures.
+        signature = {key: convert_to_tuple_if_list(value) for key, value in deserialized_obj['signature'].items()}
         options = {
             key: tuple(value) if isinstance(value, list) else value
             for key, value in deserialized_obj['options'].items()