intel
diff --git a/‎benchmarks/triton_kernels_benchmark/benchmark_testing.py‎
Lines changed: 1 addition & 0 deletions b/‎benchmarks/triton_kernels_benchmark/benchmark_testing.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎python/test/unit/language/test_core.py‎
Lines changed: 1 addition & 1 deletion b/‎python/test/unit/language/test_core.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎python/test/unit/language/test_tuple.py‎
Lines changed: 2 additions & 1 deletion b/‎python/test/unit/language/test_tuple.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎python/test/unit/runtime/test_bindings.py‎
Lines changed: 0 additions & 1 deletion b/‎python/test/unit/runtime/test_bindings.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎python/test/unit/runtime/test_cache.py‎
Lines changed: 1 addition & 1 deletion b/‎python/test/unit/runtime/test_cache.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎python/test/unit/runtime/test_subproc.py‎
Lines changed: 9 additions & 14 deletions b/‎python/test/unit/runtime/test_subproc.py‎
Lines changed: 9 additions & 14 deletions
@@ -68,6 +68,7 @@ def do_bench_elapsed_time(fn, n_warmup=25, n_repeat=100, grad_to_none=None, quan
         fn()
     end_event.record()
     synchronize()
+    # wait_on_sycl_queue?
     estimate_ms = start_event.elapsed_time(end_event) / 5
 
     # The cache is also maintained in `triton_do_bench` function,
 
@@ -4519,7 +4519,7 @@ def test_value_specialization(value: int, value_type: str, device) -> None:
 
     def repr(specialization):
         ty = specialization.signature["value1"]
-        cst = '_'.join([k for k, v in specialization.constants.items() if v == 1])
+        cst = '_'.join([k for k, v in specialization.constants.items() if isinstance(k, str) and v == 1])
         return f"kernel_{ty}_{cst}"
 
     @triton.jit(repr=repr)
 
@@ -81,6 +81,7 @@ def _tuple_fn0(Ptr, cst2: tl.constexpr, tuple1):
 def _tuple_serialize(Ptr, N1, tuple1, cst1: tl.constexpr, val1, tuple2):
     tl.static_assert(N1 is None)
     tl.static_assert(tuple1[1][1] is None)
+    tl.static_assert(tuple1[1][3] == 4)
     tl.store(Ptr + 0, tl.load(tuple1[0]))
     tl.store(Ptr + 1, tuple1[1][0])
     tl.store(Ptr + 2, tl.load(tuple1[1][2]))
@@ -95,6 +96,6 @@ def test_serialize(device="xpu"):
     y0 = torch.tensor([10], dtype=torch.int32, device=device)
     z = torch.empty((10, ), dtype=torch.int32, device=device)
     # we want to check that JIT specialization propagates to tuples:
-    _tuple_serialize[(1, )](z, None, (x0, (1, None, x1)), 20, 1, (y0, ))
+    _tuple_serialize[(1, )](z, None, (x0, (1, None, x1, tl.constexpr(4))), 20, 1, (y0, ))
     ref = torch.tensor([8, 1, 12, 21, 10, 15, -1, 8, 1, 12], device=device)
     assert torch.equal(z, ref)
@@ -68,7 +68,6 @@ def walk_fn(op):
         constexprs={kernel.arg_names[i]: arg
                     for i, arg in enumerate(args)
                     if not isinstance(arg, torch.Tensor)},
-        attrs=backend.get_attrs_descriptor(kernel.params, args),
     )
 
     context = triton._C.libtriton.ir.context()
 
@@ -563,7 +563,7 @@ def kernel_add(a):
 
     def cache_hook(*args, **kwargs):
         nonlocal pointer_range_32
-        pointer_range_32 = kwargs["compile"]["configs"][0].pointer_range_32
+        pointer_range_32 = [k for k, v in kwargs["compile"]["configs"][0].items() if ['tt.pointer_range', 32] in v]
 
     JITFunction.cache_hook = cache_hook
     # In warmup we assume that the pointer range is 32 bits
 
@@ -3,14 +3,13 @@
 
 import triton
 import triton.language as tl
-from triton.backends.compiler import AttrsDescriptor
 from triton.compiler import ASTSource
 
 target = triton.runtime.driver.active.get_current_target()
 start_method = 'fork' if 'fork' in multiprocessing.get_all_start_methods() else 'spawn'
 
 
-def compile_fn(attrs):
+def compile_fn():
 
     @triton.jit
     def kernel_sub(a, b, o, N: tl.constexpr):
@@ -21,21 +20,19 @@ def kernel_sub(a, b, o, N: tl.constexpr):
         fn=kernel_sub,
         constexprs={'N': 32},
         signature={'a': "*fp32", 'b': "*fp32", 'o': "*fp32", 'N': 'constexpr'},
-        attrs=attrs,
     )
     triton.compile(src=src, target=target)
 
 
 def test_compile_in_subproc() -> None:
-    config = AttrsDescriptor.from_hints({i: 16 for i in range(4)})
     mp_ctx = multiprocessing.get_context(start_method)
-    proc = mp_ctx.Process(target=compile_fn, args=(config, ))
+    proc = mp_ctx.Process(target=compile_fn)
     proc.start()
     proc.join()
     assert proc.exitcode == 0
 
 
-def compile_fn_dot(attrs):
+def compile_fn_dot():
 
     @triton.jit
     def kernel_dot(Z):
@@ -44,28 +41,27 @@ def kernel_dot(Z):
         z = tl.dot(z, z)
         tl.store(Z + offs, z)
 
-    src = ASTSource(fn=kernel_dot, signature={'Z': "*fp32"}, attrs=attrs, constexprs={})
+    src = ASTSource(fn=kernel_dot, signature={'Z': "*fp32"})
     triton.compile(src=src, target=target)
 
 
 def test_compile_in_forked_subproc(fresh_triton_cache) -> None:
-    config = AttrsDescriptor.from_hints({0: 16})
     mp_ctx = multiprocessing.get_context(start_method)
-    proc = mp_ctx.Process(target=compile_fn_dot, args=(config, ))
+    proc = mp_ctx.Process(target=compile_fn_dot)
     proc.start()
     proc.join()
     assert proc.exitcode == 0
 
 
-def compile_empty_kernel_with_gc(attrs):
+def compile_empty_kernel_with_gc():
 
     @triton.jit
     def empty_kernel():
         pass
 
     import gc
     gc.collect()
-    src = ASTSource(fn=empty_kernel, signature={}, attrs=attrs, constexprs={})
+    src = ASTSource(fn=empty_kernel, signature={})
     triton.compile(src=src, target=target)
 
 
@@ -88,13 +84,12 @@ def test_compile_in_forked_subproc_with_forced_gc(fresh_triton_cache) -> None:
     gc.disable()
 
     # stage 1.p
-    config = AttrsDescriptor.from_hints({0: 16})
-    compile_empty_kernel_with_gc(config)
+    compile_empty_kernel_with_gc()
 
     # stage 2.p
     shutil.rmtree(fresh_triton_cache, ignore_errors=True)
     mp_ctx = multiprocessing.get_context(start_method)
-    proc = mp_ctx.Process(target=compile_empty_kernel_with_gc, args=(config, ))
+    proc = mp_ctx.Process(target=compile_empty_kernel_with_gc)
 
     # stage 3.c
     proc.start()
Original file line number	Diff line number	Diff line change
`@@ -68,7 +68,6 @@ def walk_fn(op):`
`68`	`68`	`constexprs={kernel.arg_names[i]: arg`
`69`	`69`	`for i, arg in enumerate(args)`
`70`	`70`	`if not isinstance(arg, torch.Tensor)},`
`71`		`- attrs=backend.get_attrs_descriptor(kernel.params, args),`
`72`	`71`	`)`
`73`	`72`
`74`	`73`	`context = triton._C.libtriton.ir.context()`