[FRONTEND] Allow JITFunctions as arguments to other JITFunctions (#5723)

davidberard98 · whitneywhtsang · commit 9256bb378853 · 2025-01-30T17:41:16.000-05:00
This PR allows a call to a JITFunction to pass another JITFunction as an argument. For example: ```python @triton.jit def fn_a(x): ... @triton.jit def fn_b(x, fn): ... @triton.jit def fn_c(x): return fn_b(x, fn_a) # fn_a (a JITFunction) is passed as an argument to fn_b (another JITFunction) ``` Prior to #5220, this worked. After #5220, the user needs to annotate the JITFunctions with @triton.constexpr manually (until this PR). Use case: Inductor has some generic helper functions for implementing scans (e.g. exclusive_scan_decoupled_lookback) which take a `combine_fn` to implement the combination function (similar to tl.reduce). These helper functions have stopped working after #5220. https://github.com/pytorch/pytorch/blob/01a4d86b31365cfb484dc17885c9a7ee09c235ab/torch/_inductor/runtime/triton_helpers.py#L321
diff --git a/python/test/unit/language/test_core.py b/python/test/unit/language/test_core.py
@@ -6940,6 +6940,35 @@ def inject_layout(ir, src: torch.Tensor, axis, indices: torch.Tensor, src_layout
     torch.testing.assert_close(output, ref, rtol=0, atol=0)
 
 
+@triton.jit
+def mul_jit_function(x, y):
+    return x * y
+
+
+@triton.jit
+def apply_binary_op(x, combine_op):
+    return combine_op(x, x)
+
+
+def test_jit_function_arg(device):
+
+    @triton.jit
+    def square_kernel_jit_function(in_ptr, out_ptr, BLOCK_SIZE: tl.constexpr):
+        offsets = tl.arange(0, BLOCK_SIZE)
+        in_data = tl.load(in_ptr + offsets)
+        out_data = apply_binary_op(in_data, mul_jit_function)  # pass a JITFunction into another JITFunction
+        tl.store(out_ptr + offsets, out_data)
+
+    BLOCK_SIZE = 16
+    x = torch.full((BLOCK_SIZE, ), 3.0, device=device)
+    out = torch.empty((BLOCK_SIZE, ), device=device)
+    expect = torch.full((BLOCK_SIZE, ), 9.0, dtype=x.dtype, device=device)
+
+    square_kernel_jit_function[(1, )](x, out, BLOCK_SIZE)
+
+    torch.testing.assert_close(out, expect)
+
+
 @pytest.mark.interpreter
 def test_zero_strided_tensors(device):
 
diff --git a/python/triton/compiler/code_generator.py b/python/triton/compiler/code_generator.py
@@ -1153,7 +1153,7 @@ def call_JitFunction(self, fn: JITFunction, args, kwargs):
         args = inspect.getcallargs(fn.fn, *args, **kwargs)
         args = [args[name] for name in fn.arg_names]
         for i, arg in enumerate(args):
-            if isinstance(arg, (language.dtype, float, int, bool)):
+            if isinstance(arg, (language.dtype, float, int, bool, JITFunction)):
                 args[i] = language.core.constexpr(arg)
         args_cst = find_paths_if(args, lambda _, x: _is_constexpr(x))
         args_cst = {path: get_iterable_path(args, path) for path in args_cst}