add jax interface

refraction-ray · refraction-ray · commit 7c4bc5b7f46f · 2025-01-21T21:02:08.000+08:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,10 @@
 
 ## Unreleased
 
+### Added
+
+- Add `jax_interface`
+
 ## 1.1.0
 
 ### Added
diff --git a/tensorcircuit/interfaces/__init__.py b/tensorcircuit/interfaces/__init__.py
@@ -14,6 +14,4 @@
 from .scipy import scipy_interface, scipy_optimize_interface
 from .torch import torch_interface, pytorch_interface, torch_interface_kws
 from .tensorflow import tensorflow_interface, tf_interface
-
-
-# TODO(@refraction-ray): jax interface using puer_callback and custom_vjp
+from .jax import jax_interface
diff --git a/tensorcircuit/interfaces/jax.py b/tensorcircuit/interfaces/jax.py
@@ -0,0 +1,187 @@
+"""
+Interface wraps quantum function as a jax function
+"""
+
+from typing import Any, Callable, Tuple, Optional, Union, Sequence
+from functools import wraps, partial
+
+import jax
+from jax import custom_vjp
+
+from ..cons import backend
+from .tensortrans import general_args_to_backend
+
+Tensor = Any
+
+
+def jax_wrapper(
+    fun: Callable[..., Any],
+    enable_dlpack: bool = False,
+    output_shape: Optional[
+        Union[Tuple[int, ...], Tuple[int, ...], Sequence[Tuple[int, ...]]]
+    ] = None,
+    output_dtype: Optional[Union[Any, Sequence[Any]]] = None,
+) -> Callable[..., Any]:
+    @wraps(fun)
+    def fun_jax(*x: Any) -> Any:
+        def wrapped_fun(*args: Any) -> Any:
+            args = general_args_to_backend(args, enable_dlpack=enable_dlpack)
+            y = fun(*args)
+            y = general_args_to_backend(
+                y, target_backend="jax", enable_dlpack=enable_dlpack
+            )
+            return y
+
+        # Use provided shape and dtype if available, otherwise run test
+        if output_shape is not None and output_dtype is not None:
+            if isinstance(output_shape, Sequence) and not isinstance(
+                output_shape[0], int
+            ):
+                # Multiple outputs case
+                out_shape = tuple(
+                    jax.ShapeDtypeStruct(s, d)
+                    for s, d in zip(output_shape, output_dtype)
+                )
+            else:
+                # Single output case
+                out_shape = jax.ShapeDtypeStruct(output_shape, output_dtype)  # type: ignore
+        else:
+            # Get expected output shape by running function once
+            test_out = wrapped_fun(*x)
+            if isinstance(test_out, tuple):
+                # Multiple outputs case
+                out_shape = tuple(
+                    jax.ShapeDtypeStruct(
+                        t.shape if hasattr(t, "shape") else (),
+                        t.dtype if hasattr(t, "dtype") else x[0].dtype,
+                    )
+                    for t in test_out
+                )
+            else:
+                # Single output case
+                out_shape = jax.ShapeDtypeStruct(  # type: ignore
+                    test_out.shape if hasattr(test_out, "shape") else (),
+                    test_out.dtype if hasattr(test_out, "dtype") else x[0].dtype,
+                )
+
+        # Use pure_callback with correct output shape
+        result = jax.pure_callback(wrapped_fun, out_shape, *x)
+        return result
+
+    return fun_jax
+
+
+def jax_interface(
+    fun: Callable[..., Any],
+    jit: bool = False,
+    enable_dlpack: bool = False,
+    output_shape: Optional[Union[Tuple[int, ...], Tuple[()]]] = None,
+    output_dtype: Optional[Any] = None,
+) -> Callable[..., Any]:
+    """
+    Wrap a function on different ML backend with a jax interface.
+
+    :Example:
+
+    .. code-block:: python
+
+        tc.set_backend("tensorflow")
+
+        def f(params):
+            c = tc.Circuit(1)
+            c.rx(0, theta=params[0])
+            c.ry(0, theta=params[1])
+            return tc.backend.real(c.expectation([tc.gates.z(), [0]]))
+
+        f = tc.interfaces.jax_interface(f, jit=True)
+
+        params = jnp.ones(2)
+        value, grad = jax.value_and_grad(f)(params)
+
+    :param fun: The quantum function with tensor in and tensor out
+    :type fun: Callable[..., Any]
+    :param jit: whether to jit ``fun``, defaults to False
+    :type jit: bool, optional
+    :param enable_dlpack: whether transform tensor backend via dlpack, defaults to False
+    :type enable_dlpack: bool, optional
+    :param output_shape: Optional shape of the function output, defaults to None
+    :type output_shape: Optional[Union[Tuple[int, ...], Tuple[()]]], optional
+    :param output_dtype: Optional dtype of the function output, defaults to None
+    :type output_dtype: Optional[Any], optional
+    :return: The same quantum function but now with jax array in and jax array out
+        while AD is also supported
+    :rtype: Callable[..., Any]
+    """
+    jax_fun = create_jax_function(
+        fun,
+        enable_dlpack=enable_dlpack,
+        jit=jit,
+        output_shape=output_shape,
+        output_dtype=output_dtype,
+    )
+    return jax_fun
+
+
+def create_jax_function(
+    fun: Callable[..., Any],
+    enable_dlpack: bool = False,
+    jit: bool = False,
+    output_shape: Optional[Union[Tuple[int, ...], Tuple[()]]] = None,
+    output_dtype: Optional[Any] = None,
+) -> Callable[..., Any]:
+    if jit:
+        fun = backend.jit(fun)
+
+    wrapped = jax_wrapper(
+        fun,
+        enable_dlpack=enable_dlpack,
+        output_shape=output_shape,
+        output_dtype=output_dtype,
+    )
+
+    @custom_vjp
+    def f(*x: Any) -> Any:
+        return wrapped(*x)
+
+    def f_fwd(*x: Any) -> Tuple[Any, Tuple[Any, ...]]:
+        y = wrapped(*x)
+        return y, x
+
+    def f_bwd(res: Tuple[Any, ...], g: Any) -> Tuple[Any, ...]:
+        x = res
+
+        if len(x) == 1:
+            x = x[0]
+
+        vjp_fun = partial(backend.vjp, fun)
+        if jit:
+            vjp_fun = backend.jit(vjp_fun)  # type: ignore
+
+        def vjp_wrapped(args: Any) -> Any:
+            args = general_args_to_backend(args, enable_dlpack=enable_dlpack)
+            gb = general_args_to_backend(g, enable_dlpack=enable_dlpack)
+            r = vjp_fun(args, gb)[1]
+            r = general_args_to_backend(
+                r, target_backend="jax", enable_dlpack=enable_dlpack
+            )
+            return r
+
+        # Handle gradient shape for both single input and tuple inputs
+        if isinstance(x, tuple):
+            # Create a tuple of ShapeDtypeStruct for each input
+            grad_shape = tuple(jax.ShapeDtypeStruct(xi.shape, xi.dtype) for xi in x)
+        else:
+            grad_shape = jax.ShapeDtypeStruct(x.shape, x.dtype)
+
+        dx = jax.pure_callback(
+            vjp_wrapped,
+            grad_shape,
+            x,
+        )
+
+        if not isinstance(dx, tuple):
+            dx = (dx,)
+        return dx  # type: ignore
+
+    f.defvjp(f_fwd, f_bwd)
+    return f
diff --git a/tests/test_interfaces.py b/tests/test_interfaces.py
@@ -8,6 +8,7 @@
 from scipy import optimize
 import tensorflow as tf
 import jax
+from jax import numpy as jnp
 
 thisfile = os.path.abspath(__file__)
 modulepath = os.path.dirname(os.path.dirname(thisfile))
@@ -427,3 +428,135 @@ def g(a, b, c):
     assert tc.backend.shape_tuple(a[1]) == (2, 2, 2, 2)
     assert tc.backend.shape_tuple(b.eval()) == (2, 2, 2, 2, 2, 2)
     assert tc.backend.shape_tuple(c) == (2, 2, 2, 2)
+
+
+def test_jax_interface_basic(tfb):
+
+    def f(params):
+        c = tc.Circuit(1)
+        c.rx(0, theta=params[0])
+        c.ry(0, theta=params[1])
+        return tc.backend.real(c.expectation_ps(z=[0]))
+
+    f_jax = tc.interfaces.jax_interface(f, jit=True)
+    params = jnp.ones(2)
+
+    # Test forward pass
+    val = f_jax(params)
+    assert isinstance(val, jnp.ndarray)
+    np.testing.assert_allclose(val, 0.291927, atol=1e-5)
+
+    # Test gradient computation
+    val, grad = jax.value_and_grad(f_jax)(params)
+    assert isinstance(grad, jnp.ndarray)
+    assert grad.shape == params.shape
+
+
+def test_jax_interface_multiple_inputs(tfb):
+
+    def f(params1, params2):
+        c = tc.Circuit(2)
+        c.rx(0, theta=params1[0])
+        c.ry(1, theta=params2[0])
+        return tc.backend.real(c.expectation([tc.gates.z(), [0]]))
+
+    f_jax = tc.interfaces.jax_interface(f, jit=False)
+    p1 = jnp.array([1.0])
+    p2 = jnp.array([2.0])
+
+    # Test forward pass
+    val = f_jax(p1, p2)
+    assert isinstance(val, jnp.ndarray)
+
+    # Test gradient computation
+
+    val, (grad1, grad2) = jax.value_and_grad(f_jax, argnums=(0, 1))(p1, p2)
+    assert isinstance(grad1, jnp.ndarray)
+    assert isinstance(grad2, jnp.ndarray)
+    assert grad1.shape == p1.shape
+    assert grad2.shape == p2.shape
+
+
+@pytest.mark.skip(
+    reason="might fail when testing with other function",
+)
+def test_jax_interface_jit_dlpack(tfb):
+
+    def f(params):
+        c = tc.Circuit(2)
+        c.rx(range(2), theta=params)
+        return tc.backend.real(c.expectation([tc.gates.z(), [0]]))
+
+    # Test with JIT
+    f_jax = tc.interfaces.jax_interface(f, jit=True, enable_dlpack=True)
+    params = jnp.array([np.pi, np.pi], dtype=jnp.float32)
+
+    # First call compiles
+    val1 = f_jax(params)
+    # Second call should be faster
+    val2, gs = jax.value_and_grad(f_jax)(params)
+
+    assert isinstance(val1, jnp.ndarray)
+    assert isinstance(gs, jnp.ndarray)
+    np.testing.assert_allclose(val1, val2, atol=1e-5)
+
+
+def test_jax_interface_pure_callback(tfb):
+
+    def f(params):
+        # Use TF operation to test pure_callback
+        return tf.square(params)
+
+    def f_jax1(params):
+        return jnp.sum(tc.interfaces.jax_interface(f)(params))
+
+    def f_jax2(params):
+        return jnp.sum(
+            tc.interfaces.jax_interface(
+                f, jit=True, output_shape=[2], output_dtype=jnp.float32
+            )(params)
+        )
+
+    params = jnp.array([1.0, 2.0])
+
+    for f_jax in [f_jax1, f_jax2]:
+        val = f_jax(params)
+        assert isinstance(val, jnp.ndarray)
+        np.testing.assert_allclose(val, 5.0, atol=1e-5)
+
+        # Test gradient
+        grad = jax.grad(f_jax)(params)
+        assert isinstance(grad, jnp.ndarray)
+        np.testing.assert_allclose(grad, [2.0, 4.0], atol=1e-5)
+
+
+def test_jax_interface_multiple_outputs(tfb):
+
+    def f(params):
+        # Use TF operation to test pure_callback
+        return tf.square(params), params
+
+    def f_jax1(params):
+        r = tc.interfaces.jax_interface(f)(params)
+        return jnp.sum(r[0] + r[1] ** 2) / 2
+
+    def f_jax2(params):
+        r = tc.interfaces.jax_interface(
+            f,
+            jit=True,
+            output_shape=([2], [2]),
+            output_dtype=(jnp.float32, jnp.float32),
+        )(params)
+        return jnp.sum(r[0] + r[1] ** 2) / 2
+
+    params = jnp.array([1.0, 2.0])
+
+    for f_jax in [f_jax1, f_jax2]:
+        val = f_jax(params)
+        assert isinstance(val, jnp.ndarray)
+        np.testing.assert_allclose(val, 5.0, atol=1e-5)
+
+        # Test gradient
+        grad = jax.grad(f_jax)(params)
+        assert isinstance(grad, jnp.ndarray)
+        np.testing.assert_allclose(grad, [2.0, 4.0], atol=1e-5)