memory efficient chebyshev evol

refraction-ray · refraction-ray · commit 695d1d84842c · 2025-07-31T00:12:06.000+08:00
diff --git a/tensorcircuit/backends/tensorflow_backend.py b/tensorcircuit/backends/tensorflow_backend.py
@@ -360,6 +360,38 @@ def _svd_tf(
 tensornetwork.backends.tensorflow.tensorflow_backend.TensorFlowBackend.svd = _svd_tf
 
 
+def sparse_tensor_matmul(self: Tensor, other: Tensor) -> Tensor:
+    """
+    An implementation of matrix multiplication (@) for tf.SparseTensor.
+
+    This function is designed to be monkey-patched onto the tf.SparseTensor class.
+    It handles multiplication with a dense vector (rank-1 Tensor) by temporarily
+    promoting it to a matrix (rank-2 Tensor) for the underlying TensorFlow call.
+    """
+    # Ensure the 'other' tensor is of a compatible dtype
+    if not other.dtype.is_compatible_with(self.dtype):
+        other = tf.cast(other, self.dtype)
+
+    # tf.sparse.sparse_dense_matmul requires the dense tensor to be a 2D matrix.
+    # If we get a 1D vector, we need to reshape it.
+    is_vector = len(other.shape) == 1
+
+    if is_vector:
+        # Promote the vector to a column matrix [N] -> [N, 1]
+        other_matrix = tf.expand_dims(other, axis=1)
+    else:
+        other_matrix = other
+
+    # Perform the actual multiplication
+    result_matrix = tf.sparse.sparse_dense_matmul(self, other_matrix)
+
+    if is_vector:
+        # Demote the result matrix back to a vector [M, 1] -> [M]
+        return tf.squeeze(result_matrix, axis=1)
+    else:
+        return result_matrix
+
+
 class TensorFlowBackend(tensorflow_backend.TensorFlowBackend, ExtendedBackend):  # type: ignore
     """
     See the original backend API at `tensorflow backend
@@ -378,6 +410,8 @@ def __init__(self) -> None:
             )
         tf = tensorflow
         tf.sparse.SparseTensor.__add__ = tf.sparse.add
+        tf.SparseTensor.__matmul__ = sparse_tensor_matmul
+
         self.minor = int(tf.__version__.split(".")[1])
         self.name = "tensorflow"
         logger = tf.get_logger()  # .setLevel('ERROR')
diff --git a/tensorcircuit/fgs.py b/tensorcircuit/fgs.py
@@ -227,7 +227,7 @@ def get_alpha(self) -> Tensor:
         return self.alpha
 
     def get_cmatrix(self, now_i: bool = True, now_j: bool = True) -> Tensor:
-        """
+        r"""
         Calculates the correlation matrix.
 
         The correlation matrix is defined as :math:`C_{ij} = \langle c_i^\dagger c_j \rangle`.
@@ -509,7 +509,7 @@ def orthogonal(self) -> None:
 
     @staticmethod
     def hopping(chi: Tensor, i: int, j: int, L: int) -> Tensor:
-        """
+        r"""
         Constructs the hopping Hamiltonian between two sites.
 
         The hopping Hamiltonian is given by :math:`\chi c_i^\dagger c_j + h.c.`.
@@ -550,7 +550,7 @@ def evol_hp(self, i: int, j: int, chi: Tensor = 0) -> None:
 
     @staticmethod
     def chemical_potential(chi: Tensor, i: int, L: int) -> Tensor:
-        """
+        r"""
         Constructs the chemical potential Hamiltonian for a single site.
 
         The chemical potential Hamiltonian is given by :math:`\chi c_i^\dagger c_i`.
@@ -572,7 +572,7 @@ def chemical_potential(chi: Tensor, i: int, L: int) -> Tensor:
 
     @staticmethod
     def sc_pairing(chi: Tensor, i: int, j: int, L: int) -> Tensor:
-        """
+        r"""
         Constructs the superconducting pairing Hamiltonian between two sites.
 
         The superconducting pairing Hamiltonian is given by :math:`\chi c_i^\dagger c_j^\dagger + h.c.`.
@@ -637,7 +637,7 @@ def evol_icp(self, i: int, chi: Tensor = 0) -> None:
         self.evol_ihamiltonian(self.chemical_potential(chi, i, self.L))
 
     def get_bogoliubov_uv(self) -> Tuple[Tensor, Tensor]:
-        """
+        r"""
         Returns the u and v matrices of the Bogoliubov transformation.
 
         The Bogoliubov transformation is defined as:
diff --git a/tensorcircuit/timeevol.py b/tensorcircuit/timeevol.py
@@ -624,52 +624,76 @@ def chebyshev_evol(
     :return: Evolved state
     :rtype: Tensor
     """
-
+    # TODO(@refraction-ray): no support for tf backend as bessel function has no implementation
     E_max, E_min = spectral_bounds
     if E_max <= E_min:
         raise ValueError("E_max must be > E_min.")
 
     a = (E_max - E_min) / 2.0
     b = (E_max + E_min) / 2.0
-    tau = a * t  # tau is now a scalar
+    tau = a * t  # Rescaled time parameter
 
     def apply_h_norm(psi: Any) -> Any:
+        """Applies the normalized Hamiltonian to a state."""
         return ((hamiltonian @ psi) - b * psi) / a
 
-    T0 = initial_state
-    if k == 1:
-        T_k_vectors = T0[None, :]
-    else:
-        T1 = apply_h_norm(T0)
+    # Handle edge case where no evolution is needed.
+    if k == 0:
+        # The phase factor still applies even for zero evolution of the series part.
+        phase = backend.exp(-1j * b * t)
+        return phase * backend.zeros_like(initial_state)
 
-        def scan_body(carry, _):  # type: ignore
-            Tk, Tkm1 = carry
-            Tkp1 = 2 * apply_h_norm(Tk) - Tkm1
-            return (Tkp1, Tk), Tk
+    # --- 2. Calculate Chebyshev Expansion Coefficients ---
+    k_indices = backend.arange(k)
+    bessel_vals = backend.special_jv(k, tau, M)
 
-        # 假设 backend.jaxy_scan 已正确实现
-        _, T_k_stack_1_onwards = backend.jaxy_scan(
-            scan_body, (T1, T0), xs=backend.arange(k - 1)
+    # Prefactor is 1 for k=0 and 2 for k>0.
+    prefactor = backend.ones([k])
+    if k > 1:
+        # Using concat for backend compatibility (vs. jax's .at[1:].set(2.0))
+        prefactor = backend.concat(
+            [backend.ones([1]), backend.ones([k - 1]) * 2.0], axis=0
         )
-        T_k_vectors = backend.concat([T0[None, :], T_k_stack_1_onwards], axis=0)
 
-    bessel_vals = backend.special_jv(k, tau, M)
+    ik_powers = backend.power(0 - 1j, k_indices)
+    coeffs = prefactor * ik_powers * bessel_vals
 
-    k_indices = backend.arange(k)
-    first_element = backend.ones([1])
+    # --- 3. Iteratively build the result using a scan ---
 
-    remaining_elements = backend.ones([k - 1]) * 2.0
+    # Handle the simple case of k=1 separately.
+    if k == 1:
+        psi_unphased = coeffs[0] * initial_state
+    else:  # k >= 2, use the scan operation.
+        # Initialize the first two Chebyshev vectors and the initial sum.
+        T0 = initial_state
+        T1 = apply_h_norm(T0)
+        initial_sum = coeffs[0] * T0 + coeffs[1] * T1
 
-    prefactor = backend.concat([first_element, remaining_elements], axis=0)
-    ik_powers = backend.power(0 - 1j, k_indices)
+        # The carry for the scan holds the state needed for the next iteration:
+        # (current vector T_k, previous vector T_{k-1}, and the running sum).
+        initial_carry = (T1, T0, initial_sum)
 
-    # coeffs 现在是一个清晰的 1D 向量，形状为 (n_terms,)
-    coeffs = prefactor * ik_powers * bessel_vals
+        def scan_body(carry, i):  # type: ignore
+            """The body of the scan operation."""
+            Tk, Tkm1, current_sum = carry
 
-    # 求和也变得更简单
-    psi_unphased = backend.einsum("k,kD->D", coeffs, T_k_vectors)
+            # Calculate the next Chebyshev vector using the recurrence relation.
+            Tkp1 = 2 * apply_h_norm(Tk) - Tkm1
 
-    # 加上全局相位
+            # Add its contribution to the running sum.
+            new_sum = current_sum + coeffs[i] * Tkp1
+
+            # Return the updated carry for the next step. No intermediate output is needed.
+            return (Tkp1, Tk, new_sum)
+
+        # Run the scan over the remaining coefficients (from index 2 to k-1).
+        final_carry = backend.scan(scan_body, backend.arange(2, k), initial_carry)
+
+        # The final result is the sum accumulated in the last carry state.
+        psi_unphased = final_carry[2]
+
+    # --- 4. Final Step: Apply Phase Correction ---
+    # This undoes the energy shift from the Hamiltonian normalization.
     phase = backend.exp(-1j * b * t)
     psi_final = phase * psi_unphased
 
@@ -750,19 +774,19 @@ def estimate_spectral_bounds(
         r = backend.convert_to_tensor(r)  # in case np.matrix
         r = backend.reshape(r, [-1])
         if beta != 0:
-            r -= beta * q_prev
+            r -= backend.cast(beta, dtypestr) * q_prev
 
         alpha = backend.real(backend.sum(backend.conj(q) * r))
 
         alphas.append(alpha)
 
-        r -= alpha * q
+        r -= backend.cast(alpha, dtypestr) * q
 
         q_prev = q
         beta = backend.norm(r)
         q = r / beta
+        beta = backend.abs(beta)
         betas.append(beta)
-
         if beta < 1e-8:
             break
 
diff --git a/tests/test_backends.py b/tests/test_backends.py
@@ -61,6 +61,49 @@ def f(x):
     np.testing.assert_allclose(f(a), np.ones([2]), atol=1e-5)
 
 
+def test_sparse_tensor_matmul_monkey_patch(tfb):
+    """
+    Test the monkey-patched __matmul__ method for tf.SparseTensor.
+    This test specifically targets the line:
+    tf.SparseTensor.__matmul__ = sparse_tensor_matmul
+    """
+    # Create a sparse matrix in COO format
+    indices = tf.constant([[0, 0], [1, 1], [2, 3]], dtype=tf.int64)
+    values = tf.constant([1.0, 2.0, 3.0], dtype=tf.complex64)
+    shape = [4, 4]
+    sparse_matrix = tf.SparseTensor(indices=indices, values=values, dense_shape=shape)
+
+    # Test 1: Matrix-vector multiplication with 1D vector
+    vector_1d = tf.constant([1.0, 2.0, 3.0, 4.0], dtype=tf.complex64)
+    result_1d = sparse_matrix @ vector_1d  # Using the monkey-patched @ operator
+
+    expected_1d = tf.constant([1.0, 4.0, 12.0, 0.0], dtype=tf.complex64)
+
+    np.testing.assert_allclose(result_1d, expected_1d, atol=1e-6)
+    vector_1d = tc.backend.reshape(vector_1d, [4, 1])
+    result_1dn = sparse_matrix @ vector_1d  # Using the monkey-patched @ operator
+    expected_1d = tc.backend.reshape(expected_1d, [4, 1])
+
+    np.testing.assert_allclose(result_1dn, expected_1d, atol=1e-6)
+
+    # Test 2: Matrix-matrix multiplication with 2D matrix
+    matrix_2d = tf.constant(
+        [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]], dtype=tf.complex64
+    )
+    result_2d = sparse_matrix @ matrix_2d  # Using the monkey-patched @ operator
+
+    expected_2d = tf.sparse.sparse_dense_matmul(sparse_matrix, matrix_2d)
+
+    np.testing.assert_allclose(result_2d.numpy(), expected_2d.numpy(), atol=1e-6)
+
+    # Test 3: Verify that the operation is consistent with sparse_dense_matmul
+
+    reference_result = tc.backend.sparse_dense_matmul(sparse_matrix, vector_1d)
+    reference_result_squeezed = tc.backend.reshape(reference_result, [-1])
+
+    np.testing.assert_allclose(result_1d, reference_result_squeezed, atol=1e-6)
+
+
 @pytest.mark.parametrize("backend", [lf("npb"), lf("jaxb")])
 def test_backend_jv(backend, highp):
     def calculate_M(k, x_val):
diff --git a/tests/test_timeevol.py b/tests/test_timeevol.py
@@ -279,7 +279,6 @@ def test_krylov_evol_heisenberg_6_sites(backend):
 
     # Generate Heisenberg Hamiltonian
     h = tc.quantum.heisenberg_hamiltonian(g, hzz=1.0, hxx=1.0, hyy=1.0, sparse=False)
-    print(h.dtype)
     # Initial state - all spins up except last one down
     psi0 = np.zeros((2**n,))
     psi0[62] = 1.0
@@ -454,15 +453,18 @@ def loss_function(t):
     print(gradient)
 
 
-@pytest.mark.parametrize("backend", [lf("npb"), lf("jaxb")])
-def test_chebyshev_evol_basic(backend, highp):
+@pytest.mark.parametrize(
+    "backend, sparse",
+    [[lf("npb"), True], [lf("npb"), False], [lf("jaxb"), True], [lf("jaxb"), False]],
+)
+def test_chebyshev_evol_basic(backend, highp, sparse):
     n = 6
     # Create a 1D chain graph
     g = tc.templates.graphs.Line1D(n, pbc=False)
 
     # Generate Heisenberg Hamiltonian (dense for better compatibility)
     h = tc.quantum.heisenberg_hamiltonian(
-        g, hzz=1.0, hxx=1.0, hyy=1.0, hx=0.2, sparse=False
+        g, hzz=1.0, hxx=1.0, hyy=1.0, hx=0.2, sparse=sparse
     )
 
     # Initial Neel state: |↑↓↑↓⟩
@@ -490,6 +492,8 @@ def test_chebyshev_evol_basic(backend, highp):
     np.testing.assert_allclose(norm, 1.0, atol=1e-3)
 
     # Compare with exact evolution for small system
+    if sparse is True:
+        h = tc.backend.to_dense(h)
     psi_exact = tc.timeevol.ed_evol(h, psi0, 1.0j * tc.backend.convert_to_tensor([t]))[
         0
     ]