Started heterogeneous RBC implementation

brownbaerchen · brownbaerchen · commit 76f0d93e7fdd · 2025-07-18T11:10:12.000+02:00
diff --git a/pySDC/implementations/problem_classes/RayleighBenard3D.py b/pySDC/implementations/problem_classes/RayleighBenard3D.py
@@ -357,3 +357,148 @@ def plot(self, u, t=None, fig=None, quantity='T'):  # pragma: no cover
         axs[1].set_ylabel(r'$z$')
         fig.colorbar(imT, self.cax[0])
         fig.colorbar(imV, self.cax[1])
+
+
+class RayleighBenard3DHeterogeneous(RayleighBenard3D):
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        # copy matrices we need on CPU
+        if self.useGPU:
+            for key in ['BC_line_zero_matrix', 'BCs']:  # TODO complete this list!
+                setattr(self.spectral, key, getattr(self.spectral, key).get())
+            for key in ['Pl', 'Pr', 'M']:  # TODO complete this list!
+                setattr(self, key, getattr(self, key).get())
+
+            self.L_CPU = self.L.get()
+        else:
+            self.L_CPU = self.L.copy()
+
+        # delete matrices we do not need on GPU
+        for key in []:  # TODO: complete list
+            delattr(self, key)
+
+    def solve_system(self, rhs, dt, u0=None, *args, skip_itransform=False, **kwargs):
+        """
+        Do an implicit Euler step to solve M u_t + Lu = rhs, with M the mass matrix and L the linear operator as setup by
+        ``GenericSpectralLinear.setup_L`` and ``GenericSpectralLinear.setup_M``.
+
+        The implicit Euler step is (M - dt L) u = M rhs. Note that M need not be invertible as long as (M + dt*L) is.
+        This means solving with dt=0 to mimic explicit methods does not work for all problems, in particular simple DAEs.
+
+        Note that by putting M rhs on the right hand side, this function can only solve algebraic conditions equal to
+        zero. If you want something else, it should be easy to overload this function.
+        """
+
+        sp = self.spectral.sparse_lib
+
+        if self.spectral_space:
+            rhs_hat = rhs.copy()
+            if u0 is not None:
+                u0_hat = u0.copy().flatten()
+            else:
+                u0_hat = None
+        else:
+            rhs_hat = self.spectral.transform(rhs)
+            if u0 is not None:
+                u0_hat = self.spectral.transform(u0).flatten()
+            else:
+                u0_hat = None
+
+        # apply inverse right preconditioner to initial guess
+        if u0_hat is not None and 'direct' not in self.solver_type:
+            if not hasattr(self, '_Pr_inv'):
+                self._PR_inv = self.linalg.splu(self.Pr.astype(complex)).solve
+            u0_hat[...] = self._PR_inv(u0_hat)
+
+        rhs_hat = (self.M @ rhs_hat.flatten()).reshape(rhs_hat.shape)
+        rhs_hat = self.spectral.put_BCs_in_rhs_hat(rhs_hat)
+        rhs_hat = self.Pl @ rhs_hat.flatten()
+
+        if dt not in self.cached_factorizations.keys() or not self.solver_type.lower() == 'cached_direct':
+            A = self.M + dt * self.L_CPU
+            A = self.Pl @ self.spectral.put_BCs_in_matrix(A) @ self.Pr
+            A = self.spectral.sparse_lib.csc_matrix(A)
+
+            # if A.shape[0] < 200e20:
+            #     import matplotlib.pyplot as plt
+
+            #     # M = self.spectral.put_BCs_in_matrix(self.L.copy())
+            #     M = A  # self.L
+            #     im = plt.spy(M)
+            #     plt.show()
+
+        if 'ilu' in self.solver_type.lower():
+            if dt not in self.cached_factorizations.keys():
+                if len(self.cached_factorizations) >= self.max_cached_factorizations:
+                    to_evict = list(self.cached_factorizations.keys())[0]
+                    self.cached_factorizations.pop(to_evict)
+                    self.logger.debug(f'Evicted matrix factorization for {to_evict=:.6f} from cache')
+                iLU = self.linalg.spilu(
+                    A, **{**self.preconditioner_args, 'drop_tol': dt * self.preconditioner_args['drop_tol']}
+                )
+                self.cached_factorizations[dt] = self.linalg.LinearOperator(A.shape, iLU.solve)
+                self.logger.debug(f'Cached incomplete LU factorization for {dt=:.6f}')
+                self.work_counters['factorizations']()
+            M = self.cached_factorizations[dt]
+        else:
+            M = None
+        info = 0
+
+        if self.solver_type.lower() == 'cached_direct':
+            if dt not in self.cached_factorizations.keys():
+                if len(self.cached_factorizations) >= self.max_cached_factorizations:
+                    self.cached_factorizations.pop(list(self.cached_factorizations.keys())[0])
+                    self.logger.debug(f'Evicted matrix factorization for {dt=:.6f} from cache')
+                self.cached_factorizations[dt] = self.spectral.linalg.factorized(A)
+                self.logger.debug(f'Cached matrix factorization for {dt=:.6f}')
+                self.work_counters['factorizations']()
+
+            _sol_hat = self.cached_factorizations[dt](rhs_hat)
+            self.logger.debug(f'Used cached matrix factorization for {dt=:.6f}')
+
+        elif self.solver_type.lower() == 'direct':
+            _sol_hat = sp.linalg.spsolve(A, rhs_hat)
+        elif 'gmres' in self.solver_type.lower():
+            _sol_hat, _ = sp.linalg.gmres(
+                A,
+                rhs_hat,
+                x0=u0_hat,
+                **self.solver_args,
+                callback=self.work_counters[self.solver_type],
+                callback_type='pr_norm',
+                M=M,
+            )
+        elif self.solver_type.lower() == 'cg':
+            _sol_hat, info = sp.linalg.cg(
+                A, rhs_hat, x0=u0_hat, **self.solver_args, callback=self.work_counters[self.solver_type]
+            )
+        elif 'bicgstab' in self.solver_type.lower():
+            _sol_hat, info = self.linalg.bicgstab(
+                A,
+                rhs_hat,
+                x0=u0_hat,
+                **self.solver_args,
+                callback=self.work_counters[self.solver_type],
+                M=M,
+            )
+        else:
+            raise NotImplementedError(f'Solver {self.solver_type=} not implemented in {type(self).__name__}!')
+
+        if info != 0:
+            self.logger.warn(f'{self.solver_type} not converged! {info=}')
+
+        sol_hat = self.spectral.u_init_forward
+        sol_hat[...] = (self.Pr @ _sol_hat).reshape(sol_hat.shape)
+
+        if self.spectral_space:
+            return sol_hat
+        else:
+            sol = self.spectral.u_init
+            sol[:] = self.spectral.itransform(sol_hat).real
+
+            if self.spectral.debug:
+                self.spectral.check_BCs(sol)
+
+            return sol
diff --git a/pySDC/tests/test_problems/test_RayleighBenard3D.py b/pySDC/tests/test_problems/test_RayleighBenard3D.py
@@ -191,6 +191,48 @@ def test_Poisson_problem_w():
         assert np.allclose(u_exact[i], u[i]), f'Unexpected solution in component {comp}'
 
 
+@pytest.mark.mpi4py
+@pytest.mark.parametrize('solver_type', ['gmres+ilu', 'bicgstab+ilu'])
+@pytest.mark.parametrize('N', [4, 16])
+@pytest.mark.parametrize('left_preconditioner', [True, False])
+@pytest.mark.parametrize('Dirichlet_recombination', [True, False])
+def test_solver_convergence(solver_type, N, left_preconditioner, Dirichlet_recombination):
+    import numpy as np
+    from pySDC.implementations.problem_classes.RayleighBenard3D import RayleighBenard3D
+
+    fill_factor = 5 if left_preconditioner or Dirichlet_recombination else 10
+
+    P = RayleighBenard3D(
+        nx=N,
+        ny=N,
+        nz=N,
+        solver_type=solver_type,
+        solver_args={'atol': 1e-10, 'rtol': 0},
+        preconditioner_args={'fill_factor': fill_factor, 'drop_tol': 1e-4},
+        left_preconditioner=left_preconditioner,
+        Dirichlet_recombination=Dirichlet_recombination,
+    )
+    P_direct = RayleighBenard3D(nx=N, ny=N, nz=N, solver_type='cached_direct')
+
+    u0 = P.u_exact(0, noise_level=1.0e-3)
+
+    dt = 1.0e-3
+    u_direct = P_direct.solve_system(u0.copy(), dt)
+    u_good_ig = P.solve_system(u0.copy(), dt, u0=u_direct.copy())
+    assert P.work_counters[P.solver_type].niter == 0
+    assert np.allclose(u_good_ig, u_direct)
+
+    u = P.solve_system(u0.copy(), dt, u0=u0.copy())
+
+    error = abs(u - u_direct)
+    assert error <= P.solver_args['atol'] * 1e3, error
+
+    if 'ilu' in solver_type.lower():
+        size_LU = P_direct.cached_factorizations[dt].__sizeof__()
+        size_iLU = P.cached_factorizations[dt].__sizeof__()
+        assert size_iLU < size_LU, 'iLU does not require less memory than LU!'
+
+
 @pytest.mark.mpi4py
 def test_libraries():
     from pySDC.implementations.problem_classes.RayleighBenard3D import RayleighBenard3D
@@ -231,9 +273,30 @@ def test_banded_matrix(preconditioning):
         ), 'One-sided bandwidth of LU decomposition is larger than that of the full matrix!'
 
 
+@pytest.mark.cupy
+def test_heterogeneous_implementation():
+    from pySDC.implementations.problem_classes.RayleighBenard3D import RayleighBenard3D, RayleighBenard3DHeterogeneous
+
+    params = {'nx': 2, 'ny': 2, 'nz': 2, 'useGPU': False}
+    gpu = RayleighBenard3D(**params)
+    het = RayleighBenard3DHeterogeneous(**params)
+
+    xp = gpu.xp
+
+    u0 = gpu.u_exact()
+
+    f = [me.eval_f(u0) for me in [gpu, het]]
+    assert xp.allclose(*f)
+
+    un = [me.solve_system(u0, 1e-3) for me in [gpu, het]]
+    assert xp.allclose(*un)
+
+
 if __name__ == '__main__':
     # test_eval_f(2**2, 2**1, 'x', False)
     # test_libraries()
     # test_Poisson_problems(4, 'u')
     # test_Poisson_problem_w()
-    test_banded_matrix(False)
+    # test_solver_convergence('bicgstab+ilu', 32, False, True)
+    # test_banded_matrix(False)
+    test_heterogeneous_implementation()