Implemented more general heterogeneous solves

brownbaerchen · brownbaerchen · commit f93f9942b26e · 2025-07-18T11:10:36.000+02:00
diff --git a/pySDC/implementations/problem_classes/RayleighBenard3D.py b/pySDC/implementations/problem_classes/RayleighBenard3D.py
@@ -357,147 +357,3 @@ def plot(self, u, t=None, fig=None, quantity='T'):  # pragma: no cover
         axs[1].set_ylabel(r'$z$')
         fig.colorbar(imT, self.cax[0])
         fig.colorbar(imV, self.cax[1])
-
-
-class RayleighBenard3DHeterogeneous(RayleighBenard3D):
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-
-        CPU_only = ['BC_line_zero_matrix', 'BCs']
-        both = ['Pl', 'Pr', 'L', 'M']
-
-        # copy matrices we need on CPU
-        if self.useGPU:
-            for key in CPU_only:
-                setattr(self.spectral, key, getattr(self.spectral, key).get())
-
-            for key in both:
-                setattr(self, f'{key}_CPU', getattr(self, key).get())
-        else:
-            for key in both:
-                setattr(self, f'{key}_CPU', getattr(self, key))
-
-    def solve_system(self, rhs, dt, u0=None, *args, skip_itransform=False, **kwargs):
-        """
-        Do an implicit Euler step to solve M u_t + Lu = rhs, with M the mass matrix and L the linear operator as setup by
-        ``GenericSpectralLinear.setup_L`` and ``GenericSpectralLinear.setup_M``.
-
-        The implicit Euler step is (M - dt L) u = M rhs. Note that M need not be invertible as long as (M + dt*L) is.
-        This means solving with dt=0 to mimic explicit methods does not work for all problems, in particular simple DAEs.
-
-        Note that by putting M rhs on the right hand side, this function can only solve algebraic conditions equal to
-        zero. If you want something else, it should be easy to overload this function.
-        """
-
-        sp = self.spectral.sparse_lib
-
-        if self.spectral_space:
-            rhs_hat = rhs.copy()
-            if u0 is not None:
-                u0_hat = u0.copy().flatten()
-            else:
-                u0_hat = None
-        else:
-            rhs_hat = self.spectral.transform(rhs)
-            if u0 is not None:
-                u0_hat = self.spectral.transform(u0).flatten()
-            else:
-                u0_hat = None
-
-        # apply inverse right preconditioner to initial guess
-        if u0_hat is not None and 'direct' not in self.solver_type:
-            if not hasattr(self, '_Pr_inv'):
-                self._PR_inv = self.linalg.splu(self.Pr.astype(complex)).solve
-            u0_hat[...] = self._PR_inv(u0_hat)
-
-        rhs_hat = (self.M @ rhs_hat.flatten()).reshape(rhs_hat.shape)
-        rhs_hat = self.spectral.put_BCs_in_rhs_hat(rhs_hat)
-        rhs_hat = self.Pl @ rhs_hat.flatten()
-
-        if dt not in self.cached_factorizations.keys() or not self.solver_type.lower() == 'cached_direct':
-            A = self.M_CPU + dt * self.L_CPU
-            A = self.Pl_CPU @ self.spectral.put_BCs_in_matrix(A) @ self.Pr_CPU
-            A = self.spectral.sparse_lib.csc_matrix(A)
-
-            # if A.shape[0] < 200e20:
-            #     import matplotlib.pyplot as plt
-
-            #     # M = self.spectral.put_BCs_in_matrix(self.L.copy())
-            #     M = A  # self.L
-            #     im = plt.spy(M)
-            #     plt.show()
-
-        if 'ilu' in self.solver_type.lower():
-            if dt not in self.cached_factorizations.keys():
-                if len(self.cached_factorizations) >= self.max_cached_factorizations:
-                    to_evict = list(self.cached_factorizations.keys())[0]
-                    self.cached_factorizations.pop(to_evict)
-                    self.logger.debug(f'Evicted matrix factorization for {to_evict=:.6f} from cache')
-                iLU = self.linalg.spilu(
-                    A, **{**self.preconditioner_args, 'drop_tol': dt * self.preconditioner_args['drop_tol']}
-                )
-                self.cached_factorizations[dt] = self.linalg.LinearOperator(A.shape, iLU.solve)
-                self.logger.debug(f'Cached incomplete LU factorization for {dt=:.6f}')
-                self.work_counters['factorizations']()
-            M = self.cached_factorizations[dt]
-        else:
-            M = None
-        info = 0
-
-        if self.solver_type.lower() == 'cached_direct':
-            if dt not in self.cached_factorizations.keys():
-                if len(self.cached_factorizations) >= self.max_cached_factorizations:
-                    self.cached_factorizations.pop(list(self.cached_factorizations.keys())[0])
-                    self.logger.debug(f'Evicted matrix factorization for {dt=:.6f} from cache')
-                self.cached_factorizations[dt] = self.spectral.linalg.factorized(A)
-                self.logger.debug(f'Cached matrix factorization for {dt=:.6f}')
-                self.work_counters['factorizations']()
-
-            _sol_hat = self.cached_factorizations[dt](rhs_hat)
-            self.logger.debug(f'Used cached matrix factorization for {dt=:.6f}')
-
-        elif self.solver_type.lower() == 'direct':
-            _sol_hat = sp.linalg.spsolve(A, rhs_hat)
-        elif 'gmres' in self.solver_type.lower():
-            _sol_hat, _ = sp.linalg.gmres(
-                A,
-                rhs_hat,
-                x0=u0_hat,
-                **self.solver_args,
-                callback=self.work_counters[self.solver_type],
-                callback_type='pr_norm',
-                M=M,
-            )
-        elif self.solver_type.lower() == 'cg':
-            _sol_hat, info = sp.linalg.cg(
-                A, rhs_hat, x0=u0_hat, **self.solver_args, callback=self.work_counters[self.solver_type]
-            )
-        elif 'bicgstab' in self.solver_type.lower():
-            _sol_hat, info = self.linalg.bicgstab(
-                A,
-                rhs_hat,
-                x0=u0_hat,
-                **self.solver_args,
-                callback=self.work_counters[self.solver_type],
-                M=M,
-            )
-        else:
-            raise NotImplementedError(f'Solver {self.solver_type=} not implemented in {type(self).__name__}!')
-
-        if info != 0:
-            self.logger.warn(f'{self.solver_type} not converged! {info=}')
-
-        sol_hat = self.spectral.u_init_forward
-        sol_hat[...] = (self.Pr @ _sol_hat).reshape(sol_hat.shape)
-
-        if self.spectral_space:
-            return sol_hat
-        else:
-            sol = self.spectral.u_init
-            sol[:] = self.spectral.itransform(sol_hat).real
-
-            if self.spectral.debug:
-                self.spectral.check_BCs(sol)
-
-            return sol
diff --git a/pySDC/implementations/problem_classes/generic_spectral.py b/pySDC/implementations/problem_classes/generic_spectral.py
@@ -64,6 +64,7 @@ def __init__(
         max_cached_factorizations=12,
         spectral_space=True,
         real_spectral_coefficients=False,
+        heterogeneous=False,
         debug=False,
     ):
         """
@@ -81,6 +82,7 @@ def __init__(
             max_cached_factorizations (int): Number of matrix decompositions to cache before starting eviction
             spectral_space (bool): If yes, the solution will not be transformed back after solving and evaluating the RHS, and is expected as input in spectral space to these functions
             real_spectral_coefficients (bool): If yes, allow only real values in spectral space, otherwise, allow complex.
+            heterogeneous (bool): If yes, perform memory intensive sparse matrix operations on CPU
             debug (bool): Make additional tests at extra computational cost
         """
         solver_args = {} if solver_args is None else solver_args
@@ -100,6 +102,7 @@ def __init__(
             'comm',
             'spectral_space',
             'real_spectral_coefficients',
+            'heterogeneous',
             'debug',
             localVars=locals(),
         )
@@ -126,6 +129,29 @@ def __init__(
 
         self.cached_factorizations = {}
 
+        if self.heterogeneous:
+            self.__heterogeneous_setup = False
+
+    def heterogeneous_setup(self):
+        if self.heterogeneous and self.useGPU and not self.__heterogeneous_setup:
+            for key in ['BC_line_zero_matrix', 'BCs']:
+                setattr(self.spectral, key, getattr(self.spectral, key).get())
+
+            CPU_only = ['BC_line_zero_matrix', 'BCs']
+            both = ['Pl', 'Pr', 'L', 'M']
+
+            if self.useGPU:
+                for key in CPU_only:
+                    setattr(self.spectral, key, getattr(self.spectral, key).get())
+
+                for key in both:
+                    setattr(self, f'{key}_CPU', getattr(self, key).get())
+            else:
+                for key in both:
+                    setattr(self, f'{key}_CPU', getattr(self, key))
+
+        self.__heterogeneous_setup = True
+
     def __getattr__(self, name):
         """
         Pass requests on to the helper if they are not directly attributes of this class for convenience.
@@ -233,6 +259,8 @@ def solve_system(self, rhs, dt, u0=None, *args, skip_itransform=False, **kwargs)
 
         sp = self.spectral.sparse_lib
 
+        self.heterogeneous_setup()
+
         if self.spectral_space:
             rhs_hat = rhs.copy()
             if u0 is not None:
@@ -257,8 +285,19 @@ def solve_system(self, rhs, dt, u0=None, *args, skip_itransform=False, **kwargs)
         rhs_hat = self.Pl @ rhs_hat.flatten()
 
         if dt not in self.cached_factorizations.keys() or not self.solver_type.lower() == 'cached_direct':
-            A = self.M + dt * self.L
-            A = self.Pl @ self.spectral.put_BCs_in_matrix(A) @ self.Pr
+            if self.heterogeneous:
+                M = self.M_CPU
+                L = self.L_CPU
+                Pl = self.Pl_CPU
+                Pr = self.Pr_CPU
+            else:
+                M = self.M
+                L = self.L
+                Pl = self.Pl
+                Pr = self.Pr
+
+            A = M + dt * L
+            A = Pl @ self.spectral.put_BCs_in_matrix(A) @ Pr
 
             # if A.shape[0] < 200e20:
             #     import matplotlib.pyplot as plt
@@ -290,7 +329,21 @@ def solve_system(self, rhs, dt, u0=None, *args, skip_itransform=False, **kwargs)
                 if len(self.cached_factorizations) >= self.max_cached_factorizations:
                     self.cached_factorizations.pop(list(self.cached_factorizations.keys())[0])
                     self.logger.debug(f'Evicted matrix factorization for {dt=:.6f} from cache')
-                self.cached_factorizations[dt] = self.spectral.linalg.factorized(A)
+
+                if self.heterogeneous:
+                    import scipy.sparse as sp
+
+                    cpu_decomp = sp.linalg.splu(A)
+                    if self.useGPU:
+                        from cupyx.scipy.sparse.linalg import SuperLU
+
+                        solver = SuperLU(cpu_decomp).solve
+                    else:
+                        solver = cpu_decomp.solve
+                else:
+                    solver = self.spectral.linalg.factorized(A)
+
+                self.cached_factorizations[dt] = solver
                 self.logger.debug(f'Cached matrix factorization for {dt=:.6f}')
                 self.work_counters['factorizations']()