Parallel-in-Time
diff --git a/‎pySDC/helpers/fieldsIO.py‎
Lines changed: 2 additions & 1 deletion b/‎pySDC/helpers/fieldsIO.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎pySDC/helpers/plot_helper.py‎
Lines changed: 2 additions & 0 deletions b/‎pySDC/helpers/plot_helper.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎pySDC/helpers/spectral_helper.py‎
Lines changed: 32 additions & 5 deletions b/‎pySDC/helpers/spectral_helper.py‎
Lines changed: 32 additions & 5 deletions
diff --git a/‎pySDC/implementations/problem_classes/RayleighBenard3D.py‎
Lines changed: 75 additions & 16 deletions b/‎pySDC/implementations/problem_classes/RayleighBenard3D.py‎
Lines changed: 75 additions & 16 deletions
diff --git a/‎pySDC/implementations/problem_classes/generic_spectral.py‎
Lines changed: 27 additions & 1 deletion b/‎pySDC/implementations/problem_classes/generic_spectral.py‎
Lines changed: 27 additions & 1 deletion
@@ -154,7 +154,8 @@ def fromFile(cls, fileName):
         fieldsIO : :class:`FieldsIO`
             The specialized `FieldsIO` adapted to the file.
         """
-        assert os.path.isfile(fileName), f"not a file ({fileName})"
+        if not os.path.isfile(fileName):
+            raise FileNotFoundError(f"not a file ({fileName})")
         with open(fileName, "rb") as f:
             STRUCT, DTYPE = np.fromfile(f, dtype=H_DTYPE, count=2)
             fieldsIO: FieldsIO = cls.STRUCTS[STRUCT](DTYPES[DTYPE], fileName)
 
@@ -45,13 +45,15 @@ def figsize_by_journal(journal, scale, ratio):  # pragma: no cover
         'Springer_proceedings': 347.12354,
         'JSC_thesis': 434.26027,
         'TUHH_thesis': 426.79135,
+        'Nature_CS': 372.0,
     }
     # store text height in points here, get this from LaTeX using \the\textheight
     textheights = {
         'JSC_beamer': 214.43411,
         'JSC_thesis': 635.5,
         'TUHH_thesis': 631.65118,
         'Springer_proceedings': 549.13828,
+        'Nature_CS': 552.69478,
     }
     assert (
         journal in textwidths.keys()
 
@@ -97,6 +97,7 @@ def __init__(self, N, x0=None, x1=None, useGPU=False, useFFTW=False):
 
         if useGPU:
             self.setup_GPU()
+            self.logger.debug('Set up for GPU')
         else:
             self.setup_CPU(useFFTW=useFFTW)
 
@@ -161,6 +162,10 @@ def get_differentiation_matrix(self):
     def get_integration_matrix(self):
         raise NotImplementedError()
 
+    def get_integration_weights(self):
+        """Weights for integration across entire domain"""
+        raise NotImplementedError()
+
     def get_wavenumbers(self):
         """
         Get the grid in spectral space
@@ -379,6 +384,16 @@ def get_integration_matrix(self, lbnd=0):
             raise NotImplementedError(f'This function allows to integrate only from x=0, you attempted from x={lbnd}.')
         return S
 
+    def get_integration_weights(self):
+        """Weights for integration across entire domain"""
+        n = self.xp.arange(self.N, dtype=float)
+
+        weights = (-1) ** n + 1
+        weights[2:] /= 1 - (n**2)[2:]
+
+        weights /= 2 / self.L
+        return weights
+
     def get_differentiation_matrix(self, p=1):
         '''
         Keep in mind that the T2T differentiation matrix is dense.
@@ -808,6 +823,12 @@ def get_integration_matrix(self, p=1):
         k[0] = 1j * self.L
         return self.linalg.matrix_power(self.sparse_lib.diags(1 / (1j * k)), p)
 
+    def get_integration_weights(self):
+        """Weights for integration across entire domain"""
+        weights = self.xp.zeros(self.N)
+        weights[0] = self.L / self.N
+        return weights
+
     def get_plan(self, u, forward, *args, **kwargs):
         if self.fft_lib.__name__ == 'mpi4py_fft.fftw':
             if 'axes' in kwargs.keys():
@@ -1006,7 +1027,6 @@ def __init__(self, comm=None, useGPU=False, debug=False):
         self.BCs = None
 
         self.fft_cache = {}
-        self.fft_dealias_shape_cache = {}
 
         self.logger = logging.getLogger(name='Spectral Discretization')
         if debug:
@@ -1293,11 +1313,12 @@ def setup_BCs(self):
 
         diags = self.xp.ones(self.BCs.shape[0])
         diags[self.BC_zero_index] = 0
-        self.BC_line_zero_matrix = sp.diags(diags)
+        self.BC_line_zero_matrix = sp.diags(diags).tocsc()
 
         # prepare BCs in spectral space to easily add to the RHS
         rhs_BCs = self.put_BCs_in_rhs(self.u_init)
-        self.rhs_BCs_hat = self.transform(rhs_BCs)
+        self.rhs_BCs_hat = self.transform(rhs_BCs).view(self.xp.ndarray)
+        del self.BC_rhs_mask
 
     def check_BCs(self, u):
         """
@@ -1350,7 +1371,7 @@ def put_BCs_in_rhs_hat(self, rhs_hat):
             Generate a mask where we need to set values in the rhs in spectral space to zero, such that can replace them
             by the boundary conditions. The mask is then cached.
             """
-            self._rhs_hat_zero_mask = self.newDistArray().astype(bool)
+            self._rhs_hat_zero_mask = self.newDistArray(forward_output=True).astype(bool).view(self.xp.ndarray)
 
             for axis in range(self.ndim):
                 for bc in self.full_BCs:
@@ -1498,7 +1519,7 @@ def get_indices(self, forward_output=True):
     def get_pfft(self, axes=None, padding=None, grid=None):
         if self.ndim == 1 or self.comm is None:
             return None
-        from mpi4py_fft import PFFT
+        from mpi4py_fft import PFFT, newDistArray
 
         axes = tuple(i for i in range(self.ndim)) if axes is None else axes
         padding = list(padding if padding else [1.0 for _ in range(self.ndim)])
@@ -1530,6 +1551,10 @@ def no_transform(u, *args, **kwargs):
             transforms=transforms,
             grid=grid,
         )
+
+        # do a transform to do the planning
+        _u = newDistArray(pfft, forward_output=False)
+        pfft.backward(pfft.forward(_u))
         return pfft
 
     def get_fft(self, axes=None, direction='object', padding=None, shape=None):
@@ -1885,6 +1910,7 @@ def get_differentiation_matrix(self, axes, **kwargs):
             _D = self.axes[axis].get_differentiation_matrix(**kwargs)
             D = D @ self.expand_matrix_ND(_D, axis)
 
+        self.logger.debug(f'Set up differentiation matrix along axes {axes} with kwargs {kwargs}')
         return D
 
     def get_integration_matrix(self, axes):
@@ -1948,4 +1974,5 @@ def get_basis_change_matrix(self, axes=None, **kwargs):
             _C = self.axes[axis].get_basis_change_matrix(**kwargs)
             C = C @ self.expand_matrix_ND(_C, axis)
 
+        self.logger.debug(f'Set up basis change matrix along axes {axes} with kwargs {kwargs}')
         return C
@@ -141,6 +141,10 @@ def __init__(
         U01 = self.get_basis_change_matrix(p_in=0, p_out=1)
         U12 = self.get_basis_change_matrix(p_in=1, p_out=2)
         U02 = self.get_basis_change_matrix(p_in=0, p_out=2)
+        self.eliminate_zeros(S1)
+        self.eliminate_zeros(S2)
+        self.eliminate_zeros(Dz)
+        self.eliminate_zeros(Dzz)
 
         self.Dx = Dx
         self.Dxx = Dxx
@@ -158,13 +162,12 @@ def __init__(
 
         # construct operators
         _D = U02 @ (Dxx + Dyy) + Dzz
-        L_lhs = {
-            'p': {'u': U01 @ Dx, 'v': U01 @ Dy, 'w': Dz},  # divergence free constraint
-            'u': {'p': U02 @ Dx, 'u': -self.nu * _D},
-            'v': {'p': U02 @ Dy, 'v': -self.nu * _D},
-            'w': {'p': U12 @ Dz, 'w': -self.nu * _D, 'T': -U02 @ Id},
-            'T': {'T': -self.kappa * _D},
-        }
+        L_lhs = {}
+        L_lhs['p'] = {'u': U01 @ Dx, 'v': U01 @ Dy, 'w': Dz}  # divergence free constraint
+        L_lhs['u'] = {'p': U02 @ Dx, 'u': -self.nu * _D}
+        L_lhs['v'] = {'p': U02 @ Dy, 'v': -self.nu * _D}
+        L_lhs['w'] = {'p': U12 @ Dz, 'w': -self.nu * _D, 'T': -U02 @ Id}
+        L_lhs['T'] = {'T': -self.kappa * _D}
         self.setup_L(L_lhs)
 
         # mass matrix
@@ -308,16 +311,30 @@ def compute_Nusselt_numbers(self, u):
             u: The solution you want to compute the Nusselt numbers of
 
         Returns:
-            dict: Nusselt number averaged over the entire volume and horizontally averaged at the top and bottom.
+            dict: Nusselt number averaged over the entire volume and horizontally averaged at the top and bottom as well
+                  as computed from thermal and kinetic dissipation.
         """
-        iw, iT = self.index(['w', 'T'])
+        iu, iv, iw, iT = self.index(['u', 'v', 'w', 'T'])
         zAxis = self.spectral.axes[-1]
 
         if self.spectral_space:
             u_hat = u.copy()
         else:
             u_hat = self.transform(u)
 
+        # evaluate derivatives in x, y, and z for u, v, w, and T
+        derivatives = []
+        derivative_indices = [iu, iv, iw, iT]
+        u_hat_flat = [u_hat[i].flatten() for i in derivative_indices]
+        _D_u_hat = self.u_init_forward
+        for D in [self.Dx, self.Dy, self.Dz]:
+            _D_u_hat *= 0
+            for i in derivative_indices:
+                self.xp.copyto(_D_u_hat[i], (D @ u_hat_flat[i]).reshape(_D_u_hat[i].shape))
+            derivatives.append(
+                self.itransform(_D_u_hat).real
+            )  # derivatives[0] contains x derivatives, [2] is y and [3] is z
+
         DzT_hat = (self.Dz @ u_hat[iT].flatten()).reshape(u_hat[iT].shape)
 
         # compute wT with dealiasing
@@ -327,31 +344,56 @@ def compute_Nusselt_numbers(self, u):
         _me[0] = u_pad[iw] * u_pad[iT]
         wT_hat = self.transform(_me, padding=padding)[0]
 
+        # compute Nusselt number
         nusselt_hat = (wT_hat / self.kappa - DzT_hat) * self.axes[-1].L
 
-        if not hasattr(self, '_zInt'):
-            self._zInt = zAxis.get_integration_matrix()
+        # compute thermal dissipation
+        thermal_dissipation = self.u_init_physical
+        thermal_dissipation[0, ...] = (
+            self.kappa * (derivatives[0][iT].real + derivatives[1][iT].real + derivatives[2][iT].real) ** 2
+        )
+        thermal_dissipation_hat = self.transform(thermal_dissipation)[0]
+
+        # compute kinetic energy dissipation
+        kinetic_energy_dissipation = self.u_init_physical
+        for i in [iu, iv, iw]:
+            kinetic_energy_dissipation[0, ...] += (
+                self.nu * (derivatives[0][i].real + derivatives[1][i].real + derivatives[2][i].real) ** 2
+            )
+        kinetic_energy_dissipation_hat = self.transform(kinetic_energy_dissipation)[0]
 
-        # get coefficients for evaluation on the boundary
+        # get coefficients for evaluation on the boundary and vertical integration
+        zInt = zAxis.get_integration_weights()
         top = zAxis.get_BC(kind='Dirichlet', x=1)
         bot = zAxis.get_BC(kind='Dirichlet', x=-1)
 
+        # compute volume averages
         integral_V = 0
+        integral_V_th = 0
+        integral_V_kin = 0
         if self.comm.rank == 0:
 
-            integral_z = (self._zInt @ nusselt_hat[0, 0]).real
-            integral_z[0] = zAxis.get_integration_constant(integral_z, axis=-1)
-            integral_V = ((top - bot) * integral_z).sum() * self.axes[0].L * self.axes[1].L / self.nx / self.ny
+            integral_V = (zInt @ nusselt_hat[0, 0]).real * self.axes[0].L * self.axes[1].L / self.nx / self.ny
+            integral_V_th = (
+                (zInt @ thermal_dissipation_hat[0, 0]).real * self.axes[0].L * self.axes[1].L / self.nx / self.ny
+            )
+            integral_V_kin = (
+                (zInt @ kinetic_energy_dissipation_hat[0, 0]).real * self.axes[0].L * self.axes[1].L / self.nx / self.ny
+            )
 
+        # communicate result across all tasks
         Nusselt_V = self.comm.bcast(integral_V / self.spectral.V, root=0)
-
         Nusselt_t = self.comm.bcast(self.xp.sum(nusselt_hat.real[0, 0, :] * top, axis=-1) / self.nx / self.ny, root=0)
         Nusselt_b = self.comm.bcast(self.xp.sum(nusselt_hat.real[0, 0] * bot / self.nx / self.ny, axis=-1), root=0)
+        Nusselt_thermal = self.comm.bcast(1 / self.kappa * integral_V_th / self.spectral.V, root=0)
+        Nusselt_kinetic = self.comm.bcast(1 + 1 / self.kappa * integral_V_kin / self.spectral.V, root=0)
 
         return {
             'V': Nusselt_V,
             't': Nusselt_t,
             'b': Nusselt_b,
+            'thermal': Nusselt_thermal,
+            'kinetic': Nusselt_kinetic,
         }
 
     def get_frequency_spectrum(self, u):
@@ -420,3 +462,20 @@ def get_frequency_spectrum(self, u):
                 spectrum[..., index_global] += _spectrum[..., index_local]
 
         return xp.array(unique_k_all), spectrum
+
+    def get_vertical_profiles(self, u, components):
+        if self.spectral_space:
+            u_hat = u.copy()
+        else:
+            u_hat = self.transform(u)
+
+        _u_hat = self.axes[-1].itransform(u_hat, axes=(-1,))
+
+        avgs = {}
+        for c in components:
+            i = self.index(c)
+            avg = self.xp.ascontiguousarray(_u_hat[i, 0, 0, :].real) / self.axes[0].N / self.axes[1].N
+            self.comm.Bcast(avg, root=0)
+            avgs[c] = avg
+
+        return avgs
@@ -132,21 +132,29 @@ def __init__(
         if self.heterogeneous:
             self.__heterogeneous_setup = False
 
+        self.logger.debug('Finished GenericSpectralLinear __init__')
+
     def heterogeneous_setup(self):
         if self.heterogeneous and not self.__heterogeneous_setup:
+
             CPU_only = ['BC_line_zero_matrix', 'BCs']
             both = ['Pl', 'Pr', 'L', 'M']
 
+            self.logger.debug(f'Starting heterogeneous setup. Moving {CPU_only} and copying {both} to CPU')
+
             if self.useGPU:
                 for key in CPU_only:
+                    self.logger.debug(f'Moving {key} to CPU')
                     setattr(self.spectral, key, getattr(self.spectral, key).get())
 
                 for key in both:
+                    self.logger.debug(f'Copying {key} to CPU')
                     setattr(self, f'{key}_CPU', getattr(self, key).get())
             else:
                 for key in both:
                     setattr(self, f'{key}_CPU', getattr(self, key))
 
+            self.logger.debug('Done with heterogeneous setup')
         self.__heterogeneous_setup = True
 
     def __getattr__(self, name):
@@ -195,6 +203,7 @@ def setup_L(self, LHS):
             LHS (dict): Dictionary containing the equations.
         """
         self.L = self._setup_operator(LHS)
+        self.logger.debug('Set up L matrix')
 
     def setup_M(self, LHS):
         '''
@@ -203,6 +212,7 @@ def setup_M(self, LHS):
         diff_index = list(LHS.keys())
         self.diff_mask = [me in diff_index for me in self.components]
         self.M = self._setup_operator(LHS)
+        self.logger.debug('Set up M matrix')
 
     def setup_preconditioner(self, Dirichlet_recombination=True, left_preconditioner=True):
         """
@@ -213,8 +223,14 @@ def setup_preconditioner(self, Dirichlet_recombination=True, left_preconditioner
             left_preconditioner (bool): If True, it will interleave the variables and reverse the Kronecker product
         """
         N = np.prod(self.init[0][1:])
+        if self.useGPU:
+            from cupy_backends.cuda.libs.cusparse import CuSparseError as MemError
+        else:
+            MemError = MemoryError
 
         if left_preconditioner:
+            self.logger.debug(f'Setting up left preconditioner with {N} local degrees of freedom')
+
             # reverse Kronecker product
             if self.spectral.useGPU:
                 import scipy.sparse as sp
@@ -228,18 +244,27 @@ def setup_preconditioner(self, Dirichlet_recombination=True, left_preconditioner
                     R[i * self.ncomponents + j, j * N + i] = 1
 
             self.Pl = self.spectral.sparse_lib.csc_matrix(R, dtype=complex)
+
+            self.logger.debug('Finished setup of left preconditioner')
         else:
             Id = self.spectral.sparse_lib.eye(N)
             Pl_lhs = {comp: {comp: Id} for comp in self.components}
             self.Pl = self._setup_operator(Pl_lhs)
 
         if Dirichlet_recombination and type(self.axes[-1]).__name__ in ['ChebychevHelper', 'UltrasphericalHelper']:
+            self.logger.debug('Using Dirichlet recombination as right preconditioner')
             _Pr = self.spectral.get_Dirichlet_recombination_matrix(axis=-1)
         else:
             _Pr = self.spectral.sparse_lib.eye(N)
 
         Pr_lhs = {comp: {comp: _Pr} for comp in self.components}
-        self.Pr = self._setup_operator(Pr_lhs) @ self.Pl.T
+        operator = self._setup_operator(Pr_lhs)
+
+        try:
+            self.Pr = operator @ self.Pl.T
+        except MemError:
+            self.logger.debug('Setting up right preconditioner on CPU due to memory error')
+            self.Pr = self.spectral.sparse_lib.csc_matrix(operator.get() @ self.Pl.T.get())
 
     def solve_system(self, rhs, dt, u0=None, *args, skip_itransform=False, **kwargs):
         """
@@ -330,6 +355,7 @@ def solve_system(self, rhs, dt, u0=None, *args, skip_itransform=False, **kwargs)
                     import scipy.sparse as sp
 
                     cpu_decomp = sp.linalg.splu(A)
+
                     if self.useGPU:
                         from cupyx.scipy.sparse.linalg import SuperLU