Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 53 additions & 3 deletions pySDC/implementations/problem_classes/generic_spectral.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def __init__(
max_cached_factorizations=12,
spectral_space=True,
real_spectral_coefficients=False,
heterogeneous=False,
debug=False,
):
"""
Expand All @@ -81,6 +82,7 @@ def __init__(
max_cached_factorizations (int): Number of matrix decompositions to cache before starting eviction
spectral_space (bool): If yes, the solution will not be transformed back after solving and evaluating the RHS, and is expected as input in spectral space to these functions
real_spectral_coefficients (bool): If yes, allow only real values in spectral space, otherwise, allow complex.
heterogeneous (bool): If yes, perform memory intensive sparse matrix operations on CPU
debug (bool): Make additional tests at extra computational cost
"""
solver_args = {} if solver_args is None else solver_args
Expand All @@ -100,6 +102,7 @@ def __init__(
'comm',
'spectral_space',
'real_spectral_coefficients',
'heterogeneous',
'debug',
localVars=locals(),
)
Expand All @@ -126,6 +129,26 @@ def __init__(

self.cached_factorizations = {}

if self.heterogeneous:
self.__heterogeneous_setup = False

def heterogeneous_setup(self):
if self.heterogeneous and not self.__heterogeneous_setup:
CPU_only = ['BC_line_zero_matrix', 'BCs']
both = ['Pl', 'Pr', 'L', 'M']

if self.useGPU:
for key in CPU_only:
setattr(self.spectral, key, getattr(self.spectral, key).get())

for key in both:
setattr(self, f'{key}_CPU', getattr(self, key).get())
else:
for key in both:
setattr(self, f'{key}_CPU', getattr(self, key))

self.__heterogeneous_setup = True

def __getattr__(self, name):
"""
Pass requests on to the helper if they are not directly attributes of this class for convenience.
Expand Down Expand Up @@ -232,6 +255,8 @@ def solve_system(self, rhs, dt, u0=None, *args, skip_itransform=False, **kwargs)

sp = self.spectral.sparse_lib

self.heterogeneous_setup()

if self.spectral_space:
rhs_hat = rhs.copy()
if u0 is not None:
Expand All @@ -256,8 +281,19 @@ def solve_system(self, rhs, dt, u0=None, *args, skip_itransform=False, **kwargs)
rhs_hat = self.Pl @ rhs_hat.flatten()

if dt not in self.cached_factorizations.keys() or not self.solver_type.lower() == 'cached_direct':
A = self.M + dt * self.L
A = self.Pl @ self.spectral.put_BCs_in_matrix(A) @ self.Pr
if self.heterogeneous:
M = self.M_CPU
L = self.L_CPU
Pl = self.Pl_CPU
Pr = self.Pr_CPU
else:
M = self.M
L = self.L
Pl = self.Pl
Pr = self.Pr

A = M + dt * L
A = Pl @ self.spectral.put_BCs_in_matrix(A) @ Pr

# if A.shape[0] < 200e20:
# import matplotlib.pyplot as plt
Expand Down Expand Up @@ -289,7 +325,21 @@ def solve_system(self, rhs, dt, u0=None, *args, skip_itransform=False, **kwargs)
if len(self.cached_factorizations) >= self.max_cached_factorizations:
self.cached_factorizations.pop(list(self.cached_factorizations.keys())[0])
self.logger.debug(f'Evicted matrix factorization for {dt=:.6f} from cache')
self.cached_factorizations[dt] = self.spectral.linalg.factorized(A)

if self.heterogeneous:
import scipy.sparse as sp

cpu_decomp = sp.linalg.splu(A)
if self.useGPU:
from cupyx.scipy.sparse.linalg import SuperLU

solver = SuperLU(cpu_decomp).solve
else:
solver = cpu_decomp.solve
else:
solver = self.spectral.linalg.factorized(A)

self.cached_factorizations[dt] = solver
self.logger.debug(f'Cached matrix factorization for {dt=:.6f}')
self.work_counters['factorizations']()

Expand Down
65 changes: 64 additions & 1 deletion pySDC/tests/test_problems/test_RayleighBenard3D.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,48 @@ def test_Poisson_problem_w():
assert np.allclose(u_exact[i], u[i]), f'Unexpected solution in component {comp}'


@pytest.mark.mpi4py
@pytest.mark.parametrize('solver_type', ['gmres+ilu', 'bicgstab+ilu'])
@pytest.mark.parametrize('N', [4, 16])
@pytest.mark.parametrize('left_preconditioner', [True, False])
@pytest.mark.parametrize('Dirichlet_recombination', [True, False])
def test_solver_convergence(solver_type, N, left_preconditioner, Dirichlet_recombination):
import numpy as np
from pySDC.implementations.problem_classes.RayleighBenard3D import RayleighBenard3D

fill_factor = 5 if left_preconditioner or Dirichlet_recombination else 10

P = RayleighBenard3D(
nx=N,
ny=N,
nz=N,
solver_type=solver_type,
solver_args={'atol': 1e-10, 'rtol': 0},
preconditioner_args={'fill_factor': fill_factor, 'drop_tol': 1e-4},
left_preconditioner=left_preconditioner,
Dirichlet_recombination=Dirichlet_recombination,
)
P_direct = RayleighBenard3D(nx=N, ny=N, nz=N, solver_type='cached_direct')

u0 = P.u_exact(0, noise_level=1.0e-3)

dt = 1.0e-3
u_direct = P_direct.solve_system(u0.copy(), dt)
u_good_ig = P.solve_system(u0.copy(), dt, u0=u_direct.copy())
assert P.work_counters[P.solver_type].niter == 0
assert np.allclose(u_good_ig, u_direct)

u = P.solve_system(u0.copy(), dt, u0=u0.copy())

error = abs(u - u_direct)
assert error <= P.solver_args['atol'] * 1e3, error

if 'ilu' in solver_type.lower():
size_LU = P_direct.cached_factorizations[dt].__sizeof__()
size_iLU = P.cached_factorizations[dt].__sizeof__()
assert size_iLU < size_LU, 'iLU does not require less memory than LU!'


@pytest.mark.mpi4py
def test_libraries():
from pySDC.implementations.problem_classes.RayleighBenard3D import RayleighBenard3D
Expand Down Expand Up @@ -231,9 +273,30 @@ def test_banded_matrix(preconditioning):
), 'One-sided bandwidth of LU decomposition is larger than that of the full matrix!'


@pytest.mark.cupy
def test_heterogeneous_implementation():
from pySDC.implementations.problem_classes.RayleighBenard3D import RayleighBenard3D

params = {'nx': 2, 'ny': 2, 'nz': 2, 'useGPU': True}
gpu = RayleighBenard3D(**params)
het = RayleighBenard3D(**params, heterogeneous=True)

xp = gpu.xp

u0 = gpu.u_exact()

f = [me.eval_f(u0) for me in [gpu, het]]
assert xp.allclose(*f)

un = [me.solve_system(u0, 1e-3) for me in [gpu, het]]
assert xp.allclose(*un)


if __name__ == '__main__':
# test_eval_f(2**2, 2**1, 'x', False)
# test_libraries()
# test_Poisson_problems(4, 'u')
# test_Poisson_problem_w()
test_banded_matrix(False)
# test_solver_convergence('bicgstab+ilu', 32, False, True)
# test_banded_matrix(False)
test_heterogeneous_implementation()
Loading