Skip to content

Commit 655829f

Browse files
author
Thomas Baumann
committed
Getting started on RBC in GPU project
1 parent b2030e4 commit 655829f

File tree

12 files changed

+538
-67
lines changed

12 files changed

+538
-67
lines changed

pySDC/helpers/NCCL_communicator.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,3 +113,7 @@ def Bcast(self, buf, root=0):
113113
stream = cp.cuda.get_current_stream()
114114

115115
self.commNCCL.bcast(buff=buf.data.ptr, count=count, datatype=dtype, root=root, stream=stream.ptr)
116+
117+
def Barrier(self):
118+
cp.cuda.get_current_stream().synchronize()
119+
self.commMPI.Barrier()

pySDC/helpers/spectral_helper.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1748,8 +1748,6 @@ def get_aligned(self, u, axis_in, axis_out, fft=None, forward=False, **kwargs):
17481748
if self.comm.size == 1:
17491749
return u.copy()
17501750

1751-
fft = self.get_fft(**kwargs) if fft is None else fft
1752-
17531751
global_fft = self.get_fft(**kwargs)
17541752
axisA = [me.axisA for me in global_fft.transfer]
17551753
axisB = [me.axisB for me in global_fft.transfer]
@@ -1787,6 +1785,8 @@ def get_aligned(self, u, axis_in, axis_out, fft=None, forward=False, **kwargs):
17871785
else: # go the potentially slower route of not reusing transfer classes
17881786
from mpi4py_fft import newDistArray
17891787

1788+
fft = self.get_fft(**kwargs) if fft is None else fft
1789+
17901790
_in = newDistArray(fft, forward).redistribute(axis_in)
17911791
_in[...] = u
17921792

pySDC/implementations/convergence_controller_classes/estimate_polynomial_error.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def post_iteration_processing(self, controller, S, **kwargs):
150150
if self.comm:
151151
buf = np.array(abs(u_inter - high_order_sol) if self.comm.rank == rank else 0.0)
152152
self.comm.Bcast(buf, root=rank)
153-
L.status.error_embedded_estimate = buf
153+
L.status.error_embedded_estimate = float(buf)
154154
else:
155155
L.status.error_embedded_estimate = abs(u_inter - high_order_sol)
156156

pySDC/implementations/hooks/log_solution.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,3 +188,15 @@ def post_step(self, step, level_number):
188188
if L.time + L.dt >= self.t_next_log and not step.status.restart:
189189
super().post_step(step, level_number)
190190
self.t_next_log = max([L.time + L.dt, self.t_next_log]) + self.time_increment
191+
192+
def pre_run(self, step, level_number):
193+
L = step.levels[level_number]
194+
L.uend = L.u[0]
195+
196+
def process_solution(L):
197+
return {
198+
**type(self).process_solution(L),
199+
't': L.time,
200+
}
201+
202+
self.log_to_file(step, level_number, type(self).logging_condition(L), process_solution=process_solution)

pySDC/implementations/problem_classes/RayleighBenard.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from pySDC.core.convergence_controller import ConvergenceController
77
from pySDC.core.hooks import Hooks
88
from pySDC.implementations.convergence_controller_classes.check_convergence import CheckConvergence
9+
from pySDC.core.problem import WorkCounter
910

1011

1112
class RayleighBenard(GenericSpectralLinear):
@@ -175,6 +176,8 @@ def __init__(
175176
)
176177
self.setup_BCs()
177178

179+
self.work_counters['rhs'] = WorkCounter()
180+
178181
def eval_f(self, u, *args, **kwargs):
179182
f = self.f_init
180183

@@ -225,6 +228,7 @@ def eval_f(self, u, *args, **kwargs):
225228
else:
226229
f.expl[:] = self.itransform(self.transform(fexpl_pad, padding=padding)).real
227230

231+
self.work_counters['rhs']()
228232
return f
229233

230234
def u_exact(self, t=0, noise_level=1e-3, seed=99):

pySDC/implementations/problem_classes/generic_spectral.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,21 +28,26 @@ class attributes of the spectral helper. This class will automatically switch th
2828
Pr (sparse matrix): Right preconditioner
2929
"""
3030

31-
@classmethod
32-
def setup_GPU(cls):
31+
def setup_GPU(self):
3332
"""switch to GPU modules"""
3433
import cupy as cp
3534
from pySDC.implementations.datatype_classes.cupy_mesh import cupy_mesh, imex_cupy_mesh
3635
from pySDC.implementations.datatype_classes.mesh import mesh, imex_mesh
3736

38-
cls.dtype_u = cupy_mesh
37+
self.dtype_u = cupy_mesh
3938

4039
GPU_versions = {
4140
mesh: cupy_mesh,
4241
imex_mesh: imex_cupy_mesh,
4342
}
4443

45-
cls.dtype_f = GPU_versions[cls.dtype_f]
44+
self.dtype_f = GPU_versions[self.dtype_f]
45+
46+
if self.comm is not None:
47+
from pySDC.helpers.NCCL_communicator import NCCLComm
48+
49+
if not isinstance(self.comm, NCCLComm):
50+
self.__dict__['comm'] = NCCLComm(self.comm)
4651

4752
def __init__(
4853
self,
@@ -319,7 +324,6 @@ def compute_residual_DAE(self, stage=''):
319324
res[m] += L.tau[m]
320325
# use abs function from data type here
321326
res_norm.append(abs(res[m]))
322-
# print(m, [abs(me) for me in res[m]], [abs(me) for me in L.u[0] - L.u[m + 1]])
323327

324328
# find maximal residual over the nodes
325329
if L.params.residual_type == 'full_abs':

pySDC/projects/GPU/analysis_scripts/parallel_scaling.py

Lines changed: 108 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,7 @@
44
from pySDC.helpers.stats_helper import get_sorted
55
from pySDC.projects.GPU.configs.base_config import get_config
66
from pySDC.projects.GPU.etc.generate_jobscript import write_jobscript, PROJECT_PATH
7-
from pySDC.helpers.plot_helper import setup_mpl, figsize_by_journal
8-
9-
setup_mpl()
7+
from pySDC.helpers.plot_helper import figsize_by_journal
108

119

1210
class ScalingConfig(object):
@@ -22,7 +20,6 @@ class ScalingConfig(object):
2220
max_steps_space = None
2321
max_steps_space_weak = None
2422
sbatch_options = []
25-
max_nodes = 9999
2623

2724
def __init__(self, space_time_parallel):
2825
if space_time_parallel in ['False', False]:
@@ -34,30 +31,19 @@ def get_resolution_and_tasks(self, strong, i):
3431
if strong:
3532
return self.base_resolution, [1, self._tasks_time, 2**i]
3633
else:
37-
return self.base_resolution_weak * int(self._tasks_time ** (1.0 / self.ndim)) * (2**i), [
38-
1,
39-
self._tasks_time,
40-
(2 * self.ndim) ** i,
41-
]
34+
return self.base_resolution_weak * (2**i), [1, self._tasks_time, (2 * self.ndim) ** i]
4235

4336
def run_scaling_test(self, strong=True):
4437
max_steps = self.max_steps_space if strong else self.max_steps_space_weak
4538
for i in range(max_steps):
4639
res, procs = self.get_resolution_and_tasks(strong, i)
4740

48-
_nodes = np.prod(procs) // self.tasks_per_node
49-
if _nodes > self.max_nodes:
50-
break
51-
52-
sbatch_options = [
53-
f'-n {np.prod(procs)}',
54-
f'-p {self.partition}',
55-
f'--tasks-per-node={self.tasks_per_node}',
56-
] + self.sbatch_options
57-
srun_options = [f'--tasks-per-node={self.tasks_per_node}']
41+
sbatch_options = [f'-n {np.prod(procs)}', f'-p {self.partition}'] + self.sbatch_options
5842
if self.useGPU:
59-
srun_options += ['--cpus-per-task=4', '--gpus-per-task=1']
43+
srun_options = ['--cpus-per-task=4', '--gpus-per-task=1'] + self.sbatch_options
6044
sbatch_options += ['--cpus-per-task=4', '--gpus-per-task=1']
45+
else:
46+
srun_options = []
6147

6248
procs = (''.join(f'{me}/' for me in procs))[:-1]
6349
command = f'run_experiment.py --mode=run --res={res} --config={self.config} --procs={procs}'
@@ -67,8 +53,10 @@ def run_scaling_test(self, strong=True):
6753

6854
write_jobscript(sbatch_options, srun_options, command, self.cluster)
6955

70-
def plot_scaling_test(self, strong, ax, plot_ideal=False, **plotting_params): # pragma: no cover
56+
def plot_scaling_test(self, strong, ax, plot_ideal=False, plot_range=False, **plotting_params): # pragma: no cover
7157
timings = {}
58+
max_timings = []
59+
min_timings = []
7260

7361
max_steps = self.max_steps_space if strong else self.max_steps_space_weak
7462
for i in range(max_steps):
@@ -84,20 +72,27 @@ def plot_scaling_test(self, strong, ax, plot_ideal=False, **plotting_params): #
8472
stats = pickle.load(file)
8573

8674
timing_step = get_sorted(stats, type='timing_step')
87-
timings[np.prod(procs) / self.tasks_per_node] = np.mean([me[1] for me in timing_step])
75+
76+
key = np.prod(procs) / self.tasks_per_node
77+
timings[key] = np.mean([me[1] for me in timing_step])
78+
max_timings += [np.max([me[1] for me in timing_step]) - timings[key]]
79+
min_timings += [timings[key] - np.min([me[1] for me in timing_step])]
8880
except FileNotFoundError:
8981
pass
9082

9183
if plot_ideal:
92-
if strong:
93-
ax.loglog(
94-
timings.keys(),
95-
list(timings.values())[0] * list(timings.keys())[0] / np.array(list(timings.keys())),
96-
ls='--',
97-
color='grey',
98-
label='ideal',
99-
)
100-
ax.loglog(timings.keys(), timings.values(), **plotting_params)
84+
ax.loglog(
85+
timings.keys(),
86+
list(timings.values())[0] * list(timings.keys())[0] / np.array(list(timings.keys())),
87+
ls='--',
88+
color='grey',
89+
label='ideal',
90+
)
91+
if plot_range:
92+
yerr = [min_timings, max_timings]
93+
ax.errorbar(timings.keys(), timings.values(), yerr=yerr, **plotting_params)
94+
else:
95+
ax.loglog(timings.keys(), timings.values(), **plotting_params)
10196
ax.set_xlabel(r'$N_\mathrm{nodes}$')
10297
ax.set_ylabel(r'$t_\mathrm{step}$')
10398

@@ -106,61 +101,108 @@ class CPUConfig(ScalingConfig):
106101
cluster = 'jusuf'
107102
partition = 'batch'
108103
tasks_per_node = 16
109-
max_nodes = 144
104+
sbatch_options = ['--tasks-per-node=16']
110105

111106

112107
class GPUConfig(ScalingConfig):
113108
cluster = 'booster'
114109
partition = 'booster'
115110
tasks_per_node = 4
116111
useGPU = True
117-
max_nodes = 936
118112

119113

120114
class GrayScottSpaceScalingCPU(CPUConfig, ScalingConfig):
121-
base_resolution = 8192
122-
base_resolution_weak = 512
115+
base_resolution = 4096
116+
base_resolution_weak = 256
123117
config = 'GS_scaling'
124-
max_steps_space = 11
125-
max_steps_space_weak = 11
126-
tasks_time = 4
127-
sbatch_options = ['--time=3:30:00']
118+
max_steps_space = 10
119+
max_steps_space_weak = 6
120+
tasks_time = 3
128121

129122

130123
class GrayScottSpaceScalingGPU(GPUConfig, ScalingConfig):
131-
base_resolution_weak = 1024
132-
base_resolution = 8192
124+
base_resolution_weak = 256 * 2
125+
base_resolution = 4096
133126
config = 'GS_scaling'
134-
max_steps_space = 7
135-
max_steps_space_weak = 5
127+
max_steps_space = 6
128+
max_steps_space_weak = 4
129+
tasks_time = 3
130+
131+
132+
class RayleighBenardSpaceScalingCPU(CPUConfig, ScalingConfig):
133+
base_resolution = 1024
134+
base_resolution_weak = 256
135+
config = 'RBC_scaling'
136+
max_steps_space = 10
137+
max_steps_space_weak = 6
138+
tasks_time = 4
139+
140+
141+
class RayleighBenardSpaceScalingGPU(GPUConfig, ScalingConfig):
142+
base_resolution = 1024
143+
base_resolution_weak = 256 * 2
144+
config = 'RBC_scaling'
145+
max_steps_space = 6
146+
max_steps_space_weak = 4
147+
tasks_time = 4
148+
149+
150+
class RayleighBenardDedalusComparison(CPUConfig, ScalingConfig):
151+
base_resolution = 256
152+
config = 'RBC_Tibo'
153+
max_steps_space = 6
154+
tasks_time = 4
155+
156+
157+
class RayleighBenardDedalusComparisonGPU(GPUConfig, ScalingConfig):
158+
base_resolution_weak = 256
159+
base_resolution = 256
160+
config = 'RBC_Tibo'
161+
max_steps_space = 4
162+
max_steps_space_weak = 4
136163
tasks_time = 4
137-
max_nodes = 64
138164

139165

140166
def plot_scalings(strong, problem, kwargs): # pragma: no cover
141-
if problem == 'GS':
142-
fig, ax = plt.subplots(figsize=figsize_by_journal('JSC_beamer', 1, 0.45))
167+
plottings_params = [
168+
{'plot_ideal': strong, 'marker': 'x', 'label': 'CPU space parallel'},
169+
{'marker': '>', 'label': 'CPU space time parallel'},
170+
{'marker': '^', 'label': 'GPU space parallel'},
171+
{'marker': 'o', 'label': 'GPU space time parallel'},
172+
]
173+
fig, ax = plt.subplots(figsize=figsize_by_journal('JSC_beamer', 1, 0.45))
143174

144-
plottings_params = [
145-
{'plot_ideal': True, 'marker': 'x', 'label': 'CPU space parallel'},
146-
{'marker': '>', 'label': 'CPU space time parallel'},
147-
{'marker': '^', 'label': 'GPU space parallel'},
148-
{'marker': '<', 'label': 'GPU space time parallel'},
149-
]
175+
if problem == 'GS':
150176
configs = [
151177
GrayScottSpaceScalingCPU(space_time_parallel=False),
152178
GrayScottSpaceScalingCPU(space_time_parallel=True),
153179
GrayScottSpaceScalingGPU(space_time_parallel=False),
154180
GrayScottSpaceScalingGPU(space_time_parallel=True),
155181
]
182+
elif problem == 'RBC':
183+
configs = [
184+
RayleighBenardSpaceScalingCPU(space_time_parallel=False),
185+
RayleighBenardSpaceScalingCPU(space_time_parallel=True),
186+
RayleighBenardSpaceScalingGPU(space_time_parallel=False),
187+
RayleighBenardSpaceScalingGPU(space_time_parallel=True),
188+
]
189+
elif problem == 'RBC_dedalus':
190+
configs = [
191+
RayleighBenardDedalusComparison(space_time_parallel=False),
192+
RayleighBenardDedalusComparison(space_time_parallel=True),
193+
RayleighBenardDedalusComparisonGPU(space_time_parallel=False),
194+
RayleighBenardDedalusComparisonGPU(space_time_parallel=True),
195+
]
156196

157-
for config, params in zip(configs, plottings_params):
158-
config.plot_scaling_test(strong=strong, ax=ax, **params)
159-
ax.legend(frameon=False)
160-
fig.savefig(f'{PROJECT_PATH}/plots/{"strong" if strong else "weak"}_scaling_{problem}.pdf', bbox_inches='tight')
161197
else:
162198
raise NotImplementedError
163199

200+
for config, params in zip(configs, plottings_params):
201+
config.plot_scaling_test(strong=strong, ax=ax, **params)
202+
ax.legend(frameon=False)
203+
plt.show()
204+
fig.savefig(f'{PROJECT_PATH}/plots/{"strong" if strong else "weak"}_scaling_{problem}.pdf', bbox_inches='tight')
205+
164206

165207
if __name__ == '__main__':
166208
import argparse
@@ -181,6 +223,16 @@ def plot_scalings(strong, problem, kwargs): # pragma: no cover
181223
configClass = GrayScottSpaceScalingCPU
182224
else:
183225
configClass = GrayScottSpaceScalingGPU
226+
elif args.problem == 'RBC':
227+
if args.XPU == 'CPU':
228+
configClass = RayleighBenardSpaceScalingCPU
229+
else:
230+
configClass = RayleighBenardSpaceScalingGPU
231+
elif args.problem == 'RBC_dedalus':
232+
if args.XPU == 'CPU':
233+
configClass = RayleighBenardDedalusComparison
234+
else:
235+
configClass = RayleighBenardDedalusComparisonGPU
184236
else:
185237
raise NotImplementedError(f'Don\'t know problem {args.problem!r}')
186238

0 commit comments

Comments
 (0)