|
1 | | -# Copyright 2016-2022 Swiss National Supercomputing Centre (CSCS/ETH Zurich) |
| 1 | +# Copyright Swiss National Supercomputing Centre (CSCS/ETH Zurich) |
2 | 2 | # ReFrame Project Developers. See the top-level LICENSE file for details. |
3 | 3 | # |
4 | 4 | # SPDX-License-Identifier: BSD-3-Clause |
5 | | - |
6 | 5 | import os |
7 | 6 |
|
8 | 7 | import reframe as rfm |
9 | 8 | import reframe.utility.sanity as sn |
10 | | - |
11 | | - |
12 | | -class LAMMPSCheck(rfm.RunOnlyRegressionTest): |
13 | | - scale = parameter(['small', 'large']) |
14 | | - modules = ['cray-python', 'LAMMPS'] |
15 | | - tags = {'external-resources', 'maintenance', 'production'} |
16 | | - maintainers = ['LM'] |
17 | | - strict_check = False |
18 | | - extra_resources = { |
19 | | - 'switches': { |
20 | | - 'num_switches': 1 |
21 | | - } |
22 | | - } |
23 | | - |
24 | | - @run_after('init') |
25 | | - def setup_by_system(self): |
26 | | - # Reset sources dir relative to the SCS apps prefix |
27 | | - self.sourcesdir = os.path.join(self.current_system.resourcesdir, |
28 | | - 'LAMMPS') |
29 | | - if self.current_system.name in ['eiger', 'pilatus']: |
30 | | - self.valid_prog_environs = ['cpeGNU'] |
31 | | - else: |
32 | | - self.valid_prog_environs = ['builtin'] |
33 | | - |
34 | | - @performance_function('timesteps/s') |
35 | | - def perf(self): |
36 | | - return sn.extractsingle(r'\s+(?P<perf>\S+) timesteps/s', |
37 | | - self.stdout, 'perf', float) |
| 9 | +from uenv import uarch |
| 10 | + |
| 11 | +lammps_references = { |
| 12 | + 'lj': {'gh200': {'time_run': (345, None, 0.05, 's')}}, |
| 13 | +} |
| 14 | + |
| 15 | +slurm_config = { |
| 16 | + "lj": { |
| 17 | + "gh200": { |
| 18 | + "nodes": 2, |
| 19 | + "ntasks-per-node": 32, |
| 20 | + "walltime": "10m", |
| 21 | + "gpu": True, |
| 22 | + }, |
| 23 | + }, |
| 24 | +} |
| 25 | + |
| 26 | + |
| 27 | +class lammps_download(rfm.RunOnlyRegressionTest): |
| 28 | + descr = 'Download LAMMPS source code' |
| 29 | + version = variable(str, value='20230802.3') |
| 30 | + sourcesdir = None |
| 31 | + executable = 'wget' |
| 32 | + executable_opts = [ |
| 33 | + '--quiet', |
| 34 | + 'https://jfrog.svc.cscs.ch/artifactory/cscs-reframe-tests/lammps/' |
| 35 | + 'LAMMPS_20230802.3_Source.tar.gz', |
| 36 | + # 'https://download.lammps.org/tars/lammps-2Aug2023.tar.gz', |
| 37 | + ] |
| 38 | + local = True |
38 | 39 |
|
39 | 40 | @sanity_function |
40 | | - def assert_energy_diff(self): |
41 | | - energy_reference = -4.6195 |
42 | | - energy = sn.extractsingle( |
43 | | - r'\s+500000(\s+\S+){3}\s+(?P<energy>\S+)\s+\S+\s\n', |
44 | | - self.stdout, 'energy', float) |
45 | | - energy_diff = sn.abs(energy - energy_reference) |
46 | | - return sn.all([ |
47 | | - sn.assert_found(r'Total wall time:', self.stdout), |
48 | | - sn.assert_lt(energy_diff, 6e-4) |
49 | | - ]) |
| 41 | + def validate_download(self): |
| 42 | + return sn.assert_eq(self.job.exitcode, 0) |
50 | 43 |
|
51 | 44 |
|
52 | 45 | @rfm.simple_test |
53 | | -class LAMMPSGPUCheck(LAMMPSCheck): |
54 | | - valid_systems = [] |
55 | | - executable = 'lmp_mpi' |
56 | | - executable_opts = ['-sf gpu', '-pk gpu 1', '-in in.lj.gpu'] |
57 | | - env_vars = {'CRAY_CUDA_MPS': 1} |
58 | | - num_gpus_per_node = 1 |
59 | | - refs_by_scale = { |
60 | | - 'small': { |
61 | | - 'dom:gpu': {'perf': (3456.792, -0.10, None, 'timesteps/s')}, |
62 | | - 'daint:gpu': {'perf': (1566.979, -0.10, None, 'timesteps/s')} |
63 | | - }, |
64 | | - 'large': { |
65 | | - 'daint:gpu': {'perf': (2108.561, -0.10, None, 'timesteps/s')} |
66 | | - } |
67 | | - } |
68 | | - |
69 | | - @run_after('init') |
70 | | - def setup_by_scale(self): |
71 | | - self.descr = f'LAMMPS GPU check (version: {self.scale})' |
72 | | - if self.scale == 'small': |
73 | | - self.valid_systems += [] |
74 | | - self.num_tasks = 12 |
75 | | - self.num_tasks_per_node = 2 |
76 | | - else: |
77 | | - self.num_tasks = 32 |
78 | | - self.num_tasks_per_node = 2 |
| 46 | +class lammps_build_test(rfm.CompileOnlyRegressionTest): |
| 47 | + ''' |
| 48 | + Test LAMMPS build from source using the develop-kokkos view |
| 49 | + ''' |
| 50 | + descr = 'LAMMPS Build Test' |
| 51 | + valid_prog_environs = ['+lammps-kokkos-dev'] |
| 52 | + valid_systems = ['*'] |
| 53 | + maintainers = ['SSA'] |
| 54 | + sourcesdir = None |
| 55 | + lammps_sources = fixture(lammps_download, scope='session') |
| 56 | + build_system = 'CMake' |
| 57 | + tags = {'uenv'} |
| 58 | + build_locally = False |
| 59 | + |
| 60 | + @run_before('compile') |
| 61 | + def prepare_build(self): |
| 62 | + self.build_system.builddir = 'build' |
| 63 | + self.build_system.config_opts = [ |
| 64 | + f'-C ../lammps-2Aug2023/cmake/presets/kokkos-cuda.cmake', |
| 65 | + '-DKokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC=OFF', |
| 66 | + '-DKokkos_ARCH_NATIVE=ON', |
| 67 | + '-DKokkos_ARCH_PASCAL60=OFF', |
| 68 | + '-DKokkos_ARCH_HOPPER90=ON', |
| 69 | + '../lammps-2Aug2023/cmake/', |
| 70 | + ] |
| 71 | + self.build_system.max_concurrency = 64 |
| 72 | + tarsource = os.path.join( |
| 73 | + self.lammps_sources.stagedir, |
| 74 | + f'LAMMPS_{self.lammps_sources.version}_Source.tar.gz', |
| 75 | + ) |
| 76 | + # Extract source code |
| 77 | + self.prebuild_cmds = [f'tar zxf {tarsource}'] |
79 | 78 |
|
80 | | - self.reference = self.refs_by_scale[self.scale] |
| 79 | + @sanity_function |
| 80 | + def validate_test(self): |
| 81 | + self.lammps_executable = os.path.join(self.stagedir, "build", "lmp") |
| 82 | + return os.path.isfile(self.lammps_executable) |
81 | 83 |
|
82 | 84 |
|
83 | 85 | @rfm.simple_test |
84 | | -class LAMMPSCPUCheck(LAMMPSCheck): |
85 | | - valid_systems = ['eiger:mc', 'pilatus:mc'] |
86 | | - refs_by_scale = { |
87 | | - 'small': { |
88 | | - 'eiger:mc': {'perf': (3807.095, -0.10, None, 'timesteps/s')}, |
89 | | - 'pilatus:mc': {'perf': (4828.986, -0.10, None, 'timesteps/s')} |
90 | | - }, |
91 | | - 'large': { |
92 | | - 'eiger:mc': {'perf': (4922.81, -0.10, None, 'timesteps/s')}, |
93 | | - 'pilatus:mc': {'perf': (7247.484, -0.10, None, 'timesteps/s')} |
94 | | - } |
95 | | - } |
96 | | - |
97 | | - @run_after('init') |
98 | | - def setup_by_scale(self): |
99 | | - self.descr = f'LAMMPS CPU check (version: {self.scale})' |
100 | | - if self.current_system.name in ['eiger', 'pilatus']: |
101 | | - self.executable = 'lmp_mpi' |
102 | | - self.executable_opts = ['-in in.lj.cpu'] |
103 | | - else: |
104 | | - self.executable = 'lmp_omp' |
105 | | - self.executable_opts = ['-sf omp', '-pk omp 1', '-in in.lj.cpu'] |
106 | | - |
107 | | - if self.scale == 'small': |
108 | | - self.valid_systems += [] |
109 | | - self.num_tasks = 216 |
110 | | - self.num_tasks_per_node = 36 |
111 | | - else: |
112 | | - self.num_tasks_per_node = 36 |
113 | | - self.num_tasks = 576 |
114 | | - |
115 | | - if self.current_system.name == 'eiger': |
116 | | - self.num_tasks_per_node = 128 |
117 | | - self.num_tasks = 256 if self.scale == 'small' else 512 |
118 | | - |
119 | | - self.reference = self.refs_by_scale[self.scale] |
| 86 | +class lammps_gpu_test(rfm.RunOnlyRegressionTest): |
| 87 | + """ |
| 88 | + Test LAMMPS run using the run-gpu:gpu view |
| 89 | + Untested views: |
| 90 | + build-gpu: develop-gpu |
| 91 | + build-kokkos: develop-kokkos |
| 92 | + run-kokkos: kokkos |
| 93 | + """ |
| 94 | + executable = './mps-wrapper.sh lmp' |
| 95 | + valid_prog_environs = ['+lammps-gpu-prod'] |
| 96 | + valid_systems = ["*"] |
| 97 | + maintainers = ["SSA"] |
| 98 | + test_name = variable(str, value='lj') |
| 99 | + energy_reference = -4.620456 |
| 100 | + |
| 101 | + @run_before("run") |
| 102 | + def prepare_run(self): |
| 103 | + self.uarch = uarch(self.current_partition) |
| 104 | + config = slurm_config[self.test_name][self.uarch] |
| 105 | + self.extra_resources = {"gres": {"gpu": 4}} |
| 106 | + self.job.options = [f'--nodes={config["nodes"]}'] |
| 107 | + self.num_tasks_per_node = config["ntasks-per-node"] |
| 108 | + self.num_tasks = config["nodes"] * self.num_tasks_per_node |
| 109 | + self.ntasks_per_core = 1 |
| 110 | + self.time_limit = config["walltime"] |
| 111 | + self.executable_opts = [f'-i {self.test_name}.in'] |
| 112 | + |
| 113 | + if self.uarch == "gh200": |
| 114 | + self.env_vars["MPICH_GPU_SUPPORT_ENABLED"] = "1" |
| 115 | + |
| 116 | + @run_before("run") |
| 117 | + def prepare_reference(self): |
| 118 | + self.uarch = uarch(self.current_partition) |
| 119 | + if self.uarch is not None and \ |
| 120 | + self.uarch in lammps_references[self.test_name]: |
| 121 | + self.reference = { |
| 122 | + self.current_partition.fullname: |
| 123 | + lammps_references[self.test_name][self.uarch] |
| 124 | + } |
| 125 | + |
| 126 | + @sanity_function |
| 127 | + def assert_energy_diff(self): |
| 128 | + successful_termination = \ |
| 129 | + sn.assert_found(r"Total wall time", self.stdout) |
| 130 | + |
| 131 | + energy = sn.extractsingle( |
| 132 | + r'^\s*1000(\s+\S+){5}\s+(?P<energy>-?\d+\.\d+)\s+', |
| 133 | + self.stdout, "energy", float) |
| 134 | + energy_diff = sn.abs(energy - self.energy_reference) |
| 135 | + correct_energy = sn.assert_lt(energy_diff, 1e-4) |
| 136 | + |
| 137 | + return sn.all([successful_termination, correct_energy]) |
| 138 | + |
| 139 | + # INFO: The name of this function needs to match with the reference dict! |
| 140 | + @performance_function('s') |
| 141 | + def time_run(self): |
| 142 | + regex = r'Total wall time: (?P<hh>\S+):(?P<mm>\S+):(?P<ss>\S+)' |
| 143 | + hh = sn.extractsingle(regex, self.stdout, 'hh', int) |
| 144 | + mm = sn.extractsingle(regex, self.stdout, 'mm', int) |
| 145 | + ss = sn.extractsingle(regex, self.stdout, 'ss', int) |
| 146 | + return (hh*3600 + mm*60 + ss) |
0 commit comments