Skip to content

Commit d69d03d

Browse files
author
Luca
committed
Merge branch 'master' of github.com:eth-cscs/reframe into namd-pilatus
2 parents 5dc9998 + bb71c5e commit d69d03d

File tree

19 files changed

+509
-186
lines changed

19 files changed

+509
-186
lines changed

cscs-checks/apps/spark/spark_check.py

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,21 +12,22 @@
1212

1313
@rfm.simple_test
1414
class SparkCheck(rfm.RunOnlyRegressionTest):
15+
variant = parameter(['spark', 'pyspark'])
16+
1517
def __init__(self):
16-
self.descr = 'Simple calculation of pi with Spark'
18+
self.descr = f'Simple calculation of pi with {self.variant}'
1719
self.valid_systems = ['daint:gpu', 'daint:mc',
1820
'dom:gpu', 'dom:mc']
1921
self.valid_prog_environs = ['builtin']
2022
self.modules = ['Spark']
21-
self.sourcesdir = None
2223
self.prerun_cmds = ['start-all.sh']
2324
self.postrun_cmds = ['stop-all.sh']
24-
self.num_tasks = 2
25+
self.num_tasks = 3
2526
self.num_tasks_per_node = 1
2627
pi_value = sn.extractsingle(r'Pi is roughly\s+(?P<pi>\S+)',
2728
self.stdout, 'pi', float)
2829
self.sanity_patterns = sn.assert_lt(sn.abs(pi_value - math.pi), 0.01)
29-
self.maintainers = ['TM', 'TR']
30+
self.maintainers = ['TM', 'RS']
3031
self.tags = {'production'}
3132

3233
@rfm.run_before('run')
@@ -39,16 +40,24 @@ def prepare_run(self):
3940
exec_cores = 9
4041

4142
self.variables = {
42-
'SPARK_WORKER_CORES': '%s' % num_workers,
43+
'SPARK_WORKER_CORES': str(num_workers),
4344
'SPARK_LOCAL_DIRS': '"/tmp"',
4445
}
45-
self.executable = (
46-
f'spark-submit --conf spark.default.parallelism={num_workers} '
47-
f'--conf spark.executor.cores={exec_cores} '
48-
f'--conf spark.executor.memory=15g --master $SPARKURL '
49-
f'--class org.apache.spark.examples.SparkPi '
50-
f'$EBROOTSPARK/examples/jars/spark-examples*.jar 10000;'
51-
)
46+
self.executable = 'spark-submit'
47+
self.executable_opts = [
48+
f'--conf spark.default.parallelism={num_workers}',
49+
f'--conf spark.executor.cores={exec_cores}',
50+
f'--conf spark.executor.memory=15g',
51+
f'--master $SPARKURL'
52+
]
53+
if self.variant == 'spark':
54+
self.executable_opts += [
55+
'--class org.apache.spark.examples.SparkPi',
56+
'$EBROOTSPARK/examples/jars/spark-examples*.jar 10000'
57+
]
58+
else:
59+
self.executable_opts.append('spark_pi.py')
60+
5261
# The job launcher has to be changed since the `spark-submit`
5362
# script is not used with srun.
5463
self.job.launcher = getlauncher('local')()
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import random
2+
from pyspark import SparkContext, SparkConf
3+
4+
5+
conf = SparkConf().setAppName('pyspark')
6+
sc = SparkContext(conf=conf)
7+
NUM_SAMPLES = 10000000
8+
9+
10+
def inside(p):
11+
x, y = random.random(), random.random()
12+
return x*x + y*y < 1
13+
14+
15+
if __name__ == '__main__':
16+
count = sc.parallelize(range(0, NUM_SAMPLES)).filter(inside).count()
17+
print('Pi is roughly %f' % (4.0 * count / NUM_SAMPLES))

cscs-checks/microbenchmarks/cpu/dgemm/dgemm.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,10 @@ def __init__(self):
3636
self.build_system.cflags = ['-O3']
3737
self.sys_reference = {
3838
'daint:gpu': (300.0, -0.15, None, 'Gflop/s'),
39-
'daint:mc': (860.0, -0.15, None, 'Gflop/s'),
39+
'daint:mc': (1040.0, -0.15, None, 'Gflop/s'),
4040
'dom:gpu': (300.0, -0.15, None, 'Gflop/s'),
41-
'dom:mc': (860.0, -0.15, None, 'Gflop/s'),
42-
43-
# FIXME: This needs further investigation (see SD-51352)
44-
'eiger:mc': (650.0, -0.15, None, 'Gflop/s'),
41+
'dom:mc': (1040.0, -0.15, None, 'Gflop/s'),
42+
'eiger:mc': (3200.0, -0.15, None, 'Gflop/s'),
4543
}
4644
self.maintainers = ['AJ', 'VH']
4745
self.tags = {'benchmark', 'diagnostic', 'craype'}
@@ -80,7 +78,10 @@ def set_tasks(self):
8078

8179
if self.num_cpus_per_task:
8280
self.variables = {
83-
'OMP_NUM_THREADS': str(self.num_cpus_per_task)
81+
'OMP_NUM_THREADS': str(self.num_cpus_per_task),
82+
'OMP_BIND': 'cores',
83+
'OMP_PROC_BIND': 'spread',
84+
'OMP_SCHEDULE': 'static'
8485
}
8586

8687
@sn.sanity_function

cscs-checks/microbenchmarks/cpu/dgemm/src/dgemm.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,11 @@ int main(int argc, char* argv[])
4949
printf("%s: LOOP COUNT\t\t\t:\t%d \n", hostname, LOOP_COUNT);
5050
printf("\n");
5151

52+
#pragma omp parallel for
5253
for (i=0; i<m*k ; ++i) A[i] = i%3+1;
54+
#pragma omp parallel for
5355
for (i=0; i<k*n ; ++i) B[i] = i%3+1;
56+
#pragma omp parallel for
5457
for (i=0; i<m*n ; ++i) C[i] = i%3+1;
5558

5659
gflop = (2.0 * m * n * k + 3.0 * m * n) * 1E-9;

cscs-checks/system/io/ior_check.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,12 @@ def __init__(self, base_dir):
2323
self.prerun_cmds = ['mkdir -p ' + self.test_dir]
2424
self.test_file = os.path.join(self.test_dir, 'ior')
2525
self.fs = {
26+
'/scratch/e1000': {
27+
'valid_systems': ['eiger:mc'],
28+
'eiger': {
29+
'num_tasks': 10,
30+
}
31+
},
2632
'/scratch/snx3000tds': {
2733
'valid_systems': ['dom:gpu', 'dom:mc'],
2834
'dom': {
@@ -105,7 +111,7 @@ def __init__(self, base_dir):
105111

106112
self.maintainers = ['SO', 'GLR']
107113

108-
systems_to_test = ['dom', 'daint']
114+
systems_to_test = ['dom', 'daint', 'eiger']
109115
if self.current_system.name in systems_to_test:
110116
self.tags |= {'production', 'external-resources'}
111117

@@ -115,7 +121,8 @@ def set_exec_opts(self):
115121
self.executable_opts += ['-o', self.test_file]
116122

117123

118-
@rfm.parameterized_test(['/scratch/snx3000tds'],
124+
@rfm.parameterized_test(['/scratch/e1000'],
125+
['/scratch/snx3000tds'],
119126
['/scratch/snx3000'],
120127
['/users'],
121128
['/scratch/shared/fulen'])
@@ -132,7 +139,8 @@ def __init__(self, base_dir):
132139
self.tags |= {'write'}
133140

134141

135-
@rfm.parameterized_test(['/scratch/snx3000tds'],
142+
@rfm.parameterized_test(['/scratch/e1000'],
143+
['/scratch/snx3000tds'],
136144
['/scratch/snx3000'],
137145
['/users'],
138146
['/scratch/shared/fulen'])

docs/Makefile

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ SPHINXOPTS =
88
SPHINXBUILD = -msphinx
99
SPHINXPROJ = ReFrame
1010
SOURCEDIR = .
11-
BUILDDIR = $(VERSION)
1211
RM = /bin/rm -rf
1312

1413
TARGET_DOCS := \
@@ -48,7 +47,7 @@ clean:
4847
-$(RM) $(TARGET_DOCS) doctrees
4948

5049
$(TARGET_DOCS): Makefile
51-
@$(PYTHON) $(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
50+
@$(PYTHON) $(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "" $(SPHINXOPTS) $(O)
5251

5352

5453

docs/config_reference.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,16 @@ System Partition Configuration
308308
This option is relevant only when ReFrame executes with the `asynchronous execution policy <pipeline.html#execution-policies>`__.
309309

310310

311+
.. js:attribute:: .systems[].partitions[].prepare_cmds
312+
313+
:required: No
314+
:default: ``[]``
315+
316+
List of shell commands to be emitted before any environment loading commands are emitted.
317+
318+
.. versionadded:: 3.5.0
319+
320+
311321
.. js:attribute:: .systems[].partitions[].resources
312322

313323
:required: No

docs/tutorial_advanced.rst

Lines changed: 56 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -389,13 +389,16 @@ Generally, ReFrame generates the job shell scripts using the following pattern:
389389
390390
#!/bin/bash -l
391391
{job_scheduler_preamble}
392-
{test_environment}
392+
{prepare_cmds}
393+
{env_load_cmds}
393394
{prerun_cmds}
394395
{parallel_launcher} {executable} {executable_opts}
395396
{postrun_cmds}
396397
397398
The ``job_scheduler_preamble`` contains the backend job scheduler directives that control the job allocation.
398-
The ``test_environment`` are the necessary commands for setting up the environment of the test.
399+
The ``prepare_cmds`` are commands that can be emitted before the test environment commands.
400+
These can be specified with the :js:attr:`prepare_cmds <.systems[].partitions[].prepare_cmds>` partition configuration option.
401+
The ``env_load_cmds`` are the necessary commands for setting up the environment of the test.
399402
These include any modules or environment variables set at the `system partition level <config_reference.html#system-partition-configuration>`__ or any `modules <regression_test_api.html#reframe.core.pipeline.RegressionTest.modules>`__ or `environment variables <regression_test_api.html#reframe.core.pipeline.RegressionTest.variables>`__ set at the test level.
400403
Then the commands specified in :attr:`prerun_cmds <reframe.core.pipeline.RegressionTest.prerun_cmds>` follow, while those specified in the :attr:`postrun_cmds <reframe.core.pipeline.RegressionTest.postrun_cmds>` come after the launch of the parallel job.
401404
The parallel launch itself consists of three parts:
@@ -671,13 +674,14 @@ ReFrame can be used also to test applications that run inside a container.
671674
First, we need to enable the container platform support in ReFrame's configuration and, specifically, at the partition configuration level:
672675

673676
.. literalinclude:: ../tutorials/config/settings.py
674-
:lines: 38-58
675-
:emphasize-lines: 15-20
677+
:lines: 38-62
678+
:emphasize-lines: 15-24
676679

677680
For each partition, users can define a list of container platforms supported using the :js:attr:`container_platforms` `configuration parameter <config_reference.html#.systems[].partitions[].container_platforms>`__.
678-
In this case, we define the `Singularity <https://sylabs.io>`__ platform, for which we set the :js:attr:`modules` parameter in order to instruct ReFrame to load the ``singularity`` module, whenever it needs to run with this container platform.
681+
In this case, we define the `Sarus <https://github.com/eth-cscs/sarus>`__ platform for which we set the :js:attr:`modules` parameter in order to instruct ReFrame to load the ``sarus`` module, whenever it needs to run with this container platform.
682+
Similarly, we add an entry for the `Singularity <https://sylabs.io>`__ platform.
679683

680-
The following test will use a Singularity container to run:
684+
The following parameterized test, will create two tests, one for each of the supported container platforms:
681685

682686
.. code-block:: console
683687
@@ -690,29 +694,66 @@ The following test will use a Singularity container to run:
690694

691695
A container-based test can be written as :class:`RunOnlyRegressionTest <reframe.core.pipeline.RunOnlyRegressionTest>` that sets the :attr:`container_platform <reframe.core.pipeline.RegressionTest.container_platform>` attribute.
692696
This attribute accepts a string that corresponds to the name of the container platform that will be used to run the container for this test.
693-
In this case, the test will be using `Singularity <https://sylabs.io>`__ as a container platform.
694697
If such a platform is not `configured <config_reference.html#container-platform-configuration>`__ for the current system, the test will fail.
695698

696-
As soon as the container platform to be used is defined, you need to specify the container image to use and the commands to run inside the container by setting the :attr:`image <reframe.core.containers.ContainerPlatform.image>` and the :attr:`commands <reframe.core.containers.ContainerPlatform.commands>` container platform attributes.
697-
These two attributes are mandatory for container-based checks.
699+
As soon as the container platform to be used is defined, you need to specify the container image to use by setting the :attr:`image <reframe.core.containers.ContainerPlatform.image>`.
700+
In the ``Singularity`` test variant, we add the ``docker://`` prefix to the image name, in order to instruct ``Singularity`` to pull the image from `DockerHub <https://hub.docker.com/>`__.
701+
The default command that the container runs can be overwritten by setting the :attr:`command <reframe.core.containers.ContainerPlatform.command>` attribute of the container platform.
702+
703+
The :attr:`image <reframe.core.containers.ContainerPlatform.image>` is the only mandatory attribute for container-based checks.
698704
It is important to note that the :attr:`executable <reframe.core.pipeline.RegressionTest.executable>` and :attr:`executable_opts <reframe.core.pipeline.RegressionTest.executable_opts>` attributes of the actual test are ignored in case of container-based tests.
699705

700-
ReFrame will run the container as follows:
706+
ReFrame will run the container according to the given platform as follows:
707+
708+
.. code-block:: bash
709+
710+
# Sarus
711+
sarus run --mount=type=bind,source="/path/to/test/stagedir",destination="/rfm_workdir" ubuntu:18.04 bash -c 'cat /etc/os-release | tee /rfm_workdir/release.txt'
712+
713+
# Singularity
714+
singularity exec -B"/path/to/test/stagedir:/rfm_workdir" docker://ubuntu:18.04 bash -c 'cat /etc/os-release | tee /rfm_workdir/release.txt'
701715
702-
.. code-block:: console
703716
704-
singularity exec -B"/path/to/test/stagedir:/workdir" docker://ubuntu:18.04 bash -c 'cd rfm_workdir; pwd; ls; cat /etc/os-release'
717+
In the ``Sarus`` case, ReFrame will prepend the following command in order to pull the container image before running the container:
705718

706-
By default ReFrame will mount the stage directory of the test under ``/rfm_workdir`` inside the container and it will always prepend a ``cd`` command to that directory.
707-
The user commands are then run from that directory one after the other.
719+
.. code-block:: bash
720+
721+
sarus pull ubuntu:18.04
722+
723+
724+
This is the default behavior of ReFrame, which can be changed if pulling the image is not desired by setting the :attr:`pull_image <reframe.core.containers.ContainerPlatform.pull_image>` attribute to :class:`False`.
725+
By default ReFrame will mount the stage directory of the test under ``/rfm_workdir`` inside the container.
708726
Once the commands are executed, the container is stopped and ReFrame goes on with the sanity and performance checks.
709-
Users may also change the default mount point of the stage directory by using :attr:`workdir <reframe.core.pipeline.RegressionTest.container_platform.workdir>` attribute:
710727
Besides the stage directory, additional mount points can be specified through the :attr:`mount_points <reframe.core.pipeline.RegressionTest.container_platform.mount_points>` attribute:
711728

712729
.. code-block:: python
713730
714731
self.container_platform.mount_points = [('/path/to/host/dir1', '/path/to/container/mount_point1'),
715732
('/path/to/host/dir2', '/path/to/container/mount_point2')]
716733
734+
735+
The container filesystem is ephemeral, therefore, ReFrame mounts the stage directory under ``/rfm_workdir`` inside the container where the user can copy artifacts as needed.
736+
These artifacts will therefore be available inside the stage directory after the container execution finishes.
737+
This is very useful if the artifacts are needed for the sanity or performance checks.
738+
If the copy is not performed by the default container command, the user can override this command by settings the :attr:`command <reframe.core.containers.ContainerPlatform.command>` attribute such as to include the appropriate copy commands.
739+
In the current test, the output of the ``cat /etc/os-release`` is available both in the standard output as well as in the ``release.txt`` file, since we have used the command:
740+
741+
.. code-block:: bash
742+
743+
bash -c 'cat /etc/os-release | tee /rfm_workdir/release.txt'
744+
745+
746+
and ``/rfm_workdir`` corresponds to the stage directory on the host system.
747+
Therefore, the ``release.txt`` file can now be used in the subsequent sanity checks:
748+
749+
.. code-block:: python
750+
751+
os_release_pattern = r'18.04.\d+ LTS \(Bionic Beaver\)'
752+
self.sanity_patterns = sn.all([
753+
sn.assert_found(os_release_pattern, 'release.txt'),
754+
sn.assert_found(os_release_pattern, self.stdout)
755+
])
756+
757+
717758
For a complete list of the available attributes of a specific container platform, please have a look at the :ref:`container-platforms` section of the :doc:`regression_test_api` guide.
718759
On how to configure ReFrame for running containerized tests, please have a look at the :ref:`container-platform-configuration` section of the :doc:`config_reference`.

docs/tutorial_basics.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,7 @@ Note that you should *not* edit this configuration file in place.
323323
Here is how the new configuration file looks like with the needed additions highlighted:
324324

325325
.. literalinclude:: ../tutorials/config/settings.py
326-
:lines: 10-24,76-97,130-
326+
:lines: 10-24,80-101,134-
327327
:emphasize-lines: 3-15,31-42
328328

329329
Here we define a system named ``catalina`` that has one partition named ``default``.
@@ -807,7 +807,7 @@ Let's extend our configuration file for Piz Daint.
807807

808808

809809
.. literalinclude:: ../tutorials/config/settings.py
810-
:lines: 10-45,58-66,73-
810+
:lines: 10-45,62-70,77-
811811
:emphasize-lines: 16-48,70-101,114-120
812812

813813

0 commit comments

Comments
 (0)