Skip to content

Commit 9090e1a

Browse files
author
ocaisa
authored
Merge pull request #3312 from boegel/get_mpi_cmd_template
flesh out get_mpi_cmd_template function from Mpi.mpi_cmd_for method
2 parents 33d0073 + 4955d8b commit 9090e1a

File tree

2 files changed

+133
-70
lines changed

2 files changed

+133
-70
lines changed

easybuild/tools/toolchain/mpi.py

Lines changed: 97 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,12 @@
2828
:author: Stijn De Weirdt (Ghent University)
2929
:author: Kenneth Hoste (Ghent University)
3030
"""
31+
import copy
3132
import os
3233
import tempfile
3334
from distutils.version import LooseVersion
3435

36+
from easybuild.base import fancylogger
3537
import easybuild.tools.environment as env
3638
import easybuild.tools.toolchain as toolchain
3739
from easybuild.tools.build_log import EasyBuildError
@@ -41,6 +43,95 @@
4143
from easybuild.tools.toolchain.toolchain import Toolchain
4244

4345

46+
_log = fancylogger.getLogger('tools.toolchain.mpi', fname=False)
47+
48+
49+
def get_mpi_cmd_template(mpi_family, params, mpi_version=None):
50+
"""
51+
Return template for MPI command, for specified MPI family.
52+
53+
:param mpi_family: MPI family to use to determine MPI command template
54+
"""
55+
56+
params = copy.deepcopy(params)
57+
58+
mpi_cmd_template = build_option('mpi_cmd_template')
59+
if mpi_cmd_template:
60+
_log.info("Using specified template for MPI commands: %s", mpi_cmd_template)
61+
else:
62+
# different known mpirun commands
63+
mpirun_n_cmd = "mpirun -n %(nr_ranks)s %(cmd)s"
64+
mpi_cmds = {
65+
toolchain.OPENMPI: mpirun_n_cmd,
66+
toolchain.QLOGICMPI: "mpirun -H localhost -np %(nr_ranks)s %(cmd)s",
67+
toolchain.INTELMPI: mpirun_n_cmd,
68+
toolchain.MVAPICH2: mpirun_n_cmd,
69+
toolchain.MPICH: mpirun_n_cmd,
70+
toolchain.MPICH2: mpirun_n_cmd,
71+
}
72+
73+
# Intel MPI mpirun needs more work
74+
if mpi_cmd_template is None:
75+
76+
if mpi_family == toolchain.INTELMPI:
77+
78+
if mpi_version is None:
79+
raise EasyBuildError("Intel MPI version unknown, can't determine MPI command template!")
80+
81+
# for old versions of Intel MPI, we need to use MPD
82+
if LooseVersion(mpi_version) <= LooseVersion('4.1'):
83+
84+
mpi_cmds[toolchain.INTELMPI] = "mpirun %(mpdbf)s %(nodesfile)s -np %(nr_ranks)s %(cmd)s"
85+
86+
# set temporary dir for MPD
87+
# note: this needs to be kept *short*,
88+
# to avoid mpirun failing with "socket.error: AF_UNIX path too long"
89+
# exact limit is unknown, but ~20 characters seems to be OK
90+
env.setvar('I_MPI_MPD_TMPDIR', tempfile.gettempdir())
91+
mpd_tmpdir = os.environ['I_MPI_MPD_TMPDIR']
92+
if len(mpd_tmpdir) > 20:
93+
_log.warning("$I_MPI_MPD_TMPDIR should be (very) short to avoid problems: %s", mpd_tmpdir)
94+
95+
# temporary location for mpdboot and nodes files
96+
tmpdir = tempfile.mkdtemp(prefix='mpi_cmd_for-')
97+
98+
# set PBS_ENVIRONMENT, so that --file option for mpdboot isn't stripped away
99+
env.setvar('PBS_ENVIRONMENT', "PBS_BATCH_MPI")
100+
101+
# make sure we're always using mpd as process manager
102+
# only required for/picked up by Intel MPI v4.1 or higher, no harm done for others
103+
env.setvar('I_MPI_PROCESS_MANAGER', 'mpd')
104+
105+
# create mpdboot file
106+
mpdboot = os.path.join(tmpdir, 'mpdboot')
107+
write_file(mpdboot, "localhost ifhn=localhost")
108+
109+
params.update({'mpdbf': "--file=%s" % mpdboot})
110+
111+
# create nodes file
112+
nodes = os.path.join(tmpdir, 'nodes')
113+
write_file(nodes, "localhost\n" * int(params['nr_ranks']))
114+
115+
params.update({'nodesfile': "-machinefile %s" % nodes})
116+
117+
if mpi_family in mpi_cmds:
118+
mpi_cmd_template = mpi_cmds[mpi_family]
119+
_log.info("Using template MPI command '%s' for MPI family '%s'", mpi_cmd_template, mpi_family)
120+
else:
121+
raise EasyBuildError("Don't know which template MPI command to use for MPI family '%s'", mpi_family)
122+
123+
missing = []
124+
for key in sorted(params.keys()):
125+
tmpl = '%(' + key + ')s'
126+
if tmpl not in mpi_cmd_template:
127+
missing.append(tmpl)
128+
if missing:
129+
raise EasyBuildError("Missing templates in mpi-cmd-template value '%s': %s",
130+
mpi_cmd_template, ', '.join(missing))
131+
132+
return mpi_cmd_template, params
133+
134+
44135
class Mpi(Toolchain):
45136
"""General MPI-like class
46137
can't be used without creating new class M(Mpi)
@@ -191,79 +282,15 @@ def mpi_cmd_for(self, cmd, nr_ranks):
191282
'cmd': cmd,
192283
}
193284

194-
mpi_cmd_template = build_option('mpi_cmd_template')
195-
if mpi_cmd_template:
196-
self.log.info("Using specified template for MPI commands: %s", mpi_cmd_template)
197-
else:
198-
# different known mpirun commands
199-
mpirun_n_cmd = "mpirun -n %(nr_ranks)s %(cmd)s"
200-
mpi_cmds = {
201-
toolchain.OPENMPI: mpirun_n_cmd,
202-
toolchain.QLOGICMPI: "mpirun -H localhost -np %(nr_ranks)s %(cmd)s",
203-
toolchain.INTELMPI: mpirun_n_cmd,
204-
toolchain.MVAPICH2: mpirun_n_cmd,
205-
toolchain.MPICH: mpirun_n_cmd,
206-
toolchain.MPICH2: mpirun_n_cmd,
207-
}
208-
209285
mpi_family = self.mpi_family()
210286

211-
# Intel MPI mpirun needs more work
212-
if mpi_cmd_template is None:
213-
214-
if mpi_family == toolchain.INTELMPI:
215-
216-
# for old versions of Intel MPI, we need to use MPD
217-
impi_ver = self.get_software_version(self.MPI_MODULE_NAME)[0]
218-
if LooseVersion(impi_ver) <= LooseVersion('4.1'):
219-
220-
mpi_cmds[toolchain.INTELMPI] = "mpirun %(mpdbf)s %(nodesfile)s -np %(nr_ranks)s %(cmd)s"
221-
222-
# set temporary dir for MPD
223-
# note: this needs to be kept *short*,
224-
# to avoid mpirun failing with "socket.error: AF_UNIX path too long"
225-
# exact limit is unknown, but ~20 characters seems to be OK
226-
env.setvar('I_MPI_MPD_TMPDIR', tempfile.gettempdir())
227-
mpd_tmpdir = os.environ['I_MPI_MPD_TMPDIR']
228-
if len(mpd_tmpdir) > 20:
229-
self.log.warning("$I_MPI_MPD_TMPDIR should be (very) short to avoid problems: %s", mpd_tmpdir)
230-
231-
# temporary location for mpdboot and nodes files
232-
tmpdir = tempfile.mkdtemp(prefix='mpi_cmd_for-')
233-
234-
# set PBS_ENVIRONMENT, so that --file option for mpdboot isn't stripped away
235-
env.setvar('PBS_ENVIRONMENT', "PBS_BATCH_MPI")
236-
237-
# make sure we're always using mpd as process manager
238-
# only required for/picked up by Intel MPI v4.1 or higher, no harm done for others
239-
env.setvar('I_MPI_PROCESS_MANAGER', 'mpd')
240-
241-
# create mpdboot file
242-
mpdboot = os.path.join(tmpdir, 'mpdboot')
243-
write_file(mpdboot, "localhost ifhn=localhost")
244-
245-
params.update({'mpdbf': "--file=%s" % mpdboot})
246-
247-
# create nodes file
248-
nodes = os.path.join(tmpdir, 'nodes')
249-
write_file(nodes, "localhost\n" * int(nr_ranks))
250-
251-
params.update({'nodesfile': "-machinefile %s" % nodes})
252-
253-
if mpi_family in mpi_cmds.keys():
254-
mpi_cmd_template = mpi_cmds[mpi_family]
255-
self.log.info("Using template MPI command '%s' for MPI family '%s'", mpi_cmd_template, mpi_family)
256-
else:
257-
raise EasyBuildError("Don't know which template MPI command to use for MPI family '%s'", mpi_family)
287+
if mpi_family == toolchain.INTELMPI:
288+
mpi_version = self.get_software_version(self.MPI_MODULE_NAME)[0]
289+
else:
290+
mpi_version = None
258291

259-
missing = []
260-
for key in sorted(params.keys()):
261-
tmpl = '%(' + key + ')s'
262-
if tmpl not in mpi_cmd_template:
263-
missing.append(tmpl)
264-
if missing:
265-
raise EasyBuildError("Missing templates in mpi-cmd-template value '%s': %s",
266-
mpi_cmd_template, ', '.join(missing))
292+
mpi_cmd_template, params = get_mpi_cmd_template(mpi_family, params, mpi_version=mpi_version)
293+
self.log.info("Using MPI command template '%s' (params: %s)", mpi_cmd_template, params)
267294

268295
try:
269296
res = mpi_cmd_template % params

test/framework/toolchain.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
from test.framework.utilities import EnhancedTestCase, TestLoaderFiltered, find_full_path, init_config
4141

4242
import easybuild.tools.modules as modules
43+
import easybuild.tools.toolchain as toolchain
4344
import easybuild.tools.toolchain.compiler
4445
from easybuild.framework.easyconfig.easyconfig import EasyConfig, ActiveMNS
4546
from easybuild.toolchains.system import SystemToolchain
@@ -49,6 +50,7 @@
4950
from easybuild.tools.filetools import adjust_permissions, copy_dir, find_eb_script, mkdir, read_file, write_file, which
5051
from easybuild.tools.py2vs3 import string_type
5152
from easybuild.tools.run import run_cmd
53+
from easybuild.tools.toolchain.mpi import get_mpi_cmd_template
5254
from easybuild.tools.toolchain.toolchain import env_vars_external_module
5355
from easybuild.tools.toolchain.utilities import get_toolchain, search_toolchain
5456

@@ -1027,6 +1029,40 @@ def test_mpi_cmd_for(self):
10271029
error_pattern = "Failed to complete MPI cmd template .* with .*: KeyError 'foo'"
10281030
self.assertErrorRegex(EasyBuildError, error_pattern, tc.mpi_cmd_for, 'test', 1)
10291031

1032+
def test_get_mpi_cmd_template(self):
1033+
"""Test get_mpi_cmd_template function."""
1034+
1035+
# search_toolchain needs to be called once to make sure constants like toolchain.OPENMPI are in place
1036+
search_toolchain('')
1037+
1038+
input_params = {'nr_ranks': 123, 'cmd': 'this_is_just_a_test'}
1039+
1040+
for mpi_fam in [toolchain.OPENMPI, toolchain.MPICH, toolchain.MPICH2, toolchain.MVAPICH2]:
1041+
mpi_cmd_tmpl, params = get_mpi_cmd_template(mpi_fam, input_params)
1042+
self.assertEqual(mpi_cmd_tmpl, "mpirun -n %(nr_ranks)s %(cmd)s")
1043+
self.assertEqual(params, input_params)
1044+
1045+
# Intel MPI is a special case, also requires MPI version to be known
1046+
impi = toolchain.INTELMPI
1047+
error_pattern = "Intel MPI version unknown, can't determine MPI command template!"
1048+
self.assertErrorRegex(EasyBuildError, error_pattern, get_mpi_cmd_template, impi, {})
1049+
1050+
mpi_cmd_tmpl, params = get_mpi_cmd_template(toolchain.INTELMPI, input_params, mpi_version='1.0')
1051+
self.assertEqual(mpi_cmd_tmpl, "mpirun %(mpdbf)s %(nodesfile)s -np %(nr_ranks)s %(cmd)s")
1052+
self.assertEqual(sorted(params.keys()), ['cmd', 'mpdbf', 'nodesfile', 'nr_ranks'])
1053+
self.assertEqual(params['cmd'], 'this_is_just_a_test')
1054+
self.assertEqual(params['nr_ranks'], 123)
1055+
1056+
mpdbf = params['mpdbf']
1057+
regex = re.compile('^--file=.*/mpdboot$')
1058+
self.assertTrue(regex.match(mpdbf), "'%s' should match pattern '%s'" % (mpdbf, regex.pattern))
1059+
self.assertTrue(os.path.exists(mpdbf.split('=')[1]))
1060+
1061+
nodesfile = params['nodesfile']
1062+
regex = re.compile('^-machinefile /.*/nodes$')
1063+
self.assertTrue(regex.match(nodesfile), "'%s' should match pattern '%s'" % (nodesfile, regex.pattern))
1064+
self.assertTrue(os.path.exists(nodesfile.split(' ')[1]))
1065+
10301066
def test_prepare_deps(self):
10311067
"""Test preparing for a toolchain when dependencies are involved."""
10321068
tc = self.get_toolchain('GCC', version='6.4.0-2.28')

0 commit comments

Comments
 (0)