|
28 | 28 | :author: Stijn De Weirdt (Ghent University) |
29 | 29 | :author: Kenneth Hoste (Ghent University) |
30 | 30 | """ |
| 31 | +import copy |
31 | 32 | import os |
32 | 33 | import tempfile |
33 | 34 | from distutils.version import LooseVersion |
34 | 35 |
|
| 36 | +from easybuild.base import fancylogger |
35 | 37 | import easybuild.tools.environment as env |
36 | 38 | import easybuild.tools.toolchain as toolchain |
37 | 39 | from easybuild.tools.build_log import EasyBuildError |
|
41 | 43 | from easybuild.tools.toolchain.toolchain import Toolchain |
42 | 44 |
|
43 | 45 |
|
| 46 | +_log = fancylogger.getLogger('tools.toolchain.mpi', fname=False) |
| 47 | + |
| 48 | + |
| 49 | +def get_mpi_cmd_template(mpi_family, params, mpi_version=None): |
| 50 | + """ |
| 51 | + Return template for MPI command, for specified MPI family. |
| 52 | +
|
| 53 | + :param mpi_family: MPI family to use to determine MPI command template |
| 54 | + """ |
| 55 | + |
| 56 | + params = copy.deepcopy(params) |
| 57 | + |
| 58 | + mpi_cmd_template = build_option('mpi_cmd_template') |
| 59 | + if mpi_cmd_template: |
| 60 | + _log.info("Using specified template for MPI commands: %s", mpi_cmd_template) |
| 61 | + else: |
| 62 | + # different known mpirun commands |
| 63 | + mpirun_n_cmd = "mpirun -n %(nr_ranks)s %(cmd)s" |
| 64 | + mpi_cmds = { |
| 65 | + toolchain.OPENMPI: mpirun_n_cmd, |
| 66 | + toolchain.QLOGICMPI: "mpirun -H localhost -np %(nr_ranks)s %(cmd)s", |
| 67 | + toolchain.INTELMPI: mpirun_n_cmd, |
| 68 | + toolchain.MVAPICH2: mpirun_n_cmd, |
| 69 | + toolchain.MPICH: mpirun_n_cmd, |
| 70 | + toolchain.MPICH2: mpirun_n_cmd, |
| 71 | + } |
| 72 | + |
| 73 | + # Intel MPI mpirun needs more work |
| 74 | + if mpi_cmd_template is None: |
| 75 | + |
| 76 | + if mpi_family == toolchain.INTELMPI: |
| 77 | + |
| 78 | + if mpi_version is None: |
| 79 | + raise EasyBuildError("Intel MPI version unknown, can't determine MPI command template!") |
| 80 | + |
| 81 | + # for old versions of Intel MPI, we need to use MPD |
| 82 | + if LooseVersion(mpi_version) <= LooseVersion('4.1'): |
| 83 | + |
| 84 | + mpi_cmds[toolchain.INTELMPI] = "mpirun %(mpdbf)s %(nodesfile)s -np %(nr_ranks)s %(cmd)s" |
| 85 | + |
| 86 | + # set temporary dir for MPD |
| 87 | + # note: this needs to be kept *short*, |
| 88 | + # to avoid mpirun failing with "socket.error: AF_UNIX path too long" |
| 89 | + # exact limit is unknown, but ~20 characters seems to be OK |
| 90 | + env.setvar('I_MPI_MPD_TMPDIR', tempfile.gettempdir()) |
| 91 | + mpd_tmpdir = os.environ['I_MPI_MPD_TMPDIR'] |
| 92 | + if len(mpd_tmpdir) > 20: |
| 93 | + _log.warning("$I_MPI_MPD_TMPDIR should be (very) short to avoid problems: %s", mpd_tmpdir) |
| 94 | + |
| 95 | + # temporary location for mpdboot and nodes files |
| 96 | + tmpdir = tempfile.mkdtemp(prefix='mpi_cmd_for-') |
| 97 | + |
| 98 | + # set PBS_ENVIRONMENT, so that --file option for mpdboot isn't stripped away |
| 99 | + env.setvar('PBS_ENVIRONMENT', "PBS_BATCH_MPI") |
| 100 | + |
| 101 | + # make sure we're always using mpd as process manager |
| 102 | + # only required for/picked up by Intel MPI v4.1 or higher, no harm done for others |
| 103 | + env.setvar('I_MPI_PROCESS_MANAGER', 'mpd') |
| 104 | + |
| 105 | + # create mpdboot file |
| 106 | + mpdboot = os.path.join(tmpdir, 'mpdboot') |
| 107 | + write_file(mpdboot, "localhost ifhn=localhost") |
| 108 | + |
| 109 | + params.update({'mpdbf': "--file=%s" % mpdboot}) |
| 110 | + |
| 111 | + # create nodes file |
| 112 | + nodes = os.path.join(tmpdir, 'nodes') |
| 113 | + write_file(nodes, "localhost\n" * int(params['nr_ranks'])) |
| 114 | + |
| 115 | + params.update({'nodesfile': "-machinefile %s" % nodes}) |
| 116 | + |
| 117 | + if mpi_family in mpi_cmds: |
| 118 | + mpi_cmd_template = mpi_cmds[mpi_family] |
| 119 | + _log.info("Using template MPI command '%s' for MPI family '%s'", mpi_cmd_template, mpi_family) |
| 120 | + else: |
| 121 | + raise EasyBuildError("Don't know which template MPI command to use for MPI family '%s'", mpi_family) |
| 122 | + |
| 123 | + missing = [] |
| 124 | + for key in sorted(params.keys()): |
| 125 | + tmpl = '%(' + key + ')s' |
| 126 | + if tmpl not in mpi_cmd_template: |
| 127 | + missing.append(tmpl) |
| 128 | + if missing: |
| 129 | + raise EasyBuildError("Missing templates in mpi-cmd-template value '%s': %s", |
| 130 | + mpi_cmd_template, ', '.join(missing)) |
| 131 | + |
| 132 | + return mpi_cmd_template, params |
| 133 | + |
| 134 | + |
44 | 135 | class Mpi(Toolchain): |
45 | 136 | """General MPI-like class |
46 | 137 | can't be used without creating new class M(Mpi) |
@@ -191,79 +282,15 @@ def mpi_cmd_for(self, cmd, nr_ranks): |
191 | 282 | 'cmd': cmd, |
192 | 283 | } |
193 | 284 |
|
194 | | - mpi_cmd_template = build_option('mpi_cmd_template') |
195 | | - if mpi_cmd_template: |
196 | | - self.log.info("Using specified template for MPI commands: %s", mpi_cmd_template) |
197 | | - else: |
198 | | - # different known mpirun commands |
199 | | - mpirun_n_cmd = "mpirun -n %(nr_ranks)s %(cmd)s" |
200 | | - mpi_cmds = { |
201 | | - toolchain.OPENMPI: mpirun_n_cmd, |
202 | | - toolchain.QLOGICMPI: "mpirun -H localhost -np %(nr_ranks)s %(cmd)s", |
203 | | - toolchain.INTELMPI: mpirun_n_cmd, |
204 | | - toolchain.MVAPICH2: mpirun_n_cmd, |
205 | | - toolchain.MPICH: mpirun_n_cmd, |
206 | | - toolchain.MPICH2: mpirun_n_cmd, |
207 | | - } |
208 | | - |
209 | 285 | mpi_family = self.mpi_family() |
210 | 286 |
|
211 | | - # Intel MPI mpirun needs more work |
212 | | - if mpi_cmd_template is None: |
213 | | - |
214 | | - if mpi_family == toolchain.INTELMPI: |
215 | | - |
216 | | - # for old versions of Intel MPI, we need to use MPD |
217 | | - impi_ver = self.get_software_version(self.MPI_MODULE_NAME)[0] |
218 | | - if LooseVersion(impi_ver) <= LooseVersion('4.1'): |
219 | | - |
220 | | - mpi_cmds[toolchain.INTELMPI] = "mpirun %(mpdbf)s %(nodesfile)s -np %(nr_ranks)s %(cmd)s" |
221 | | - |
222 | | - # set temporary dir for MPD |
223 | | - # note: this needs to be kept *short*, |
224 | | - # to avoid mpirun failing with "socket.error: AF_UNIX path too long" |
225 | | - # exact limit is unknown, but ~20 characters seems to be OK |
226 | | - env.setvar('I_MPI_MPD_TMPDIR', tempfile.gettempdir()) |
227 | | - mpd_tmpdir = os.environ['I_MPI_MPD_TMPDIR'] |
228 | | - if len(mpd_tmpdir) > 20: |
229 | | - self.log.warning("$I_MPI_MPD_TMPDIR should be (very) short to avoid problems: %s", mpd_tmpdir) |
230 | | - |
231 | | - # temporary location for mpdboot and nodes files |
232 | | - tmpdir = tempfile.mkdtemp(prefix='mpi_cmd_for-') |
233 | | - |
234 | | - # set PBS_ENVIRONMENT, so that --file option for mpdboot isn't stripped away |
235 | | - env.setvar('PBS_ENVIRONMENT', "PBS_BATCH_MPI") |
236 | | - |
237 | | - # make sure we're always using mpd as process manager |
238 | | - # only required for/picked up by Intel MPI v4.1 or higher, no harm done for others |
239 | | - env.setvar('I_MPI_PROCESS_MANAGER', 'mpd') |
240 | | - |
241 | | - # create mpdboot file |
242 | | - mpdboot = os.path.join(tmpdir, 'mpdboot') |
243 | | - write_file(mpdboot, "localhost ifhn=localhost") |
244 | | - |
245 | | - params.update({'mpdbf': "--file=%s" % mpdboot}) |
246 | | - |
247 | | - # create nodes file |
248 | | - nodes = os.path.join(tmpdir, 'nodes') |
249 | | - write_file(nodes, "localhost\n" * int(nr_ranks)) |
250 | | - |
251 | | - params.update({'nodesfile': "-machinefile %s" % nodes}) |
252 | | - |
253 | | - if mpi_family in mpi_cmds.keys(): |
254 | | - mpi_cmd_template = mpi_cmds[mpi_family] |
255 | | - self.log.info("Using template MPI command '%s' for MPI family '%s'", mpi_cmd_template, mpi_family) |
256 | | - else: |
257 | | - raise EasyBuildError("Don't know which template MPI command to use for MPI family '%s'", mpi_family) |
| 287 | + if mpi_family == toolchain.INTELMPI: |
| 288 | + mpi_version = self.get_software_version(self.MPI_MODULE_NAME)[0] |
| 289 | + else: |
| 290 | + mpi_version = None |
258 | 291 |
|
259 | | - missing = [] |
260 | | - for key in sorted(params.keys()): |
261 | | - tmpl = '%(' + key + ')s' |
262 | | - if tmpl not in mpi_cmd_template: |
263 | | - missing.append(tmpl) |
264 | | - if missing: |
265 | | - raise EasyBuildError("Missing templates in mpi-cmd-template value '%s': %s", |
266 | | - mpi_cmd_template, ', '.join(missing)) |
| 292 | + mpi_cmd_template, params = get_mpi_cmd_template(mpi_family, params, mpi_version=mpi_version) |
| 293 | + self.log.info("Using MPI command template '%s' (params: %s)", mpi_cmd_template, params) |
267 | 294 |
|
268 | 295 | try: |
269 | 296 | res = mpi_cmd_template % params |
|
0 commit comments