Skip to content

Commit 123df51

Browse files
authored
Move MPI behavior from HTEX to MPIExecutor (#3582)
This PR moves the following MPI related functionality and options from HTEX to MPIExecutor: Kwarg options enable_mpi_mode and mpi_launcher is now removed from HTEX Checks for launcher being set to SimpleLauncher Checks for a valid mpi_launcher in now in MPIExecutor A new validate_resource_specification method is added to HTEX that currently asserts that no resource_specification is passed to it, since HTEX does not support any such options MPIExecutor overrides validate_resource_specification to check for a valid MPI resource specification These changes should make it easier to have executor specific resource validation. Changed Behaviour HTEX kwarg enable_mpi_mode and mpi_launcher are no longer supported. Expect to use MPI functionality only through the MPIExecutor
1 parent f135919 commit 123df51

File tree

10 files changed

+171
-145
lines changed

10 files changed

+171
-145
lines changed

parsl/executors/high_throughput/executor.py

Lines changed: 16 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212

1313
import typeguard
1414

15-
import parsl.launchers
1615
from parsl import curvezmq
1716
from parsl.addresses import get_all_addresses
1817
from parsl.app.errors import RemoteExceptionWrapper
@@ -25,8 +24,7 @@
2524
RandomManagerSelector,
2625
)
2726
from parsl.executors.high_throughput.mpi_prefix_composer import (
28-
VALID_LAUNCHERS,
29-
validate_resource_spec,
27+
InvalidResourceSpecification,
3028
)
3129
from parsl.executors.status_handling import BlockProviderExecutor
3230
from parsl.jobs.states import TERMINAL_STATES, JobState, JobStatus
@@ -224,17 +222,6 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
224222
Parsl will create names as integers starting with 0.
225223
226224
default: empty list
227-
228-
enable_mpi_mode: bool
229-
If enabled, MPI launch prefixes will be composed for the batch scheduler based on
230-
the nodes available in each batch job and the resource_specification dict passed
231-
from the app. This is an experimental feature, please refer to the following doc section
232-
before use: https://parsl.readthedocs.io/en/stable/userguide/mpi_apps.html
233-
234-
mpi_launcher: str
235-
This field is only used if enable_mpi_mode is set. Select one from the
236-
list of supported MPI launchers = ("srun", "aprun", "mpiexec").
237-
default: "mpiexec"
238225
"""
239226

240227
@typeguard.typechecked
@@ -263,8 +250,6 @@ def __init__(self,
263250
poll_period: int = 10,
264251
address_probe_timeout: Optional[int] = None,
265252
worker_logdir_root: Optional[str] = None,
266-
enable_mpi_mode: bool = False,
267-
mpi_launcher: str = "mpiexec",
268253
manager_selector: ManagerSelector = RandomManagerSelector(),
269254
block_error_handler: Union[bool, Callable[[BlockProviderExecutor, Dict[str, JobStatus]], None]] = True,
270255
encrypted: bool = False):
@@ -330,15 +315,6 @@ def __init__(self,
330315
self.encrypted = encrypted
331316
self.cert_dir = None
332317

333-
self.enable_mpi_mode = enable_mpi_mode
334-
assert mpi_launcher in VALID_LAUNCHERS, \
335-
f"mpi_launcher must be set to one of {VALID_LAUNCHERS}"
336-
if self.enable_mpi_mode:
337-
assert isinstance(self.provider.launcher, parsl.launchers.SimpleLauncher), \
338-
"mpi_mode requires the provider to be configured to use a SimpleLauncher"
339-
340-
self.mpi_launcher = mpi_launcher
341-
342318
if not launch_cmd:
343319
launch_cmd = DEFAULT_LAUNCH_CMD
344320
self.launch_cmd = launch_cmd
@@ -348,6 +324,8 @@ def __init__(self,
348324
self.interchange_launch_cmd = interchange_launch_cmd
349325

350326
radio_mode = "htex"
327+
enable_mpi_mode: bool = False
328+
mpi_launcher: str = "mpiexec"
351329

352330
def _warn_deprecated(self, old: str, new: str):
353331
warnings.warn(
@@ -377,6 +355,18 @@ def worker_logdir(self):
377355
return "{}/{}".format(self.worker_logdir_root, self.label)
378356
return self.logdir
379357

358+
def validate_resource_spec(self, resource_specification: dict):
359+
"""HTEX does not support *any* resource_specification options and
360+
will raise InvalidResourceSpecification is any are passed to it"""
361+
if resource_specification:
362+
raise InvalidResourceSpecification(
363+
set(resource_specification.keys()),
364+
("HTEX does not support the supplied resource_specifications."
365+
"For MPI applications consider using the MPIExecutor. "
366+
"For specifications for core count/memory/walltime, consider using WorkQueueExecutor. ")
367+
)
368+
return
369+
380370
def initialize_scaling(self):
381371
"""Compose the launch command and scale out the initial blocks.
382372
"""
@@ -660,7 +650,7 @@ def submit(self, func, resource_specification, *args, **kwargs):
660650
Future
661651
"""
662652

663-
validate_resource_spec(resource_specification, self.enable_mpi_mode)
653+
self.validate_resource_spec(resource_specification)
664654

665655
if self.bad_state_is_set:
666656
raise self.executor_exception

parsl/executors/high_throughput/mpi_executor.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,13 @@
88
GENERAL_HTEX_PARAM_DOCS,
99
HighThroughputExecutor,
1010
)
11+
from parsl.executors.high_throughput.mpi_prefix_composer import (
12+
VALID_LAUNCHERS,
13+
validate_resource_spec,
14+
)
1115
from parsl.executors.status_handling import BlockProviderExecutor
1216
from parsl.jobs.states import JobStatus
17+
from parsl.launchers import SimpleLauncher
1318
from parsl.providers import LocalProvider
1419
from parsl.providers.base import ExecutionProvider
1520

@@ -30,6 +35,11 @@ class MPIExecutor(HighThroughputExecutor):
3035
max_workers_per_block: int
3136
Maximum number of MPI applications to run at once per block
3237
38+
mpi_launcher: str
39+
Select one from the list of supported MPI launchers:
40+
("srun", "aprun", "mpiexec").
41+
default: "mpiexec"
42+
3343
{GENERAL_HTEX_PARAM_DOCS}
3444
"""
3545

@@ -60,7 +70,6 @@ def __init__(self,
6070
super().__init__(
6171
# Hard-coded settings
6272
cores_per_worker=1e-9, # Ensures there will be at least an absurd number of workers
63-
enable_mpi_mode=True,
6473
max_workers_per_node=max_workers_per_block,
6574

6675
# Everything else
@@ -82,9 +91,21 @@ def __init__(self,
8291
poll_period=poll_period,
8392
address_probe_timeout=address_probe_timeout,
8493
worker_logdir_root=worker_logdir_root,
85-
mpi_launcher=mpi_launcher,
8694
block_error_handler=block_error_handler,
8795
encrypted=encrypted
8896
)
97+
self.enable_mpi_mode = True
98+
self.mpi_launcher = mpi_launcher
8999

90100
self.max_workers_per_block = max_workers_per_block
101+
102+
if not isinstance(self.provider.launcher, SimpleLauncher):
103+
raise TypeError("mpi_mode requires the provider to be configured to use a SimpleLauncher")
104+
105+
if mpi_launcher not in VALID_LAUNCHERS:
106+
raise ValueError(f"mpi_launcher set to:{mpi_launcher} must be set to one of {VALID_LAUNCHERS}")
107+
108+
self.mpi_launcher = mpi_launcher
109+
110+
def validate_resource_spec(self, resource_specification: dict):
111+
return validate_resource_spec(resource_specification)

parsl/executors/high_throughput/mpi_prefix_composer.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,15 @@ def __str__(self):
2121
class InvalidResourceSpecification(Exception):
2222
"""Exception raised when Invalid input is supplied via resource specification"""
2323

24-
def __init__(self, invalid_keys: Set[str]):
24+
def __init__(self, invalid_keys: Set[str], message: str = ''):
2525
self.invalid_keys = invalid_keys
26+
self.message = message
2627

2728
def __str__(self):
28-
return f"Invalid resource specification options supplied: {self.invalid_keys}"
29+
return f"Invalid resource specification options supplied: {self.invalid_keys} {self.message}"
2930

3031

31-
def validate_resource_spec(resource_spec: Dict[str, str], is_mpi_enabled: bool):
32+
def validate_resource_spec(resource_spec: Dict[str, str]):
3233
"""Basic validation of keys in the resource_spec
3334
3435
Raises: InvalidResourceSpecification if the resource_spec
@@ -38,7 +39,7 @@ def validate_resource_spec(resource_spec: Dict[str, str], is_mpi_enabled: bool):
3839

3940
# empty resource_spec when mpi_mode is set causes parsl to hang
4041
# ref issue #3427
41-
if is_mpi_enabled and len(user_keys) == 0:
42+
if len(user_keys) == 0:
4243
raise MissingResourceSpecification('MPI mode requires optional parsl_resource_specification keyword argument to be configured')
4344

4445
legal_keys = set(("ranks_per_node",
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import queue
2+
from unittest import mock
3+
4+
import pytest
5+
6+
from parsl.executors import HighThroughputExecutor
7+
from parsl.executors.high_throughput.mpi_prefix_composer import (
8+
InvalidResourceSpecification,
9+
)
10+
11+
12+
def double(x):
13+
return x * 2
14+
15+
16+
@pytest.mark.local
17+
def test_submit_calls_validate():
18+
19+
htex = HighThroughputExecutor()
20+
htex.outgoing_q = mock.Mock(spec=queue.Queue)
21+
htex.validate_resource_spec = mock.Mock(spec=htex.validate_resource_spec)
22+
23+
res_spec = {}
24+
htex.submit(double, res_spec, (5,), {})
25+
htex.validate_resource_spec.assert_called()
26+
27+
28+
@pytest.mark.local
29+
def test_resource_spec_validation():
30+
htex = HighThroughputExecutor()
31+
ret_val = htex.validate_resource_spec({})
32+
assert ret_val is None
33+
34+
35+
@pytest.mark.local
36+
def test_resource_spec_validation_bad_keys():
37+
htex = HighThroughputExecutor()
38+
39+
with pytest.raises(InvalidResourceSpecification):
40+
htex.validate_resource_spec({"num_nodes": 2})
Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,48 @@
11
import pytest
22

33
from parsl import Config
4-
from parsl.executors import HighThroughputExecutor
4+
from parsl.executors import MPIExecutor
55
from parsl.launchers import AprunLauncher, SimpleLauncher, SrunLauncher
66
from parsl.providers import SlurmProvider
77

88

99
@pytest.mark.local
10-
def test_bad_launcher_with_mpi_mode():
11-
"""AssertionError if a launcher other than SimpleLauncher is supplied"""
10+
def test_bad_launcher():
11+
"""TypeError if a launcher other than SimpleLauncher is supplied"""
1212

1313
for launcher in [SrunLauncher(), AprunLauncher()]:
14-
with pytest.raises(AssertionError):
14+
with pytest.raises(TypeError):
1515
Config(executors=[
16-
HighThroughputExecutor(
17-
enable_mpi_mode=True,
16+
MPIExecutor(
1817
provider=SlurmProvider(launcher=launcher),
1918
)
2019
])
2120

2221

2322
@pytest.mark.local
24-
def test_correct_launcher_with_mpi_mode():
23+
def test_bad_mpi_launcher():
24+
"""ValueError if an unsupported mpi_launcher is specified"""
25+
26+
with pytest.raises(ValueError):
27+
Config(executors=[
28+
MPIExecutor(
29+
mpi_launcher="bad_launcher",
30+
provider=SlurmProvider(launcher=SimpleLauncher()),
31+
)
32+
])
33+
34+
35+
@pytest.mark.local
36+
@pytest.mark.parametrize(
37+
"mpi_launcher",
38+
["srun", "aprun", "mpiexec"]
39+
)
40+
def test_correct_launcher_with_mpi_mode(mpi_launcher: str):
2541
"""Confirm that SimpleLauncher works with mpi_mode"""
2642

27-
config = Config(executors=[
28-
HighThroughputExecutor(
29-
enable_mpi_mode=True,
30-
provider=SlurmProvider(launcher=SimpleLauncher()),
31-
)
32-
])
33-
assert isinstance(config.executors[0].provider.launcher, SimpleLauncher)
43+
executor = MPIExecutor(
44+
mpi_launcher=mpi_launcher,
45+
provider=SlurmProvider(launcher=SimpleLauncher()),
46+
)
47+
48+
assert isinstance(executor.provider.launcher, SimpleLauncher)

parsl/tests/test_mpi_apps/test_mpi_mode_disabled.py

Lines changed: 0 additions & 47 deletions
This file was deleted.

parsl/tests/test_mpi_apps/test_mpi_mode_enabled.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,26 +6,34 @@
66
import pytest
77

88
import parsl
9-
from parsl import bash_app, python_app
9+
from parsl import Config, bash_app, python_app
10+
from parsl.executors import MPIExecutor
1011
from parsl.executors.high_throughput.mpi_prefix_composer import (
1112
MissingResourceSpecification,
1213
)
13-
from parsl.tests.configs.htex_local import fresh_config
14+
from parsl.launchers import SimpleLauncher
15+
from parsl.providers import LocalProvider
1416

1517
EXECUTOR_LABEL = "MPI_TEST"
1618

1719

1820
def local_setup():
19-
config = fresh_config()
20-
config.executors[0].label = EXECUTOR_LABEL
21-
config.executors[0].max_workers_per_node = 2
22-
config.executors[0].enable_mpi_mode = True
23-
config.executors[0].mpi_launcher = "mpiexec"
2421

2522
cwd = os.path.abspath(os.path.dirname(__file__))
2623
pbs_nodefile = os.path.join(cwd, "mocks", "pbs_nodefile")
2724

28-
config.executors[0].provider.worker_init = f"export PBS_NODEFILE={pbs_nodefile}"
25+
config = Config(
26+
executors=[
27+
MPIExecutor(
28+
label=EXECUTOR_LABEL,
29+
max_workers_per_block=2,
30+
mpi_launcher="mpiexec",
31+
provider=LocalProvider(
32+
worker_init=f"export PBS_NODEFILE={pbs_nodefile}",
33+
launcher=SimpleLauncher()
34+
)
35+
)
36+
])
2937

3038
parsl.load(config)
3139

parsl/tests/test_mpi_apps/test_mpiex.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
import pytest
66

7-
import parsl
87
from parsl import Config, HighThroughputExecutor
98
from parsl.executors.high_throughput.mpi_executor import MPIExecutor
109
from parsl.launchers import SimpleLauncher
@@ -42,8 +41,8 @@ def test_docstring():
4241
def test_init():
4342
"""Ensure all relevant kwargs are copied over from HTEx"""
4443

45-
new_kwargs = {'max_workers_per_block'}
46-
excluded_kwargs = {'available_accelerators', 'enable_mpi_mode', 'cores_per_worker', 'max_workers_per_node',
44+
new_kwargs = {'max_workers_per_block', 'mpi_launcher'}
45+
excluded_kwargs = {'available_accelerators', 'cores_per_worker', 'max_workers_per_node',
4746
'mem_per_worker', 'cpu_affinity', 'max_workers', 'manager_selector'}
4847

4948
# Get the kwargs from both HTEx and MPIEx

0 commit comments

Comments
 (0)