|
1 | 1 | import logging |
2 | 2 | import math |
3 | 3 | import pickle |
| 4 | +import subprocess |
4 | 5 | import threading |
5 | 6 | import typing |
6 | 7 | import warnings |
7 | 8 | from collections import defaultdict |
8 | 9 | from concurrent.futures import Future |
9 | 10 | from dataclasses import dataclass |
10 | | -from multiprocessing import Process |
11 | 11 | from typing import Callable, Dict, List, Optional, Sequence, Tuple, Union |
12 | 12 |
|
13 | 13 | import typeguard |
|
18 | 18 | from parsl.app.errors import RemoteExceptionWrapper |
19 | 19 | from parsl.data_provider.staging import Staging |
20 | 20 | from parsl.executors.errors import BadMessage, ScalingFailed |
21 | | -from parsl.executors.high_throughput import interchange, zmq_pipes |
| 21 | +from parsl.executors.high_throughput import zmq_pipes |
22 | 22 | from parsl.executors.high_throughput.errors import CommandClientTimeoutError |
23 | 23 | from parsl.executors.high_throughput.mpi_prefix_composer import ( |
24 | 24 | VALID_LAUNCHERS, |
25 | 25 | validate_resource_spec, |
26 | 26 | ) |
27 | 27 | from parsl.executors.status_handling import BlockProviderExecutor |
28 | 28 | from parsl.jobs.states import TERMINAL_STATES, JobState, JobStatus |
29 | | -from parsl.multiprocessing import ForkProcess |
30 | 29 | from parsl.process_loggers import wrap_with_logs |
31 | 30 | from parsl.providers import LocalProvider |
32 | 31 | from parsl.providers.base import ExecutionProvider |
@@ -305,7 +304,7 @@ def __init__(self, |
305 | 304 | self._task_counter = 0 |
306 | 305 | self.worker_ports = worker_ports |
307 | 306 | self.worker_port_range = worker_port_range |
308 | | - self.interchange_proc: Optional[Process] = None |
| 307 | + self.interchange_proc: Optional[subprocess.Popen] = None |
309 | 308 | self.interchange_port_range = interchange_port_range |
310 | 309 | self.heartbeat_threshold = heartbeat_threshold |
311 | 310 | self.heartbeat_period = heartbeat_period |
@@ -520,38 +519,45 @@ def _queue_management_worker(self): |
520 | 519 |
|
521 | 520 | logger.info("Queue management worker finished") |
522 | 521 |
|
523 | | - def _start_local_interchange_process(self): |
| 522 | + def _start_local_interchange_process(self) -> None: |
524 | 523 | """ Starts the interchange process locally |
525 | 524 |
|
526 | | - Starts the interchange process locally and uses an internal command queue to |
| 525 | + Starts the interchange process locally and uses the command queue to |
527 | 526 | get the worker task and result ports that the interchange has bound to. |
528 | 527 | """ |
529 | | - self.interchange_proc = ForkProcess(target=interchange.starter, |
530 | | - kwargs={"client_address": "127.0.0.1", |
531 | | - "client_ports": (self.outgoing_q.port, |
532 | | - self.incoming_q.port, |
533 | | - self.command_client.port), |
534 | | - "interchange_address": self.address, |
535 | | - "worker_ports": self.worker_ports, |
536 | | - "worker_port_range": self.worker_port_range, |
537 | | - "hub_address": self.hub_address, |
538 | | - "hub_zmq_port": self.hub_zmq_port, |
539 | | - "logdir": self.logdir, |
540 | | - "heartbeat_threshold": self.heartbeat_threshold, |
541 | | - "poll_period": self.poll_period, |
542 | | - "logging_level": logging.DEBUG if self.worker_debug else logging.INFO, |
543 | | - "cert_dir": self.cert_dir, |
544 | | - }, |
545 | | - daemon=True, |
546 | | - name="HTEX-Interchange" |
547 | | - ) |
548 | | - self.interchange_proc.start() |
549 | 528 |
|
| 529 | + interchange_config = {"client_address": "127.0.0.1", |
| 530 | + "client_ports": (self.outgoing_q.port, |
| 531 | + self.incoming_q.port, |
| 532 | + self.command_client.port), |
| 533 | + "interchange_address": self.address, |
| 534 | + "worker_ports": self.worker_ports, |
| 535 | + "worker_port_range": self.worker_port_range, |
| 536 | + "hub_address": self.hub_address, |
| 537 | + "hub_zmq_port": self.hub_zmq_port, |
| 538 | + "logdir": self.logdir, |
| 539 | + "heartbeat_threshold": self.heartbeat_threshold, |
| 540 | + "poll_period": self.poll_period, |
| 541 | + "logging_level": logging.DEBUG if self.worker_debug else logging.INFO, |
| 542 | + "cert_dir": self.cert_dir, |
| 543 | + } |
| 544 | + |
| 545 | + config_pickle = pickle.dumps(interchange_config) |
| 546 | + |
| 547 | + self.interchange_proc = subprocess.Popen(b"interchange.py", stdin=subprocess.PIPE) |
| 548 | + stdin = self.interchange_proc.stdin |
| 549 | + assert stdin is not None, "Popen should have created an IO object (vs default None) because of PIPE mode" |
| 550 | + |
| 551 | + logger.debug("Popened interchange process. Writing config object") |
| 552 | + stdin.write(config_pickle) |
| 553 | + stdin.flush() |
| 554 | + logger.debug("Sent config object. Requesting worker ports") |
550 | 555 | try: |
551 | 556 | (self.worker_task_port, self.worker_result_port) = self.command_client.run("WORKER_PORTS", timeout_s=120) |
552 | 557 | except CommandClientTimeoutError: |
553 | | - logger.error("Interchange has not completed initialization in 120s. Aborting") |
| 558 | + logger.error("Interchange has not completed initialization. Aborting") |
554 | 559 | raise Exception("Interchange failed to start") |
| 560 | + logger.debug("Got worker ports") |
555 | 561 |
|
556 | 562 | def _start_queue_management_thread(self): |
557 | 563 | """Method to start the management thread as a daemon. |
@@ -810,13 +816,12 @@ def shutdown(self, timeout: float = 10.0): |
810 | 816 | logger.info("Attempting HighThroughputExecutor shutdown") |
811 | 817 |
|
812 | 818 | self.interchange_proc.terminate() |
813 | | - self.interchange_proc.join(timeout=timeout) |
814 | | - if self.interchange_proc.is_alive(): |
| 819 | + try: |
| 820 | + self.interchange_proc.wait(timeout=timeout) |
| 821 | + except subprocess.TimeoutExpired: |
815 | 822 | logger.info("Unable to terminate Interchange process; sending SIGKILL") |
816 | 823 | self.interchange_proc.kill() |
817 | 824 |
|
818 | | - self.interchange_proc.close() |
819 | | - |
820 | 825 | logger.info("Finished HighThroughputExecutor shutdown attempt") |
821 | 826 |
|
822 | 827 | def get_usage_information(self): |
|
0 commit comments