Skip to content

Commit 9bf6306

Browse files
committed
feat: add monitoring thread to _multi_connect for early timeout exit
This commit adds a background monitoring thread to the _multi_connect method that checks if the MAPDL process is alive during connection attempts. This allows PyMAPDL to exit early if the process dies, rather than waiting for the full timeout period. Key changes: Added monitoring thread in _multi_connect that runs in parallel with connection attempts. Thread checks process status every 0.5 seconds. Early exit when process death is detected. Proper thread cleanup with Event and join mechanisms. Only monitors local MAPDL instances. Tests added: 7 new tests covering valid connections, early exit on death, monitoring conditions, thread cleanup, process death detection, successful connection cleanup, and remote instance behavior.
1 parent 8efc45e commit 9bf6306

File tree

2 files changed

+389
-29
lines changed

2 files changed

+389
-29
lines changed

src/ansys/mapdl/core/mapdl_grpc.py

Lines changed: 85 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -617,40 +617,96 @@ def _multi_connect(self, n_attempts=5, timeout=15):
617617
connected = False
618618
attempt_timeout = int(timeout / n_attempts)
619619

620-
max_time = time.time() + timeout
621-
i = 1
622-
while time.time() < max_time and i <= n_attempts:
623-
self._log.debug("Connection attempt %d", i)
624-
connected = self._connect(timeout=attempt_timeout)
625-
i += 1
626-
if connected:
627-
self._log.debug("Connected")
628-
break
629-
else:
630-
# Check if mapdl process is alive
631-
msg = (
632-
f"Unable to connect to MAPDL gRPC instance at {self._channel_str}.\n"
633-
f"Reached either maximum amount of connection attempts ({n_attempts}) or timeout ({timeout} s)."
620+
# Start monitoring thread to check if MAPDL is alive
621+
monitor_stop_event = threading.Event()
622+
monitor_exception = {"error": None}
623+
624+
def monitor_mapdl_alive():
625+
"""Monitor thread to check if MAPDL process is alive."""
626+
from ansys.mapdl.core.launcher import (
627+
_check_process_is_alive,
628+
_check_file_error_created,
634629
)
630+
631+
try:
632+
while not monitor_stop_event.is_set():
633+
# Only monitor if we have a local process
634+
if self._local and self._mapdl_process and self._path:
635+
try:
636+
# Check if process is alive
637+
_check_process_is_alive(self._mapdl_process, self._path)
638+
639+
except Exception as e:
640+
# Process died or something went wrong
641+
monitor_exception["error"] = e
642+
monitor_stop_event.set()
643+
break
644+
645+
# Check every 0.5 seconds
646+
monitor_stop_event.wait(0.5)
647+
648+
except Exception as e:
649+
self._log.debug(f"Monitor thread encountered error: {e}")
650+
monitor_exception["error"] = e
651+
652+
# Start the monitoring thread
653+
monitor_thread = None
654+
if self._local and self._mapdl_process:
655+
monitor_thread = threading.Thread(target=monitor_mapdl_alive, daemon=True)
656+
monitor_thread.start()
657+
self._log.debug("Started MAPDL monitoring thread")
635658

636-
if self._mapdl_process is not None and psutil.pid_exists(
637-
self._mapdl_process.pid
638-
):
639-
# Process is alive
640-
raise MapdlConnectionError(
641-
msg
642-
+ f" The MAPDL process seems to be alive (PID: {self._mapdl_process.pid}) but PyMAPDL cannot connect to it."
643-
)
659+
try:
660+
max_time = time.time() + timeout
661+
i = 1
662+
while time.time() < max_time and i <= n_attempts:
663+
# Check if monitoring thread detected a problem
664+
if monitor_exception["error"] is not None:
665+
self._log.debug("Monitor detected MAPDL process issue, stopping connection attempts")
666+
raise monitor_exception["error"]
667+
668+
self._log.debug("Connection attempt %d", i)
669+
connected = self._connect(timeout=attempt_timeout)
670+
i += 1
671+
if connected:
672+
self._log.debug("Connected")
673+
break
674+
675+
# Check again after connection attempt
676+
if monitor_exception["error"] is not None:
677+
self._log.debug("Monitor detected MAPDL process issue after connection attempt")
678+
raise monitor_exception["error"]
644679
else:
645-
pid_msg = (
646-
f" PID: {self._mapdl_process.pid}"
647-
if self._mapdl_process is not None
648-
else ""
649-
)
650-
raise MapdlConnectionError(
651-
msg + f" The MAPDL process has died{pid_msg}."
680+
# Check if mapdl process is alive
681+
msg = (
682+
f"Unable to connect to MAPDL gRPC instance at {self._channel_str}.\n"
683+
f"Reached either maximum amount of connection attempts ({n_attempts}) or timeout ({timeout} s)."
652684
)
653685

686+
if self._mapdl_process is not None and psutil.pid_exists(
687+
self._mapdl_process.pid
688+
):
689+
# Process is alive
690+
raise MapdlConnectionError(
691+
msg
692+
+ f" The MAPDL process seems to be alive (PID: {self._mapdl_process.pid}) but PyMAPDL cannot connect to it."
693+
)
694+
else:
695+
pid_msg = (
696+
f" PID: {self._mapdl_process.pid}"
697+
if self._mapdl_process is not None
698+
else ""
699+
)
700+
raise MapdlConnectionError(
701+
msg + f" The MAPDL process has died{pid_msg}."
702+
)
703+
finally:
704+
# Stop the monitoring thread
705+
monitor_stop_event.set()
706+
if monitor_thread is not None:
707+
monitor_thread.join(timeout=1.0)
708+
self._log.debug("Stopped MAPDL monitoring thread")
709+
654710
self._exited = False
655711

656712
def _is_alive_subprocess(self): # numpydoc ignore=RT01

0 commit comments

Comments
 (0)