Skip to content

Commit 63a2da4

Browse files
committed
feat: add logging to netstat tools for process killing
- Introduced a logging function to capture messages during the killing of leftover processes in `netstat_tools.py`. - Updated `kill_old_cluster` to accept a logging function and log messages to `scheduling.log`. - Modified `cluster_getter.py` to pass a logging function to `kill_old_cluster`.
1 parent e3c8f41 commit 63a2da4

File tree

2 files changed

+21
-5
lines changed

2 files changed

+21
-5
lines changed

cardano_node_tests/cluster_management/cluster_getter.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,9 @@ def _respin(self, start_cmd: str = "", stop_cmd: str = "") -> bool: # noqa: C90
156156
f"stop_cmd='{startup_files.stop_script}'"
157157
)
158158

159+
def _netstat_log_func(msg: str) -> None:
160+
self.log(f"c{self.cluster_instance_num}: {msg}")
161+
159162
excp: Exception | None = None
160163
netstat_out = ""
161164
for i in range(2):
@@ -175,7 +178,9 @@ def _respin(self, start_cmd: str = "", stop_cmd: str = "") -> bool: # noqa: C90
175178
time.sleep(10)
176179

177180
# Kill the leftover processes
178-
netstat_tools.kill_old_cluster(instance_num=self.cluster_instance_num)
181+
netstat_tools.kill_old_cluster(
182+
instance_num=self.cluster_instance_num, log_func=_netstat_log_func
183+
)
179184

180185
# Save artifacts only when produced during this test run
181186
if cluster_running_file.exists() or i > 0:

cardano_node_tests/cluster_management/netstat_tools.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import os
55
import re
66
import time
7+
import typing as tp
78

89
from cardano_node_tests.utils import cluster_nodes
910
from cardano_node_tests.utils import helpers
@@ -22,7 +23,7 @@ def get_netstat_out() -> str:
2223
return ""
2324

2425

25-
def kill_old_cluster(instance_num: int) -> None: # noqa: C901
26+
def kill_old_cluster(instance_num: int, log_func: tp.Callable[[str], None]) -> None: # noqa: C901
2627
"""Attempt to kill all processes left over from a previous cluster instance."""
2728

2829
def _get_netstat_split() -> list[str]:
@@ -39,9 +40,18 @@ def _try_kill(pid: int) -> None:
3940
try:
4041
os.kill(pid, 15)
4142
except Exception as excp:
42-
LOGGER.error(f"Failed to kill leftover process PID {pid}: {excp}") # noqa: TRY400
43+
log_func(f"Failed to kill leftover process PID {pid}: {excp}")
4344
return
4445

46+
def _get_proc_cmdline(pid: int) -> str:
47+
try:
48+
with open(f"/proc/{pid}/cmdline") as f:
49+
cmdline = f.read().replace("\0", " ").strip()
50+
except Exception:
51+
cmdline = ""
52+
53+
return cmdline
54+
4555
port_nums = cluster_nodes.get_cluster_type().cluster_scripts.get_instance_ports(instance_num)
4656
port_strs = [
4757
# Add whitestpace to the end of each port number to avoid matching a port number that is a
@@ -63,7 +73,7 @@ def _try_kill(pid: int) -> None:
6373
continue
6474
pid = _get_pid(line)
6575
if pid:
66-
LOGGER.info(f"Killing supervisor process: PID {pid}")
76+
log_func(f"Killing supervisor process: PID {pid}")
6777
_try_kill(pid)
6878
time.sleep(5)
6979
break
@@ -78,7 +88,8 @@ def _try_kill(pid: int) -> None:
7888
found = True
7989
pid = _get_pid(line)
8090
if pid:
81-
LOGGER.info(f"Killing leftover process: PID {pid}")
91+
cmdline = _get_proc_cmdline(pid)
92+
log_func(f"Killing leftover process: PID {pid}; cmdline: {cmdline}")
8293
_try_kill(pid)
8394
time.sleep(5)
8495
break

0 commit comments

Comments
 (0)