Skip to content

Commit 43ab437

Browse files
authored
[ISD-3759] Change the threads to daemons, and cleanup processes (#584)
1 parent 54e6087 commit 43ab437

File tree

6 files changed

+39
-8
lines changed

6 files changed

+39
-8
lines changed

docs/changelog.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22

33
This changelog documents user-relevant changes to the GitHub runner charm.
44

5+
## 2025-06-26
6+
7+
- Fix a process leak internal to the charm.
8+
59
## 2025-06-24
610

711
- Fix a bug where deleted GitHub Actions Job would cause an endless loop of retries.

github-runner-manager/src/github_runner_manager/cli.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,11 @@ def main(
8282
http_server_args = FlaskArgs(host=host, port=port, debug=debug)
8383

8484
thread_manager = ThreadManager()
85-
thread_manager.add_thread(target=partial(start_http_server, config, lock, http_server_args))
8685
thread_manager.add_thread(
87-
target=partial(start_reconcile_service, config, python_path_config, lock)
86+
target=partial(start_http_server, config, lock, http_server_args), daemon=True
87+
)
88+
thread_manager.add_thread(
89+
target=partial(start_reconcile_service, config, python_path_config, lock), daemon=True
8890
)
8991
thread_manager.start()
9092

src/manager_client.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ def func_with_error_handling(*args: Any, **kwargs: Any) -> Any:
6464
) from err
6565
except requests.ConnectionError as err:
6666
raise RunnerManagerServiceConnectionError(CONNECTION_ERROR_MESSAGE) from err
67+
except requests.ReadTimeout as err:
68+
raise RunnerManagerServiceConnectionError(CONNECTION_ERROR_MESSAGE) from err
6769

6870
return func_with_error_handling
6971

@@ -122,7 +124,7 @@ def check_runner(self) -> dict[str, str]:
122124
The information on the runners.
123125
"""
124126
self.wait_till_ready()
125-
response = self._request(_HTTPMethod.GET, "/runner/check")
127+
response = self._request(_HTTPMethod.GET, "/runner/check", timeout=600)
126128
runner_info = json.loads(response.text)
127129
runner_info["runners"] = tuple(runner_info["runners"])
128130
runner_info["busy_runners"] = tuple(runner_info["busy_runners"])
@@ -138,7 +140,7 @@ def flush_runner(self, busy: bool = False) -> None:
138140
"""
139141
self.wait_till_ready()
140142
params = {"flush-busy": str(busy)}
141-
self._request(_HTTPMethod.POST, "/runner/flush", params=params)
143+
self._request(_HTTPMethod.POST, "/runner/flush", params=params, timeout=600)
142144

143145
def health_check(self) -> None:
144146
"""Request a health check on the runner manager service.
@@ -151,7 +153,7 @@ def health_check(self) -> None:
151153
API requests.
152154
"""
153155
try:
154-
response = self._request(_HTTPMethod.GET, "/health")
156+
response = self._request(_HTTPMethod.GET, "/health", timeout=60)
155157
except requests.HTTPError as err:
156158
raise RunnerManagerServiceNotReadyError(NOT_READY_ERROR_MESSAGE) from err
157159
except requests.ConnectionError as err:

src/manager_service.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
JOB_MANAGER_PACKAGE_PATH = "./jobmanager/client"
3939
GITHUB_RUNNER_MANAGER_SERVICE_NAME = "github-runner-manager"
4040
GITHUB_RUNNER_MANAGER_SERVICE_LOG_DIR = Path("/var/log/github-runner-manager")
41+
GITHUB_RUNNER_MANAGER_SERVICE_EXECUTABLE_PATH = "/usr/local/bin/github-runner-manager"
4142

4243
_INSTALL_ERROR_MESSAGE = "Unable to install github-runner-manager package from source"
4344
_SERVICE_SETUP_ERROR_MESSAGE = "Unable to enable or start the github-runner-manager application"
@@ -62,6 +63,20 @@ def setup(state: CharmState, app_name: str, unit_name: str) -> None:
6263
systemd.service_stop(GITHUB_RUNNER_MANAGER_SERVICE_NAME)
6364
except SystemdError as err:
6465
raise RunnerManagerApplicationStartError(_SERVICE_SETUP_ERROR_MESSAGE) from err
66+
# Currently, there is some multiprocess issues that cause leftover processes.
67+
# This is a temp patch to clean them up.
68+
output, code = execute_command(
69+
["/usr/bin/pkill", "-f", GITHUB_RUNNER_MANAGER_SERVICE_EXECUTABLE_PATH], check_exit=False
70+
)
71+
if code == 1:
72+
logger.info("No leftover github-runner-manager process to clean up.")
73+
elif code == 0:
74+
logger.warning("Clean up leftover processes.")
75+
else:
76+
logger.warning(
77+
"Unexpected return code %s of pkill for cleanup processes: %s", code, output
78+
)
79+
6580
config = create_application_configuration(state, app_name, unit_name)
6681
config_file = _setup_config_file(config)
6782
GITHUB_RUNNER_MANAGER_SERVICE_LOG_DIR.mkdir(parents=True, exist_ok=True)

tests/integration/test_reactive.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,7 @@ async def test_reactive_mode_scale_down(
250250

251251
# we assume that the runner got deleted while running the job, so we expect a failed job
252252
await wait_for_completion(run, conclusion="failure")
253+
await wait_for_reconcile(app)
253254
assert_queue_is_empty(mongodb_uri, app.name)
254255

255256
# 2. Spawn a job.

tests/unit/test_manager_service.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def mock_systemd_fixture(monkeypatch):
4242

4343
@pytest.fixture(name="mock_execute_command")
4444
def mock_execute_command_fixture(monkeypatch):
45-
mock_execute_command = MagicMock()
45+
mock_execute_command = MagicMock(return_value=("Mock", 0))
4646
monkeypatch.setattr("manager_service.execute_command", mock_execute_command)
4747
return mock_execute_command
4848

@@ -63,6 +63,7 @@ def test_setup_started(
6363
mock_service_path: Path,
6464
mock_home_path: Path,
6565
mock_systemd: MagicMock,
66+
mock_execute_command: MagicMock,
6667
):
6768
"""
6869
arrange: Mock the service to be running.
@@ -93,7 +94,10 @@ def test_setup_started(
9394

9495

9596
def test_setup_no_started(
96-
patch_file_paths: None, complete_charm_state: CharmState, mock_systemd: MagicMock
97+
patch_file_paths: None,
98+
complete_charm_state: CharmState,
99+
mock_systemd: MagicMock,
100+
mock_execute_command: MagicMock,
97101
):
98102
"""
99103
arrange: Mock the service to be not running.
@@ -108,7 +112,10 @@ def test_setup_no_started(
108112

109113

110114
def test_setup_systemd_error(
111-
patch_file_paths: None, complete_charm_state: CharmState, mock_systemd: MagicMock
115+
patch_file_paths: None,
116+
complete_charm_state: CharmState,
117+
mock_systemd: MagicMock,
118+
mock_execute_command: MagicMock,
112119
):
113120
"""
114121
arrange: Mock the systemd to raise error.

0 commit comments

Comments
 (0)