Skip to content

Commit dc36d61

Browse files
authored
fix: Specify compute api dynamically (#591)
* specify compute api dynamically * specify max nova compute api to be 2.91 * update changelog * lint * remove test_runner_manager_openstack.py
1 parent 426b54e commit dc36d61

File tree

5 files changed

+195
-44
lines changed

5 files changed

+195
-44
lines changed

docs/changelog.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@
22

33
This changelog documents user-relevant changes to the GitHub runner charm.
44

5+
## 2025-07-09
6+
7+
- Specify max supported nova compute api to be 2.91. This fixes an issue where the charm could fail
8+
due to a bug on the OpenStack side: https://bugs.launchpad.net/nova/+bug/2095364
9+
510
### 2025-06-30
611
- New configuration options aproxy-exclude-addresses and aproxy-redirect-ports for allowing aproxy to redirect arbitrary TCP traffic
712
- Added prometheus metrics to the GitHub runner manager application.

github-runner-manager/src/github_runner_manager/openstack_cloud/openstack_cloud.py

Lines changed: 90 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,11 @@
3939

4040
_SSH_TIMEOUT = 30
4141
_TEST_STRING = "test_string"
42+
# Max nova compute we support is 2.91, because
43+
# - 2.96 has a bug with server list https://bugs.launchpad.net/nova/+bug/2095364
44+
# - 2.92 requires public key to be set in the keypair, which is not supported by the app
45+
# https://docs.openstack.org/api-ref/compute/#import-or-create-keypair
46+
_MAX_NOVA_COMPUTE_API_VERSION = "2.91"
4247

4348
SecurityRuleDict = dict[str, Any]
4449

@@ -153,33 +158,6 @@ def exception_handling_wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
153158
return exception_handling_wrapper
154159

155160

156-
@contextmanager
157-
def _get_openstack_connection(credentials: OpenStackCredentials) -> Iterator[OpenstackConnection]:
158-
"""Create a connection context managed object, to be used within with statements.
159-
160-
Using the context manager ensures that the connection is properly closed after use.
161-
162-
Args:
163-
credentials: The OpenStack authorization information.
164-
165-
Yields:
166-
An openstack.connection.Connection object.
167-
"""
168-
# api documents that keystoneauth1.exceptions.MissingRequiredOptions can be raised but
169-
# I could not reproduce it. Therefore, no catch here for such exception.
170-
with openstack.connect(
171-
auth_url=credentials.auth_url,
172-
project_name=credentials.project_name,
173-
username=credentials.username,
174-
password=credentials.password,
175-
region_name=credentials.region_name,
176-
user_domain_name=credentials.user_domain_name,
177-
project_domain_name=credentials.project_domain_name,
178-
) as conn:
179-
conn.authorize()
180-
yield conn
181-
182-
183161
class OpenstackCloud:
184162
"""Client to interact with OpenStack cloud.
185163
@@ -237,7 +215,7 @@ def launch_instance(
237215
instance_id = runner_identity.instance_id
238216
metadata = runner_identity.metadata
239217

240-
with _get_openstack_connection(credentials=self._credentials) as conn:
218+
with self._get_openstack_connection() as conn:
241219
security_group = OpenstackCloud._ensure_security_group(conn, ingress_tcp_ports)
242220
keypair = self._setup_keypair(conn, runner_identity.instance_id)
243221
meta = metadata.as_dict()
@@ -283,7 +261,7 @@ def get_instance(self, instance_id: InstanceID) -> OpenstackInstance | None:
283261
"""
284262
logger.info("Getting openstack server with %s", instance_id)
285263

286-
with _get_openstack_connection(credentials=self._credentials) as conn:
264+
with self._get_openstack_connection() as conn:
287265
server: OpenstackServer = conn.get_server(name_or_id=instance_id.name)
288266
if server is not None:
289267
return OpenstackInstance(server, self.prefix)
@@ -298,7 +276,7 @@ def delete_instance(self, instance_id: InstanceID) -> None:
298276
"""
299277
logger.info("Deleting openstack server with %s", instance_id)
300278

301-
with _get_openstack_connection(credentials=self._credentials) as conn:
279+
with self._get_openstack_connection() as conn:
302280
self._delete_instance(conn, instance_id)
303281

304282
def _delete_instance(self, conn: OpenstackConnection, instance_id: InstanceID) -> None:
@@ -400,7 +378,7 @@ def get_instances(self) -> tuple[OpenstackInstance, ...]:
400378
"""
401379
logger.info("Getting all openstack servers managed by the charm")
402380

403-
with _get_openstack_connection(credentials=self._credentials) as conn:
381+
with self._get_openstack_connection() as conn:
404382
instance_list = list(self._get_openstack_instances(conn))
405383
server_names = set(server.name for server in instance_list)
406384

@@ -417,7 +395,7 @@ def get_instances(self) -> tuple[OpenstackInstance, ...]:
417395
@_catch_openstack_errors
418396
def cleanup(self) -> None:
419397
"""Cleanup unused key files and openstack keypairs."""
420-
with _get_openstack_connection(credentials=self._credentials) as conn:
398+
with self._get_openstack_connection() as conn:
421399
instances = self._get_openstack_instances(conn)
422400
exclude_keyfiles_set = {
423401
self._get_key_path(InstanceID.build_from_name(self.prefix, server.name))
@@ -650,6 +628,86 @@ def _ensure_security_group(
650628
)
651629
return security_group
652630

631+
@contextmanager
632+
def _get_openstack_connection(self) -> Iterator[OpenstackConnection]:
633+
"""Create a connection context managed object, to be used within with statements.
634+
635+
Using the context manager ensures that the connection is properly closed after use.
636+
637+
Yields:
638+
An openstack.connection.Connection object.
639+
"""
640+
# api documents that keystoneauth1.exceptions.MissingRequiredOptions can be raised but
641+
# I could not reproduce it. Therefore, no catch here for such exception.
642+
643+
with openstack.connect(
644+
auth_url=self._credentials.auth_url,
645+
project_name=self._credentials.project_name,
646+
username=self._credentials.username,
647+
password=self._credentials.password,
648+
region_name=self._credentials.region_name,
649+
user_domain_name=self._credentials.user_domain_name,
650+
project_domain_name=self._credentials.project_domain_name,
651+
compute_api_version=self._max_compute_api_version,
652+
) as conn:
653+
conn.authorize()
654+
yield conn
655+
656+
@functools.cached_property
657+
def _max_compute_api_version(self) -> str:
658+
"""Determine the maximum compute API version supported by the client.
659+
660+
The sdk does not support versions greater than 2.95, so we need to ensure that the
661+
maximum version returned by the OpenStack cloud is not greater than that.
662+
https://bugs.launchpad.net/nova/+bug/2095364
663+
664+
Returns:
665+
The maximum compute API version to use for the client.
666+
"""
667+
max_version = self._determine_max_compute_api_version_by_cloud()
668+
if self._version_greater_than(max_version, _MAX_NOVA_COMPUTE_API_VERSION):
669+
logger.warning(
670+
"The maximum compute API version %s is greater than the supported version %s. "
671+
"Using the maximum supported version.",
672+
max_version,
673+
_MAX_NOVA_COMPUTE_API_VERSION,
674+
)
675+
return _MAX_NOVA_COMPUTE_API_VERSION
676+
return max_version
677+
678+
def _determine_max_compute_api_version_by_cloud(self) -> str:
679+
"""Determine the maximum compute API version supported by the OpenStack cloud.
680+
681+
Returns:
682+
The maximum compute API version as a string.
683+
"""
684+
with openstack.connect(
685+
auth_url=self._credentials.auth_url,
686+
project_name=self._credentials.project_name,
687+
username=self._credentials.username,
688+
password=self._credentials.password,
689+
region_name=self._credentials.region_name,
690+
user_domain_name=self._credentials.user_domain_name,
691+
project_domain_name=self._credentials.project_domain_name,
692+
) as conn:
693+
version_endpoint = conn.compute.get_endpoint()
694+
resp = conn.session.get(version_endpoint)
695+
return resp.json()["version"]["version"]
696+
697+
def _version_greater_than(self, version1: str, version2: str) -> bool:
698+
"""Compare two OpenStack API versions.
699+
700+
Args:
701+
version1: The first version to compare.
702+
version2: The second version to compare.
703+
704+
Returns:
705+
True if version1 is greater than version2, False otherwise.
706+
"""
707+
return tuple(int(x) for x in version1.split(".")) > tuple(
708+
int(x) for x in version2.split(".")
709+
)
710+
653711

654712
def get_missing_security_rules(
655713
security_group: OpenstackSecurityGroup, ingress_tcp_ports: list[int] | None

github-runner-manager/tests/unit/openstack_cloud/test_openstack_cloud.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
from github_runner_manager.errors import OpenStackError, SSHError
2121
from github_runner_manager.openstack_cloud.openstack_cloud import (
22+
_MAX_NOVA_COMPUTE_API_VERSION,
2223
_MIN_KEYPAIR_AGE_IN_SECONDS_BEFORE_DELETION,
2324
_TEST_STRING,
2425
DEFAULT_SECURITY_RULES,
@@ -302,3 +303,92 @@ def mock_ssh_connection(*_args, **_kwargs) -> MagicMock:
302303
pass
303304

304305
assert "No connectable SSH addresses found" in str(err.value)
306+
307+
308+
@pytest.mark.parametrize(
309+
"max_compute_api_version, expected_version",
310+
[
311+
pytest.param(
312+
"2.110",
313+
_MAX_NOVA_COMPUTE_API_VERSION,
314+
id="higher version but string is lexically smaller",
315+
),
316+
pytest.param(
317+
"2.92", _MAX_NOVA_COMPUTE_API_VERSION, id="one higher version than supported"
318+
),
319+
pytest.param("2.91", _MAX_NOVA_COMPUTE_API_VERSION, id="highest supported version"),
320+
pytest.param("2.90", "2.90", id="one version lower than supported"),
321+
pytest.param("2.1", "2.1", id="really low version"),
322+
],
323+
)
324+
def test_get_openstack_connection_sets_max_compute_api(
325+
openstack_cloud,
326+
monkeypatch: pytest.MonkeyPatch,
327+
max_compute_api_version: str,
328+
expected_version: str,
329+
):
330+
"""
331+
arrange: Setup get_compute_api to return different max versions.
332+
act: Get OpenStack connection using context manager
333+
assert: The max compute API version is set to be < 2.95.
334+
"""
335+
monkeypatch.setattr(
336+
openstack_cloud,
337+
"_determine_max_compute_api_version_by_cloud",
338+
MagicMock(return_value=max_compute_api_version),
339+
)
340+
openstack_connect_mock = MagicMock()
341+
monkeypatch.setattr(
342+
"github_runner_manager.openstack_cloud.openstack_cloud.openstack.connect",
343+
openstack_connect_mock,
344+
)
345+
346+
with openstack_cloud._get_openstack_connection():
347+
pass
348+
349+
assert openstack_connect_mock.call_args[1]["compute_api_version"] == expected_version
350+
351+
352+
def test_determine_max_api_version(
353+
openstack_cloud: OpenstackCloud, monkeypatch: pytest.MonkeyPatch
354+
):
355+
"""
356+
arrange: Mock the OpenStack connection to return a specific max API version.
357+
act: Call _determine_max_compute_api_version.
358+
assert: The returned version is as expected.
359+
"""
360+
mock_connection = MagicMock()
361+
monkeypatch.setattr(
362+
"github_runner_manager.openstack_cloud.openstack_cloud.openstack.connect",
363+
MagicMock(return_value=mock_connection),
364+
)
365+
mock_connection.__enter__.return_value = mock_connection
366+
367+
endpoint_resp_json = {
368+
"version": {
369+
"id": "v2.1",
370+
"status": "CURRENT",
371+
"version": "2.96",
372+
"min_version": "2.1",
373+
"updated": "2013-07-23T11:33:21Z",
374+
"links": [
375+
{"rel": "self", "href": "http://172.16.1.204/openstack-nova/v2.1/"},
376+
{"rel": "describedby", "type": "text/html", "href": "http://docs.openstack.org/"},
377+
],
378+
"media-types": [
379+
{
380+
"base": "application/json",
381+
"type": "application/vnd.openstack.compute+json;version=2.1",
382+
}
383+
],
384+
}
385+
}
386+
endpoint_resp = MagicMock()
387+
endpoint_resp.json.return_value = endpoint_resp_json
388+
389+
session_mock = MagicMock()
390+
mock_connection.session = session_mock
391+
session_mock.get = MagicMock(return_value=endpoint_resp)
392+
393+
max_version = openstack_cloud._determine_max_compute_api_version_by_cloud()
394+
assert max_version == "2.96"

tests/integration/helpers/openstack.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,10 +145,10 @@ async def ensure_charm_has_runner(self, app: Application) -> None:
145145
Args:
146146
app: The GitHub Runner Charm app to create the runner for.
147147
"""
148-
await OpenStackInstanceHelper._set_app_runner_amount(app, 1)
148+
await OpenStackInstanceHelper.set_app_runner_amount(app, 1)
149149

150150
@staticmethod
151-
async def _set_app_runner_amount(app: Application, num_runners: int) -> None:
151+
async def set_app_runner_amount(app: Application, num_runners: int) -> None:
152152
"""Reconcile the application to a runner amount.
153153
154154
Args:

tests/integration/test_charm_runner.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -47,13 +47,13 @@ async def test_check_runner(app: Application, instance_helper: OpenStackInstance
4747
act: Run check_runner action.
4848
assert: Action returns result with one runner.
4949
"""
50-
await instance_helper.ensure_charm_has_runner(app)
50+
await instance_helper.set_app_runner_amount(app, 2)
5151

5252
action = await app.units[0].run_action("check-runners")
5353
await action.wait()
5454

5555
assert action.status == "completed"
56-
assert action.results["online"] == "1"
56+
assert action.results["online"] == "2"
5757
assert action.results["offline"] == "0"
5858
assert action.results["unknown"] == "0"
5959

@@ -68,17 +68,15 @@ async def test_flush_runner_and_resource_config(
6868
instance_helper: OpenStackInstanceHelper,
6969
) -> None:
7070
"""
71-
arrange: A working application with one runner.
71+
arrange: A working application with two runners.
7272
act:
73-
1. Run Check_runner action. Record the runner name for later.
74-
2. Nothing.
75-
3. Change the virtual machine resource configuration.
76-
4. Run flush_runner action.
77-
5. Dispatch a workflow to make runner busy and call flush_runner action.
73+
1. Run Check_runner action. Record the runner names for later.
74+
2. Flush runners.
75+
3. Dispatch a workflow to make runner busy and call flush_runner action.
7876
7977
assert:
80-
1. One runner exists.
81-
2. Check the resource matches the configuration.
78+
1. Two runner exists.
79+
2. Runners are recreated.
8280
3. The runner is not flushed since by default it flushes idle.
8381
8482
Test are combined to reduce number of runner spawned.

0 commit comments

Comments
 (0)