Some fixes for integration tests (#441)

javierdelapuente · web-flow · commit 5e348d5d7bf7 · 2025-02-05T14:21:59.000+01:00
diff --git a/github-runner-manager/src/github_runner_manager/templates/openstack-userdata.sh.j2 b/github-runner-manager/src/github_runner_manager/templates/openstack-userdata.sh.j2
@@ -7,6 +7,9 @@ hostnamectl set-hostname github-runner
 # Write .env contents
 su - ubuntu -c 'cd ~/actions-runner && echo "{{ env_contents }}" > .env'
 
+snap refresh --hold=24h
+snap watch --last=auto-refresh?
+
 {% if aproxy_address %}
 snap install aproxy --edge
 snap set aproxy proxy={{ aproxy_address }} listen=:54969
diff --git a/src/charm.py b/src/charm.py
@@ -110,7 +110,7 @@ class ReconcileRunnersEvent(EventBase):
 
 
 def catch_charm_errors(
-    func: Callable[["GithubRunnerCharm", EventT], None]
+    func: Callable[["GithubRunnerCharm", EventT], None],
 ) -> Callable[["GithubRunnerCharm", EventT], None]:
     """Catch common errors in charm.
 
@@ -145,7 +145,7 @@ def func_with_catch_errors(self: "GithubRunnerCharm", event: EventT) -> None:
 
 
 def catch_action_errors(
-    func: Callable[["GithubRunnerCharm", ActionEvent], None]
+    func: Callable[["GithubRunnerCharm", ActionEvent], None],
 ) -> Callable[["GithubRunnerCharm", ActionEvent], None]:
     """Catch common errors in actions.
 
@@ -339,22 +339,26 @@ def _on_config_changed(self, _: ConfigChangedEvent) -> None:
         state = self._setup_state()
         self._set_reconcile_timer()
 
+        flush_and_reconcile = False
         if state.charm_config.token != self._stored.token:
-            self._stored.token = None
+            self._stored.token = self.config[TOKEN_CONFIG_NAME]
+            flush_and_reconcile = True
         if self.config[PATH_CONFIG_NAME] != self._stored.path:
             self._stored.path = self.config[PATH_CONFIG_NAME]
+            flush_and_reconcile = True
         if self.config[LABELS_CONFIG_NAME] != self._stored.labels:
             self._stored.labels = self.config[LABELS_CONFIG_NAME]
+            flush_and_reconcile = True
 
         state = self._setup_state()
 
         if not self._get_set_image_ready_status():
             return
-        if state.charm_config.token != self._stored.token:
+        if flush_and_reconcile:
+            logger.info("Flush and reconcile on config-changed")
             runner_scaler = self._get_runner_scaler(state)
             runner_scaler.flush(flush_mode=FlushMode.FLUSH_IDLE)
             self._reconcile_openstack_runners(runner_scaler, state.runner_config.virtual_machines)
-            # TODO: 2024-04-12: Flush on token changes.
 
     @catch_charm_errors
     def _on_reconcile_runners(self, _: ReconcileRunnersEvent) -> None:
@@ -518,16 +522,14 @@ def _apt_install(self, packages: Sequence[str]) -> None:
     @catch_charm_errors
     def _on_debug_ssh_relation_changed(self, _: ops.RelationChangedEvent) -> None:
         """Handle debug ssh relation changed event."""
+        self.unit.status = MaintenanceStatus("Added debug-ssh relation")
         state = self._setup_state()
 
         if not self._get_set_image_ready_status():
             return
         runner_scaler = self._get_runner_scaler(state)
         runner_scaler.flush()
-        try:
-            runner_scaler.reconcile(state.runner_config.virtual_machines)
-        except ReconcileError:
-            logger.exception(FAILED_TO_RECONCILE_RUNNERS_MSG)
+        self._reconcile_openstack_runners(runner_scaler, state.runner_config.virtual_machines)
 
     @catch_charm_errors
     def _on_image_relation_joined(self, _: ops.RelationJoinedEvent) -> None:
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
@@ -45,7 +45,7 @@
 from tests.integration.helpers.openstack import OpenStackInstanceHelper, PrivateEndpointConfigs
 from tests.status_name import ACTIVE
 
-IMAGE_BUILDER_DEPLOY_TIMEOUT_IN_SECONDS = 30 * 60
+IMAGE_BUILDER_DEPLOY_TIMEOUT_IN_SECONDS = 20 * 60
 
 # The following line is required because we are using request.getfixturevalue in conjunction
 # with pytest-asyncio. See https://github.com/pytest-dev/pytest-asyncio/issues/112
@@ -317,7 +317,7 @@ async def app_no_runner(
 ) -> AsyncIterator[Application]:
     """Application with no runner."""
     await basic_app.set_config({VIRTUAL_MACHINES_CONFIG_NAME: "0"})
-    await model.wait_for_idle(apps=[basic_app.name], status=ACTIVE, timeout=90 * 60)
+    await model.wait_for_idle(apps=[basic_app.name], status=ACTIVE, timeout=20 * 60)
     yield basic_app
 
 
@@ -339,7 +339,10 @@ async def image_builder_fixture(
             config={
                 "app-channel": "edge",
                 "build-interval": "12",
-                "revision-history-limit": "5",
+                # There are several tests running simulteously, all with the same images.
+                # Until we update the image-builder to create different names for the images,
+                # the history limit should be big enough so that tests do not interfere.
+                "revision-history-limit": "15",
                 "openstack-auth-url": private_endpoint_config["auth_url"],
                 # Bandit thinks this is a hardcoded password
                 "openstack-password": private_endpoint_config["password"],  # nosec: B105
@@ -401,7 +404,9 @@ async def app_openstack_runner_fixture(
             wait_idle=False,
         )
         await model.integrate(f"{image_builder.name}:image", f"{application.name}:image")
-    await model.wait_for_idle(apps=[application.name], status=ACTIVE, timeout=90 * 60)
+    await model.wait_for_idle(
+        apps=[application.name, image_builder.name], status=ACTIVE, timeout=20 * 60
+    )
 
     return application
 
@@ -415,7 +420,7 @@ async def app_scheduled_events_fixture(
     application = app_openstack_runner
     await application.set_config({"reconcile-interval": "8"})
     await application.set_config({VIRTUAL_MACHINES_CONFIG_NAME: "1"})
-    await model.wait_for_idle(apps=[application.name], status=ACTIVE, timeout=90 * 60)
+    await model.wait_for_idle(apps=[application.name], status=ACTIVE, timeout=20 * 60)
     await reconcile(app=application, model=model)
     return application
 
diff --git a/tests/integration/helpers/common.py b/tests/integration/helpers/common.py
@@ -156,7 +156,7 @@ async def deploy_github_runner_charm(
     )
 
     if wait_idle:
-        await model.wait_for_idle(status=ACTIVE, timeout=60 * 40)
+        await model.wait_for_idle(status=ACTIVE, timeout=60 * 20)
 
     return application
 
diff --git a/tests/integration/test_charm_upgrade.py b/tests/integration/test_charm_upgrade.py
@@ -91,10 +91,10 @@ async def test_charm_upgrade(
     )
     await model.integrate(f"{image_builder.name}:image", f"{application.name}:image")
     await model.wait_for_idle(
-        apps=[application.name],
+        apps=[application.name, image_builder.name],
         raise_on_error=False,
         wait_for_active=True,
-        timeout=180 * 60,
+        timeout=20 * 60,
         check_freq=30,
     )
     origin = client.CharmOrigin(
@@ -125,6 +125,6 @@ async def test_charm_upgrade(
         apps=[application.name],
         raise_on_error=False,
         wait_for_active=True,
-        timeout=180 * 60,
+        timeout=20 * 60,
         check_freq=30,
     )
diff --git a/tests/integration/test_debug_ssh.py b/tests/integration/test_debug_ssh.py
@@ -34,7 +34,7 @@ async def test_ssh_debug(
     act: when canonical/action-tmate is triggered.
     assert: the ssh connection info from action-log and tmate-ssh-server matches.
     """
-    await model.wait_for_idle(status=ACTIVE, timeout=60 * 120)
+    await model.wait_for_idle(status=ACTIVE, timeout=60 * 20)
 
     unit = app_no_wait_tmate.units[0]
     # We need the runner to connect to the current machine, instead of the tmate_ssh_server unit,
diff --git a/tests/integration/test_reactive.py b/tests/integration/test_reactive.py
@@ -117,7 +117,7 @@ async def _runner_installed_in_metrics_log() -> bool:
         return "runner_installed" in events
 
     try:
-        await wait_for(_runner_installed_in_metrics_log, check_interval=30, timeout=600)
+        await wait_for(_runner_installed_in_metrics_log, check_interval=30, timeout=60 * 10)
     except TimeoutError:
         assert False, "runner_installed event has not been logged"
 
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
@@ -31,7 +31,7 @@
     RunnerManagerConfig,
 )
 from github_runner_manager.metrics import events
-from github_runner_manager.openstack_cloud import health_checks
+from github_runner_manager.openstack_cloud import constants, health_checks
 from github_runner_manager.openstack_cloud.openstack_runner_manager import (
     OpenStackCredentials,
     OpenStackRunnerManager,
@@ -51,6 +51,11 @@
 
 logger = logging.getLogger(__name__)
 
+# A higher create server timeout is reasonable for integration tests,
+# as only one machine that stays for more than the default time in BUILD,
+# will break the tests
+constants.CREATE_SERVER_TIMEOUT = 900
+
 
 @pytest.fixture(scope="module", name="runner_label")
 def runner_label():

Original file line number	Diff line number	Diff line change
`@@ -156,7 +156,7 @@ async def deploy_github_runner_charm(`
`156`	`156`	`)`
`157`	`157`
`158`	`158`	`if wait_idle:`
`159`		`- await model.wait_for_idle(status=ACTIVE, timeout=60 * 40)`
	`159`	`+ await model.wait_for_idle(status=ACTIVE, timeout=60 * 20)`
`160`	`160`
`161`	`161`	`return application`
`162`	`162`