canonical
diff --git a/‎github-runner-manager/src/github_runner_manager/manager/runner_manager.py
Lines changed: 0 additions & 44 deletions b/‎github-runner-manager/src/github_runner_manager/manager/runner_manager.py
Lines changed: 0 additions & 44 deletions
diff --git a/‎github-runner-manager/src/github_runner_manager/openstack_cloud/openstack_cloud.py
Lines changed: 1 addition & 1 deletion b/‎github-runner-manager/src/github_runner_manager/openstack_cloud/openstack_cloud.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎github-runner-manager/src/github_runner_manager/reactive/consumer.py
Lines changed: 2 additions & 4 deletions b/‎github-runner-manager/src/github_runner_manager/reactive/consumer.py
Lines changed: 2 additions & 4 deletions
diff --git a/‎github-runner-manager/tests/unit/manager/test_runner_manager.py
Lines changed: 4 additions & 104 deletions b/‎github-runner-manager/tests/unit/manager/test_runner_manager.py
Lines changed: 4 additions & 104 deletions
diff --git a/‎github-runner-manager/tests/unit/test_runner_scaler.py
Lines changed: 0 additions & 2 deletions b/‎github-runner-manager/tests/unit/test_runner_scaler.py
Lines changed: 0 additions & 2 deletions
diff --git a/‎tests/integration/conftest.py
Lines changed: 2 additions & 2 deletions b/‎tests/integration/conftest.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎tests/integration/helpers/common.py
Lines changed: 25 additions & 3 deletions b/‎tests/integration/helpers/common.py
Lines changed: 25 additions & 3 deletions
diff --git a/‎tests/integration/helpers/openstack.py
Lines changed: 6 additions & 2 deletions b/‎tests/integration/helpers/openstack.py
Lines changed: 6 additions & 2 deletions
diff --git a/‎tests/integration/jobmanager/conftest.py
Lines changed: 2 additions & 2 deletions b/‎tests/integration/jobmanager/conftest.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎tests/integration/jobmanager/test_jobmanager_prespawned.py
Lines changed: 3 additions & 3 deletions b/‎tests/integration/jobmanager/test_jobmanager_prespawned.py
Lines changed: 3 additions & 3 deletions
@@ -5,7 +5,6 @@
 
 import copy
 import logging
-import time
 from dataclasses import dataclass
 from enum import Enum, auto
 from functools import partial
@@ -522,55 +521,12 @@ def _create_runner(args: _CreateRunnerArgs) -> InstanceID:
                 runner_identity=runner_identity,
                 runner_context=runner_context,
             )
-
-            # This wait should be deleted to make the runner creation as
-            # quick as possible. The waiting should only be done in the
-            # reactive case, before checking that a job was taken.
-            RunnerManager.wait_for_runner_online(
-                platform_provider=args.platform_provider,
-                runner_identity=runner_identity,
-            )
-
         except RunnerError:
             logger.warning("Deleting runner %s from platform after creation failed", instance_id)
             args.platform_provider.delete_runner(runner_info.identity)
             raise
         return instance_id
 
-    @staticmethod
-    def wait_for_runner_online(
-        platform_provider: PlatformProvider,
-        runner_identity: RunnerIdentity,
-    ) -> None:
-        """Wait until the runner is online.
-
-        The constant RUNNER_CREATION_WAITING_TIMES defines the time before calling
-        the platform provider to check if the runner is online. Besides online runner,
-        deletable runner will also be equivalent to online, as no more waiting should
-        be needed.
-
-        Args:
-            platform_provider: Platform provider to use for health checks.
-            runner_identity: Identity of the runner.
-
-        Raises:
-            RunnerError: If the runner did not come online after the specified time.
-
-        """
-        for wait_time in RUNNER_CREATION_WAITING_TIMES:
-            time.sleep(wait_time)
-            try:
-                runner_health = platform_provider.get_runner_health(runner_identity)
-            except PlatformApiError:
-                logger.exception("Error getting the runner health: %s", runner_identity)
-                continue
-            if runner_health.online or runner_health.deletable:
-                logger.info("Runner %s online", runner_identity)
-                break
-            logger.info("Runner %s not yet online", runner_identity)
-        else:
-            raise RunnerError(f"Runner {runner_identity} did not get online")
-
 
 def _filter_runner_to_delete(
     cloud_runner: CloudRunnerInstance,
 
@@ -253,7 +253,7 @@ def launch_instance(
                     userdata=cloud_init,
                     auto_ip=False,
                     timeout=CREATE_SERVER_TIMEOUT,
-                    wait=True,
+                    wait=False,
                     meta=meta,
                 )
             except openstack.exceptions.ResourceTimeout as err:
 
@@ -240,11 +240,9 @@ def _spawn_runner(
         return
     logger.info("Reactive runner spawned %s", instance_ids)
 
-    for iteration in range(5):
-        # Do not sleep on the first iteration — the job might already be taken.
+    for _ in range(5):
+        sleep(60)
         logger.info("Checking if job picked up for reactive runner %s", instance_ids)
-        if iteration != 0:
-            sleep(60)
         if platform_provider.check_job_been_picked_up(metadata=metadata, job_url=job_url):
             logger.info("Job picked %s. reactive runner ok %s", job_url, instance_ids)
             msg.ack()
 
@@ -3,12 +3,11 @@
 
 """Unit tests for the the runner_manager."""
 
-from unittest.mock import ANY, MagicMock
+from unittest.mock import MagicMock
 
 import pytest
 
 from github_runner_manager.errors import RunnerCreateError
-from github_runner_manager.manager import runner_manager as runner_manager_module
 from github_runner_manager.manager.cloud_runner_manager import (
     CloudRunnerInstance,
     CloudRunnerManager,
@@ -22,7 +21,6 @@
 from github_runner_manager.manager.runner_manager import RunnerManager
 from github_runner_manager.platform.platform_provider import (
     PlatformProvider,
-    PlatformRunnerHealth,
     RunnersHealthResponse,
 )
 from github_runner_manager.types_.github import GitHubRunnerStatus, SelfHostedRunner
@@ -104,55 +102,12 @@ def _get_runner_context(instance_id, metadata, labels):
     github_provider.delete_runner.assert_called_once_with(github_runner.identity)
 
 
-@pytest.mark.parametrize(
-    "creation_waiting_times,runner_unhealthy,runner_healthy",
-    [
-        pytest.param(
-            (0,),
-            None,
-            PlatformRunnerHealth(
-                identity=MagicMock(),
-                online=True,
-                busy=False,
-                deletable=False,
-            ),
-            id="online runner",
-        ),
-        pytest.param(
-            (0, 0),
-            PlatformRunnerHealth(
-                identity=MagicMock(),
-                online=False,
-                busy=True,
-                deletable=False,
-            ),
-            PlatformRunnerHealth(
-                identity=MagicMock(),
-                online=False,
-                busy=False,
-                deletable=True,
-            ),
-            id="deletable runner",
-        ),
-    ],
-)
-def test_create_runner(
-    monkeypatch: pytest.MonkeyPatch,
-    creation_waiting_times: tuple[int, ...],
-    runner_unhealthy: PlatformRunnerHealth | None,
-    runner_healthy: PlatformRunnerHealth,
-):
+def test_create_runner() -> None:
     """
-    arrange: Given a specific pattern for creation waiting times and a list of.
-        PlatformRunnerHealth objects being the last one a healthy runner.
+    arrange: None.
     act: call runner_manager.create_runners.
-    assert: The runner manager will create the runner and make requests to check the health
-        until it gets a healthy state.
+    assert: The runner manager will create the runner.
     """
-    monkeypatch.setattr(
-        runner_manager_module, "RUNNER_CREATION_WAITING_TIMES", creation_waiting_times
-    )
-
     cloud_runner_manager = MagicMock(spec=CloudRunnerManager)
     cloud_runner_manager.name_prefix = "unit-0"
 
@@ -161,10 +116,6 @@ def test_create_runner(
     github_runner = MagicMock()
     platform_provider.get_runner_context.return_value = (runner_context_mock, github_runner)
 
-    platform_provider.get_runner_health.side_effect = tuple(
-        runner_unhealthy for _ in range(len(creation_waiting_times) - 1)
-    ) + (runner_healthy,)
-
     runner_manager = RunnerManager(
         "managername",
         platform_provider=platform_provider,
@@ -176,54 +127,3 @@ def test_create_runner(
 
     assert instance_id
     cloud_runner_manager.create_runner.assert_called_once()
-    # The method to get the runner health was called three times
-    # until the runner was online.
-    assert platform_provider.get_runner_health.call_count == len(creation_waiting_times)
-    platform_provider.get_runner_health.assert_called()
-
-
-def test_create_runner_failed_waiting(monkeypatch: pytest.MonkeyPatch):
-    """
-    arrange: Given a specific pattern for creation waiting times and a list of.
-        PlatformRunnerHealth objects where none is healthy
-    act: call runner_manager.create_runners.
-    assert: The runner manager will create the runner, it will check for the health state,
-       but the runner will not get into healthy state and the platform api for deleting
-       the runner will be called.
-    """
-    runner_creation_waiting_times = (0, 0)
-    monkeypatch.setattr(
-        runner_manager_module, "RUNNER_CREATION_WAITING_TIMES", runner_creation_waiting_times
-    )
-
-    cloud_runner_manager = MagicMock(spec=CloudRunnerManager)
-    cloud_runner_manager.name_prefix = "unit-0"
-
-    platform_provider = MagicMock(spec=PlatformProvider)
-    runner_context_mock = MagicMock()
-    github_runner = MagicMock()
-    platform_provider.get_runner_context.return_value = (runner_context_mock, github_runner)
-
-    health_offline = PlatformRunnerHealth(
-        identity=MagicMock(), online=False, busy=False, deletable=False
-    )
-
-    platform_provider.get_runner_health.side_effect = (
-        health_offline,
-        health_offline,
-    )
-
-    runner_manager = RunnerManager(
-        "managername",
-        platform_provider=platform_provider,
-        cloud_runner_manager=cloud_runner_manager,
-        labels=["label1", "label2"],
-    )
-
-    () = runner_manager.create_runners(1, RunnerMetadata(), True)
-
-    # The runner was started even if it failed.
-    cloud_runner_manager.create_runner.assert_called_once()
-    assert platform_provider.get_runner_health.call_count == 2
-    platform_provider.get_runner_health.assert_called()
-    platform_provider.delete_runner.assert_called_once_with(ANY)
@@ -121,8 +121,6 @@ def runner_manager_fixture(
         cloud_runner_manager=mock_cloud,
         labels=["label1", "label2", "arm64", "noble", "flavorlabel"],
     )
-    # We do not want to wait in the unit tests for machines to be ready.
-    monkeypatch.setattr(runner_manager_module, "RUNNER_CREATION_WAITING_TIMES", (0,))
     return runner_manager
 
 
 
@@ -41,7 +41,7 @@
     MONGODB_APP_NAME,
     deploy_github_runner_charm,
     wait_for,
-    wait_for_reconcile,
+    wait_for_runner_ready,
 )
 from tests.integration.helpers.openstack import OpenStackInstanceHelper, PrivateEndpointConfigs
 from tests.status_name import ACTIVE
@@ -485,7 +485,7 @@ async def app_scheduled_events_fixture(
     await application.set_config({"reconcile-interval": "8"})
     await application.set_config({BASE_VIRTUAL_MACHINES_CONFIG_NAME: "1"})
     await model.wait_for_idle(apps=[application.name], status=ACTIVE, timeout=20 * 60)
-    await wait_for_reconcile(app=application, model=model)
+    await wait_for_runner_ready(app=application)
     return application
 
 
 
@@ -101,17 +101,16 @@ async def get_reconcile_id(unit: Unit) -> str:
     return stdout
 
 
-async def wait_for_reconcile(app: Application, model: Model) -> None:
+async def wait_for_reconcile(app: Application) -> None:
     """Wait until a reconcile has happened.
 
     Uses the first unit found in the application.
 
     Args:
         app: The GitHub Runner Charm application.
-        model: The machine charm model.
     """
     # Wait the application is actively reconciling. Avoid waiting for image, etc.
-    await model.wait_for_idle(apps=[app.name], status=ACTIVE)
+    await app.model.wait_for_idle(apps=[app.name], status=ACTIVE)
 
     unit = app.units[0]
     base_id = await get_reconcile_id(unit)
@@ -122,6 +121,29 @@ async def wait_for_reconcile(app: Application, model: Model) -> None:
             return
 
 
+async def wait_for_runner_ready(app: Application) -> None:
+    """Wait until a runner is ready.
+
+    Uses the first unit found in the application.
+
+    Args:
+        app: The GitHub Runner Charm application.
+    """
+    await wait_for_reconcile(app)
+
+    # Wait for 10 minutes for the runner to come online.
+    for _ in range(20):
+        action = await app.units[0].run_action("check-runners")
+        await action.wait()
+
+        if action.status == "completed" and int(action.results["online"]) >= 1:
+            break
+
+        await sleep(30)
+    else:
+        assert False, "Timeout waiting for runner to be ready"
+
+
 async def deploy_github_runner_charm(
     model: Model,
     charm_file: str,
 
@@ -12,7 +12,11 @@
 from openstack.compute.v2.server import Server
 
 from charm_state import BASE_VIRTUAL_MACHINES_CONFIG_NAME
-from tests.integration.helpers.common import run_in_unit, wait_for, wait_for_reconcile
+from tests.integration.helpers.common import (
+    run_in_unit,
+    wait_for,
+    wait_for_runner_ready,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -152,7 +156,7 @@ async def _set_app_runner_amount(app: Application, num_runners: int) -> None:
             num_runners: The number of runners.
         """
         await app.set_config({BASE_VIRTUAL_MACHINES_CONFIG_NAME: f"{num_runners}"})
-        await wait_for_reconcile(app=app, model=app.model)
+        await wait_for_runner_ready(app=app)
 
     async def get_runner_names(self, unit: Unit) -> list[str]:
         """Get the name of all the runners in the unit.
 
@@ -98,7 +98,7 @@ async def app_fixture(
             PATH_CONFIG_NAME: jobmanager_base_url,
         }
     )
-    await wait_for_reconcile(app_for_jobmanager, app_for_jobmanager.model)
+    await wait_for_reconcile(app_for_jobmanager)
 
     httpserver.clear_all_handlers()
 
@@ -111,4 +111,4 @@ async def app_fixture(
             RECONCILE_INTERVAL_CONFIG_NAME: str(DEFAULT_RECONCILE_INTERVAL),
         }
     )
-    await wait_for_reconcile(app_for_jobmanager, app_for_jobmanager.model)
+    await wait_for_reconcile(app_for_jobmanager)
@@ -15,7 +15,7 @@
     RECONCILE_INTERVAL_CONFIG_NAME,
 )
 from tests.integration.conftest import DEFAULT_RECONCILE_INTERVAL
-from tests.integration.helpers.common import wait_for, wait_for_reconcile
+from tests.integration.helpers.common import wait_for, wait_for_reconcile, wait_for_runner_ready
 from tests.integration.helpers.openstack import OpenStackInstanceHelper
 from tests.integration.jobmanager.helpers import (
     GetRunnerHealthEndpoint,
@@ -147,7 +147,7 @@ async def _prepare_runner() -> bool:
     # TMP: hack to trigger reconcile by changing the configuration, which cause config_changed hook
     # to restart the reconcile service.
     await app.set_config({RECONCILE_INTERVAL_CONFIG_NAME: str(DEFAULT_RECONCILE_INTERVAL + 1)})
-    await wait_for_reconcile(app, app.model)
+    await wait_for_runner_ready(app)
 
     # At this point there should be a runner
     await _assert_runners(app, online=1, busy=1, offline=0, unknown=0)
@@ -159,6 +159,6 @@ async def _prepare_runner() -> bool:
     # TMP: hack to trigger reconcile by changing the configuration, which cause config_changed hook
     # to restart the reconcile service.
     await app.set_config({RECONCILE_INTERVAL_CONFIG_NAME: str(DEFAULT_RECONCILE_INTERVAL + 2)})
-    await wait_for_reconcile(app, app.model)
+    await wait_for_reconcile(app)
 
     await _assert_runners(app, online=0, busy=0, offline=0, unknown=0)
Original file line number	Diff line number	Diff line change
`@@ -253,7 +253,7 @@ def launch_instance(`
`253`	`253`	`userdata=cloud_init,`
`254`	`254`	`auto_ip=False,`
`255`	`255`	`timeout=CREATE_SERVER_TIMEOUT,`
`256`		`- wait=True,`
	`256`	`+ wait=False,`
`257`	`257`	`meta=meta,`
`258`	`258`	`)`
`259`	`259`	`except openstack.exceptions.ResourceTimeout as err:`
Original file line number	Diff line number	Diff line change
`@@ -121,8 +121,6 @@ def runner_manager_fixture(`
`121`	`121`	`cloud_runner_manager=mock_cloud,`
`122`	`122`	`labels=["label1", "label2", "arm64", "noble", "flavorlabel"],`
`123`	`123`	`)`
`124`		`- # We do not want to wait in the unit tests for machines to be ready.`
`125`		`- monkeypatch.setattr(runner_manager_module, "RUNNER_CREATION_WAITING_TIMES", (0,))`
`126`	`124`	`return runner_manager`
`127`	`125`
`128`	`126`
Original file line number	Diff line number	Diff line change
`@@ -98,7 +98,7 @@ async def app_fixture(`
`98`	`98`	`PATH_CONFIG_NAME: jobmanager_base_url,`
`99`	`99`	`}`
`100`	`100`	`)`
`101`		`- await wait_for_reconcile(app_for_jobmanager, app_for_jobmanager.model)`
	`101`	`+ await wait_for_reconcile(app_for_jobmanager)`
`102`	`102`
`103`	`103`	`httpserver.clear_all_handlers()`
`104`	`104`
`@@ -111,4 +111,4 @@ async def app_fixture(`
`111`	`111`	`RECONCILE_INTERVAL_CONFIG_NAME: str(DEFAULT_RECONCILE_INTERVAL),`
`112`	`112`	`}`
`113`	`113`	`)`
`114`		`- await wait_for_reconcile(app_for_jobmanager, app_for_jobmanager.model)`
	`114`	`+ await wait_for_reconcile(app_for_jobmanager)`