Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,8 @@

from github_runner_manager import constants
from github_runner_manager.errors import GithubMetricsError, RunnerError
from github_runner_manager.manager.cloud_runner_manager import (
CloudRunnerInstance,
CloudRunnerManager,
CloudRunnerState,
HealthState,
)
from github_runner_manager.manager.models import InstanceID, RunnerIdentity, RunnerMetadata
from github_runner_manager.manager.vm_manager import VM, CloudRunnerManager, HealthState, VMState
from github_runner_manager.metrics import events as metric_events
from github_runner_manager.metrics import github as github_metrics
from github_runner_manager.metrics import runner as runner_metrics
Expand Down Expand Up @@ -76,14 +71,25 @@ class RunnerInstance:
name: str
instance_id: InstanceID
metadata: RunnerMetadata
health: HealthState
platform_state: PlatformRunnerState | None
cloud_state: CloudRunnerState
cloud_state: VMState

@property
def health(self) -> HealthState:
"""Overall health state of the runner instance."""
if not self.platform_state:
return HealthState.UNHEALTHY
if self.platform_state == (
PlatformRunnerState.BUSY,
PlatformRunnerState.IDLE,
):
return HealthState.HEALTHY
return HealthState.UNKNOWN

@classmethod
def from_cloud_and_platform_health(
cls,
cloud_instance: CloudRunnerInstance,
cloud_instance: VM,
platform_health_state: PlatformRunnerHealth | None,
) -> "RunnerInstance":
"""Construct an instance.
Expand All @@ -96,10 +102,9 @@ def from_cloud_and_platform_health(
The RunnerInstance instantiated from cloud instance and platform state.
"""
return cls(
name=cloud_instance.name,
name=cloud_instance.instance_id.name,
instance_id=cloud_instance.instance_id,
metadata=cloud_instance.metadata,
health=cloud_instance.health,
platform_state=(
PlatformRunnerState.from_platform_health(platform_health_state)
if platform_health_state is not None
Expand Down Expand Up @@ -188,16 +193,9 @@ def get_runners(self) -> tuple[RunnerInstance, ...]:
for cloud_runner in cloud_runners:
if cloud_runner.instance_id not in health_runners_map:
runner_instance = RunnerInstance.from_cloud_and_platform_health(cloud_runner, None)
runner_instance.health = HealthState.UNKNOWN
runner_instances.append(runner_instance)
continue
health_runner = health_runners_map[cloud_runner.instance_id]
if health_runner.deletable:
cloud_runner.health = HealthState.UNHEALTHY
elif health_runner.online:
cloud_runner.health = HealthState.HEALTHY
else:
cloud_runner.health = HealthState.UNHEALTHY
runner_instance = RunnerInstance.from_cloud_and_platform_health(
cloud_runner, health_runner
)
Expand Down Expand Up @@ -317,7 +315,7 @@ def _cleanup_resources(

def _delete_cloud_runners(
self,
cloud_runners: Sequence[CloudRunnerInstance],
cloud_runners: Sequence[VM],
runners_health: Sequence[PlatformRunnerHealth],
delete_busy_runners: bool = False,
) -> Iterable[runner_metrics.RunnerMetrics]:
Expand Down Expand Up @@ -551,7 +549,7 @@ def _create_runner(args: _CreateRunnerArgs) -> InstanceID:


def _filter_runner_to_delete(
cloud_runner: CloudRunnerInstance,
cloud_runner: VM,
health: PlatformRunnerHealth | None,
*,
clean_idle: bool = False,
Expand Down Expand Up @@ -598,7 +596,7 @@ def _filter_runner_to_delete(


def _runner_deletion_sort_key(
health_runners_map: dict[InstanceID, PlatformRunnerHealth], cloud_runner: CloudRunnerInstance
health_runners_map: dict[InstanceID, PlatformRunnerHealth], cloud_runner: VM
) -> int:
"""Order the runners in accordance to how inconvenient it is to delete them.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@
MissingServerConfigError,
ReconcileError,
)
from github_runner_manager.manager.cloud_runner_manager import HealthState
from github_runner_manager.manager.runner_manager import (
FlushMode,
IssuedMetricEventsStats,
RunnerInstance,
RunnerManager,
RunnerMetadata,
)
from github_runner_manager.manager.vm_manager import HealthState
from github_runner_manager.metrics import events as metric_events
from github_runner_manager.metrics.reconcile import (
BUSY_RUNNERS_COUNT,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class HealthState(Enum):
UNKNOWN = auto()


class CloudRunnerState(str, Enum):
class VMState(str, Enum):
"""Represent state of the instance hosting the runner.

Attributes:
Expand All @@ -61,7 +61,7 @@ class CloudRunnerState(str, Enum):
@staticmethod
def from_openstack_server_status( # pragma: no cover
openstack_server_status: str,
) -> "CloudRunnerState":
) -> "VMState":
"""Create from openstack server status.

The openstack server status are documented here:
Expand All @@ -73,72 +73,41 @@ def from_openstack_server_status( # pragma: no cover
Returns:
The state of the runner.
"""
state = CloudRunnerState.UNEXPECTED
state = VMState.UNEXPECTED
match openstack_server_status:
case "BUILD":
state = CloudRunnerState.CREATED
state = VMState.CREATED
case "REBUILD":
state = CloudRunnerState.CREATED
state = VMState.CREATED
case "ACTIVE":
state = CloudRunnerState.ACTIVE
state = VMState.ACTIVE
case "ERROR":
state = CloudRunnerState.ERROR
state = VMState.ERROR
case "STOPPED":
state = CloudRunnerState.STOPPED
state = VMState.STOPPED
case "DELETED":
state = CloudRunnerState.DELETED
state = VMState.DELETED
case "UNKNOWN":
state = CloudRunnerState.UNKNOWN
state = VMState.UNKNOWN
case _:
state = CloudRunnerState.UNEXPECTED
state = VMState.UNEXPECTED
return state


class CloudInitStatus(str, Enum):
"""Represents the state of cloud-init script.

The cloud init script is used to launch ephemeral GitHub runners. If the script is being
initialized, GitHub runner is listening for jobs or GitHub runner is running the job, the
cloud-init script should report "running" status.

Refer to the official documentation on cloud-init status:
https://cloudinit.readthedocs.io/en/latest/howto/status.html.

Attributes:
NOT_STARTED: The cloud-init script has not yet been started.
RUNNING: The cloud-init script is running.
DONE: The cloud-init script has completed successfully.
ERROR: There was an error while running the cloud-init script.
DEGRADED: There was a non-critical issue while running the cloud-inits script.
DISABLED: Cloud init was disabled by other system configurations.
"""

NOT_STARTED = "not started"
RUNNING = "running"
DONE = "done"
ERROR = "error"
DEGRADED = "degraded"
DISABLED = "disabled"


@dataclass
class CloudRunnerInstance:
class VM:
"""Information on the runner on the cloud.

Attributes:
name: Name of the instance hosting the runner.
instance_id: ID of the instance.
metadata: Metadata of the runner.
health: Health state of the runner.
state: State of the instance hosting the runner.
instance_id: VM instance identifier (NOT VM UUID).
metadata: Metadata associated with the VM.
state: The VM state.
created_at: Creation time of the runner in the cloud provider.
"""

name: str
instance_id: InstanceID
metadata: RunnerMetadata
health: HealthState
state: CloudRunnerState
state: VMState
created_at: datetime

def is_older_than(self, seconds: float) -> bool:
Expand Down Expand Up @@ -248,7 +217,7 @@ def create_runner(
self,
runner_identity: RunnerIdentity,
runner_context: RunnerContext,
) -> CloudRunnerInstance:
) -> VM:
"""Create a self-hosted runner.

Args:
Expand All @@ -257,10 +226,11 @@ def create_runner(
"""

@abc.abstractmethod
def get_runners(self) -> Sequence[CloudRunnerInstance]:
def get_runners(self) -> Sequence[VM]:
"""Get cloud self-hosted runners."""

@abc.abstractmethod
# Abstract methods do not have a return value, ignore the docstring error DCO031
def delete_vms(self, instance_ids: Sequence[InstanceID]) -> list[InstanceID]:
"""Delete cloud VM instances.

Expand All @@ -269,9 +239,10 @@ def delete_vms(self, instance_ids: Sequence[InstanceID]) -> list[InstanceID]:

Returns:
The deleted instance IDs.
"""
""" # noqa: DCO031

@abc.abstractmethod
# Abstract methods do not have a return value, ignore the docstring error DCO031
def extract_metrics(self, instance_ids: Sequence[InstanceID]) -> list[RunnerMetrics]:
"""Extract metrics from cloud VMs.

Expand All @@ -280,7 +251,7 @@ def extract_metrics(self, instance_ids: Sequence[InstanceID]) -> list[RunnerMetr

Returns:
The fetched runner metrics.
"""
""" # noqa: DCO031

@abc.abstractmethod
def cleanup(self) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from pydantic import BaseModel, NonNegativeFloat

from github_runner_manager.errors import IssueMetricEventError
from github_runner_manager.manager.cloud_runner_manager import CodeInformation
from github_runner_manager.manager.vm_manager import CodeInformation

METRICS_LOG_PATH = Path("/var/log/github-runner-metrics.log")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,8 @@
from pydantic import ValidationError

from github_runner_manager.errors import IssueMetricEventError, RunnerMetricsError, SSHError
from github_runner_manager.manager.cloud_runner_manager import (
PostJobMetrics,
PreJobMetrics,
RunnerMetrics,
)
from github_runner_manager.manager.models import InstanceID
from github_runner_manager.manager.vm_manager import PostJobMetrics, PreJobMetrics, RunnerMetrics
from github_runner_manager.metrics import events as metric_events
from github_runner_manager.metrics.type import GithubJobMetrics
from github_runner_manager.openstack_cloud.constants import (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,8 @@
OpenStackError,
RunnerCreateError,
)
from github_runner_manager.manager.cloud_runner_manager import (
CloudRunnerInstance,
CloudRunnerManager,
CloudRunnerState,
RunnerMetrics,
)
from github_runner_manager.manager.models import InstanceID, RunnerContext, RunnerIdentity
from github_runner_manager.manager.runner_manager import HealthState
from github_runner_manager.manager.vm_manager import VM, CloudRunnerManager, RunnerMetrics, VMState
from github_runner_manager.metrics import runner as runner_metrics
from github_runner_manager.openstack_cloud.constants import (
CREATE_SERVER_TIMEOUT,
Expand Down Expand Up @@ -94,7 +88,7 @@ def create_runner(
self,
runner_identity: RunnerIdentity,
runner_context: RunnerContext,
) -> CloudRunnerInstance:
) -> VM:
"""Create a self-hosted runner.

Args:
Expand Down Expand Up @@ -127,7 +121,7 @@ def create_runner(
logger.info("Runner %s created successfully", instance.instance_id)
return self._build_cloud_runner_instance(instance)

def get_runners(self) -> Sequence[CloudRunnerInstance]:
def get_runners(self) -> Sequence[VM]:
"""Get cloud self-hosted runners.

Returns:
Expand All @@ -140,15 +134,13 @@ def cleanup(self) -> None:
"""Cleanup runner and resource on the cloud."""
self._openstack_cloud.delete_expired_keys()

def _build_cloud_runner_instance(self, instance: OpenstackInstance) -> CloudRunnerInstance:
def _build_cloud_runner_instance(self, instance: OpenstackInstance) -> VM:
"""Build a new cloud runner instance from an openstack instance."""
metadata = instance.metadata
return CloudRunnerInstance(
name=instance.instance_id.name,
return VM(
metadata=metadata,
instance_id=instance.instance_id,
health=HealthState.UNKNOWN,
state=CloudRunnerState.from_openstack_server_status(instance.status),
state=VMState.from_openstack_server_status(instance.status),
created_at=instance.created_at,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import factory

from github_runner_manager.manager.cloud_runner_manager import CodeInformation
from github_runner_manager.manager.vm_manager import CodeInformation
from github_runner_manager.metrics.events import Event, RunnerInstalled, RunnerStop


Expand Down
Loading
Loading