Skip to content

Commit 6e1b652

Browse files
author
Vitaliy Zakaznikov
committed
Fixing #87
1 parent a37e990 commit 6e1b652

File tree

3 files changed

+76
-5
lines changed

3 files changed

+76
-5
lines changed

testflows/github/hetzner/runners/dashboard/metrics/servers.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
tracker.track("github_hetzner_runners_servers_total", states=states)
2929
tracker.track("github_hetzner_runners_zombie_servers_total_count")
3030
tracker.track("github_hetzner_runners_unused_runners_total_count")
31-
tracker.track("github_hetzner_runners_recycled_servers_total")
31+
tracker.track("github_hetzner_runners_recycled_servers_total_count")
3232
# Register individual standby server status metrics
3333
for status in states:
3434
tracker.track(
@@ -157,7 +157,9 @@ def recycled_total_count():
157157
Returns:
158158
int: Number of recycled servers, or 0 if metric not available
159159
"""
160-
return int(recycled_summary()["total"] or 0)
160+
return int(
161+
get.metric_value("github_hetzner_runners_recycled_servers_total_count") or 0
162+
)
161163

162164

163165
def labels_info():
@@ -402,7 +404,7 @@ def health_history(cutoff_minutes=15):
402404

403405
# Get recycled servers total count history
404406
recycled_timestamps, recycled_values = history.data(
405-
"github_hetzner_runners_recycled_servers_total",
407+
"github_hetzner_runners_recycled_servers_total_count",
406408
cutoff_minutes=cutoff_minutes,
407409
)
408410
health_metrics["recycled"] = {

testflows/github/hetzner/runners/metrics.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from prometheus_client import Counter, Gauge, Histogram, Info
2424
from .estimate import get_server_price
2525
from .constants import standby_server_name_prefix
26+
from .constants import recycle_server_name_prefix
2627
from .server import get_runner_server_name
2728

2829

@@ -257,6 +258,24 @@
257258
],
258259
)
259260

261+
# Recycled servers metrics
262+
RECYCLED_SERVERS_TOTAL = Gauge(
263+
"github_hetzner_runners_recycled_servers_total",
264+
"Total number of recycled servers",
265+
["server_type", "location"],
266+
)
267+
268+
RECYCLED_SERVERS_TOTAL_COUNT = Gauge(
269+
"github_hetzner_runners_recycled_servers_total_count",
270+
"Total number of recycled servers across all types and locations",
271+
)
272+
273+
RECYCLED_SERVER_INFO = Gauge(
274+
"github_hetzner_runners_recycled_server",
275+
"Recycled server information including age in seconds",
276+
["server_id", "server_name", "server_type", "location", "status", "created"],
277+
)
278+
260279
# Runner pool metrics
261280
RUNNER_POOL_STATUS = Gauge(
262281
"github_hetzner_runners_pool_status",
@@ -1201,6 +1220,55 @@ def update_unused_runners(unused_runners_dict):
12011220
)
12021221

12031222

1223+
def update_recycled_servers(servers):
1224+
"""Update recycled server metrics.
1225+
1226+
Args:
1227+
servers: List of server objects to check for recycled servers
1228+
"""
1229+
# Clear existing recycled server metrics
1230+
RECYCLED_SERVERS_TOTAL._metrics.clear()
1231+
RECYCLED_SERVER_INFO._metrics.clear()
1232+
RECYCLED_SERVERS_TOTAL_COUNT.set(0)
1233+
1234+
total_recycled_servers = 0
1235+
recycled_counts = {}
1236+
current_time = time.time()
1237+
1238+
for server in servers:
1239+
# Check if this is a recycled server by name prefix
1240+
if server.name.startswith(recycle_server_name_prefix):
1241+
server_type = server.server_type.name
1242+
location = server.datacenter.location.name
1243+
key = (server_type, location)
1244+
1245+
# Count by type and location
1246+
recycled_counts[key] = recycled_counts.get(key, 0) + 1
1247+
total_recycled_servers += 1
1248+
1249+
# Track recycled server age
1250+
server_age = (
1251+
current_time - server.created.timestamp() if server.created else 0
1252+
)
1253+
RECYCLED_SERVER_INFO.labels(
1254+
server_id=str(server.id),
1255+
server_name=server.name,
1256+
server_type=server_type,
1257+
location=location,
1258+
status=server.status,
1259+
created=server.created.isoformat() if server.created else "",
1260+
).set(server_age)
1261+
1262+
# Set total count
1263+
RECYCLED_SERVERS_TOTAL_COUNT.set(total_recycled_servers)
1264+
1265+
# Set counts by type and location
1266+
for (server_type, location), count in recycled_counts.items():
1267+
RECYCLED_SERVERS_TOTAL.labels(server_type=server_type, location=location).set(
1268+
count
1269+
)
1270+
1271+
12041272
def record_server_creation(server_type: str, location: str, creation_time: float):
12051273
"""Record metrics for a server creation.
12061274

testflows/github/hetzner/runners/scale_down.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -438,14 +438,15 @@ def scale_down(
438438
current_interval
439439
)
440440

441-
# Update zombie and unused runner metrics
441+
# Update zombie, unused runner, and recycled server metrics
442442
with Action(
443-
"Updating zombie and unused runner metrics",
443+
"Updating zombie, unused runner, and recycled server metrics",
444444
level=logging.DEBUG,
445445
interval=interval,
446446
):
447447
metrics.update_zombie_servers(zombie_servers)
448448
metrics.update_unused_runners(unused_runners)
449+
metrics.update_recycled_servers(servers)
449450

450451
with Action(
451452
"Checking for scale up failures", level=logging.DEBUG, interval=interval

0 commit comments

Comments
 (0)