Skip to content

Commit 5776ff5

Browse files
authored
add job share to dashboard (#535)
* add job share * update docs/changelog
1 parent 223edfc commit 5776ff5

File tree

3 files changed

+165
-64
lines changed

3 files changed

+165
-64
lines changed

docs/changelog.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,18 @@
11
# Changelog
22

3+
4+
### 2025-04-28
5+
6+
- Add a visualization of the share of jobs started per application.
7+
8+
39
### 2025-04-22
410

511
- Add how-to landing page.
612

713
### 2025-04-15
814

15+
916
- Fix a race condition where keypairs were being deleted even though the server was being built, potentially killing active github action runs.
1017

1118
### 2025-04-09

docs/reference/cos.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ called "GitHub Self-Hosted Runner Metrics (Long-Term)".
1111
The "GitHub Self-Hosted Runner Metrics" metrics dashboard presents the following rows:
1212

1313
- General: Displays general metrics about the charm and runners, such as:
14+
- Share of jobs per application: A pie chart showing the share of jobs per application.
1415
- Lifecycle counters: Tracks the frequency of Runner initialisation, start, stop, and crash events.
1516
- Available runners: A horizontal bar graph showing the number of runners available (and max expected) during the last reconciliation event. Note: This data is updated after each reconciliation event and is not real-time.
1617
- Runners after reconciliation: A time series graph showing the number of runners marked as active/idle, the number of expected runners, and the difference between expected and the former (unknown) during the last reconciliation event over time. Note: This data is updated after each reconciliation event and is not real-time.

src/grafana_dashboards/metrics.json

Lines changed: 157 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
"editable": true,
2525
"fiscalYearStartMonth": 0,
2626
"graphTooltip": 0,
27-
"id": 1085,
27+
"id": 51,
2828
"links": [],
2929
"liveNow": false,
3030
"panels": [
@@ -46,57 +46,21 @@
4646
"type": "loki",
4747
"uid": "${lokids}"
4848
},
49+
"description": "It counts the number of jobs that have been started per deployed GitHub Runner application and displays the percentage.",
4950
"fieldConfig": {
5051
"defaults": {
5152
"color": {
5253
"mode": "palette-classic"
5354
},
5455
"custom": {
55-
"axisCenteredZero": false,
56-
"axisColorMode": "text",
57-
"axisLabel": "",
58-
"axisPlacement": "auto",
59-
"barAlignment": 0,
60-
"drawStyle": "line",
61-
"fillOpacity": 0,
62-
"gradientMode": "none",
6356
"hideFrom": {
6457
"legend": false,
6558
"tooltip": false,
6659
"viz": false
67-
},
68-
"lineInterpolation": "linear",
69-
"lineWidth": 1,
70-
"pointSize": 5,
71-
"scaleDistribution": {
72-
"type": "linear"
73-
},
74-
"showPoints": "auto",
75-
"spanNulls": false,
76-
"stacking": {
77-
"group": "A",
78-
"mode": "none"
79-
},
80-
"thresholdsStyle": {
81-
"mode": "off"
8260
}
8361
},
84-
"decimals": 0,
8562
"mappings": [],
86-
"thresholds": {
87-
"mode": "absolute",
88-
"steps": [
89-
{
90-
"color": "green",
91-
"value": null
92-
},
93-
{
94-
"color": "red",
95-
"value": 80
96-
}
97-
]
98-
},
99-
"unit": "none"
63+
"unit": "short"
10064
},
10165
"overrides": []
10266
},
@@ -106,46 +70,46 @@
10670
"x": 0,
10771
"y": 1
10872
},
109-
"id": 3,
73+
"id": 25,
11074
"options": {
75+
"displayLabels": [
76+
"percent"
77+
],
11178
"legend": {
112-
"calcs": [],
11379
"displayMode": "list",
114-
"placement": "bottom",
115-
"showLegend": true
80+
"placement": "right",
81+
"showLegend": true,
82+
"values": []
83+
},
84+
"pieType": "pie",
85+
"reduceOptions": {
86+
"calcs": [
87+
"lastNotNull"
88+
],
89+
"fields": "",
90+
"values": true
11691
},
11792
"tooltip": {
11893
"mode": "single",
11994
"sort": "none"
12095
}
12196
},
97+
"pluginVersion": "9.5.3",
12298
"targets": [
12399
{
124100
"datasource": {
125101
"type": "loki",
126102
"uid": "${lokids}"
127103
},
128104
"editorMode": "code",
129-
"expr": "sum by(filename, event) (count_over_time({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\", timestamp=\"timestamp\", flavor=\"flavor\" | label_format timestamp=\"{{__timestamp__ | unixEpoch | mulf 1000}}\" | event =~ \"runner_start|runner_stop|runner_installed\" | flavor=~\"$flavor\" | timestamp >= ${__from} [$__range]))",
105+
"expr": "sum by(flavor) (count_over_time({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\", timestamp=\"timestamp\", flavor=\"flavor\" | event =~ \"runner_start\" | flavor=~\"$flavor\" [$__range]))",
130106
"key": "Q-f7c42eab-69be-43b5-a807-35c071f708a0-0",
131107
"legendFormat": "{{event}}",
132-
"queryType": "range",
108+
"queryType": "instant",
133109
"refId": "A"
134-
},
135-
{
136-
"datasource": {
137-
"type": "loki",
138-
"uid": "${lokids}"
139-
},
140-
"editorMode": "builder",
141-
"expr": "sum by(filename) (sum_over_time({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\", crashed_runners=\"crashed_runners\", timestamp=\"timestamp\", flavor=\"flavor\" | label_format timestamp=\"{{__timestamp__ | unixEpoch | mulf 1000}}\" | event = `reconciliation` | flavor=~\"$flavor\" | timestamp >= ${__from} | unwrap crashed_runners [$__range]))",
142-
"hide": false,
143-
"legendFormat": "Crashed",
144-
"queryType": "range",
145-
"refId": "D"
146110
}
147111
],
148-
"title": "Lifecycle Status",
112+
"title": "Share of jobs per application",
149113
"transformations": [
150114
{
151115
"id": "renameByRegex",
@@ -169,7 +133,7 @@
169133
}
170134
}
171135
],
172-
"type": "timeseries"
136+
"type": "piechart"
173137
},
174138
{
175139
"datasource": {
@@ -804,7 +768,7 @@
804768
"uid": "${lokids}"
805769
},
806770
"editorMode": "code",
807-
"expr": "sum by(filename)(last_over_time({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",expected_runners=\"expected_runners\",flavor=\"flavor\" | __error__=\"\" | event=\"reconciliation\" | flavor=~\"$flavor\" | unwrap expected_runners[60m])) - sum by(filename)(last_over_time({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",idle_runners=\"idle_runners\",flavor=\"flavor\" | event=\"reconciliation\" | flavor=~\"$flavor\" | unwrap idle_runners[60m])) - sum by(filename)(last_over_time({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",active_runners=\"active_runners\",flavor=\"flavor\" | event=\"reconciliation\" | flavor=~\"$flavor\" | unwrap active_runners[60m]))",
771+
"expr": "((sum by(filename)(last_over_time({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",expected_runners=\"expected_runners\",flavor=\"flavor\" | __error__=\"\" | event=\"reconciliation\" | flavor=~\"$flavor\" | unwrap expected_runners[60m])) - sum by(filename)(last_over_time({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",idle_runners=\"idle_runners\",flavor=\"flavor\" | event=\"reconciliation\" | flavor=~\"$flavor\" | unwrap idle_runners[60m]))) - sum by(filename)(last_over_time({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",active_runners=\"active_runners\",flavor=\"flavor\" | event=\"reconciliation\" | flavor=~\"$flavor\" | unwrap active_runners[60m])))",
808772
"hide": false,
809773
"legendFormat": "Unknown",
810774
"queryType": "range",
@@ -1070,6 +1034,136 @@
10701034
],
10711035
"type": "barchart"
10721036
},
1037+
{
1038+
"datasource": {
1039+
"type": "loki",
1040+
"uid": "${lokids}"
1041+
},
1042+
"fieldConfig": {
1043+
"defaults": {
1044+
"color": {
1045+
"mode": "palette-classic"
1046+
},
1047+
"custom": {
1048+
"axisCenteredZero": false,
1049+
"axisColorMode": "text",
1050+
"axisLabel": "",
1051+
"axisPlacement": "auto",
1052+
"barAlignment": 0,
1053+
"drawStyle": "line",
1054+
"fillOpacity": 0,
1055+
"gradientMode": "none",
1056+
"hideFrom": {
1057+
"legend": false,
1058+
"tooltip": false,
1059+
"viz": false
1060+
},
1061+
"lineInterpolation": "linear",
1062+
"lineWidth": 1,
1063+
"pointSize": 5,
1064+
"scaleDistribution": {
1065+
"type": "linear"
1066+
},
1067+
"showPoints": "auto",
1068+
"spanNulls": false,
1069+
"stacking": {
1070+
"group": "A",
1071+
"mode": "none"
1072+
},
1073+
"thresholdsStyle": {
1074+
"mode": "off"
1075+
}
1076+
},
1077+
"decimals": 0,
1078+
"mappings": [],
1079+
"thresholds": {
1080+
"mode": "absolute",
1081+
"steps": [
1082+
{
1083+
"color": "green",
1084+
"value": null
1085+
},
1086+
{
1087+
"color": "red",
1088+
"value": 80
1089+
}
1090+
]
1091+
},
1092+
"unit": "none"
1093+
},
1094+
"overrides": []
1095+
},
1096+
"gridPos": {
1097+
"h": 8,
1098+
"w": 12,
1099+
"x": 0,
1100+
"y": 33
1101+
},
1102+
"id": 3,
1103+
"options": {
1104+
"legend": {
1105+
"calcs": [],
1106+
"displayMode": "list",
1107+
"placement": "bottom",
1108+
"showLegend": true
1109+
},
1110+
"tooltip": {
1111+
"mode": "single",
1112+
"sort": "none"
1113+
}
1114+
},
1115+
"targets": [
1116+
{
1117+
"datasource": {
1118+
"type": "loki",
1119+
"uid": "${lokids}"
1120+
},
1121+
"editorMode": "code",
1122+
"expr": "sum by(filename, event) (count_over_time({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\", timestamp=\"timestamp\", flavor=\"flavor\" | label_format timestamp=\"{{__timestamp__ | unixEpoch | mulf 1000}}\" | event =~ \"runner_start|runner_stop|runner_installed\" | flavor=~\"$flavor\" | timestamp >= ${__from} [$__range]))",
1123+
"key": "Q-f7c42eab-69be-43b5-a807-35c071f708a0-0",
1124+
"legendFormat": "{{event}}",
1125+
"queryType": "range",
1126+
"refId": "A"
1127+
},
1128+
{
1129+
"datasource": {
1130+
"type": "loki",
1131+
"uid": "${lokids}"
1132+
},
1133+
"editorMode": "builder",
1134+
"expr": "sum by(filename) (sum_over_time({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\", crashed_runners=\"crashed_runners\", timestamp=\"timestamp\", flavor=\"flavor\" | label_format timestamp=\"{{__timestamp__ | unixEpoch | mulf 1000}}\" | event = `reconciliation` | flavor=~\"$flavor\" | timestamp >= ${__from} | unwrap crashed_runners [$__range]))",
1135+
"hide": false,
1136+
"legendFormat": "Crashed",
1137+
"queryType": "range",
1138+
"refId": "D"
1139+
}
1140+
],
1141+
"title": "Lifecycle Status",
1142+
"transformations": [
1143+
{
1144+
"id": "renameByRegex",
1145+
"options": {
1146+
"regex": "runner_installed",
1147+
"renamePattern": "Initialized"
1148+
}
1149+
},
1150+
{
1151+
"id": "renameByRegex",
1152+
"options": {
1153+
"regex": "runner_start",
1154+
"renamePattern": "Started"
1155+
}
1156+
},
1157+
{
1158+
"id": "renameByRegex",
1159+
"options": {
1160+
"regex": "runner_stop",
1161+
"renamePattern": "Stopped"
1162+
}
1163+
}
1164+
],
1165+
"type": "timeseries"
1166+
},
10731167
{
10741168
"datasource": {
10751169
"type": "loki",
@@ -1837,8 +1931,7 @@
18371931
"mode": "absolute",
18381932
"steps": [
18391933
{
1840-
"color": "green",
1841-
"value": null
1934+
"color": "green"
18421935
},
18431936
{
18441937
"color": "red",
@@ -2134,12 +2227,12 @@
21342227
]
21352228
},
21362229
"time": {
2137-
"from": "now-24h",
2230+
"from": "now-6h",
21382231
"to": "now"
21392232
},
21402233
"timepicker": {},
21412234
"timezone": "",
21422235
"title": "GitHub Self-Hosted Runner Metrics",
2143-
"version": 19,
2236+
"version": 20,
21442237
"weekStart": ""
21452238
}

0 commit comments

Comments
 (0)