@@ -26,7 +26,67 @@ class JobMetrics:
2626 workflow_id : int
2727
2828
29- def get_metrics (github_repo : github .Repository , workflows_to_track : dict [str , int ]):
29+ @dataclass
30+ class GaugeMetric :
31+ name : str
32+ value : int
33+ time_ns : int
34+
35+
36+ def get_sampled_workflow_metrics (github_repo : github .Repository ):
37+ """Gets global statistics about the Github workflow queue
38+
39+ Args:
40+ github_repo: A github repo object to use to query the relevant information.
41+
42+ Returns:
43+ Returns a list of GaugeMetric objects, containing the relevant metrics about
44+ the workflow
45+ """
46+
47+ # Other states are available (pending, waiting, etc), but the meaning
48+ # is not documented (See #70540).
49+ # "queued" seems to be the info we want.
50+ queued_workflow_count = len (
51+ [
52+ x
53+ for x in github_repo .get_workflow_runs (status = "queued" )
54+ if x .name in WORKFLOWS_TO_TRACK
55+ ]
56+ )
57+ running_workflow_count = len (
58+ [
59+ x
60+ for x in github_repo .get_workflow_runs (status = "in_progress" )
61+ if x .name in WORKFLOWS_TO_TRACK
62+ ]
63+ )
64+
65+ workflow_metrics = []
66+ workflow_metrics .append (
67+ GaugeMetric (
68+ "workflow_queue_size" ,
69+ queued_workflow_count ,
70+ time .time_ns (),
71+ )
72+ )
73+ workflow_metrics .append (
74+ GaugeMetric (
75+ "running_workflow_count" ,
76+ running_workflow_count ,
77+ time .time_ns (),
78+ )
79+ )
80+ # Always send a hearbeat metric so we can monitor is this container is still able to log to Grafana.
81+ workflow_metrics .append (
82+ GaugeMetric ("metrics_container_heartbeat" , 1 , time .time_ns ())
83+ )
84+ return workflow_metrics
85+
86+
87+ def get_per_workflow_metrics (
88+ github_repo : github .Repository , workflows_to_track : dict [str , int ]
89+ ):
3090 """Gets the metrics for specified Github workflows.
3191
3292 This function takes in a list of workflows to track, and optionally the
@@ -43,14 +103,14 @@ def get_metrics(github_repo: github.Repository, workflows_to_track: dict[str, in
43103 Returns a list of JobMetrics objects, containing the relevant metrics about
44104 the workflow.
45105 """
46- workflow_runs = iter (github_repo .get_workflow_runs ())
47-
48106 workflow_metrics = []
49107
50108 workflows_to_include = set (workflows_to_track .keys ())
51109
52- while len (workflows_to_include ) > 0 :
53- workflow_run = next (workflow_runs )
110+ for workflow_run in iter (github_repo .get_workflow_runs ()):
111+ if len (workflows_to_include ) == 0 :
112+ break
113+
54114 if workflow_run .status != "completed" :
55115 continue
56116
@@ -139,12 +199,27 @@ def upload_metrics(workflow_metrics, metrics_userid, api_key):
139199 metrics_userid: The userid to use for the upload.
140200 api_key: The API key to use for the upload.
141201 """
202+
203+ if len (workflow_metrics ) == 0 :
204+ print ("No metrics found to upload." , file = sys .stderr )
205+ return
206+
142207 metrics_batch = []
143208 for workflow_metric in workflow_metrics :
144- workflow_formatted_name = workflow_metric .job_name .lower ().replace (" " , "_" )
145- metrics_batch .append (
146- f"{ workflow_formatted_name } queue_time={ workflow_metric .queue_time } ,run_time={ workflow_metric .run_time } ,status={ workflow_metric .status } { workflow_metric .created_at_ns } "
147- )
209+ if isinstance (workflow_metric , GaugeMetric ):
210+ name = workflow_metric .name .lower ().replace (" " , "_" )
211+ metrics_batch .append (
212+ f"{ name } value={ workflow_metric .value } { workflow_metric .time_ns } "
213+ )
214+ elif isinstance (workflow_metric , JobMetrics ):
215+ name = workflow_metric .job_name .lower ().replace (" " , "_" )
216+ metrics_batch .append (
217+ f"{ name } queue_time={ workflow_metric .queue_time } ,run_time={ workflow_metric .run_time } ,status={ workflow_metric .status } { workflow_metric .created_at_ns } "
218+ )
219+ else :
220+ raise ValueError (
221+ f"Unsupported object type { type (workflow_metric )} : { str (workflow_metric )} "
222+ )
148223
149224 request_data = "\n " .join (metrics_batch )
150225 response = requests .post (
@@ -176,16 +251,21 @@ def main():
176251 # Enter the main loop. Every five minutes we wake up and dump metrics for
177252 # the relevant jobs.
178253 while True :
179- current_metrics = get_metrics (github_repo , workflows_to_track )
180- if len (current_metrics ) == 0 :
181- print ("No metrics found to upload." , file = sys .stderr )
182- continue
254+ current_metrics = get_per_workflow_metrics (github_repo , workflows_to_track )
255+ current_metrics += get_sampled_workflow_metrics (github_repo )
256+ # Always send a hearbeat metric so we can monitor is this container is still able to log to Grafana.
257+ current_metrics .append (
258+ GaugeMetric ("metrics_container_heartbeat" , 1 , time .time_ns ())
259+ )
183260
184261 upload_metrics (current_metrics , grafana_metrics_userid , grafana_api_key )
185262 print (f"Uploaded { len (current_metrics )} metrics" , file = sys .stderr )
186263
187264 for workflow_metric in reversed (current_metrics ):
188- workflows_to_track [workflow_metric .job_name ] = workflow_metric .workflow_id
265+ if isinstance (workflow_metric , JobMetrics ):
266+ workflows_to_track [
267+ workflow_metric .job_name
268+ ] = workflow_metric .workflow_id
189269
190270 time .sleep (SCRAPE_INTERVAL_SECONDS )
191271
0 commit comments