@@ -24,6 +24,7 @@ class JobMetrics:
2424 status : int
2525 created_at_ns : int
2626 workflow_id : int
27+ workflow_name : str
2728
2829
2930@dataclass
@@ -43,40 +44,60 @@ def get_sampled_workflow_metrics(github_repo: github.Repository):
4344 Returns a list of GaugeMetric objects, containing the relevant metrics about
4445 the workflow
4546 """
47+ queued_job_counts = {}
48+ running_job_counts = {}
4649
4750 # Other states are available (pending, waiting, etc), but the meaning
4851 # is not documented (See #70540).
4952 # "queued" seems to be the info we want.
50- queued_workflow_count = len (
51- [
52- x
53- for x in github_repo .get_workflow_runs (status = "queued" )
54- if x .name in WORKFLOWS_TO_TRACK
55- ]
56- )
57- running_workflow_count = len (
58- [
59- x
60- for x in github_repo .get_workflow_runs (status = "in_progress" )
61- if x .name in WORKFLOWS_TO_TRACK
62- ]
63- )
53+ for queued_workflow in github_repo .get_workflow_runs (status = "queued" ):
54+ if queued_workflow .name not in WORKFLOWS_TO_TRACK :
55+ continue
56+ for queued_workflow_job in queued_workflow .jobs ():
57+ job_name = queued_workflow_job .name
58+ # Workflows marked as queued can potentially only have some jobs
59+ # queued, so make sure to also count jobs currently in progress.
60+ if queued_workflow_job .status == "queued" :
61+ if job_name not in queued_job_counts :
62+ queued_job_counts [job_name ] = 1
63+ else :
64+ queued_job_counts [job_name ] += 1
65+ elif queued_workflow_job .status == "in_progress" :
66+ if job_name not in running_job_counts :
67+ running_job_counts [job_name ] = 1
68+ else :
69+ running_job_counts [job_name ] += 1
70+
71+ for running_workflow in github_repo .get_workflow_runs (status = "in_progress" ):
72+ if running_workflow .name not in WORKFLOWS_TO_TRACK :
73+ continue
74+ for running_workflow_job in running_workflow .jobs ():
75+ job_name = running_workflow_job .name
76+ if running_workflow_job .status != "in_progress" :
77+ continue
78+
79+ if job_name not in running_job_counts :
80+ running_job_counts [job_name ] = 1
81+ else :
82+ running_job_counts [job_name ] += 1
6483
6584 workflow_metrics = []
66- workflow_metrics .append (
67- GaugeMetric (
68- "workflow_queue_size" ,
69- queued_workflow_count ,
70- time .time_ns (),
85+ for queued_job in queued_job_counts :
86+ workflow_metrics .append (
87+ GaugeMetric (
88+ f"workflow_queue_size_{ queued_job } " ,
89+ queued_job_counts [queued_job ],
90+ time .time_ns (),
91+ )
7192 )
72- )
73- workflow_metrics .append (
74- GaugeMetric (
75- "running_workflow_count" ,
76- running_workflow_count ,
77- time .time_ns (),
93+ for running_job in running_job_counts :
94+ workflow_metrics .append (
95+ GaugeMetric (
96+ f"running_workflow_count_{ running_job } " ,
97+ running_job_counts [running_job ],
98+ time .time_ns (),
99+ )
78100 )
79- )
80101 # Always send a hearbeat metric so we can monitor is this container is still able to log to Grafana.
81102 workflow_metrics .append (
82103 GaugeMetric ("metrics_container_heartbeat" , 1 , time .time_ns ())
@@ -157,7 +178,7 @@ def get_per_workflow_metrics(
157178 # longer in a testing state and we can directly assert the workflow
158179 # result.
159180 for step in workflow_job .steps :
160- if step .conclusion != "success" :
181+ if step .conclusion != "success" and step . conclusion != "skipped" :
161182 job_result = 0
162183 break
163184
@@ -179,6 +200,7 @@ def get_per_workflow_metrics(
179200 job_result ,
180201 created_at_ns ,
181202 workflow_run .id ,
203+ workflow_run .name ,
182204 )
183205 )
184206
@@ -235,8 +257,6 @@ def upload_metrics(workflow_metrics, metrics_userid, api_key):
235257def main ():
236258 # Authenticate with Github
237259 auth = Auth .Token (os .environ ["GITHUB_TOKEN" ])
238- github_object = Github (auth = auth )
239- github_repo = github_object .get_repo ("llvm/llvm-project" )
240260
241261 grafana_api_key = os .environ ["GRAFANA_API_KEY" ]
242262 grafana_metrics_userid = os .environ ["GRAFANA_METRICS_USERID" ]
@@ -248,20 +268,19 @@ def main():
248268 # Enter the main loop. Every five minutes we wake up and dump metrics for
249269 # the relevant jobs.
250270 while True :
271+ github_object = Github (auth = auth )
272+ github_repo = github_object .get_repo ("llvm/llvm-project" )
273+
251274 current_metrics = get_per_workflow_metrics (github_repo , workflows_to_track )
252275 current_metrics += get_sampled_workflow_metrics (github_repo )
253- # Always send a hearbeat metric so we can monitor is this container is still able to log to Grafana.
254- current_metrics .append (
255- GaugeMetric ("metrics_container_heartbeat" , 1 , time .time_ns ())
256- )
257276
258277 upload_metrics (current_metrics , grafana_metrics_userid , grafana_api_key )
259278 print (f"Uploaded { len (current_metrics )} metrics" , file = sys .stderr )
260279
261280 for workflow_metric in reversed (current_metrics ):
262281 if isinstance (workflow_metric , JobMetrics ):
263282 workflows_to_track [
264- workflow_metric .job_name
283+ workflow_metric .workflow_name
265284 ] = workflow_metric .workflow_id
266285
267286 time .sleep (SCRAPE_INTERVAL_SECONDS )
0 commit comments