@@ -43,40 +43,60 @@ def get_sampled_workflow_metrics(github_repo: github.Repository):
4343 Returns a list of GaugeMetric objects, containing the relevant metrics about
4444 the workflow
4545 """
46+ queued_job_counts = {}
47+ running_job_counts = {}
4648
4749 # Other states are available (pending, waiting, etc), but the meaning
4850 # is not documented (See #70540).
4951 # "queued" seems to be the info we want.
50- queued_workflow_count = len (
51- [
52- x
53- for x in github_repo .get_workflow_runs (status = "queued" )
54- if x .name in WORKFLOWS_TO_TRACK
55- ]
56- )
57- running_workflow_count = len (
58- [
59- x
60- for x in github_repo .get_workflow_runs (status = "in_progress" )
61- if x .name in WORKFLOWS_TO_TRACK
62- ]
63- )
52+ for queued_workflow in github_repo .get_workflow_runs (status = "queued" ):
53+ if queued_workflow .name not in WORKFLOWS_TO_TRACK :
54+ continue
55+ for queued_workflow_job in queued_workflow .jobs ():
56+ job_name = queued_workflow_job .name
57+ # Workflows marked as queued can potentially only have some jobs
58+ # queued, so make sure to also count jobs currently in progress.
59+ if queued_workflow_job .status == "queued" :
60+ if job_name not in queued_job_counts :
61+ queued_job_counts [job_name ] = 1
62+ else :
63+ queued_job_counts [job_name ] += 1
64+ elif queued_workflow_job .status == "in_progress" :
65+ if job_name not in running_job_counts :
66+ running_job_counts [job_name ] = 1
67+ else :
68+ running_job_counts [job_name ] += 1
69+
70+ for running_workflow in github_repo .get_workflow_runs (status = "in_progress" ):
71+ if running_workflow .name not in WORKFLOWS_TO_TRACK :
72+ continue
73+ for running_workflow_job in running_workflow .jobs ():
74+ job_name = running_workflow_job .name
75+ if running_workflow_job .status != "in_progress" :
76+ continue
77+
78+ if job_name not in running_job_counts :
79+ running_job_counts [job_name ] = 1
80+ else :
81+ running_job_counts [job_name ] += 1
6482
6583 workflow_metrics = []
66- workflow_metrics .append (
67- GaugeMetric (
68- "workflow_queue_size" ,
69- queued_workflow_count ,
70- time .time_ns (),
84+ for queued_job in queued_job_counts :
85+ workflow_metrics .append (
86+ GaugeMetric (
87+ f"workflow_queue_size_{ queued_job } " ,
88+ queued_job_counts [queued_job ],
89+ time .time_ns (),
90+ )
7191 )
72- )
73- workflow_metrics .append (
74- GaugeMetric (
75- "running_workflow_count" ,
76- running_workflow_count ,
77- time .time_ns (),
92+ for running_job in running_job_counts :
93+ workflow_metrics .append (
94+ GaugeMetric (
95+ f"running_workflow_count_{ running_job } " ,
96+ running_job_counts [running_job ],
97+ time .time_ns (),
98+ )
7899 )
79- )
80100 # Always send a hearbeat metric so we can monitor is this container is still able to log to Grafana.
81101 workflow_metrics .append (
82102 GaugeMetric ("metrics_container_heartbeat" , 1 , time .time_ns ())
@@ -157,7 +177,7 @@ def get_per_workflow_metrics(
157177 # longer in a testing state and we can directly assert the workflow
158178 # result.
159179 for step in workflow_job .steps :
160- if step .conclusion != "success" :
180+ if step .conclusion != "success" and step . conclusion != "skipped" :
161181 job_result = 0
162182 break
163183
@@ -235,8 +255,6 @@ def upload_metrics(workflow_metrics, metrics_userid, api_key):
235255def main ():
236256 # Authenticate with Github
237257 auth = Auth .Token (os .environ ["GITHUB_TOKEN" ])
238- github_object = Github (auth = auth )
239- github_repo = github_object .get_repo ("llvm/llvm-project" )
240258
241259 grafana_api_key = os .environ ["GRAFANA_API_KEY" ]
242260 grafana_metrics_userid = os .environ ["GRAFANA_METRICS_USERID" ]
@@ -248,12 +266,11 @@ def main():
248266 # Enter the main loop. Every five minutes we wake up and dump metrics for
249267 # the relevant jobs.
250268 while True :
269+ github_object = Github (auth = auth )
270+ github_repo = github_object .get_repo ("llvm/llvm-project" )
271+
251272 current_metrics = get_per_workflow_metrics (github_repo , workflows_to_track )
252273 current_metrics += get_sampled_workflow_metrics (github_repo )
253- # Always send a hearbeat metric so we can monitor is this container is still able to log to Grafana.
254- current_metrics .append (
255- GaugeMetric ("metrics_container_heartbeat" , 1 , time .time_ns ())
256- )
257274
258275 upload_metrics (current_metrics , grafana_metrics_userid , grafana_api_key )
259276 print (f"Uploaded { len (current_metrics )} metrics" , file = sys .stderr )
0 commit comments