3030# This metric name is also used as a key in the job->name map.
3131GITHUB_WORKFLOW_TO_TRACK = {
3232 "CI Checks" : "github_llvm_premerge_checks" ,
33- "Build and Test libc++" : "github_libc++_premerge_checks " ,
33+ "Build and Test libc++" : "github_libcxx_premerge_checks " ,
3434}
3535
3636# Lists the Github jobs to track for a given workflow. The key is the stable
4242 "Build and Test Linux" : "premerge_linux" ,
4343 "Build and Test Windows" : "premerge_windows" ,
4444 },
45- "github_libc++_premerge_checks " : {
46- "libc++ Stage1 Testing " : "premerge_libcxx_stage1" ,
47- "libc++ Stage2 Testing " : "premerge_libcxx_stage2" ,
48- "libc++ Stage3 Testing " : "premerge_libcxx_stage3" ,
45+ "github_libcxx_premerge_checks " : {
46+ "stage1 " : "premerge_libcxx_stage1" ,
47+ "stage2 " : "premerge_libcxx_stage2" ,
48+ "stage3 " : "premerge_libcxx_stage3" ,
4949 },
5050}
5151
7070# by trial and error).
7171GRAFANA_METRIC_MAX_AGE_MN = 120
7272
73-
7473@dataclass
7574class JobMetrics :
7675 job_name : str
7776 queue_time : int
7877 run_time : int
7978 status : int
79+ created_at_ns : int
80+ started_at_ns : int
8081 completed_at_ns : int
8182 workflow_id : int
8283 workflow_name : str
@@ -89,6 +90,139 @@ class GaugeMetric:
8990 time_ns : int
9091
9192
93+ @dataclass
94+ class AggregateMetric :
95+ aggregate_name : str
96+ aggregate_queue_time : int
97+ aggregate_run_time : int
98+ aggregate_status : int
99+ workflow_id : int
100+
101+
102+ def create_and_append_libcxx_aggregates (
103+ workflow_metrics : list [JobMetrics ]) -> list [JobMetrics ,AggregateMetric ]:
104+ """
105+ Find libc++ JobMetric entries and create aggregate metrics for them.
106+
107+ Sort the libc++ JobMetric entries by workflow id, and for each workflow
108+ id group them by stages. Create an aggreate metric for each stage for each
109+ unique workflow id. Append each aggregate metric to the workflow_metrics
110+ list.
111+
112+ How aggreates are computed:
113+ queue time: Time from when first job in group is created until last job in
114+ group has started.
115+ run time: Time from when first job in group starts running until last job
116+ in group finishes running.
117+ status: logical 'or' of all the job statuses in the group.
118+ """
119+ # Separate the jobs by workflow_id. Only look at JobMetrics entries.
120+ aggregate_data = dict ()
121+ for job in workflow_metrics :
122+ # Only want to look at JobMetrics
123+ if not isinstance (job , JobMetrics ):
124+ continue
125+ # Only want libc++ jobs.
126+ if job .workflow_name != "Build and Test libc++" :
127+ continue
128+ if job .workflow_id not in aggregate_data .keys ():
129+ aggregate_data [job .workflow_id ] = [ job ]
130+ else :
131+ aggregate_data [job .workflow_id ].append (job )
132+
133+ # Go through each aggregate_data list (workflow id) and find all the
134+ # needed data
135+ for ag_workflow_id in aggregate_data :
136+ job_list = aggregate_data [ag_workflow_id ]
137+ stage1_jobs = list ()
138+ stage2_jobs = list ()
139+ stage3_jobs = list ()
140+ # sort jobs into stage1, stage2, & stage3.
141+ for job in job_list :
142+ if job .job_name .find ('stage1' ) > 0 :
143+ stage1_jobs .append (job )
144+ elif job .job_name .find ('stage2' ) > 0 :
145+ stage2_jobs .append (job )
146+ elif job .job_name .find ('stage3' ) > 0 :
147+ stage3_jobs .append (job )
148+
149+ for job_list in [ stage1_jobs , stage2_jobs , stage3_jobs ]:
150+ if len (job_list ) < 1 :
151+ # No jobs in that stage this time around.
152+ continue
153+
154+ # Get the aggregate name.
155+ ag_name = "github_libcxx_premerge_checks_"
156+ if job_list [0 ].job_name .find ('stage1' ) > 0 :
157+ ag_name = ag_name + "stage1_aggregate"
158+ elif job_list [0 ].job_name .find ('stage2' ) > 0 :
159+ ag_name = ag_name + "stage2_aggregate"
160+ elif job_list [0 ].job_name .find ('stage3' ) > 0 :
161+ ag_name = ag_name + "stage3_aggregate"
162+ else :
163+ ag_name = ag_name + "unknown_aggregate"
164+
165+ # Initialize the rest of the aggregate values
166+ earliest_create = job_list [0 ].created_at_ns
167+ earliest_start = job_list [0 ].started_at_ns
168+ earliest_complete = job_list [0 ].completed_at_ns
169+ latest_start = job_list [0 ].started_at_ns
170+ latest_complete = job_list [0 ].completed_at_ns
171+ ag_status = job_list [0 ].status
172+
173+ # Go through rest of jobs for this workflow id, updating stats
174+ for job in job_list [1 :]:
175+ # Update the status
176+ ag_status = ag_status or job .status
177+ # Get the earliest & latest times
178+ if job .created_at_ns < earliest_create :
179+ earliest_create = job .created_at_ns
180+ if job .completed_at_ns < earliest_complete :
181+ earliest_complete = job .completed_at_ns
182+ if job .started_at_ns > latest_start :
183+ latest_start = job .started_at_ns
184+ if job .started_at_ns < earliest_start :
185+ earliest_start = job .started_at_ns
186+ if job .completed_at_ns > latest_complete :
187+ latest_complete = job .completed_at_ns
188+
189+ # Compute aggregate run time (in seconds, not ns)
190+ ag_run_time = (latest_complete - earliest_start ) / 1000000000
191+ # Compute aggregate queue time (in seconds, not ns)
192+ ag_queue_time = (latest_start - earliest_create ) / 1000000000
193+ # Append the aggregate metrics to the workflow metrics list.
194+ workflow_metrics .append (
195+ AggregateMetric (
196+ ag_name , ag_queue_time , ag_run_time , ag_status ,
197+ ag_workflow_id
198+ )
199+ )
200+ return
201+
202+ def clean_up_libcxx_job_name (old_name : str ) -> str :
203+ """
204+ Convert libcxx job names to generically legal strings.
205+
206+ Take a name like 'stage1 (generic-cxx03, clang-22, clang++-22)'
207+ and convert it to 'stage1_generic_cxx03__clang_22__clangxx_22'.
208+ (Remove parentheses; replace commas, hyphens and spaces with
209+ underscores; replace '+' with 'x'.
210+ """
211+ # Names should have exactly one set of parentheses, so break on that. If
212+ # they don't have any parentheses, then don't update them at all.
213+ if old_name .find ('(' ) == - 1 :
214+ return old_name
215+ stage , remainder = old_name .split ('(' )
216+ stage = stage .strip ()
217+ if remainder [- 1 ] == ')' :
218+ remainder = remainder [:- 1 ]
219+ remainder = remainder .replace ('-' , '_' )
220+ remainder = remainder .replace (',' , '_' )
221+ remainder = remainder .replace (' ' , '_' )
222+ remainder = remainder .replace ('+' , 'x' )
223+ new_name = stage + '_' + remainder
224+ return new_name
225+
92226def github_get_metrics (
93227 github_repo : github .Repository , last_workflows_seen_as_completed : set [int ]
94228) -> tuple [list [JobMetrics ], int ]:
@@ -151,9 +285,14 @@ def github_get_metrics(
151285 break
152286
153287 # This workflow is not interesting to us.
154- if task .name not in GITHUB_WORKFLOW_TO_TRACK :
288+ if (task .name not in GITHUB_WORKFLOW_TO_TRACK
289+ and task .name != "Build and Test libc++" ):
155290 continue
156291
292+ libcxx_testing = False
293+ if task .name == "Build and Test libc++" :
294+ libcxx_testing = True
295+
157296 if task .status == "completed" :
158297 workflow_seen_as_completed .add (task .id )
159298
@@ -163,11 +302,20 @@ def github_get_metrics(
163302
164303 name_prefix = GITHUB_WORKFLOW_TO_TRACK [task .name ]
165304 for job in task .jobs ():
305+ if libcxx_testing :
306+ # We're not running macos or windows libc++ tests on our
307+ # infrastructure.
308+ if (job .name .find ("macos" ) != - 1 or
309+ job .name .find ("windows" ) != - 1 ):
310+ continue
166311 # This job is not interesting to us.
167- if job .name not in GITHUB_JOB_TO_TRACK [name_prefix ]:
312+ elif job .name not in GITHUB_JOB_TO_TRACK [name_prefix ]:
168313 continue
169314
170- name_suffix = GITHUB_JOB_TO_TRACK [name_prefix ][job .name ]
315+ if libcxx_testing :
316+ name_suffix = clean_up_libcxx_job_name (job .name )
317+ else :
318+ name_suffix = GITHUB_JOB_TO_TRACK [name_prefix ][job .name ]
171319 metric_name = name_prefix + "_" + name_suffix
172320
173321 if task .status != "completed" :
@@ -216,21 +364,29 @@ def github_get_metrics(
216364 continue
217365
218366 logging .info (f"Adding a job metric for job { job .id } in workflow { task .id } " )
219- # The timestamp associated with the event is expected by Grafana to be
220- # in nanoseconds.
367+ # The timestamp associated with the event is expected by Grafana to
368+ # be in nanoseconds.
369+ created_at_ns = int (created_at .timestamp ()) * 10 ** 9
370+ started_at_ns = int (started_at .timestamp ()) * 10 ** 9
221371 completed_at_ns = int (completed_at .timestamp ()) * 10 ** 9
222372 workflow_metrics .append (
223373 JobMetrics (
224374 metric_name ,
225375 queue_time .seconds ,
226376 run_time .seconds ,
227377 job_result ,
378+ created_at_ns ,
379+ started_at_ns ,
228380 completed_at_ns ,
229381 task .id ,
230382 task .name ,
231383 )
232384 )
233385
386+ # Finished collecting the JobMetrics for all jobs; now create the
387+ # aggregates for any libc++ jobs.
388+ create_and_append_libcxx_aggregates (workflow_metrics )
389+
234390 for name , value in queued_count .items ():
235391 workflow_metrics .append (
236392 GaugeMetric (f"workflow_queue_size_{ name } " , value , time .time_ns ())
0 commit comments