2828# Lists the Github workflows we want to track. Maps the Github job name to 
2929# the metric name prefix in grafana. 
3030# This metric name is also used as a key in the job->name map. 
31- GITHUB_WORKFLOW_TO_TRACK  =  {"CI Checks" : "github_llvm_premerge_checks" }
31+ GITHUB_WORKFLOW_TO_TRACK  =  {
32+     "CI Checks" : "github_llvm_premerge_checks" ,
33+     "Build and Test libc++" : "github_libcxx_premerge_checks" ,
34+ }
3235
3336# Lists the Github jobs to track for a given workflow. The key is the stable 
3437# name (metric name) of the workflow (see GITHUB_WORKFLOW_TO_TRACK). 
3841    "github_llvm_premerge_checks" : {
3942        "Build and Test Linux" : "premerge_linux" ,
4043        "Build and Test Windows" : "premerge_windows" ,
41-     }
44+     },
45+     "github_libcxx_premerge_checks" : {
46+         "stage1" : "premerge_libcxx_stage1" ,
47+         "stage2" : "premerge_libcxx_stage2" ,
48+         "stage3" : "premerge_libcxx_stage3" ,
49+     },
4250}
4351
4452# The number of workflows to pull when sampling Github workflows. 
6270# by trial and error). 
6371GRAFANA_METRIC_MAX_AGE_MN  =  120 
6472
65- 
6673@dataclass  
6774class  JobMetrics :
6875    job_name : str 
6976    queue_time : int 
7077    run_time : int 
7178    status : int 
79+     created_at_ns : int 
80+     started_at_ns : int 
7281    completed_at_ns : int 
7382    workflow_id : int 
7483    workflow_name : str 
@@ -81,6 +90,159 @@ class GaugeMetric:
8190    time_ns : int 
8291
8392
93+ @dataclass  
94+ class  AggregateMetric :
95+     aggregate_name : str 
96+     aggregate_queue_time : int 
97+     aggregate_run_time : int 
98+     aggregate_status : int 
99+     completed_at_ns : int 
100+     workflow_id : int 
101+ 
102+ 
103+ def  _construct_aggregate (ag_name : str , job_list : list [JobMetrics ]) ->  AggregateMetric :
104+     """Create a libc++ AggregateMetric from a list of libc++ JobMetrics 
105+ 
106+     How aggregates are computed: 
107+     queue time: Time from when first job in group is created until last job in 
108+                 group has started. 
109+     run time: Time from when first job in group starts running until last job 
110+               in group finishes running. 
111+     status: logical 'and' of all the job statuses in the group. 
112+ 
113+     Args: 
114+       ag_name: The name for this particular AggregateMetric 
115+       job_list: This list of JobMetrics to be combined into the AggregateMetric. 
116+         The input list should contain all (and only!) the libc++ JobMetrics 
117+         for a particular stage and a particular workflow_id. 
118+ 
119+     Returns: 
120+       Returns the AggregateMetric constructed from the inputs. 
121+     """ 
122+ 
123+     # Initialize the aggregate values 
124+     earliest_create  =  job_list [0 ].created_at_ns 
125+     earliest_start  =  job_list [0 ].started_at_ns 
126+     earliest_complete  =  job_list [0 ].completed_at_ns 
127+     latest_start  =  job_list [0 ].started_at_ns 
128+     latest_complete  =  job_list [0 ].completed_at_ns 
129+     ag_status  =  job_list [0 ].status 
130+     ag_workflow_id  =  job_list [0 ].workflow_id 
131+ 
132+     # Go through rest of jobs for this workflow id, if any, updating stats 
133+     for  job  in  job_list [1 :]:
134+         # Update the status 
135+         ag_status  =  ag_status  and  job .status 
136+         # Get the earliest & latest times 
137+         if  job .created_at_ns  <  earliest_create :
138+             earliest_create  =  job .created_at_ns 
139+         if  job .completed_at_ns  <  earliest_complete :
140+             earliest_complete  =  job .completed_at_ns 
141+         if  job .started_at_ns  >  latest_start :
142+             latest_start  =  job .started_at_ns 
143+         if  job .started_at_ns  <  earliest_start :
144+             earliest_start  =  job .started_at_ns 
145+         if  job .completed_at_ns  >  latest_complete :
146+             latest_complete  =  job .completed_at_ns 
147+ 
148+     # Compute aggregate run time (in seconds, not ns) 
149+     ag_run_time  =  (latest_complete  -  earliest_start ) /  1000000000 
150+     # Compute aggregate queue time (in seconds, not ns) 
151+     ag_queue_time  =  (latest_start  -  earliest_create ) /  1000000000 
152+     # Append the aggregate metrics to the workflow metrics list. 
153+     return  AggregateMetric (
154+         ag_name , ag_queue_time , ag_run_time , ag_status , latest_complete , ag_workflow_id 
155+     )
156+ 
157+ 
158+ def  create_and_append_libcxx_aggregates (workflow_metrics : list [JobMetrics ]):
159+     """Find libc++ JobMetric entries and create aggregate metrics for them. 
160+ 
161+     Sort the libc++ JobMetric entries by workflow id, and for each workflow 
162+     id group them by stages. Call _construct_aggregate to reate an aggregate 
163+     metric for each stage for each unique workflow id. Append each aggregate 
164+     metric to the input workflow_metrics list. 
165+ 
166+      Args: 
167+       workflow_metrics: A list of JobMetrics entries collected so far. 
168+     """ 
169+     # Separate the jobs by workflow_id. Only look at JobMetrics entries. 
170+     aggregate_data  =  dict ()
171+     for  job  in  workflow_metrics :
172+         # Only want to look at JobMetrics 
173+         if  not  isinstance (job , JobMetrics ):
174+             continue 
175+         # Only want libc++ jobs. 
176+         if  job .workflow_name  !=  "Build and Test libc++" :
177+             continue 
178+         if  job .workflow_id  not  in   aggregate_data .keys ():
179+             aggregate_data [job .workflow_id ] =  [job ]
180+         else :
181+             aggregate_data [job .workflow_id ].append (job )
182+ 
183+     # Go through each aggregate_data list (workflow id) and find all the 
184+     # needed data 
185+     for  ag_workflow_id  in  aggregate_data :
186+         job_list  =  aggregate_data [ag_workflow_id ]
187+         stage1_jobs  =  list ()
188+         stage2_jobs  =  list ()
189+         stage3_jobs  =  list ()
190+         # sort jobs into stage1, stage2, & stage3. 
191+         for  job  in  job_list :
192+             if  job .job_name .find ("stage1" ) >  0 :
193+                 stage1_jobs .append (job )
194+             elif  job .job_name .find ("stage2" ) >  0 :
195+                 stage2_jobs .append (job )
196+             elif  job .job_name .find ("stage3" ) >  0 :
197+                 stage3_jobs .append (job )
198+ 
199+         if  len (stage1_jobs ) >  0 :
200+             aggregate  =  _construct_aggregate (
201+                 "github_libcxx_premerge_checks_stage1_aggregate" , stage1_jobs 
202+             )
203+             workflow_metrics .append (aggregate )
204+         if  len (stage2_jobs ) >  0 :
205+             aggregate  =  _construct_aggregate (
206+                 "github_libcxx_premerge_checks_stage2_aggregate" , stage2_jobs 
207+             )
208+             workflow_metrics .append (aggregate )
209+         if  len (stage3_jobs ) >  0 :
210+             aggregate  =  _construct_aggregate (
211+                 "github_libcxx_premerge_checks_stage3_aggregate" , stage3_jobs 
212+             )
213+             workflow_metrics .append (aggregate )
214+ 
215+ 
216+ def  clean_up_libcxx_job_name (old_name : str ) ->  str :
217+     """Convert libcxx job names to generically legal strings. 
218+ 
219+     Args: 
220+       old_name: A string with the full name of the libc++ test that was run. 
221+ 
222+     Returns: 
223+       Returns the input string with characters that might not be acceptable 
224+         in some indentifier strings replaced with safer characters. 
225+ 
226+     Take a name like 'stage1 (generic-cxx03, clang-22, clang++-22)' 
227+     and convert it to 'stage1_generic_cxx03__clang_22__clangxx_22'. 
228+     (Remove parentheses; replace commas, hyphens and spaces with 
229+     underscores; replace '+' with 'x'.) 
230+     """ 
231+     # Names should have exactly one set of parentheses, so break on that. If 
232+     # they don't have any parentheses, then don't update them at all. 
233+     if  old_name .find ("(" ) ==  - 1 :
234+         return  old_name 
235+     stage , remainder  =  old_name .split ("(" )
236+     stage  =  stage .strip ()
237+     if  remainder [- 1 ] ==  ")" :
238+         remainder  =  remainder [:- 1 ]
239+     remainder  =  remainder .replace ("-" , "_" )
240+     remainder  =  remainder .replace ("," , "_" )
241+     remainder  =  remainder .replace (" " , "_" )
242+     remainder  =  remainder .replace ("+" , "x" )
243+     new_name  =  stage  +  "_"  +  remainder 
244+     return  new_name 
245+ 
84246def  github_get_metrics (
85247    github_repo : github .Repository , last_workflows_seen_as_completed : set [int ]
86248) ->  tuple [list [JobMetrics ], int ]:
@@ -146,6 +308,10 @@ def github_get_metrics(
146308        if  task .name  not  in   GITHUB_WORKFLOW_TO_TRACK :
147309            continue 
148310
311+         libcxx_testing  =  False 
312+         if  task .name  ==  "Build and Test libc++" :
313+             libcxx_testing  =  True 
314+ 
149315        if  task .status  ==  "completed" :
150316            workflow_seen_as_completed .add (task .id )
151317
@@ -155,11 +321,19 @@ def github_get_metrics(
155321
156322        name_prefix  =  GITHUB_WORKFLOW_TO_TRACK [task .name ]
157323        for  job  in  task .jobs ():
324+             if  libcxx_testing :
325+                 # We're not running macos or windows libc++ tests on our 
326+                 # infrastructure. 
327+                 if  job .name .find ("macos" ) !=  - 1  or  job .name .find ("windows" ) !=  - 1 :
328+                     continue 
158329            # This job is not interesting to us. 
159-             if  job .name  not  in   GITHUB_JOB_TO_TRACK [name_prefix ]:
330+             elif  job .name  not  in   GITHUB_JOB_TO_TRACK [name_prefix ]:
160331                continue 
161332
162-             name_suffix  =  GITHUB_JOB_TO_TRACK [name_prefix ][job .name ]
333+             if  libcxx_testing :
334+                 name_suffix  =  clean_up_libcxx_job_name (job .name )
335+             else :
336+                 name_suffix  =  GITHUB_JOB_TO_TRACK [name_prefix ][job .name ]
163337            metric_name  =  name_prefix  +  "_"  +  name_suffix 
164338
165339            if  task .status  !=  "completed" :
@@ -208,21 +382,32 @@ def github_get_metrics(
208382                continue 
209383
210384            logging .info (f"Adding a job metric for job { job .id }   in workflow { task .id }  " )
211-             # The timestamp associated with the event is expected by Grafana to be 
212-             # in nanoseconds. 
385+             # The completed_at_ns timestamp associated with the event is 
386+             # expected by Grafana to be in nanoseconds. Because we do math using 
387+             # all three times (when creating libc++ aggregates), we need them 
388+             # all to be in nanoseconds, even though created_at and started_at 
389+             # are not returned to Grafana. 
390+             created_at_ns  =  int (created_at .timestamp ()) *  10 ** 9 
391+             started_at_ns  =  int (started_at .timestamp ()) *  10 ** 9 
213392            completed_at_ns  =  int (completed_at .timestamp ()) *  10 ** 9 
214393            workflow_metrics .append (
215394                JobMetrics (
216395                    metric_name ,
217396                    queue_time .seconds ,
218397                    run_time .seconds ,
219398                    job_result ,
399+                     created_at_ns ,
400+                     started_at_ns ,
220401                    completed_at_ns ,
221402                    task .id ,
222403                    task .name ,
223404                )
224405            )
225406
407+     # Finished collecting the JobMetrics for all jobs; now create the 
408+     # aggregates for any libc++ jobs. 
409+     create_and_append_libcxx_aggregates (workflow_metrics )
410+ 
226411    for  name , value  in  queued_count .items ():
227412        workflow_metrics .append (
228413            GaugeMetric (f"workflow_queue_size_{ name }  " , value , time .time_ns ())
@@ -278,6 +463,11 @@ def upload_metrics(workflow_metrics, metrics_userid, api_key):
278463            metrics_batch .append (
279464                f"{ name }   queue_time={ workflow_metric .queue_time }  ,run_time={ workflow_metric .run_time }  ,status={ workflow_metric .status }   { workflow_metric .completed_at_ns }  " 
280465            )
466+         elif  isinstance (workflow_metric , AggregateMetric ):
467+             name  =  workflow_metric .aggregate_name .lower ().replace (" " , "_" )
468+             metrics_batch .append (
469+                 f"{ name }   queue_time={ workflow_metric .aggregate_queue_time }  ,run_time={ workflow_metric .aggregate_run_time }  ,status={ workflow_metric .aggregate_status }   { workflow_metric .completed_at_ns }  " 
470+             )
281471        else :
282472            raise  ValueError (
283473                f"Unsupported object type { type (workflow_metric )}  : { str (workflow_metric )}  " 
0 commit comments