Skip to content

Commit 61ac179

Browse files
committed
Remove fake document (used for poking test runs).
Finish updating metrics.py to collect libc++ testing metrics.
1 parent c8c42b7 commit 61ac179

File tree

2 files changed

+167
-15
lines changed

2 files changed

+167
-15
lines changed

.ci/metrics/metrics.py

Lines changed: 167 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
# This metric name is also used as a key in the job->name map.
3131
GITHUB_WORKFLOW_TO_TRACK = {
3232
"CI Checks": "github_llvm_premerge_checks",
33-
"Build and Test libc++": "github_libc++_premerge_checks",
33+
"Build and Test libc++": "github_libcxx_premerge_checks",
3434
}
3535

3636
# Lists the Github jobs to track for a given workflow. The key is the stable
@@ -42,10 +42,10 @@
4242
"Build and Test Linux": "premerge_linux",
4343
"Build and Test Windows": "premerge_windows",
4444
},
45-
"github_libc++_premerge_checks": {
46-
"libc++ Stage1 Testing": "premerge_libcxx_stage1",
47-
"libc++ Stage2 Testing": "premerge_libcxx_stage2",
48-
"libc++ Stage3 Testing": "premerge_libcxx_stage3",
45+
"github_libcxx_premerge_checks": {
46+
"stage1": "premerge_libcxx_stage1",
47+
"stage2": "premerge_libcxx_stage2",
48+
"stage3": "premerge_libcxx_stage3",
4949
},
5050
}
5151

@@ -70,13 +70,14 @@
7070
# by trial and error).
7171
GRAFANA_METRIC_MAX_AGE_MN = 120
7272

73-
7473
@dataclass
7574
class JobMetrics:
7675
job_name: str
7776
queue_time: int
7877
run_time: int
7978
status: int
79+
created_at_ns: int
80+
started_at_ns: int
8081
completed_at_ns: int
8182
workflow_id: int
8283
workflow_name: str
@@ -89,6 +90,139 @@ class GaugeMetric:
8990
time_ns: int
9091

9192

93+
@dataclass
94+
class AggregateMetric:
95+
aggregate_name: str
96+
aggregate_queue_time: int
97+
aggregate_run_time: int
98+
aggregate_status: int
99+
workflow_id: int
100+
101+
102+
def create_and_append_libcxx_aggregates(
103+
workflow_metrics: list[JobMetrics]) -> list[JobMetrics,AggregateMetric]:
104+
"""
105+
Find libc++ JobMetric entries and create aggregate metrics for them.
106+
107+
Sort the libc++ JobMetric entries by workflow id, and for each workflow
108+
id group them by stages. Create an aggreate metric for each stage for each
109+
unique workflow id. Append each aggregate metric to the workflow_metrics
110+
list.
111+
112+
How aggreates are computed:
113+
queue time: Time from when first job in group is created until last job in
114+
group has started.
115+
run time: Time from when first job in group starts running until last job
116+
in group finishes running.
117+
status: logical 'or' of all the job statuses in the group.
118+
"""
119+
# Separate the jobs by workflow_id. Only look at JobMetrics entries.
120+
aggregate_data = dict()
121+
for job in workflow_metrics:
122+
# Only want to look at JobMetrics
123+
if not isinstance(job, JobMetrics):
124+
continue
125+
# Only want libc++ jobs.
126+
if job.workflow_name != "Build and Test libc++":
127+
continue
128+
if job.workflow_id not in aggregate_data.keys():
129+
aggregate_data[job.workflow_id] = [ job ]
130+
else:
131+
aggregate_data[job.workflow_id].append(job)
132+
133+
# Go through each aggregate_data list (workflow id) and find all the
134+
# needed data
135+
for ag_workflow_id in aggregate_data:
136+
job_list = aggregate_data[ag_workflow_id]
137+
stage1_jobs = list()
138+
stage2_jobs = list()
139+
stage3_jobs = list()
140+
# sort jobs into stage1, stage2, & stage3.
141+
for job in job_list:
142+
if job.job_name.find('stage1') > 0:
143+
stage1_jobs.append(job)
144+
elif job.job_name.find('stage2') > 0:
145+
stage2_jobs.append(job)
146+
elif job.job_name.find('stage3') > 0:
147+
stage3_jobs.append(job)
148+
149+
for job_list in [ stage1_jobs, stage2_jobs, stage3_jobs]:
150+
if len(job_list) < 1:
151+
# No jobs in that stage this time around.
152+
continue
153+
154+
# Get the aggregate name.
155+
ag_name = "github_libcxx_premerge_checks_"
156+
if job_list[0].job_name.find('stage1') > 0:
157+
ag_name = ag_name + "stage1_aggregate"
158+
elif job_list[0].job_name.find('stage2') > 0:
159+
ag_name = ag_name + "stage2_aggregate"
160+
elif job_list[0].job_name.find('stage3') > 0:
161+
ag_name = ag_name + "stage3_aggregate"
162+
else:
163+
ag_name = ag_name + "unknown_aggregate"
164+
165+
# Initialize the rest of the aggregate values
166+
earliest_create = job_list[0].created_at_ns
167+
earliest_start = job_list[0].started_at_ns
168+
earliest_complete = job_list[0].completed_at_ns
169+
latest_start = job_list[0].started_at_ns
170+
latest_complete = job_list[0].completed_at_ns
171+
ag_status = job_list[0].status
172+
173+
# Go through rest of jobs for this workflow id, updating stats
174+
for job in job_list[1:]:
175+
# Update the status
176+
ag_status = ag_status or job.status
177+
# Get the earliest & latest times
178+
if job.created_at_ns < earliest_create:
179+
earliest_create = job.created_at_ns
180+
if job.completed_at_ns < earliest_complete:
181+
earliest_complete = job.completed_at_ns
182+
if job.started_at_ns > latest_start:
183+
latest_start = job.started_at_ns
184+
if job.started_at_ns < earliest_start:
185+
earliest_start = job.started_at_ns
186+
if job.completed_at_ns > latest_complete:
187+
latest_complete = job.completed_at_ns
188+
189+
# Compute aggregate run time (in seconds, not ns)
190+
ag_run_time = (latest_complete - earliest_start) / 1000000000
191+
# Compute aggregate queue time (in seconds, not ns)
192+
ag_queue_time = (latest_start - earliest_create) / 1000000000
193+
# Append the aggregate metrics to the workflow metrics list.
194+
workflow_metrics.append(
195+
AggregateMetric(
196+
ag_name, ag_queue_time, ag_run_time, ag_status,
197+
ag_workflow_id
198+
)
199+
)
200+
return
201+
202+
def clean_up_libcxx_job_name(old_name: str) -> str:
203+
"""
204+
Convert libcxx job names to generically legal strings.
205+
206+
Take a name like 'stage1 (generic-cxx03, clang-22, clang++-22)'
207+
and convert it to 'stage1_generic_cxx03__clang_22__clangxx_22'.
208+
(Remove parentheses; replace commas, hyphens and spaces with
209+
underscores; replace '+' with 'x'.
210+
"""
211+
# Names should have exactly one set of parentheses, so break on that. If
212+
# they don't have any parentheses, then don't update them at all.
213+
if old_name.find('(') == -1:
214+
return old_name
215+
stage, remainder = old_name.split('(')
216+
stage = stage.strip()
217+
if remainder[-1] == ')':
218+
remainder = remainder[:-1]
219+
remainder = remainder.replace('-', '_')
220+
remainder = remainder.replace(',', '_')
221+
remainder = remainder.replace(' ', '_')
222+
remainder = remainder.replace('+', 'x')
223+
new_name = stage + '_' + remainder
224+
return new_name
225+
92226
def github_get_metrics(
93227
github_repo: github.Repository, last_workflows_seen_as_completed: set[int]
94228
) -> tuple[list[JobMetrics], int]:
@@ -151,9 +285,14 @@ def github_get_metrics(
151285
break
152286

153287
# This workflow is not interesting to us.
154-
if task.name not in GITHUB_WORKFLOW_TO_TRACK:
288+
if (task.name not in GITHUB_WORKFLOW_TO_TRACK
289+
and task.name != "Build and Test libc++"):
155290
continue
156291

292+
libcxx_testing = False
293+
if task.name == "Build and Test libc++":
294+
libcxx_testing = True
295+
157296
if task.status == "completed":
158297
workflow_seen_as_completed.add(task.id)
159298

@@ -163,11 +302,20 @@ def github_get_metrics(
163302

164303
name_prefix = GITHUB_WORKFLOW_TO_TRACK[task.name]
165304
for job in task.jobs():
305+
if libcxx_testing:
306+
# We're not running macos or windows libc++ tests on our
307+
# infrastructure.
308+
if (job.name.find("macos") != -1 or
309+
job.name.find("windows") != -1):
310+
continue
166311
# This job is not interesting to us.
167-
if job.name not in GITHUB_JOB_TO_TRACK[name_prefix]:
312+
elif job.name not in GITHUB_JOB_TO_TRACK[name_prefix]:
168313
continue
169314

170-
name_suffix = GITHUB_JOB_TO_TRACK[name_prefix][job.name]
315+
if libcxx_testing:
316+
name_suffix = clean_up_libcxx_job_name(job.name)
317+
else:
318+
name_suffix = GITHUB_JOB_TO_TRACK[name_prefix][job.name]
171319
metric_name = name_prefix + "_" + name_suffix
172320

173321
if task.status != "completed":
@@ -216,21 +364,29 @@ def github_get_metrics(
216364
continue
217365

218366
logging.info(f"Adding a job metric for job {job.id} in workflow {task.id}")
219-
# The timestamp associated with the event is expected by Grafana to be
220-
# in nanoseconds.
367+
# The timestamp associated with the event is expected by Grafana to
368+
# be in nanoseconds.
369+
created_at_ns = int(created_at.timestamp()) * 10**9
370+
started_at_ns = int(started_at.timestamp()) * 10**9
221371
completed_at_ns = int(completed_at.timestamp()) * 10**9
222372
workflow_metrics.append(
223373
JobMetrics(
224374
metric_name,
225375
queue_time.seconds,
226376
run_time.seconds,
227377
job_result,
378+
created_at_ns,
379+
started_at_ns,
228380
completed_at_ns,
229381
task.id,
230382
task.name,
231383
)
232384
)
233385

386+
# Finished collecting the JobMetrics for all jobs; now create the
387+
# aggregates for any libc++ jobs.
388+
create_and_append_libcxx_aggregates(workflow_metrics)
389+
234390
for name, value in queued_count.items():
235391
workflow_metrics.append(
236392
GaugeMetric(f"workflow_queue_size_{name}", value, time.time_ns())

libcxx/docs/CarolineTest.txt

Lines changed: 0 additions & 4 deletions
This file was deleted.

0 commit comments

Comments
 (0)