Skip to content

Commit 84a5909

Browse files
committed
modify last workflow tracking & use map
1 parent ef103a5 commit 84a5909

File tree

1 file changed

+60
-67
lines changed

1 file changed

+60
-67
lines changed

.ci/metrics/metrics.py

Lines changed: 60 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,28 @@
1414
GRAFANA_URL = (
1515
"https://influx-prod-13-prod-us-east-0.grafana.net/api/v1/push/influx/write"
1616
)
17-
GITHUB_PROJECT = "llvm/llvm-project"
18-
WORKFLOWS_TO_TRACK = ["LLVM Premerge Checks"]
1917
SCRAPE_INTERVAL_SECONDS = 5 * 60
2018

2119
# Number of builds to fetch per page. Since we scrape regularly, this can
2220
# remain small.
2321
BUILDKITE_GRAPHQL_BUILDS_PER_PAGE = 10
2422

23+
# Lists the Github workflows we want to track. Maps the Github job name to
24+
# the metric name prefix in grafana.
25+
# This metric name is also used as a key in the job->name map.
26+
GITHUB_WORKFLOW_TO_TRACK = {"LLVM Premerge Checks": "github_llvm_premerge_checks"}
27+
28+
# Lists the Github jobs to track for a given workflow. The key is the stable
29+
# name (metric name) of the workflow (see GITHUB_WORKFLOW_TO_TRACK).
30+
# Each value is a map to link the github job name to the corresponding metric
31+
# name.
32+
GITHUB_JOB_TO_TRACK = {
33+
"github_llvm_premerge_checks": {
34+
"Linux Premerge Checks (Test Only - Please Ignore Results)": "premerge_linux",
35+
"Windows Premerge Checks (Test Only - Please Ignore Results)": "premerge_windows",
36+
}
37+
}
38+
2539
# Lists the BuildKite jobs we want to track. Maps the BuildKite job name to
2640
# the metric name in Grafana. This is important not to lose metrics history
2741
# if the workflow name changes.
@@ -179,7 +193,7 @@ def buildkite_get_metrics(
179193

180194
last_recorded_build = last_cursor
181195
output = []
182-
for build in builds:
196+
for build in reversed(builds):
183197
info = buildkite_get_build_info(build["number"])
184198
last_recorded_build = build["cursor"]
185199
for job in info["jobs"]:
@@ -196,7 +210,7 @@ def buildkite_get_metrics(
196210
queue_time = (started_at - scheduled_at).seconds
197211
run_time = (finished_at - started_at).seconds
198212
status = bool(job["passed"])
199-
created_at_ns = int(created_at.timestamp()) * 10**9
213+
finished_at_ns = int(finished_at.timestamp()) * 10**9
200214
workflow_id = build["number"]
201215
workflow_name = "Github pull requests"
202216
output.append(
@@ -205,7 +219,7 @@ def buildkite_get_metrics(
205219
queue_time,
206220
run_time,
207221
status,
208-
created_at_ns,
222+
finished_at_ns,
209223
workflow_id,
210224
workflow_name,
211225
)
@@ -231,7 +245,7 @@ def get_sampled_workflow_metrics(github_repo: github.Repository):
231245
# is not documented (See #70540).
232246
# "queued" seems to be the info we want.
233247
for queued_workflow in github_repo.get_workflow_runs(status="queued"):
234-
if queued_workflow.name not in WORKFLOWS_TO_TRACK:
248+
if queued_workflow.name not in GITHUB_WORKFLOW_TO_TRACK:
235249
continue
236250
for queued_workflow_job in queued_workflow.jobs():
237251
job_name = queued_workflow_job.name
@@ -249,7 +263,7 @@ def get_sampled_workflow_metrics(github_repo: github.Repository):
249263
running_job_counts[job_name] += 1
250264

251265
for running_workflow in github_repo.get_workflow_runs(status="in_progress"):
252-
if running_workflow.name not in WORKFLOWS_TO_TRACK:
266+
if running_workflow.name not in GITHUB_WORKFLOW_TO_TRACK:
253267
continue
254268
for running_workflow_job in running_workflow.jobs():
255269
job_name = running_workflow_job.name
@@ -284,70 +298,54 @@ def get_sampled_workflow_metrics(github_repo: github.Repository):
284298
)
285299
return workflow_metrics
286300

287-
def get_per_workflow_metrics(
288-
github_repo: github.Repository, workflows_to_track: dict[str, int]
289-
):
301+
302+
def get_per_workflow_metrics(github_repo: github.Repository, last_workflow_id: str):
290303
"""Gets the metrics for specified Github workflows.
291304
292-
This function takes in a list of workflows to track, and optionally the
293-
workflow ID of the last tracked invocation. It grabs the relevant data
294-
from Github, returning it to the caller.
305+
This function loads the last workflows from GitHub up to
306+
`last_workflow_id` and logs their metrics if they are referenced in
307+
GITHUB_WORKFLOW_TO_TRACK.
308+
The function returns a list of metrics, and the most recent processed
309+
workflow.
310+
If `last_workflow_id` is None, no metrics are returned, and the last
311+
completed github workflow ID is returned. This is used once when the
312+
program starts.
295313
296314
Args:
297315
github_repo: A github repo object to use to query the relevant information.
298-
workflows_to_track: A dictionary mapping workflow names to the last
299-
invocation ID where metrics have been collected, or None to collect the
300-
last five results.
316+
last_workflow_id: the last workflow we checked.
301317
302318
Returns:
303319
Returns a list of JobMetrics objects, containing the relevant metrics about
304320
the workflow.
305321
"""
306322
workflow_metrics = []
323+
last_checked_workflow_id = last_workflow_id
307324

308-
workflows_to_include = set(workflows_to_track.keys())
309-
310-
for workflow_run in iter(github_repo.get_workflow_runs()):
311-
if len(workflows_to_include) == 0:
325+
for workflow_run in iter(github_repo.get_workflow_runs(status="completed")):
326+
last_checked_workflow_id = workflow_run.id
327+
# If we saw this workflow already, break. We also break if no
328+
# workflow has been seen, as this means the script just started.
329+
if last_workflow_id == workflow_run.id or last_workflow_id is None:
312330
break
313331

314-
if workflow_run.status != "completed":
315-
continue
316-
317-
# This workflow was already sampled for this run, or is not tracked at
318-
# all. Ignoring.
319-
if workflow_run.name not in workflows_to_include:
320-
continue
321-
322-
# There were no new workflow invocations since the previous scrape.
323-
# The API returns a sorted list with the most recent invocations first,
324-
# so we can stop looking for this particular workflow. Continue to grab
325-
# information on the other workflows of interest, if present.
326-
if workflows_to_track[workflow_run.name] == workflow_run.id:
327-
workflows_to_include.remove(workflow_run.name)
332+
# This workflow is not interesting to us. Skipping.
333+
if workflow_run.name not in GITHUB_WORKFLOW_TO_TRACK:
328334
continue
329335

330-
workflow_jobs = workflow_run.jobs()
331-
if workflow_jobs.totalCount == 0:
332-
continue
336+
workflow_key = GITHUB_WORKFLOW_TO_TRACK[workflow_run.name]
333337

334-
if (
335-
workflows_to_track[workflow_run.name] is None
336-
or workflows_to_track[workflow_run.name] == workflow_run.id
337-
):
338-
workflows_to_include.remove(workflow_run.name)
339-
if (
340-
workflows_to_track[workflow_run.name] is not None
341-
and len(workflows_to_include) == 0
342-
):
343-
break
338+
for workflow_job in workflow_run.jobs():
339+
# This job is not interesting, skipping.
340+
if workflow_job.name not in GITHUB_JOB_TO_TRACK[workflow_key]:
341+
continue
344342

345-
for workflow_job in workflow_jobs:
346343
created_at = workflow_job.created_at
347344
started_at = workflow_job.started_at
348345
completed_at = workflow_job.completed_at
349-
350346
job_result = int(workflow_job.conclusion == "success")
347+
job_key = GITHUB_JOB_TO_TRACK[workflow_key][workflow_job.name]
348+
351349
if job_result:
352350
# We still might want to mark the job as a failure if one of the steps
353351
# failed. This is required due to use setting continue-on-error in
@@ -377,7 +375,7 @@ def get_per_workflow_metrics(
377375

378376
workflow_metrics.append(
379377
JobMetrics(
380-
workflow_run.name + "-" + workflow_job.name,
378+
workflow_key + "-" + job_key,
381379
queue_time.seconds,
382380
run_time.seconds,
383381
job_result,
@@ -387,7 +385,7 @@ def get_per_workflow_metrics(
387385
)
388386
)
389387

390-
return workflow_metrics
388+
return workflow_metrics, last_checked_workflow_id
391389

392390
def upload_metrics(workflow_metrics, metrics_userid, api_key):
393391
"""Upload metrics to Grafana.
@@ -441,33 +439,28 @@ def main():
441439
grafana_metrics_userid = os.environ["GRAFANA_METRICS_USERID"]
442440
buildkite_token = os.environ["BUILDKITE_TOKEN"]
443441

444-
# The last buildkite build recorded.
442+
# This script only records workflows/jobs/builds finished after it
443+
# started. So we need to keep track of the last known build.
445444
buildkite_last_cursor = None
446-
447-
workflows_to_track = {}
448-
for workflow_to_track in WORKFLOWS_TO_TRACK:
449-
workflows_to_track[workflow_to_track] = None
445+
github_last_workflow_id = None
450446

451447
# Enter the main loop. Every five minutes we wake up and dump metrics for
452448
# the relevant jobs.
453449
while True:
454450
github_object = Github(auth=auth)
455451
github_repo = github_object.get_repo("llvm/llvm-project")
456452

457-
current_metrics, buildkite_last_cursor = buildkite_get_metrics(
453+
buildkite_metrics, buildkite_last_cursor = buildkite_get_metrics(
458454
buildkite_token, buildkite_last_cursor
459455
)
460-
current_metrics += get_per_workflow_metrics(github_repo, workflows_to_track)
461-
current_metrics += get_sampled_workflow_metrics(github_repo)
462-
463-
upload_metrics(current_metrics, grafana_metrics_userid, grafana_api_key)
464-
logging.info(f"Uploaded {len(current_metrics)} metrics")
456+
github_metrics, github_last_workflow_id = get_per_workflow_metrics(
457+
github_repo, github_last_workflow_id
458+
)
459+
sampled_metrics = get_sampled_workflow_metrics(github_repo)
465460

466-
for workflow_metric in reversed(current_metrics):
467-
if isinstance(workflow_metric, JobMetrics):
468-
workflows_to_track[
469-
workflow_metric.workflow_name
470-
] = workflow_metric.workflow_id
461+
metrics = buildkite_metrics + github_metrics + sampled_metrics
462+
upload_metrics(metrics, grafana_metrics_userid, grafana_api_key)
463+
logging.info(f"Uploaded {len(metrics)} metrics")
471464

472465
time.sleep(SCRAPE_INTERVAL_SECONDS)
473466

0 commit comments

Comments
 (0)