14
14
GRAFANA_URL = (
15
15
"https://influx-prod-13-prod-us-east-0.grafana.net/api/v1/push/influx/write"
16
16
)
17
- GITHUB_PROJECT = "llvm/llvm-project"
18
- WORKFLOWS_TO_TRACK = ["LLVM Premerge Checks" ]
19
17
SCRAPE_INTERVAL_SECONDS = 5 * 60
20
18
21
19
# Number of builds to fetch per page. Since we scrape regularly, this can
22
20
# remain small.
23
21
BUILDKITE_GRAPHQL_BUILDS_PER_PAGE = 10
24
22
23
+ # Lists the Github workflows we want to track. Maps the Github job name to
24
+ # the metric name prefix in grafana.
25
+ # This metric name is also used as a key in the job->name map.
26
+ GITHUB_WORKFLOW_TO_TRACK = {"LLVM Premerge Checks" : "github_llvm_premerge_checks" }
27
+
28
+ # Lists the Github jobs to track for a given workflow. The key is the stable
29
+ # name (metric name) of the workflow (see GITHUB_WORKFLOW_TO_TRACK).
30
+ # Each value is a map to link the github job name to the corresponding metric
31
+ # name.
32
+ GITHUB_JOB_TO_TRACK = {
33
+ "github_llvm_premerge_checks" : {
34
+ "Linux Premerge Checks (Test Only - Please Ignore Results)" : "premerge_linux" ,
35
+ "Windows Premerge Checks (Test Only - Please Ignore Results)" : "premerge_windows" ,
36
+ }
37
+ }
38
+
25
39
# Lists the BuildKite jobs we want to track. Maps the BuildKite job name to
26
40
# the metric name in Grafana. This is important not to lose metrics history
27
41
# if the workflow name changes.
@@ -179,7 +193,7 @@ def buildkite_get_metrics(
179
193
180
194
last_recorded_build = last_cursor
181
195
output = []
182
- for build in builds :
196
+ for build in reversed ( builds ) :
183
197
info = buildkite_get_build_info (build ["number" ])
184
198
last_recorded_build = build ["cursor" ]
185
199
for job in info ["jobs" ]:
@@ -196,7 +210,7 @@ def buildkite_get_metrics(
196
210
queue_time = (started_at - scheduled_at ).seconds
197
211
run_time = (finished_at - started_at ).seconds
198
212
status = bool (job ["passed" ])
199
- created_at_ns = int (created_at .timestamp ()) * 10 ** 9
213
+ finished_at_ns = int (finished_at .timestamp ()) * 10 ** 9
200
214
workflow_id = build ["number" ]
201
215
workflow_name = "Github pull requests"
202
216
output .append (
@@ -205,7 +219,7 @@ def buildkite_get_metrics(
205
219
queue_time ,
206
220
run_time ,
207
221
status ,
208
- created_at_ns ,
222
+ finished_at_ns ,
209
223
workflow_id ,
210
224
workflow_name ,
211
225
)
@@ -231,7 +245,7 @@ def get_sampled_workflow_metrics(github_repo: github.Repository):
231
245
# is not documented (See #70540).
232
246
# "queued" seems to be the info we want.
233
247
for queued_workflow in github_repo .get_workflow_runs (status = "queued" ):
234
- if queued_workflow .name not in WORKFLOWS_TO_TRACK :
248
+ if queued_workflow .name not in GITHUB_WORKFLOW_TO_TRACK :
235
249
continue
236
250
for queued_workflow_job in queued_workflow .jobs ():
237
251
job_name = queued_workflow_job .name
@@ -249,7 +263,7 @@ def get_sampled_workflow_metrics(github_repo: github.Repository):
249
263
running_job_counts [job_name ] += 1
250
264
251
265
for running_workflow in github_repo .get_workflow_runs (status = "in_progress" ):
252
- if running_workflow .name not in WORKFLOWS_TO_TRACK :
266
+ if running_workflow .name not in GITHUB_WORKFLOW_TO_TRACK :
253
267
continue
254
268
for running_workflow_job in running_workflow .jobs ():
255
269
job_name = running_workflow_job .name
@@ -284,70 +298,54 @@ def get_sampled_workflow_metrics(github_repo: github.Repository):
284
298
)
285
299
return workflow_metrics
286
300
287
- def get_per_workflow_metrics (
288
- github_repo : github .Repository , workflows_to_track : dict [str , int ]
289
- ):
301
+
302
+ def get_per_workflow_metrics (github_repo : github .Repository , last_workflow_id : str ):
290
303
"""Gets the metrics for specified Github workflows.
291
304
292
- This function takes in a list of workflows to track, and optionally the
293
- workflow ID of the last tracked invocation. It grabs the relevant data
294
- from Github, returning it to the caller.
305
+ This function loads the last workflows from GitHub up to
306
+ `last_workflow_id` and logs their metrics if they are referenced in
307
+ GITHUB_WORKFLOW_TO_TRACK.
308
+ The function returns a list of metrics, and the most recent processed
309
+ workflow.
310
+ If `last_workflow_id` is None, no metrics are returned, and the last
311
+ completed github workflow ID is returned. This is used once when the
312
+ program starts.
295
313
296
314
Args:
297
315
github_repo: A github repo object to use to query the relevant information.
298
- workflows_to_track: A dictionary mapping workflow names to the last
299
- invocation ID where metrics have been collected, or None to collect the
300
- last five results.
316
+ last_workflow_id: the last workflow we checked.
301
317
302
318
Returns:
303
319
Returns a list of JobMetrics objects, containing the relevant metrics about
304
320
the workflow.
305
321
"""
306
322
workflow_metrics = []
323
+ last_checked_workflow_id = last_workflow_id
307
324
308
- workflows_to_include = set (workflows_to_track .keys ())
309
-
310
- for workflow_run in iter (github_repo .get_workflow_runs ()):
311
- if len (workflows_to_include ) == 0 :
325
+ for workflow_run in iter (github_repo .get_workflow_runs (status = "completed" )):
326
+ last_checked_workflow_id = workflow_run .id
327
+ # If we saw this workflow already, break. We also break if no
328
+ # workflow has been seen, as this means the script just started.
329
+ if last_workflow_id == workflow_run .id or last_workflow_id is None :
312
330
break
313
331
314
- if workflow_run .status != "completed" :
315
- continue
316
-
317
- # This workflow was already sampled for this run, or is not tracked at
318
- # all. Ignoring.
319
- if workflow_run .name not in workflows_to_include :
320
- continue
321
-
322
- # There were no new workflow invocations since the previous scrape.
323
- # The API returns a sorted list with the most recent invocations first,
324
- # so we can stop looking for this particular workflow. Continue to grab
325
- # information on the other workflows of interest, if present.
326
- if workflows_to_track [workflow_run .name ] == workflow_run .id :
327
- workflows_to_include .remove (workflow_run .name )
332
+ # This workflow is not interesting to us. Skipping.
333
+ if workflow_run .name not in GITHUB_WORKFLOW_TO_TRACK :
328
334
continue
329
335
330
- workflow_jobs = workflow_run .jobs ()
331
- if workflow_jobs .totalCount == 0 :
332
- continue
336
+ workflow_key = GITHUB_WORKFLOW_TO_TRACK [workflow_run .name ]
333
337
334
- if (
335
- workflows_to_track [workflow_run .name ] is None
336
- or workflows_to_track [workflow_run .name ] == workflow_run .id
337
- ):
338
- workflows_to_include .remove (workflow_run .name )
339
- if (
340
- workflows_to_track [workflow_run .name ] is not None
341
- and len (workflows_to_include ) == 0
342
- ):
343
- break
338
+ for workflow_job in workflow_run .jobs ():
339
+ # This job is not interesting, skipping.
340
+ if workflow_job .name not in GITHUB_JOB_TO_TRACK [workflow_key ]:
341
+ continue
344
342
345
- for workflow_job in workflow_jobs :
346
343
created_at = workflow_job .created_at
347
344
started_at = workflow_job .started_at
348
345
completed_at = workflow_job .completed_at
349
-
350
346
job_result = int (workflow_job .conclusion == "success" )
347
+ job_key = GITHUB_JOB_TO_TRACK [workflow_key ][workflow_job .name ]
348
+
351
349
if job_result :
352
350
# We still might want to mark the job as a failure if one of the steps
353
351
# failed. This is required due to use setting continue-on-error in
@@ -377,7 +375,7 @@ def get_per_workflow_metrics(
377
375
378
376
workflow_metrics .append (
379
377
JobMetrics (
380
- workflow_run . name + "-" + workflow_job . name ,
378
+ workflow_key + "-" + job_key ,
381
379
queue_time .seconds ,
382
380
run_time .seconds ,
383
381
job_result ,
@@ -387,7 +385,7 @@ def get_per_workflow_metrics(
387
385
)
388
386
)
389
387
390
- return workflow_metrics
388
+ return workflow_metrics , last_checked_workflow_id
391
389
392
390
def upload_metrics (workflow_metrics , metrics_userid , api_key ):
393
391
"""Upload metrics to Grafana.
@@ -441,33 +439,28 @@ def main():
441
439
grafana_metrics_userid = os .environ ["GRAFANA_METRICS_USERID" ]
442
440
buildkite_token = os .environ ["BUILDKITE_TOKEN" ]
443
441
444
- # The last buildkite build recorded.
442
+ # This script only records workflows/jobs/builds finished after it
443
+ # started. So we need to keep track of the last known build.
445
444
buildkite_last_cursor = None
446
-
447
- workflows_to_track = {}
448
- for workflow_to_track in WORKFLOWS_TO_TRACK :
449
- workflows_to_track [workflow_to_track ] = None
445
+ github_last_workflow_id = None
450
446
451
447
# Enter the main loop. Every five minutes we wake up and dump metrics for
452
448
# the relevant jobs.
453
449
while True :
454
450
github_object = Github (auth = auth )
455
451
github_repo = github_object .get_repo ("llvm/llvm-project" )
456
452
457
- current_metrics , buildkite_last_cursor = buildkite_get_metrics (
453
+ buildkite_metrics , buildkite_last_cursor = buildkite_get_metrics (
458
454
buildkite_token , buildkite_last_cursor
459
455
)
460
- current_metrics += get_per_workflow_metrics (github_repo , workflows_to_track )
461
- current_metrics += get_sampled_workflow_metrics (github_repo )
462
-
463
- upload_metrics (current_metrics , grafana_metrics_userid , grafana_api_key )
464
- logging .info (f"Uploaded { len (current_metrics )} metrics" )
456
+ github_metrics , github_last_workflow_id = get_per_workflow_metrics (
457
+ github_repo , github_last_workflow_id
458
+ )
459
+ sampled_metrics = get_sampled_workflow_metrics (github_repo )
465
460
466
- for workflow_metric in reversed (current_metrics ):
467
- if isinstance (workflow_metric , JobMetrics ):
468
- workflows_to_track [
469
- workflow_metric .workflow_name
470
- ] = workflow_metric .workflow_id
461
+ metrics = buildkite_metrics + github_metrics + sampled_metrics
462
+ upload_metrics (metrics , grafana_metrics_userid , grafana_api_key )
463
+ logging .info (f"Uploaded { len (metrics )} metrics" )
471
464
472
465
time .sleep (SCRAPE_INTERVAL_SECONDS )
473
466
0 commit comments