Skip to content

Commit 147b38c

Browse files
[CI] Upstream metrics script and container definition
This patch includes the script that pulls information from Github and pushes it to Grafana. This is currently running in the cluster and pushes information to https://llvm.grafana.net/public-dashboards/6a1c1969b6794e0a8ee5d494c72ce2cd. This script is designed to accept other jobs relatively easily and can be easily modified to look at other metrics.
1 parent a0ef12c commit 147b38c

File tree

4 files changed

+523
-0
lines changed

4 files changed

+523
-0
lines changed

.ci/metrics/Dockerfile

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
FROM python:3.12
2+
3+
COPY requirements.lock.txt ./
4+
RUN pip3 install --no-cache-dir -r requirements.lock.txt
5+
COPY metrics.py ./
6+
7+
CMD ["python3", "metrics.py"]

.ci/metrics/metrics.py

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
import requests
2+
import time
3+
import os
4+
from dataclasses import dataclass
5+
6+
from github import Github
7+
from github import Auth
8+
9+
GRAFANA_URL = (
10+
"https://influx-prod-13-prod-us-east-0.grafana.net/api/v1/push/influx/write"
11+
)
12+
GITHUB_PROJECT = "llvm/llvm-project"
13+
WORKFLOWS_TO_TRACK = ["Check code formatting"]
14+
15+
16+
@dataclass
17+
class JobMetrics:
18+
job_name: str
19+
queue_time: int
20+
run_time: int
21+
status: int
22+
created_at_ns: int
23+
workflow_id: int
24+
25+
26+
def get_metrics(github_repo, workflows_to_track):
27+
"""Gets the metrics for specified Github workflows.
28+
29+
This function takes in a list of workflows to track, and optionally the
30+
workflow ID of the last tracked invocation. It grabs the relevant data
31+
from Github, returning it to the caller.
32+
33+
Args:
34+
github_repo: A github repo object to use to query the relevant information.
35+
workflows_to_track: A dictionary mapping workflow names to the last
36+
invocation ID where metrics have been collected, or None to collect the
37+
last five results.
38+
39+
Returns:
40+
Returns a list of JobMetrics objects, containing the relevant metrics about
41+
the workflow.
42+
"""
43+
workflow_runs = iter(github_repo.get_workflow_runs())
44+
45+
workflow_metrics = []
46+
47+
workflows_to_include = {}
48+
for workflow_to_track in workflows_to_track:
49+
workflows_to_include[workflow_to_track] = True
50+
workflows_left_to_include = len(workflows_to_track)
51+
52+
while True:
53+
workflow_run = next(workflow_runs)
54+
if workflow_run.status != "completed":
55+
continue
56+
57+
interesting_workflow = False
58+
for workflow_name in workflows_to_track:
59+
if workflow_run.name == workflow_name:
60+
interesting_workflow = True
61+
break
62+
if not interesting_workflow:
63+
continue
64+
65+
if not workflows_to_include[workflow_run.name]:
66+
continue
67+
68+
workflow_jobs = workflow_run.jobs()
69+
if workflow_jobs.totalCount == 0:
70+
continue
71+
if workflow_jobs.totalCount > 1:
72+
raise ValueError(
73+
f"Encountered an unexpected number of jobs: {workflow_jobs.totalCount}"
74+
)
75+
76+
created_at = workflow_jobs[0].created_at
77+
started_at = workflow_jobs[0].started_at
78+
completed_at = workflow_jobs[0].completed_at
79+
80+
job_result = int(workflow_jobs[0].conclusion == "success")
81+
82+
queue_time = started_at - created_at
83+
run_time = completed_at - started_at
84+
85+
if run_time.seconds == 0:
86+
continue
87+
88+
if (
89+
workflows_to_track[workflow_run.name] is None
90+
or workflows_to_track[workflow_run.name] == workflow_run.id
91+
):
92+
workflows_left_to_include -= 1
93+
workflows_to_include[workflow_run.name] = False
94+
if (
95+
workflows_to_track[workflow_run.name] is not None
96+
and workflows_left_to_include == 0
97+
):
98+
break
99+
100+
created_at_ns = int(created_at.timestamp()) * 10**9
101+
102+
workflow_metrics.append(
103+
JobMetrics(
104+
workflow_run.name,
105+
queue_time.seconds,
106+
run_time.seconds,
107+
job_result,
108+
created_at_ns,
109+
workflow_run.id,
110+
)
111+
)
112+
113+
if workflows_left_to_include == 0:
114+
break
115+
116+
return workflow_metrics
117+
118+
119+
def upload_metrics(workflow_metrics, metrics_userid, api_key):
120+
"""Upload metrics to Grafana.
121+
122+
Takes in a list of workflow metrics and then uploads them to Grafana
123+
through a REST request.
124+
125+
Args:
126+
workflow_metrics: A list of metrics to upload to Grafana.
127+
metrics_userid: The userid to use for the upload.
128+
api_key: The API key to use for the upload.
129+
"""
130+
metrics_batch = []
131+
for workflow_metric in workflow_metrics:
132+
workflow_formatted_name = workflow_metric.job_name.lower().replace(" ", "_")
133+
metrics_batch.append(
134+
f"{workflow_formatted_name} queue_time={workflow_metric.queue_time},run_time={workflow_metric.run_time},status={workflow_metric.status} {workflow_metric.created_at_ns}"
135+
)
136+
137+
request_data = "\n".join(metrics_batch)
138+
response = requests.post(
139+
GRAFANA_URL,
140+
headers={"Content-Type": "text/plain"},
141+
data=request_data,
142+
auth=(metrics_userid, api_key),
143+
)
144+
145+
if response.status_code < 200 or response.status_code >= 300:
146+
print(f"Failed to submit data to Grafana: {response.status_code}")
147+
148+
149+
def main():
150+
# Authenticate with Github
151+
auth = Auth.Token(os.environ["GITHUB_TOKEN"])
152+
github_object = Github(auth=auth)
153+
github_repo = github_object.get_repo("llvm/llvm-project")
154+
155+
grafana_api_key = os.environ["GRAFANA_API_KEY"]
156+
grafana_metrics_userid = os.environ["GRAFANA_METRICS_USERID"]
157+
158+
workflows_to_track = {}
159+
for workflow_to_track in WORKFLOWS_TO_TRACK:
160+
workflows_to_track[workflow_to_track] = None
161+
162+
# Enter the main loop. Every five minutes we wake up and dump metrics for
163+
# the relevant jobs.
164+
while True:
165+
current_metrics = get_metrics(github_repo, workflows_to_track)
166+
if len(current_metrics) == 0:
167+
print("No metrics found to upload.")
168+
continue
169+
170+
upload_metrics(current_metrics, grafana_metrics_userid, grafana_api_key)
171+
print(f"Uploaded {len(current_metrics)} metrics")
172+
173+
for workflow_metric in reversed(current_metrics):
174+
workflows_to_track[workflow_metric.job_name] = workflow_metric.workflow_id
175+
176+
time.sleep(5 * 60)
177+
178+
179+
if __name__ == "__main__":
180+
main()

0 commit comments

Comments
 (0)