3
3
import logging
4
4
import os
5
5
import git
6
+ from google .cloud import bigquery
6
7
import requests
7
8
8
9
GRAFANA_URL = (
11
12
GITHUB_GRAPHQL_API_URL = "https://api.github.com/graphql"
12
13
REPOSITORY_URL = "https://github.com/llvm/llvm-project.git"
13
14
15
+ # BigQuery dataset and tables to write metrics to.
16
+ OPERATIONAL_METRICS_DATASET = "operational_metrics"
17
+ LLVM_COMMITS_TABLE = "llvm_commits"
18
+
14
19
# How many commits to query the GitHub GraphQL API for at a time.
15
20
# Querying too many commits at once often leads to the call failing.
16
21
GITHUB_API_BATCH_SIZE = 50
27
32
commit_{commit_sha}:
28
33
object(oid:"{commit_sha}") {{
29
34
... on Commit {{
35
+ author {{
36
+ user {{
37
+ login
38
+ }}
39
+ }}
30
40
associatedPullRequests(first: 1) {{
31
41
totalCount
32
42
pullRequest: nodes {{
33
43
number
34
44
reviewDecision
45
+ reviews(first: 10) {{
46
+ nodes {{
47
+ reviewer: author {{
48
+ login
49
+ }}
50
+ }}
51
+ }}
35
52
}}
36
53
}}
37
54
}}
42
59
@dataclasses .dataclass
43
60
class LLVMCommitInfo :
44
61
commit_sha : str
45
- commit_datetime : datetime .datetime
46
62
commit_timestamp_seconds : int
63
+ files_modified : set [str ]
64
+ commit_author : str = "" # GitHub username of author is unknown until API call
47
65
has_pull_request : bool = False
48
- pr_number : int = 0
66
+ pull_request_number : int = 0
49
67
is_reviewed : bool = False
50
68
is_approved : bool = False
69
+ reviewers : set [str ] = dataclasses .field (default_factory = set )
51
70
52
71
53
72
def scrape_new_commits_by_date (
@@ -99,7 +118,9 @@ def query_for_reviews(
99
118
# Create a map of commit sha to info
100
119
new_commits = {
101
120
commit .hexsha : LLVMCommitInfo (
102
- commit .hexsha , commit .committed_datetime , commit .committed_date
121
+ commit_sha = commit .hexsha ,
122
+ commit_timestamp_seconds = commit .committed_date ,
123
+ files_modified = set (commit .stats .files .keys ()),
103
124
)
104
125
for commit in new_commits
105
126
}
@@ -140,29 +161,41 @@ def query_for_reviews(
140
161
},
141
162
json = {"query" : query },
142
163
)
164
+
165
+ # Exit if API call fails
166
+ # A failed API call means a large batch of data is missing and will not be
167
+ # reflected in the dashboard. The dashboard will silently misrepresent
168
+ # commit data if we continue execution, so it's better to fail loudly.
143
169
if response .status_code < 200 or response .status_code >= 300 :
144
170
logging .error ("Failed to query GitHub GraphQL API: %s" , response .text )
171
+ exit (1 )
172
+
145
173
api_commit_data .update (response .json ()["data" ]["repository" ])
146
174
175
+ # Amend commit information with GitHub data
147
176
for commit_sha , data in api_commit_data .items ():
148
- # Verify that push commit has no pull requests
149
177
commit_sha = commit_sha .removeprefix ("commit_" )
178
+ commit_info = new_commits [commit_sha ]
179
+ commit_info .commit_author = data ["author" ]["user" ]["login" ]
150
180
151
181
# If commit has no pull requests, skip it. No data to update.
152
182
if data ["associatedPullRequests" ]["totalCount" ] == 0 :
153
183
continue
154
184
155
185
pull_request = data ["associatedPullRequests" ]["pullRequest" ][0 ]
156
- commit_info = new_commits [commit_sha ]
157
186
commit_info .has_pull_request = True
158
- commit_info .pr_number = pull_request ["number" ]
187
+ commit_info .pull_request_number = pull_request ["number" ]
159
188
commit_info .is_reviewed = pull_request ["reviewDecision" ] is not None
160
189
commit_info .is_approved = pull_request ["reviewDecision" ] == "APPROVED"
190
+ commit_info .reviewers = set ([
191
+ review ["reviewer" ]["login" ]
192
+ for review in pull_request ["reviews" ]["nodes" ]
193
+ ])
161
194
162
195
return list (new_commits .values ())
163
196
164
197
165
- def upload_daily_metrics (
198
+ def upload_daily_metrics_to_grafana (
166
199
grafana_api_key : str ,
167
200
grafana_metrics_userid : str ,
168
201
new_commits : list [LLVMCommitInfo ],
@@ -205,6 +238,22 @@ def upload_daily_metrics(
205
238
logging .error ("Failed to submit data to Grafana: %s" , response .text )
206
239
207
240
241
+ def upload_daily_metrics_to_bigquery (new_commits : list [LLVMCommitInfo ]) -> None :
242
+ """Upload processed commit metrics to a BigQuery dataset.
243
+
244
+ Args:
245
+ new_commits: List of commits to process & upload to BigQuery.
246
+ """
247
+ bq_client = bigquery .Client ()
248
+ table_ref = bq_client .dataset (OPERATIONAL_METRICS_DATASET ).table (
249
+ LLVM_COMMITS_TABLE
250
+ )
251
+ table = bq_client .get_table (table_ref )
252
+ commit_records = [dataclasses .asdict (commit ) for commit in new_commits ]
253
+ bq_client .insert_rows (table , commit_records )
254
+ bq_client .close ()
255
+
256
+
208
257
def main () -> None :
209
258
github_token = os .environ ["GITHUB_TOKEN" ]
210
259
grafana_api_key = os .environ ["GRAFANA_API_KEY" ]
@@ -227,7 +276,12 @@ def main() -> None:
227
276
new_commit_info = query_for_reviews (new_commits , github_token )
228
277
229
278
logging .info ("Uploading metrics to Grafana." )
230
- upload_daily_metrics (grafana_api_key , grafana_metrics_userid , new_commit_info )
279
+ upload_daily_metrics_to_grafana (
280
+ grafana_api_key , grafana_metrics_userid , new_commit_info
281
+ )
282
+
283
+ logging .info ("Uploading metrics to BigQuery." )
284
+ upload_daily_metrics_to_bigquery (new_commit_info )
231
285
232
286
233
287
if __name__ == "__main__" :
0 commit comments