9
9
GRAFANA_URL = (
10
10
"https://influx-prod-13-prod-us-east-0.grafana.net/api/v1/push/influx/write"
11
11
)
12
+ GITHUB_GRAPHQL_API_URL = "https://api.github.com/graphql"
12
13
REPOSITORY_URL = "https://github.com/llvm/llvm-project.git"
13
14
15
+ # How many commits to query the GitHub GraphQL API for at a time.
16
+ # Querying too many commits at once often leads to the call failing.
17
+ GITHUB_API_BATCH_SIZE = 75
18
+
14
19
# Number of days to look back for new commits
15
20
# We allow some buffer time between when a commit is made and when it is queried
16
21
# for reviews. This is allow time for any events to propogate in the GitHub
44
49
AND JSON_VALUE(pr_event.payload, '$.pull_request.merge_commit_sha') IS NOT NULL
45
50
"""
46
51
52
+ # Template GraphQL subquery to check if a commit has an associated pull request
53
+ # and whether that pull request has been reviewed and approved.
54
+ COMMIT_GRAPHQL_SUBQUERY_TEMPLATE = """
55
+ commit_{commit_sha}:
56
+ object(oid:"{commit_sha}") {{
57
+ ... on Commit {{
58
+ associatedPullRequests(first: 1) {{
59
+ totalCount
60
+ pullRequest: nodes {{
61
+ number
62
+ reviewDecision
63
+ }}
64
+ }}
65
+ }}
66
+ }}
67
+ """
68
+
47
69
48
70
@dataclasses .dataclass
49
71
class LLVMCommitInfo :
@@ -153,6 +175,85 @@ def query_for_reviews(
153
175
return list (new_commits .values ())
154
176
155
177
178
+ def validate_push_commits (
179
+ new_commits : list [LLVMCommitInfo ], github_token : str
180
+ ) -> None :
181
+ """Validate that push commits don't have a pull request.
182
+
183
+ To address lossiness of data from GitHub Archive BigQuery, we check each
184
+ commit to see if it actually has an associated pull request.
185
+
186
+ Args:
187
+ new_commits: List of commits to validate.
188
+ github_token: The access token to use with the GitHub GraphQL API.
189
+ """
190
+
191
+ # Get all push commits from new commits and form their subqueries
192
+ commit_subqueries = []
193
+ potential_push_commits = {}
194
+ for commit in new_commits :
195
+ if commit .has_pull_request :
196
+ continue
197
+ potential_push_commits [commit .commit_sha ] = commit
198
+ commit_subqueries .append (
199
+ COMMIT_GRAPHQL_SUBQUERY_TEMPLATE .format (commit_sha = commit .commit_sha )
200
+ )
201
+ logging .info ("Found %d potential push commits" , len (potential_push_commits ))
202
+
203
+ # Query GitHub GraphQL API for pull requests associated with push commits
204
+ # We query in batches as large queries often fail
205
+ api_commit_data = {}
206
+ query_template = """
207
+ query {
208
+ repository(owner:"llvm", name:"llvm-project"){
209
+ %s
210
+ }
211
+ }
212
+ """
213
+ num_batches = len (commit_subqueries ) // GITHUB_API_BATCH_SIZE + 1
214
+ logging .info ("Querying GitHub GraphQL API in %d batches" , num_batches )
215
+ for i in range (num_batches ):
216
+ subquery_batch = commit_subqueries [
217
+ i * GITHUB_API_BATCH_SIZE : (i + 1 ) * GITHUB_API_BATCH_SIZE
218
+ ]
219
+ query = query_template % "" .join (subquery_batch )
220
+
221
+ logging .info (
222
+ "Querying batch %d of %d (%d commits)" ,
223
+ i + 1 ,
224
+ num_batches ,
225
+ len (subquery_batch ),
226
+ )
227
+ response = requests .post (
228
+ url = GITHUB_GRAPHQL_API_URL ,
229
+ headers = {
230
+ "Authorization" : f"bearer { github_token } " ,
231
+ },
232
+ json = {"query" : query },
233
+ )
234
+ if response .status_code < 200 or response .status_code >= 300 :
235
+ logging .error ("Failed to query GitHub GraphQL API: %s" , response .text )
236
+ api_commit_data .update (response .json ()["data" ]["repository" ])
237
+
238
+ amend_count = 0
239
+ for commit_sha , data in api_commit_data .items ():
240
+ # Verify that push commit has no pull requests
241
+ commit_sha = commit_sha .removeprefix ("commit_" )
242
+ if data ["associatedPullRequests" ]["totalCount" ] == 0 :
243
+ continue
244
+
245
+ # Amend fields with new data from API
246
+ pull_request = data ["associatedPullRequests" ]["pullRequest" ][0 ]
247
+ commit_info = potential_push_commits [commit_sha ]
248
+ commit_info .has_pull_request = True
249
+ commit_info .pr_number = pull_request ["number" ]
250
+ commit_info .is_reviewed = pull_request ["reviewDecision" ] is not None
251
+ commit_info .is_approved = pull_request ["reviewDecision" ] == "APPROVED"
252
+ amend_count += 1
253
+
254
+ logging .info ("Amended %d commits" , amend_count )
255
+
256
+
156
257
def upload_daily_metrics (
157
258
grafana_api_key : str ,
158
259
grafana_metrics_userid : str ,
@@ -164,9 +265,6 @@ def upload_daily_metrics(
164
265
grafana_api_key: The key to make API requests with.
165
266
grafana_metrics_userid: The user to make API requests with.
166
267
new_commits: List of commits to process & upload to Grafana.
167
-
168
- Returns:
169
- None
170
268
"""
171
269
# Count each type of commit made
172
270
approval_count = 0
@@ -200,6 +298,7 @@ def upload_daily_metrics(
200
298
201
299
202
300
def main () -> None :
301
+ github_token = os .environ ["GITHUB_TOKEN" ]
203
302
grafana_api_key = os .environ ["GRAFANA_API_KEY" ]
204
303
grafana_metrics_userid = os .environ ["GRAFANA_METRICS_USERID" ]
205
304
@@ -219,6 +318,9 @@ def main() -> None:
219
318
logging .info ("Querying for reviews of new commits." )
220
319
new_commit_info = query_for_reviews (new_commits , date_to_scrape )
221
320
321
+ logging .info ("Validating push commits." )
322
+ validate_push_commits (new_commit_info , github_token )
323
+
222
324
logging .info ("Uploading metrics to Grafana." )
223
325
upload_daily_metrics (grafana_api_key , grafana_metrics_userid , new_commit_info )
224
326
0 commit comments