Skip to content

Commit 859653d

Browse files
authored
[CI] Track unique contributors to LLVM over time (#534)
This change uploads additional metrics to Grafana so that we may visualize LLVM contributor data. From newly scraped commits and stored data from BigQuery, we can determine the number of unique LLVM contributors per day, the number of all time unique LLVM contributors, and the number of new contributors to LLVM per day.
1 parent ff8bf02 commit 859653d

File tree

1 file changed

+59
-13
lines changed

1 file changed

+59
-13
lines changed

llvm-ops-metrics/ops-container/process_llvm_commits.py

Lines changed: 59 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -195,23 +195,56 @@ def query_for_reviews(
195195
return list(new_commits.values())
196196

197197

198+
def get_past_contributors(bq_client: bigquery.Client) -> set[str]:
199+
"""Get past contributors to LLVM from BigQuery dataset.
200+
201+
Args:
202+
bq_client: The BigQuery client to use.
203+
204+
Returns:
205+
Set of unique past contributors to LLVM.
206+
"""
207+
results = bq_client.query("""
208+
SELECT
209+
DISTINCT commit_author
210+
FROM %s.%s
211+
WHERE commit_author IS NOT NULL
212+
""" % (OPERATIONAL_METRICS_DATASET, LLVM_COMMITS_TABLE)).result()
213+
return set(row.commit_author for row in results)
214+
215+
198216
def upload_daily_metrics_to_grafana(
199217
grafana_api_key: str,
200218
grafana_metrics_userid: str,
201219
new_commits: list[LLVMCommitInfo],
220+
past_contributors: set[str],
202221
) -> None:
203222
"""Upload daily commit metrics to Grafana.
204223
205224
Args:
206225
grafana_api_key: The key to make API requests with.
207226
grafana_metrics_userid: The user to make API requests with.
208227
new_commits: List of commits to process & upload to Grafana.
228+
past_contributors: Set of unique past contributors to LLVM.
209229
"""
230+
231+
def post_data(data: str) -> None:
232+
"""Helper function to post data to Grafana."""
233+
response = requests.post(
234+
GRAFANA_URL,
235+
headers={"Content-Type": "text/plain"},
236+
data=data,
237+
auth=(grafana_metrics_userid, grafana_api_key),
238+
)
239+
if response.status_code < 200 or response.status_code >= 300:
240+
logging.error("Failed to submit data to Grafana: %s", response.text)
241+
210242
# Count each type of commit made
211243
approval_count = 0
212244
review_count = 0
213245
pull_request_count = 0
214246
push_count = 0
247+
contributors = set()
215248
for commit in new_commits:
216249
if commit.is_approved:
217250
approval_count += 1
@@ -221,44 +254,49 @@ def upload_daily_metrics_to_grafana(
221254
pull_request_count += 1
222255
else:
223256
push_count += 1
257+
contributors.add(commit.commit_author)
224258

225259
# Post data via InfluxDB API call
260+
# Commit data
226261
request_data = (
227262
"llvm_project_main_daily_commits"
228263
" approval_count={},review_count={},pull_request_count={},push_count={}"
229264
).format(approval_count, review_count, pull_request_count, push_count)
230-
response = requests.post(
231-
GRAFANA_URL, # Set timestamp precision to seconds
232-
headers={"Content-Type": "text/plain"},
233-
data=request_data,
234-
auth=(grafana_metrics_userid, grafana_api_key),
235-
)
265+
post_data(request_data)
236266

237-
if response.status_code < 200 or response.status_code >= 300:
238-
logging.error("Failed to submit data to Grafana: %s", response.text)
267+
# Contributor data
268+
request_data = (
269+
"llvm_project_main"
270+
" daily_unique_contributor_count={},all_time_unique_contributor_count={}"
271+
.format(len(contributors), len(contributors | past_contributors))
272+
)
273+
post_data(request_data)
239274

240275

241-
def upload_daily_metrics_to_bigquery(new_commits: list[LLVMCommitInfo]) -> None:
276+
def upload_daily_metrics_to_bigquery(
277+
bq_client: bigquery.Client, new_commits: list[LLVMCommitInfo]
278+
) -> None:
242279
"""Upload processed commit metrics to a BigQuery dataset.
243280
244281
Args:
282+
bq_client: The BigQuery client to use.
245283
new_commits: List of commits to process & upload to BigQuery.
246284
"""
247-
bq_client = bigquery.Client()
248285
table_ref = bq_client.dataset(OPERATIONAL_METRICS_DATASET).table(
249286
LLVM_COMMITS_TABLE
250287
)
251288
table = bq_client.get_table(table_ref)
252289
commit_records = [dataclasses.asdict(commit) for commit in new_commits]
253290
bq_client.insert_rows(table, commit_records)
254-
bq_client.close()
255291

256292

257293
def main() -> None:
258294
github_token = os.environ["GITHUB_TOKEN"]
259295
grafana_api_key = os.environ["GRAFANA_API_KEY"]
260296
grafana_metrics_userid = os.environ["GRAFANA_METRICS_USERID"]
261297

298+
bq_client = bigquery.Client()
299+
262300
# Scrape new commits
263301
date_to_scrape = datetime.datetime.now(
264302
datetime.timezone.utc
@@ -275,13 +313,21 @@ def main() -> None:
275313
logging.info("Querying for reviews of new commits.")
276314
new_commit_info = query_for_reviews(new_commits, github_token)
277315

316+
logging.info("Getting set of past LLVM contributors.")
317+
past_contributors = get_past_contributors(bq_client)
318+
278319
logging.info("Uploading metrics to Grafana.")
279320
upload_daily_metrics_to_grafana(
280-
grafana_api_key, grafana_metrics_userid, new_commit_info
321+
grafana_api_key,
322+
grafana_metrics_userid,
323+
new_commit_info,
324+
past_contributors,
281325
)
282326

283327
logging.info("Uploading metrics to BigQuery.")
284-
upload_daily_metrics_to_bigquery(new_commit_info)
328+
upload_daily_metrics_to_bigquery(bq_client, new_commit_info)
329+
330+
bq_client.close()
285331

286332

287333
if __name__ == "__main__":

0 commit comments

Comments
 (0)