diff --git a/.github/workflows/commit-access-review.py b/.github/workflows/commit-access-review.py index 8ea9b1fcc2fb0..e812ba10b0d91 100644 --- a/.github/workflows/commit-access-review.py +++ b/.github/workflows/commit-access-review.py @@ -62,57 +62,9 @@ def __repr__(self): ) -def run_graphql_query( - query: str, variables: dict, token: str, retry: bool = True -) -> dict: - """ - This function submits a graphql query and returns the results as a - dictionary. - """ - s = requests.Session() - retries = requests.adapters.Retry(total=8, backoff_factor=2, status_forcelist=[504]) - s.mount("https://", requests.adapters.HTTPAdapter(max_retries=retries)) - - headers = { - "Authorization": "bearer {}".format(token), - # See - # https://github.blog/2021-11-16-graphql-global-id-migration-update/ - "X-Github-Next-Global-ID": "1", - } - request = s.post( - url="https://api.github.com/graphql", - json={"query": query, "variables": variables}, - headers=headers, - ) - - rate_limit = request.headers.get("X-RateLimit-Remaining") - print(rate_limit) - if rate_limit and int(rate_limit) < 10: - reset_time = int(request.headers["X-RateLimit-Reset"]) - while reset_time - int(time.time()) > 0: - time.sleep(60) - print( - "Waiting until rate limit reset", - reset_time - int(time.time()), - "seconds remaining", - ) - - if request.status_code == 200: - if "data" not in request.json(): - print(request.json()) - sys.exit(1) - return request.json()["data"] - elif retry: - return run_graphql_query(query, variables, token, False) - else: - raise Exception( - "Failed to run graphql query\nquery: {}\nerror: {}".format( - query, request.json() - ) - ) - - -def check_manual_requests(start_date: datetime.datetime, token: str) -> list[str]: +def check_manual_requests( + gh: github.Github, start_date: datetime.datetime +) -> list[str]: """ Return a list of users who have been asked since ``start_date`` if they want to keep their commit access. @@ -140,7 +92,10 @@ def check_manual_requests(start_date: datetime.datetime, token: str) -> list[str "query": f"type:issue created:>{formatted_start_date} org:llvm repo:llvm-project label:infrastructure:commit-access" } - data = run_graphql_query(query, variables, token) + res_header, res_data = gh._Github__requester.graphql_query( + query=query, variables=variables + ) + data = res_data["data"] users = [] for issue in data["search"]["nodes"]: users.extend([user[1:] for user in re.findall("@[^ ,\n]+", issue["body"])]) @@ -148,7 +103,7 @@ def check_manual_requests(start_date: datetime.datetime, token: str) -> list[str return users -def get_num_commits(user: str, start_date: datetime.datetime, token: str) -> int: +def get_num_commits(gh: github.Github, user: str, start_date: datetime.datetime) -> int: """ Get number of commits that ``user`` has been made since ``start_date`. """ @@ -166,7 +121,10 @@ def get_num_commits(user: str, start_date: datetime.datetime, token: str) -> int } """ - data = run_graphql_query(user_query, variables, token) + res_header, res_data = gh._Github__requester.graphql_query( + query=user_query, variables=variables + ) + data = res_data["data"] variables["user_id"] = data["user"]["id"] query = """ @@ -193,7 +151,10 @@ def get_num_commits(user: str, start_date: datetime.datetime, token: str) -> int } """ count = 0 - data = run_graphql_query(query, variables, token) + res_header, res_data = gh._Github__requester.graphql_query( + query=query, variables=variables + ) + data = res_data["data"] for repo in data["organization"]["teams"]["nodes"][0]["repositories"]["nodes"]: count += int(repo["ref"]["target"]["history"]["totalCount"]) if count >= User.THRESHOLD: @@ -202,7 +163,7 @@ def get_num_commits(user: str, start_date: datetime.datetime, token: str) -> int def is_new_committer_query_repo( - user: str, start_date: datetime.datetime, token: str + gh: github.Github, user: str, start_date: datetime.datetime ) -> bool: """ Determine if ``user`` is a new committer. A new committer can keep their @@ -220,7 +181,10 @@ def is_new_committer_query_repo( } """ - data = run_graphql_query(user_query, variables, token) + res_header, res_data = gh._Github__requester.graphql_query( + query=user_query, variables=variables + ) + data = res_data["data"] variables["owner"] = "llvm" variables["user_id"] = data["user"]["id"] variables["start_date"] = start_date.strftime("%Y-%m-%dT%H:%M:%S") @@ -245,7 +209,10 @@ def is_new_committer_query_repo( } """ - data = run_graphql_query(query, variables, token) + res_header, res_data = gh._Github__requester.graphql_query( + query=query, variables=variables + ) + data = res_data["data"] repo = data["organization"]["repository"] commits = repo["ref"]["target"]["history"]["nodes"] if len(commits) == 0: @@ -256,18 +223,22 @@ def is_new_committer_query_repo( return True -def is_new_committer(user: str, start_date: datetime.datetime, token: str) -> bool: +def is_new_committer( + gh: github.Github, user: str, start_date: datetime.datetime +) -> bool: """ Wrapper around is_new_commiter_query_repo to handle exceptions. """ try: - return is_new_committer_query_repo(user, start_date, token) + return is_new_committer_query_repo(gh, user, start_date) except: pass return True -def get_review_count(user: str, start_date: datetime.datetime, token: str) -> int: +def get_review_count( + gh: github.Github, user: str, start_date: datetime.datetime +) -> int: """ Return the number of reviews that ``user`` has done since ``start_date``. """ @@ -286,11 +257,14 @@ def get_review_count(user: str, start_date: datetime.datetime, token: str) -> in "query": f"type:pr commenter:{user} -author:{user} merged:>{formatted_start_date} org:llvm", } - data = run_graphql_query(query, variables, token) + res_header, res_data = gh._Github__requester.graphql_query( + query=query, variables=variables + ) + data = res_data["data"] return int(data["search"]["issueCount"]) -def count_prs(triage_list: dict, start_date: datetime.datetime, token: str): +def count_prs(gh: github.Github, triage_list: dict, start_date: datetime.datetime): """ Fetch all the merged PRs for the project since ``start_date`` and update ``triage_list`` with the number of PRs merged for each user. @@ -329,7 +303,10 @@ def count_prs(triage_list: dict, start_date: datetime.datetime, token: str): has_next_page = True while has_next_page: print(variables) - data = run_graphql_query(query, variables, token) + res_header, res_data = gh._Github__requester.graphql_query( + query=query, variables=variables + ) + data = res_data["data"] for pr in data["search"]["nodes"]: # Users can be None if the user has been deleted. if not pr["author"]: @@ -365,14 +342,14 @@ def main(): print("Start:", len(triage_list), "triagers") # Step 0 Check if users have requested commit access in the last year. - for user in check_manual_requests(one_year_ago, token): + for user in check_manual_requests(gh, one_year_ago): if user in triage_list: print(user, "requested commit access in the last year.") del triage_list[user] print("After Request Check:", len(triage_list), "triagers") # Step 1 count all PRs authored or merged - count_prs(triage_list, one_year_ago, token) + count_prs(gh, triage_list, one_year_ago) print("After PRs:", len(triage_list), "triagers") @@ -381,7 +358,7 @@ def main(): # Step 2 check for reviews for user in list(triage_list.keys()): - review_count = get_review_count(user, one_year_ago, token) + review_count = get_review_count(gh, user, one_year_ago) triage_list[user].add_reviewed(review_count) print("After Reviews:", len(triage_list), "triagers") @@ -391,7 +368,7 @@ def main(): # Step 3 check for number of commits for user in list(triage_list.keys()): - num_commits = get_num_commits(user, one_year_ago, token) + num_commits = get_num_commits(gh, user, one_year_ago) # Override the total number of commits to not double count commits and # authored PRs. triage_list[user].set_authored(num_commits) @@ -401,7 +378,7 @@ def main(): # Step 4 check for new committers for user in list(triage_list.keys()): print("Checking", user) - if is_new_committer(user, one_year_ago, token): + if is_new_committer(gh, user, one_year_ago): print("Removing new committer: ", user) del triage_list[user] diff --git a/llvm/utils/git/requirements.txt b/llvm/utils/git/requirements.txt index 9ed52610c5343..bbb9059b6b260 100644 --- a/llvm/utils/git/requirements.txt +++ b/llvm/utils/git/requirements.txt @@ -222,9 +222,9 @@ pycparser==2.22 \ --hash=sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6 \ --hash=sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc # via cffi -pygithub==2.2.0 \ - --hash=sha256:41042ea53e4c372219db708c38d2ca1fd4fadab75475bac27d89d339596cfad1 \ - --hash=sha256:e39be7c4dc39418bdd6e3ecab5931c636170b8b21b4d26f9ecf7e6102a3b51c3 +pygithub==2.4.0 \ + --hash=sha256:6601e22627e87bac192f1e2e39c6e6f69a43152cfb8f307cee575879320b3051 \ + --hash=sha256:81935aa4bdc939fba98fee1cb47422c09157c56a27966476ff92775602b9ee24 # via -r requirements.txt.in pyjwt[crypto]==2.9.0 \ --hash=sha256:3b02fb0f44517787776cf48f2ae25d8e14f300e6d7545a4315cee571a415e850 \ @@ -254,9 +254,9 @@ typing-extensions==4.12.2 \ --hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \ --hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8 # via pygithub -urllib3==2.2.2 \ - --hash=sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472 \ - --hash=sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168 +urllib3==2.2.3 \ + --hash=sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac \ + --hash=sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9 # via # pygithub # requests diff --git a/llvm/utils/git/requirements.txt.in b/llvm/utils/git/requirements.txt.in index 512b80b60e1d2..e880e94e1de80 100644 --- a/llvm/utils/git/requirements.txt.in +++ b/llvm/utils/git/requirements.txt.in @@ -4,6 +4,8 @@ # pip-compile -o requirements.txt requirements.txt.in certifi>=2023.7.22 # https://security.snyk.io/vuln/SNYK-PYTHON-CERTIFI-5805047 -PyGithub==2.2.0 # >=1.59.1 For WorkflowRun.name +PyGithub==2.4.0 # >=1.59.1 For WorkflowRun.name # >= 2.2.0 for permission arg to Repository.get_collaborators + # >= Fix for https://github.com/PyGithub/PyGithub/issues/3001 + # (variables in graphql query). GitPython>=3.1.32 # https://security.snyk.io/vuln/SNYK-PYTHON-GITPYTHON-5840584