Skip to content

Commit 72bf400

Browse files
committed
add handling of graphql response for org and repo level
1 parent ef0c4d1 commit 72bf400

File tree

1 file changed

+147
-36
lines changed

1 file changed

+147
-36
lines changed

gh_org_mgr/_gh_org.py

Lines changed: 147 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ class GHorg: # pylint: disable=too-many-instance-attributes, too-many-lines
4040
configured_teams: dict[str, dict | None] = field(default_factory=dict)
4141
newly_added_users: list[NamedUser] = field(default_factory=list)
4242
current_repos_teams: dict[Repository, dict[Team, str]] = field(default_factory=dict)
43+
graphql_repos_collaborators: dict[str, list[dict]] = field(default_factory=dict)
4344
current_repos_collaborators: dict[Repository, dict[str, str]] = field(default_factory=dict)
4445
configured_repos_collaborators: dict[str, dict[str, str]] = field(default_factory=dict)
4546
archived_repos: list[Repository] = field(default_factory=list)
@@ -946,18 +947,18 @@ def _convert_graphql_perm_to_rest(self, permission: str) -> str:
946947

947948
return permission
948949

949-
def _fetch_collaborators_of_all_organization_repos(self):
950+
def _fetch_collaborators_of_all_organization_repos(self) -> None:
950951
"""Get all collaborators (individuals) of all repos of a GitHub
951952
organization with their permissions using the GraphQL API"""
952953

953954
graphql_query = """
954955
query($owner: String!, $cursor: String) {
955956
organization(login: $owner) {
956-
repositories(first: 100, after: $cursor) {
957+
repositories(first: 5, after: $cursor) {
957958
edges {
958959
node {
959960
name
960-
collaborators(first: 100) {
961+
collaborators(first: 20) {
961962
edges {
962963
node {
963964
login
@@ -980,70 +981,180 @@ def _fetch_collaborators_of_all_organization_repos(self):
980981
}
981982
"""
982983

983-
# Initial query parameters
984+
# Initial query parameters for org-level request
984985
variables = {"owner": self.org.login, "cursor": None}
985986

986-
# dicts in which we store the collaborators of each repo, and the repos
987-
# for which there are more than 100 collaborators
988-
repo_collaborators: dict[str, dict] = {}
989-
missing_data_for_repos: dict[str, str] = {}
987+
# dict in which we store repos for which there are more than 100
988+
# collaborators, and their respective end cursors
989+
next_page_cursors_for_repos: dict[str, str] = {}
990990

991-
more_repos = True
992-
while more_repos:
991+
more_repos_in_org = True
992+
while more_repos_in_org:
993993
logging.debug("Requesting collaborators for %s", self.org.login)
994-
result = run_graphql_query(graphql_query, variables, self.gh_token)
995-
self._process_graphql_response(result, repo_collaborators, missing_data_for_repos)
996-
more_repos = result["data"]["organization"]["repositories"]["pageInfo"]["hasNextPage"]
997-
variables["cursor"] = result["data"]["organization"]["repositories"]["pageInfo"][
998-
"endCursor"
999-
]
994+
org_result = run_graphql_query(graphql_query, variables, self.gh_token)
995+
more_repos_in_org, variables["cursor"] = self._extract_data_from_graphql_response(
996+
graphql_response=org_result, next_page_cursors_for_repos=next_page_cursors_for_repos
997+
)
1000998

1001-
if missing_data_for_repos:
1002-
logging.warning(
999+
# If there are more than 100 collaborators in a repo, we need to fetch
1000+
# rest via individual GraphQL queries
1001+
if next_page_cursors_for_repos:
1002+
logging.debug(
10031003
"Not all collaborators of all repos have been fetched. Missing data: %s",
1004-
missing_data_for_repos,
1004+
next_page_cursors_for_repos,
10051005
)
1006-
# TODO: Need to make individual graphql queries for these repos
1006+
for repo_name, end_cursor in next_page_cursors_for_repos.items():
1007+
more_collaborators_in_repo = True
1008+
while more_collaborators_in_repo:
1009+
logging.debug("Requesting additional collaborators for repo %s", repo_name)
1010+
# Initial query parameters for repo-level request
1011+
repo_variables = {
1012+
"owner": self.org.login,
1013+
"repo": repo_name,
1014+
"cursor": end_cursor,
1015+
}
1016+
repo_query = """
1017+
query($owner: String!, $repo: String!, $cursor: String) {
1018+
repository(owner: $owner, name: $repo) {
1019+
collaborators(first: 100, after: $cursor) {
1020+
edges {
1021+
node {
1022+
login
1023+
}
1024+
permission
1025+
}
1026+
pageInfo {
1027+
endCursor
1028+
hasNextPage
1029+
}
1030+
}
1031+
}
1032+
}
1033+
"""
1034+
repo_result = run_graphql_query(repo_query, repo_variables, self.gh_token)
1035+
more_collaborators_in_repo, end_cursor = (
1036+
self._extract_data_from_graphql_response(
1037+
graphql_response=repo_result,
1038+
next_page_cursors_for_repos=next_page_cursors_for_repos,
1039+
single_repo_name=repo_name,
1040+
)
1041+
)
10071042

1008-
self._populate_current_repos_collaborators(repo_collaborators)
1043+
# All collaborators from all repos have been fetched, now populate the
1044+
# actual dictionary
1045+
self._populate_current_repos_collaborators()
10091046

1010-
def _process_graphql_response(
1011-
self, result, repo_collaborators: dict[str, dict], missing_data_for_repos: dict[str, str]
1012-
):
1013-
"""Process the GraphQL response and extract collaborators"""
1014-
try:
1015-
for repo_edges in result["data"]["organization"]["repositories"]["edges"]:
1047+
def _extract_data_from_graphql_response(
1048+
self,
1049+
graphql_response: dict,
1050+
next_page_cursors_for_repos: dict[str, str],
1051+
single_repo_name: str = "",
1052+
) -> tuple[bool, str]:
1053+
"""
1054+
Extracts collaborator data from a GraphQL response for either an
1055+
organization or a single repository.
1056+
1057+
Args:
1058+
graphql_response (dict): The GraphQL response containing the data.
1059+
next_page_cursors_for_repos (dict[str, str]): A dictionary to store
1060+
the next page cursors for repositories.
1061+
single_repo_name (str, optional): The name of a single repository to
1062+
extract data for. Defaults to "".
1063+
1064+
Returns:
1065+
tuple[bool, str]: A tuple containing a boolean indicating if there
1066+
is a next page and a string for the cursor.
1067+
- For organization level extraction:
1068+
- bool: Indicates if there is a next page of repositories.
1069+
- str: The cursor for the next page of repositories.
1070+
- For single repository extraction:
1071+
- bool: Indicates if there is a next page of collaborators.
1072+
- str: The cursor for the next page of collaborators.
1073+
1074+
Raises:
1075+
SystemExit: If a repository name is not found in the GraphQL
1076+
response at the organization level.
1077+
1078+
This method processes the GraphQL response to extract information about
1079+
repositories and their collaborators. It handles pagination by
1080+
identifying if there are more pages of repositories or collaborators to
1081+
be fetched.
1082+
"""
1083+
if not single_repo_name and "organization" in graphql_response["data"]:
1084+
logging.debug("Extracting collaborators for organization from GraphQL response")
1085+
1086+
# Initialise returns
1087+
org_has_next_page = graphql_response["data"]["organization"]["repositories"][
1088+
"pageInfo"
1089+
]["hasNextPage"]
1090+
org_cursor = graphql_response["data"]["organization"]["repositories"]["pageInfo"][
1091+
"endCursor"
1092+
]
1093+
1094+
for repo_edges in graphql_response["data"]["organization"]["repositories"]["edges"]:
10161095
try:
10171096
repo_name: str = repo_edges["node"]["name"]
10181097
logging.debug(
10191098
"Extracting collaborators for %s from GraphQL response", repo_name
10201099
)
10211100
except KeyError:
10221101
logging.error(
1023-
"Did not find a repo name in the GraphQL response which "
1024-
"seems to hint to a bug: %s",
1102+
"Did not find a repo name in the GraphQL response "
1103+
"(organization level) which seems to hint to a bug: %s",
10251104
repo_edges,
10261105
)
10271106
sys.exit(1)
10281107

10291108
# fill in collaborators of repo
1030-
repo_collaborators[repo_name] = repo_edges["node"]["collaborators"]["edges"]
1109+
try:
1110+
self.graphql_repos_collaborators[repo_name].extend(
1111+
repo_edges["node"]["collaborators"]["edges"]
1112+
)
1113+
except (TypeError, KeyError):
1114+
logging.debug("Repo %s does not seem to have any collaborators", repo_name)
10311115

10321116
# Find out if there are more than 100 collaborators in the
10331117
# GraphQL response for this repo
10341118
if repo_edges["node"]["collaborators"]["pageInfo"]["hasNextPage"]:
1035-
missing_data_for_repos[repo_name] = repo_edges["node"]["collaborators"][
1119+
next_page_cursors_for_repos[repo_name] = repo_edges["node"]["collaborators"][
10361120
"pageInfo"
10371121
]["endCursor"]
1038-
except (TypeError, KeyError):
1039-
logging.debug("Repo does not seem to have any collaborators")
10401122

1041-
def _populate_current_repos_collaborators(self, repo_collaborators: dict[str, dict]):
1123+
return org_has_next_page, org_cursor
1124+
1125+
if single_repo_name and "repository" in graphql_response["data"]:
1126+
logging.debug(
1127+
"Extracting collaborators for repository %s from GraphQL response", single_repo_name
1128+
)
1129+
1130+
# Initialise returns
1131+
repo_has_next_page = graphql_response["data"]["repository"]["collaborators"][
1132+
"pageInfo"
1133+
]["hasNextPage"]
1134+
repo_cursor = graphql_response["data"]["repository"]["collaborators"]["pageInfo"][
1135+
"endCursor"
1136+
]
1137+
1138+
# fill in collaborators of repo
1139+
try:
1140+
self.graphql_repos_collaborators[single_repo_name].extend(
1141+
graphql_response["data"]["repository"]["collaborators"]["edges"]
1142+
)
1143+
except (TypeError, KeyError):
1144+
logging.debug("Repo %s does not seem to have any collaborators", single_repo_name)
1145+
1146+
return repo_has_next_page, repo_cursor
1147+
1148+
logging.warning("No relevant data found in GraphQL response")
1149+
logging.debug("GraphQL response: %s", graphql_response)
1150+
return False, ""
1151+
1152+
def _populate_current_repos_collaborators(self) -> None:
10421153
"""Populate self.current_repos_collaborators with data from repo_collaborators"""
10431154
for repo, collaborators in self.current_repos_collaborators.items():
1044-
if repo.name in repo_collaborators:
1155+
if repo.name in self.graphql_repos_collaborators:
10451156
# Extract each collaborator from the GraphQL response for this repo
1046-
for collaborator in repo_collaborators[repo.name]:
1157+
for collaborator in self.graphql_repos_collaborators[repo.name]:
10471158
login: str = collaborator["node"]["login"]
10481159
# Skip entry if collaborator is org owner, which is "admin" anyway
10491160
if login.lower() in [user.login.lower() for user in self.current_org_owners]:

0 commit comments

Comments
 (0)