Skip to content

Commit 9350b90

Browse files
authored
Merge pull request #61 from OpenRailAssociation/graphql-all-collaborators-at-once
2 parents 419bca5 + 83ea3a7 commit 9350b90

File tree

1 file changed

+199
-39
lines changed

1 file changed

+199
-39
lines changed

gh_org_mgr/_gh_org.py

Lines changed: 199 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ class GHorg: # pylint: disable=too-many-instance-attributes, too-many-lines
4040
configured_teams: dict[str, dict | None] = field(default_factory=dict)
4141
newly_added_users: list[NamedUser] = field(default_factory=list)
4242
current_repos_teams: dict[Repository, dict[Team, str]] = field(default_factory=dict)
43+
graphql_repos_collaborators: dict[str, list[dict]] = field(default_factory=dict)
4344
current_repos_collaborators: dict[Repository, dict[str, str]] = field(default_factory=dict)
4445
configured_repos_collaborators: dict[str, dict[str, str]] = field(default_factory=dict)
4546
archived_repos: list[Repository] = field(default_factory=list)
@@ -115,7 +116,7 @@ def censor_half_string(string: str) -> str:
115116
half2 = len(string) - half1
116117
return string[:half1] + "*" * (half2)
117118

118-
sensible_keys = ["gh_token"]
119+
sensible_keys = ["gh_token", "gh_app_private_key"]
119120
for key in sensible_keys:
120121
if value := dictionary.get(key, ""):
121122
dictionary[key] = censor_half_string(value)
@@ -946,69 +947,228 @@ def _convert_graphql_perm_to_rest(self, permission: str) -> str:
946947

947948
return permission
948949

949-
def _fetch_collaborators_of_repo(self, repo: Repository):
950-
"""Get all collaborators (individuals) of a GitHub repo with their
951-
permissions using the GraphQL API"""
952-
# TODO: Consider doing this for all repositories at once, but calculate
953-
# costs beforehand
950+
def _fetch_collaborators_of_all_organization_repos(self) -> None:
951+
"""Get all collaborators (individuals) of all repos of a GitHub
952+
organization with their permissions using the GraphQL API"""
953+
954954
graphql_query = """
955-
query($owner: String!, $name: String!, $cursor: String) {
956-
repository(owner: $owner, name: $name) {
957-
collaborators(first: 100, after: $cursor) {
955+
query($owner: String!, $cursor: String) {
956+
organization(login: $owner) {
957+
repositories(first: 100, after: $cursor) {
958958
edges {
959959
node {
960-
login
960+
name
961+
collaborators(first: 100) {
962+
edges {
963+
node {
964+
login
965+
}
966+
permission
967+
}
968+
pageInfo {
969+
endCursor
970+
hasNextPage
971+
}
972+
}
961973
}
962-
permission
963974
}
964975
pageInfo {
965976
endCursor
966977
hasNextPage
967978
}
979+
}
968980
}
969981
}
970-
}
971982
"""
972983

973-
# Initial query parameters
974-
variables = {"owner": self.org.login, "name": repo.name, "cursor": None}
984+
# Initial query parameters for org-level request
985+
variables = {"owner": self.org.login, "cursor": None}
986+
987+
# dict in which we store repos for which there are more than 100
988+
# collaborators, and their respective end cursors
989+
next_page_cursors_for_repos: dict[str, str] = {}
990+
991+
more_repos_in_org = True
992+
while more_repos_in_org:
993+
logging.debug("Requesting collaborators for %s", self.org.login)
994+
org_result = run_graphql_query(graphql_query, variables, self.gh_token)
995+
more_repos_in_org, variables["cursor"] = self._extract_data_from_graphql_response(
996+
graphql_response=org_result, next_page_cursors_for_repos=next_page_cursors_for_repos
997+
)
998+
999+
# If there are more than 100 collaborators in a repo, we need to fetch
1000+
# rest via individual GraphQL queries
1001+
if next_page_cursors_for_repos:
1002+
logging.debug(
1003+
"Not all collaborators of all repos have been fetched. Missing data: %s",
1004+
next_page_cursors_for_repos,
1005+
)
1006+
for repo_name, end_cursor in next_page_cursors_for_repos.items():
1007+
more_collaborators_in_repo = True
1008+
while more_collaborators_in_repo:
1009+
logging.debug("Requesting additional collaborators for repo %s", repo_name)
1010+
# Initial query parameters for repo-level request
1011+
repo_variables = {
1012+
"owner": self.org.login,
1013+
"repo": repo_name,
1014+
"cursor": end_cursor,
1015+
}
1016+
repo_query = """
1017+
query($owner: String!, $repo: String!, $cursor: String) {
1018+
repository(owner: $owner, name: $repo) {
1019+
collaborators(first: 100, after: $cursor) {
1020+
edges {
1021+
node {
1022+
login
1023+
}
1024+
permission
1025+
}
1026+
pageInfo {
1027+
endCursor
1028+
hasNextPage
1029+
}
1030+
}
1031+
}
1032+
}
1033+
"""
1034+
repo_result = run_graphql_query(repo_query, repo_variables, self.gh_token)
1035+
more_collaborators_in_repo, end_cursor = (
1036+
self._extract_data_from_graphql_response(
1037+
graphql_response=repo_result,
1038+
next_page_cursors_for_repos=next_page_cursors_for_repos,
1039+
single_repo_name=repo_name,
1040+
)
1041+
)
1042+
1043+
# All collaborators from all repos have been fetched, now populate the
1044+
# actual dictionary
1045+
self._populate_current_repos_collaborators()
1046+
1047+
def _extract_data_from_graphql_response(
1048+
self,
1049+
graphql_response: dict,
1050+
next_page_cursors_for_repos: dict[str, str],
1051+
single_repo_name: str = "",
1052+
) -> tuple[bool, str]:
1053+
"""
1054+
Extracts collaborator data from a GraphQL response for either an
1055+
organization or a single repository.
1056+
1057+
Args:
1058+
graphql_response (dict): The GraphQL response containing the data.
1059+
next_page_cursors_for_repos (dict[str, str]): A dictionary to store
1060+
the next page cursors for repositories.
1061+
single_repo_name (str, optional): The name of a single repository to
1062+
extract data for. Defaults to "".
1063+
1064+
Returns:
1065+
tuple[bool, str]: A tuple containing a boolean indicating if there
1066+
is a next page and a string for the cursor.
1067+
- For organization level extraction:
1068+
- bool: Indicates if there is a next page of repositories.
1069+
- str: The cursor for the next page of repositories.
1070+
- For single repository extraction:
1071+
- bool: Indicates if there is a next page of collaborators.
1072+
- str: The cursor for the next page of collaborators.
1073+
1074+
Raises:
1075+
SystemExit: If a repository name is not found in the GraphQL
1076+
response at the organization level.
1077+
1078+
This method processes the GraphQL response to extract information about
1079+
repositories and their collaborators. It handles pagination by
1080+
identifying if there are more pages of repositories or collaborators to
1081+
be fetched.
1082+
"""
1083+
if not single_repo_name and "organization" in graphql_response["data"]:
1084+
logging.debug("Extracting collaborators for organization from GraphQL response")
1085+
1086+
# Initialise returns
1087+
org_has_next_page = graphql_response["data"]["organization"]["repositories"][
1088+
"pageInfo"
1089+
]["hasNextPage"]
1090+
org_cursor = graphql_response["data"]["organization"]["repositories"]["pageInfo"][
1091+
"endCursor"
1092+
]
1093+
1094+
for repo_edges in graphql_response["data"]["organization"]["repositories"]["edges"]:
1095+
try:
1096+
repo_name: str = repo_edges["node"]["name"]
1097+
logging.debug(
1098+
"Extracting collaborators for %s from GraphQL response", repo_name
1099+
)
1100+
except KeyError:
1101+
logging.error(
1102+
"Did not find a repo name in the GraphQL response "
1103+
"(organization level) which seems to hint to a bug: %s",
1104+
repo_edges,
1105+
)
1106+
sys.exit(1)
1107+
1108+
# fill in collaborators of repo
1109+
try:
1110+
repo_collaborators = repo_edges["node"]["collaborators"]["edges"]
1111+
self.graphql_repos_collaborators[repo_name] = repo_collaborators
1112+
except (TypeError, KeyError):
1113+
logging.debug("Repo %s does not seem to have any collaborators", repo_name)
1114+
1115+
# Find out if there are more than 100 collaborators in the
1116+
# GraphQL response for this repo
1117+
if repo_edges["node"]["collaborators"]["pageInfo"]["hasNextPage"]:
1118+
next_page_cursors_for_repos[repo_name] = repo_edges["node"]["collaborators"][
1119+
"pageInfo"
1120+
]["endCursor"]
9751121

976-
collaborators = []
977-
has_next_page = True
1122+
return org_has_next_page, org_cursor
9781123

979-
while has_next_page:
980-
logging.debug("Requesting collaborators for %s", repo.name)
981-
result = run_graphql_query(graphql_query, variables, self.gh_token)
1124+
if single_repo_name and "repository" in graphql_response["data"]:
1125+
logging.debug(
1126+
"Extracting collaborators for repository %s from GraphQL response", single_repo_name
1127+
)
1128+
1129+
# Initialise returns
1130+
repo_has_next_page = graphql_response["data"]["repository"]["collaborators"][
1131+
"pageInfo"
1132+
]["hasNextPage"]
1133+
repo_cursor = graphql_response["data"]["repository"]["collaborators"]["pageInfo"][
1134+
"endCursor"
1135+
]
1136+
1137+
# fill in collaborators of repo
9821138
try:
983-
collaborators.extend(result["data"]["repository"]["collaborators"]["edges"])
984-
has_next_page = result["data"]["repository"]["collaborators"]["pageInfo"][
985-
"hasNextPage"
986-
]
987-
variables["cursor"] = result["data"]["repository"]["collaborators"]["pageInfo"][
988-
"endCursor"
1139+
repo_collaborators = graphql_response["data"]["repository"]["collaborators"][
1140+
"edges"
9891141
]
1142+
self.graphql_repos_collaborators[single_repo_name].extend(repo_collaborators)
9901143
except (TypeError, KeyError):
991-
logging.debug("Repo %s does not seem to have any collaborators", repo.name)
992-
continue
993-
994-
# Extract relevant data
995-
for collaborator in collaborators:
996-
login: str = collaborator["node"]["login"]
997-
# Skip entry if collaborator is org owner, which is "admin" anyway
998-
if login.lower() in [user.login.lower() for user in self.current_org_owners]:
999-
continue
1000-
permission = self._convert_graphql_perm_to_rest(collaborator["permission"])
1001-
self.current_repos_collaborators[repo][login.lower()] = permission
1144+
logging.debug("Repo %s does not seem to have any collaborators", single_repo_name)
1145+
1146+
return repo_has_next_page, repo_cursor
1147+
1148+
logging.warning("No relevant data found in GraphQL response")
1149+
logging.debug("GraphQL response: %s", graphql_response)
1150+
return False, ""
1151+
1152+
def _populate_current_repos_collaborators(self) -> None:
1153+
"""Populate self.current_repos_collaborators with data from repo_collaborators"""
1154+
for repo, collaborators in self.current_repos_collaborators.items():
1155+
if repo.name in self.graphql_repos_collaborators:
1156+
# Extract each collaborator from the GraphQL response for this repo
1157+
for collaborator in self.graphql_repos_collaborators[repo.name]:
1158+
login: str = collaborator["node"]["login"]
1159+
# Skip entry if collaborator is org owner, which is "admin" anyway
1160+
if login.lower() in [user.login.lower() for user in self.current_org_owners]:
1161+
continue
1162+
permission = self._convert_graphql_perm_to_rest(collaborator["permission"])
1163+
collaborators[login.lower()] = permission
10021164

10031165
def _get_current_repos_and_user_perms(self):
10041166
"""Get all repos, their current collaborators and their permissions"""
10051167
# We copy the list of repos from self.current_repos_teams
10061168
for repo in self.current_repos_teams:
10071169
self.current_repos_collaborators[repo] = {}
10081170

1009-
for repo in self.current_repos_collaborators:
1010-
# Get users for this repo
1011-
self._fetch_collaborators_of_repo(repo)
1171+
self._fetch_collaborators_of_all_organization_repos()
10121172

10131173
def _get_default_repository_permission(self):
10141174
"""Get the default repository permission for all users. Convert to

0 commit comments

Comments
 (0)