Skip to content

Commit 012e3b1

Browse files
committed
start improving graphql query so that all collaborators of all repos are fetched at once
This should fix hitting the secondary limits Missing: * GraphQL limits may be hit for the 100 repos x 100 collaborators. Investigate * If more than 100 collaborators, a routine is missing * Refactoring, the function is HUGE
1 parent 419bca5 commit 012e3b1

File tree

1 file changed

+77
-31
lines changed

1 file changed

+77
-31
lines changed

gh_org_mgr/_gh_org.py

Lines changed: 77 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -946,69 +946,115 @@ def _convert_graphql_perm_to_rest(self, permission: str) -> str:
946946

947947
return permission
948948

949-
def _fetch_collaborators_of_repo(self, repo: Repository):
950-
"""Get all collaborators (individuals) of a GitHub repo with their
951-
permissions using the GraphQL API"""
952-
# TODO: Consider doing this for all repositories at once, but calculate
953-
# costs beforehand
949+
def _fetch_collaborators_of_all_organization_repos(self):
950+
"""Get all collaborators (individuals) of all repos of a GitHub
951+
organization with their permissions using the GraphQL API"""
952+
954953
graphql_query = """
955-
query($owner: String!, $name: String!, $cursor: String) {
956-
repository(owner: $owner, name: $name) {
957-
collaborators(first: 100, after: $cursor) {
954+
query($owner: String!, $cursor: String) {
955+
organization(login: $owner) {
956+
repositories(first: 100, after: $cursor) {
958957
edges {
959958
node {
960-
login
959+
name
960+
collaborators(first: 100) {
961+
edges {
962+
node {
963+
login
964+
}
965+
permission
966+
}
967+
pageInfo {
968+
endCursor
969+
hasNextPage
970+
}
971+
}
961972
}
962-
permission
963973
}
964974
pageInfo {
965975
endCursor
966976
hasNextPage
967977
}
978+
}
968979
}
969980
}
970-
}
971981
"""
972982

973983
# Initial query parameters
974-
variables = {"owner": self.org.login, "name": repo.name, "cursor": None}
984+
variables = {"owner": self.org.login, "cursor": None}
975985

976-
collaborators = []
977-
has_next_page = True
986+
repo_collaborators: dict[str, dict] = {}
987+
missing_data_for_repos = {}
988+
more_repos = True
978989

979-
while has_next_page:
980-
logging.debug("Requesting collaborators for %s", repo.name)
990+
while more_repos:
991+
logging.debug("Requesting collaborators for %s", self.org.login)
981992
result = run_graphql_query(graphql_query, variables, self.gh_token)
982993
try:
983-
collaborators.extend(result["data"]["repository"]["collaborators"]["edges"])
984-
has_next_page = result["data"]["repository"]["collaborators"]["pageInfo"][
994+
for repo in result["data"]["organization"]["repositories"]["edges"]:
995+
try:
996+
repo_name = repo["node"]["name"]
997+
logging.debug(
998+
"Extracting collaborators for %s from GraphQL response", repo_name
999+
)
1000+
except KeyError:
1001+
logging.error(
1002+
"Did not find a repo name in the GraphQL response which "
1003+
"seems to hint to a bug: %s",
1004+
repo,
1005+
)
1006+
sys.exit(1)
1007+
1008+
# fill in collaborators of repo
1009+
collaborators = repo["node"]["collaborators"]["edges"]
1010+
repo_collaborators[repo_name] = collaborators
1011+
1012+
# Find out if there are more than 100 collaborators in the
1013+
# GraphQL response for this repo
1014+
if repo["node"]["collaborators"]["pageInfo"]["hasNextPage"]:
1015+
missing_data_for_repos[repo_name] = repo["node"]["collaborators"][
1016+
"pageInfo"
1017+
]["endCursor"]
1018+
1019+
# Find out if there are more than 100 repos in the GraphQL
1020+
# response. If so, get cursor
1021+
more_repos = result["data"]["organization"]["repositories"]["pageInfo"][
9851022
"hasNextPage"
9861023
]
987-
variables["cursor"] = result["data"]["repository"]["collaborators"]["pageInfo"][
1024+
variables["cursor"] = result["data"]["organization"]["repositories"]["pageInfo"][
9881025
"endCursor"
9891026
]
9901027
except (TypeError, KeyError):
991-
logging.debug("Repo %s does not seem to have any collaborators", repo.name)
1028+
logging.debug("Repo %s does not seem to have any collaborators", repo_name)
9921029
continue
9931030

994-
# Extract relevant data
995-
for collaborator in collaborators:
996-
login: str = collaborator["node"]["login"]
997-
# Skip entry if collaborator is org owner, which is "admin" anyway
998-
if login.lower() in [user.login.lower() for user in self.current_org_owners]:
999-
continue
1000-
permission = self._convert_graphql_perm_to_rest(collaborator["permission"])
1001-
self.current_repos_collaborators[repo][login.lower()] = permission
1031+
if missing_data_for_repos:
1032+
logging.warning(
1033+
"Not all collaborators of all repos have been fetched. Missing data: %s",
1034+
missing_data_for_repos,
1035+
)
1036+
# TODO: Need to make individual graphql queries for these repos
1037+
1038+
# Iterate repos, and fill self.current_repos_collaborators[repo] with
1039+
# collaborators as fetched from GraphQL and put into repo_collaborators
1040+
for repo, collaborators in self.current_repos_collaborators.items():
1041+
if repo.name in repo_collaborators:
1042+
# Extract each collaborator from the GraphQL response for this repo
1043+
for collaborator in repo_collaborators[repo.name]:
1044+
login: str = collaborator["node"]["login"]
1045+
# Skip entry if collaborator is org owner, which is "admin" anyway
1046+
if login.lower() in [user.login.lower() for user in self.current_org_owners]:
1047+
continue
1048+
permission = self._convert_graphql_perm_to_rest(collaborator["permission"])
1049+
collaborators[login.lower()] = permission
10021050

10031051
def _get_current_repos_and_user_perms(self):
10041052
"""Get all repos, their current collaborators and their permissions"""
10051053
# We copy the list of repos from self.current_repos_teams
10061054
for repo in self.current_repos_teams:
10071055
self.current_repos_collaborators[repo] = {}
10081056

1009-
for repo in self.current_repos_collaborators:
1010-
# Get users for this repo
1011-
self._fetch_collaborators_of_repo(repo)
1057+
self._fetch_collaborators_of_all_organization_repos()
10121058

10131059
def _get_default_repository_permission(self):
10141060
"""Get the default repository permission for all users. Convert to

0 commit comments

Comments
 (0)