@@ -40,6 +40,7 @@ class GHorg: # pylint: disable=too-many-instance-attributes, too-many-lines
4040 configured_teams : dict [str , dict | None ] = field (default_factory = dict )
4141 newly_added_users : list [NamedUser ] = field (default_factory = list )
4242 current_repos_teams : dict [Repository , dict [Team , str ]] = field (default_factory = dict )
43+ graphql_repos_collaborators : dict [str , list [dict ]] = field (default_factory = dict )
4344 current_repos_collaborators : dict [Repository , dict [str , str ]] = field (default_factory = dict )
4445 configured_repos_collaborators : dict [str , dict [str , str ]] = field (default_factory = dict )
4546 archived_repos : list [Repository ] = field (default_factory = list )
@@ -946,18 +947,18 @@ def _convert_graphql_perm_to_rest(self, permission: str) -> str:
946947
947948 return permission
948949
949- def _fetch_collaborators_of_all_organization_repos (self ):
950+ def _fetch_collaborators_of_all_organization_repos (self ) -> None :
950951 """Get all collaborators (individuals) of all repos of a GitHub
951952 organization with their permissions using the GraphQL API"""
952953
953954 graphql_query = """
954955 query($owner: String!, $cursor: String) {
955956 organization(login: $owner) {
956- repositories(first: 100 , after: $cursor) {
957+ repositories(first: 5 , after: $cursor) {
957958 edges {
958959 node {
959960 name
960- collaborators(first: 100 ) {
961+ collaborators(first: 20 ) {
961962 edges {
962963 node {
963964 login
@@ -980,70 +981,180 @@ def _fetch_collaborators_of_all_organization_repos(self):
980981 }
981982 """
982983
983- # Initial query parameters
984+ # Initial query parameters for org-level request
984985 variables = {"owner" : self .org .login , "cursor" : None }
985986
986- # dicts in which we store the collaborators of each repo, and the repos
987- # for which there are more than 100 collaborators
988- repo_collaborators : dict [str , dict ] = {}
989- missing_data_for_repos : dict [str , str ] = {}
987+ # dict in which we store repos for which there are more than 100
988+ # collaborators, and their respective end cursors
989+ next_page_cursors_for_repos : dict [str , str ] = {}
990990
991- more_repos = True
992- while more_repos :
991+ more_repos_in_org = True
992+ while more_repos_in_org :
993993 logging .debug ("Requesting collaborators for %s" , self .org .login )
994- result = run_graphql_query (graphql_query , variables , self .gh_token )
995- self ._process_graphql_response (result , repo_collaborators , missing_data_for_repos )
996- more_repos = result ["data" ]["organization" ]["repositories" ]["pageInfo" ]["hasNextPage" ]
997- variables ["cursor" ] = result ["data" ]["organization" ]["repositories" ]["pageInfo" ][
998- "endCursor"
999- ]
994+ org_result = run_graphql_query (graphql_query , variables , self .gh_token )
995+ more_repos_in_org , variables ["cursor" ] = self ._extract_data_from_graphql_response (
996+ graphql_response = org_result , next_page_cursors_for_repos = next_page_cursors_for_repos
997+ )
1000998
1001- if missing_data_for_repos :
1002- logging .warning (
999+ # If there are more than 100 collaborators in a repo, we need to fetch
1000+ # rest via individual GraphQL queries
1001+ if next_page_cursors_for_repos :
1002+ logging .debug (
10031003 "Not all collaborators of all repos have been fetched. Missing data: %s" ,
1004- missing_data_for_repos ,
1004+ next_page_cursors_for_repos ,
10051005 )
1006- # TODO: Need to make individual graphql queries for these repos
1006+ for repo_name , end_cursor in next_page_cursors_for_repos .items ():
1007+ more_collaborators_in_repo = True
1008+ while more_collaborators_in_repo :
1009+ logging .debug ("Requesting additional collaborators for repo %s" , repo_name )
1010+ # Initial query parameters for repo-level request
1011+ repo_variables = {
1012+ "owner" : self .org .login ,
1013+ "repo" : repo_name ,
1014+ "cursor" : end_cursor ,
1015+ }
1016+ repo_query = """
1017+ query($owner: String!, $repo: String!, $cursor: String) {
1018+ repository(owner: $owner, name: $repo) {
1019+ collaborators(first: 100, after: $cursor) {
1020+ edges {
1021+ node {
1022+ login
1023+ }
1024+ permission
1025+ }
1026+ pageInfo {
1027+ endCursor
1028+ hasNextPage
1029+ }
1030+ }
1031+ }
1032+ }
1033+ """
1034+ repo_result = run_graphql_query (repo_query , repo_variables , self .gh_token )
1035+ more_collaborators_in_repo , end_cursor = (
1036+ self ._extract_data_from_graphql_response (
1037+ graphql_response = repo_result ,
1038+ next_page_cursors_for_repos = next_page_cursors_for_repos ,
1039+ single_repo_name = repo_name ,
1040+ )
1041+ )
10071042
1008- self ._populate_current_repos_collaborators (repo_collaborators )
1043+ # All collaborators from all repos have been fetched, now populate the
1044+ # actual dictionary
1045+ self ._populate_current_repos_collaborators ()
10091046
1010- def _process_graphql_response (
1011- self , result , repo_collaborators : dict [str , dict ], missing_data_for_repos : dict [str , str ]
1012- ):
1013- """Process the GraphQL response and extract collaborators"""
1014- try :
1015- for repo_edges in result ["data" ]["organization" ]["repositories" ]["edges" ]:
1047+ def _extract_data_from_graphql_response (
1048+ self ,
1049+ graphql_response : dict ,
1050+ next_page_cursors_for_repos : dict [str , str ],
1051+ single_repo_name : str = "" ,
1052+ ) -> tuple [bool , str ]:
1053+ """
1054+ Extracts collaborator data from a GraphQL response for either an
1055+ organization or a single repository.
1056+
1057+ Args:
1058+ graphql_response (dict): The GraphQL response containing the data.
1059+ next_page_cursors_for_repos (dict[str, str]): A dictionary to store
1060+ the next page cursors for repositories.
1061+ single_repo_name (str, optional): The name of a single repository to
1062+ extract data for. Defaults to "".
1063+
1064+ Returns:
1065+ tuple[bool, str]: A tuple containing a boolean indicating if there
1066+ is a next page and a string for the cursor.
1067+ - For organization level extraction:
1068+ - bool: Indicates if there is a next page of repositories.
1069+ - str: The cursor for the next page of repositories.
1070+ - For single repository extraction:
1071+ - bool: Indicates if there is a next page of collaborators.
1072+ - str: The cursor for the next page of collaborators.
1073+
1074+ Raises:
1075+ SystemExit: If a repository name is not found in the GraphQL
1076+ response at the organization level.
1077+
1078+ This method processes the GraphQL response to extract information about
1079+ repositories and their collaborators. It handles pagination by
1080+ identifying if there are more pages of repositories or collaborators to
1081+ be fetched.
1082+ """
1083+ if not single_repo_name and "organization" in graphql_response ["data" ]:
1084+ logging .debug ("Extracting collaborators for organization from GraphQL response" )
1085+
1086+ # Initialise returns
1087+ org_has_next_page = graphql_response ["data" ]["organization" ]["repositories" ][
1088+ "pageInfo"
1089+ ]["hasNextPage" ]
1090+ org_cursor = graphql_response ["data" ]["organization" ]["repositories" ]["pageInfo" ][
1091+ "endCursor"
1092+ ]
1093+
1094+ for repo_edges in graphql_response ["data" ]["organization" ]["repositories" ]["edges" ]:
10161095 try :
10171096 repo_name : str = repo_edges ["node" ]["name" ]
10181097 logging .debug (
10191098 "Extracting collaborators for %s from GraphQL response" , repo_name
10201099 )
10211100 except KeyError :
10221101 logging .error (
1023- "Did not find a repo name in the GraphQL response which "
1024- "seems to hint to a bug: %s" ,
1102+ "Did not find a repo name in the GraphQL response "
1103+ "(organization level) which seems to hint to a bug: %s" ,
10251104 repo_edges ,
10261105 )
10271106 sys .exit (1 )
10281107
10291108 # fill in collaborators of repo
1030- repo_collaborators [repo_name ] = repo_edges ["node" ]["collaborators" ]["edges" ]
1109+ try :
1110+ self .graphql_repos_collaborators [repo_name ].extend (
1111+ repo_edges ["node" ]["collaborators" ]["edges" ]
1112+ )
1113+ except (TypeError , KeyError ):
1114+ logging .debug ("Repo %s does not seem to have any collaborators" , repo_name )
10311115
10321116 # Find out if there are more than 100 collaborators in the
10331117 # GraphQL response for this repo
10341118 if repo_edges ["node" ]["collaborators" ]["pageInfo" ]["hasNextPage" ]:
1035- missing_data_for_repos [repo_name ] = repo_edges ["node" ]["collaborators" ][
1119+ next_page_cursors_for_repos [repo_name ] = repo_edges ["node" ]["collaborators" ][
10361120 "pageInfo"
10371121 ]["endCursor" ]
1038- except (TypeError , KeyError ):
1039- logging .debug ("Repo does not seem to have any collaborators" )
10401122
1041- def _populate_current_repos_collaborators (self , repo_collaborators : dict [str , dict ]):
1123+ return org_has_next_page , org_cursor
1124+
1125+ if single_repo_name and "repository" in graphql_response ["data" ]:
1126+ logging .debug (
1127+ "Extracting collaborators for repository %s from GraphQL response" , single_repo_name
1128+ )
1129+
1130+ # Initialise returns
1131+ repo_has_next_page = graphql_response ["data" ]["repository" ]["collaborators" ][
1132+ "pageInfo"
1133+ ]["hasNextPage" ]
1134+ repo_cursor = graphql_response ["data" ]["repository" ]["collaborators" ]["pageInfo" ][
1135+ "endCursor"
1136+ ]
1137+
1138+ # fill in collaborators of repo
1139+ try :
1140+ self .graphql_repos_collaborators [single_repo_name ].extend (
1141+ graphql_response ["data" ]["repository" ]["collaborators" ]["edges" ]
1142+ )
1143+ except (TypeError , KeyError ):
1144+ logging .debug ("Repo %s does not seem to have any collaborators" , single_repo_name )
1145+
1146+ return repo_has_next_page , repo_cursor
1147+
1148+ logging .warning ("No relevant data found in GraphQL response" )
1149+ logging .debug ("GraphQL response: %s" , graphql_response )
1150+ return False , ""
1151+
1152+ def _populate_current_repos_collaborators (self ) -> None :
10421153 """Populate self.current_repos_collaborators with data from repo_collaborators"""
10431154 for repo , collaborators in self .current_repos_collaborators .items ():
1044- if repo .name in repo_collaborators :
1155+ if repo .name in self . graphql_repos_collaborators :
10451156 # Extract each collaborator from the GraphQL response for this repo
1046- for collaborator in repo_collaborators [repo .name ]:
1157+ for collaborator in self . graphql_repos_collaborators [repo .name ]:
10471158 login : str = collaborator ["node" ]["login" ]
10481159 # Skip entry if collaborator is org owner, which is "admin" anyway
10491160 if login .lower () in [user .login .lower () for user in self .current_org_owners ]:
0 commit comments