@@ -40,6 +40,7 @@ class GHorg: # pylint: disable=too-many-instance-attributes, too-many-lines
4040 configured_teams : dict [str , dict | None ] = field (default_factory = dict )
4141 newly_added_users : list [NamedUser ] = field (default_factory = list )
4242 current_repos_teams : dict [Repository , dict [Team , str ]] = field (default_factory = dict )
43+ graphql_repos_collaborators : dict [str , list [dict ]] = field (default_factory = dict )
4344 current_repos_collaborators : dict [Repository , dict [str , str ]] = field (default_factory = dict )
4445 configured_repos_collaborators : dict [str , dict [str , str ]] = field (default_factory = dict )
4546 archived_repos : list [Repository ] = field (default_factory = list )
@@ -115,7 +116,7 @@ def censor_half_string(string: str) -> str:
115116 half2 = len (string ) - half1
116117 return string [:half1 ] + "*" * (half2 )
117118
118- sensible_keys = ["gh_token" ]
119+ sensible_keys = ["gh_token" , "gh_app_private_key" ]
119120 for key in sensible_keys :
120121 if value := dictionary .get (key , "" ):
121122 dictionary [key ] = censor_half_string (value )
@@ -946,69 +947,228 @@ def _convert_graphql_perm_to_rest(self, permission: str) -> str:
946947
947948 return permission
948949
949- def _fetch_collaborators_of_repo (self , repo : Repository ):
950- """Get all collaborators (individuals) of a GitHub repo with their
951- permissions using the GraphQL API"""
952- # TODO: Consider doing this for all repositories at once, but calculate
953- # costs beforehand
950+ def _fetch_collaborators_of_all_organization_repos (self ) -> None :
951+ """Get all collaborators (individuals) of all repos of a GitHub
952+ organization with their permissions using the GraphQL API"""
953+
954954 graphql_query = """
955- query($owner: String!, $name: String!, $ cursor: String) {
956- repository(owner : $owner, name: $name ) {
957- collaborators (first: 100, after: $cursor) {
955+ query($owner: String!, $cursor: String) {
956+ organization(login : $owner) {
957+ repositories (first: 100, after: $cursor) {
958958 edges {
959959 node {
960- login
960+ name
961+ collaborators(first: 100) {
962+ edges {
963+ node {
964+ login
965+ }
966+ permission
967+ }
968+ pageInfo {
969+ endCursor
970+ hasNextPage
971+ }
972+ }
961973 }
962- permission
963974 }
964975 pageInfo {
965976 endCursor
966977 hasNextPage
967978 }
979+ }
968980 }
969981 }
970- }
971982 """
972983
973- # Initial query parameters
974- variables = {"owner" : self .org .login , "name" : repo .name , "cursor" : None }
984+ # Initial query parameters for org-level request
985+ variables = {"owner" : self .org .login , "cursor" : None }
986+
987+ # dict in which we store repos for which there are more than 100
988+ # collaborators, and their respective end cursors
989+ next_page_cursors_for_repos : dict [str , str ] = {}
990+
991+ more_repos_in_org = True
992+ while more_repos_in_org :
993+ logging .debug ("Requesting collaborators for %s" , self .org .login )
994+ org_result = run_graphql_query (graphql_query , variables , self .gh_token )
995+ more_repos_in_org , variables ["cursor" ] = self ._extract_data_from_graphql_response (
996+ graphql_response = org_result , next_page_cursors_for_repos = next_page_cursors_for_repos
997+ )
998+
999+ # If there are more than 100 collaborators in a repo, we need to fetch
1000+ # rest via individual GraphQL queries
1001+ if next_page_cursors_for_repos :
1002+ logging .debug (
1003+ "Not all collaborators of all repos have been fetched. Missing data: %s" ,
1004+ next_page_cursors_for_repos ,
1005+ )
1006+ for repo_name , end_cursor in next_page_cursors_for_repos .items ():
1007+ more_collaborators_in_repo = True
1008+ while more_collaborators_in_repo :
1009+ logging .debug ("Requesting additional collaborators for repo %s" , repo_name )
1010+ # Initial query parameters for repo-level request
1011+ repo_variables = {
1012+ "owner" : self .org .login ,
1013+ "repo" : repo_name ,
1014+ "cursor" : end_cursor ,
1015+ }
1016+ repo_query = """
1017+ query($owner: String!, $repo: String!, $cursor: String) {
1018+ repository(owner: $owner, name: $repo) {
1019+ collaborators(first: 100, after: $cursor) {
1020+ edges {
1021+ node {
1022+ login
1023+ }
1024+ permission
1025+ }
1026+ pageInfo {
1027+ endCursor
1028+ hasNextPage
1029+ }
1030+ }
1031+ }
1032+ }
1033+ """
1034+ repo_result = run_graphql_query (repo_query , repo_variables , self .gh_token )
1035+ more_collaborators_in_repo , end_cursor = (
1036+ self ._extract_data_from_graphql_response (
1037+ graphql_response = repo_result ,
1038+ next_page_cursors_for_repos = next_page_cursors_for_repos ,
1039+ single_repo_name = repo_name ,
1040+ )
1041+ )
1042+
1043+ # All collaborators from all repos have been fetched, now populate the
1044+ # actual dictionary
1045+ self ._populate_current_repos_collaborators ()
1046+
1047+ def _extract_data_from_graphql_response (
1048+ self ,
1049+ graphql_response : dict ,
1050+ next_page_cursors_for_repos : dict [str , str ],
1051+ single_repo_name : str = "" ,
1052+ ) -> tuple [bool , str ]:
1053+ """
1054+ Extracts collaborator data from a GraphQL response for either an
1055+ organization or a single repository.
1056+
1057+ Args:
1058+ graphql_response (dict): The GraphQL response containing the data.
1059+ next_page_cursors_for_repos (dict[str, str]): A dictionary to store
1060+ the next page cursors for repositories.
1061+ single_repo_name (str, optional): The name of a single repository to
1062+ extract data for. Defaults to "".
1063+
1064+ Returns:
1065+ tuple[bool, str]: A tuple containing a boolean indicating if there
1066+ is a next page and a string for the cursor.
1067+ - For organization level extraction:
1068+ - bool: Indicates if there is a next page of repositories.
1069+ - str: The cursor for the next page of repositories.
1070+ - For single repository extraction:
1071+ - bool: Indicates if there is a next page of collaborators.
1072+ - str: The cursor for the next page of collaborators.
1073+
1074+ Raises:
1075+ SystemExit: If a repository name is not found in the GraphQL
1076+ response at the organization level.
1077+
1078+ This method processes the GraphQL response to extract information about
1079+ repositories and their collaborators. It handles pagination by
1080+ identifying if there are more pages of repositories or collaborators to
1081+ be fetched.
1082+ """
1083+ if not single_repo_name and "organization" in graphql_response ["data" ]:
1084+ logging .debug ("Extracting collaborators for organization from GraphQL response" )
1085+
1086+ # Initialise returns
1087+ org_has_next_page = graphql_response ["data" ]["organization" ]["repositories" ][
1088+ "pageInfo"
1089+ ]["hasNextPage" ]
1090+ org_cursor = graphql_response ["data" ]["organization" ]["repositories" ]["pageInfo" ][
1091+ "endCursor"
1092+ ]
1093+
1094+ for repo_edges in graphql_response ["data" ]["organization" ]["repositories" ]["edges" ]:
1095+ try :
1096+ repo_name : str = repo_edges ["node" ]["name" ]
1097+ logging .debug (
1098+ "Extracting collaborators for %s from GraphQL response" , repo_name
1099+ )
1100+ except KeyError :
1101+ logging .error (
1102+ "Did not find a repo name in the GraphQL response "
1103+ "(organization level) which seems to hint to a bug: %s" ,
1104+ repo_edges ,
1105+ )
1106+ sys .exit (1 )
1107+
1108+ # fill in collaborators of repo
1109+ try :
1110+ repo_collaborators = repo_edges ["node" ]["collaborators" ]["edges" ]
1111+ self .graphql_repos_collaborators [repo_name ] = repo_collaborators
1112+ except (TypeError , KeyError ):
1113+ logging .debug ("Repo %s does not seem to have any collaborators" , repo_name )
1114+
1115+ # Find out if there are more than 100 collaborators in the
1116+ # GraphQL response for this repo
1117+ if repo_edges ["node" ]["collaborators" ]["pageInfo" ]["hasNextPage" ]:
1118+ next_page_cursors_for_repos [repo_name ] = repo_edges ["node" ]["collaborators" ][
1119+ "pageInfo"
1120+ ]["endCursor" ]
9751121
976- collaborators = []
977- has_next_page = True
1122+ return org_has_next_page , org_cursor
9781123
979- while has_next_page :
980- logging .debug ("Requesting collaborators for %s" , repo .name )
981- result = run_graphql_query (graphql_query , variables , self .gh_token )
1124+ if single_repo_name and "repository" in graphql_response ["data" ]:
1125+ logging .debug (
1126+ "Extracting collaborators for repository %s from GraphQL response" , single_repo_name
1127+ )
1128+
1129+ # Initialise returns
1130+ repo_has_next_page = graphql_response ["data" ]["repository" ]["collaborators" ][
1131+ "pageInfo"
1132+ ]["hasNextPage" ]
1133+ repo_cursor = graphql_response ["data" ]["repository" ]["collaborators" ]["pageInfo" ][
1134+ "endCursor"
1135+ ]
1136+
1137+ # fill in collaborators of repo
9821138 try :
983- collaborators .extend (result ["data" ]["repository" ]["collaborators" ]["edges" ])
984- has_next_page = result ["data" ]["repository" ]["collaborators" ]["pageInfo" ][
985- "hasNextPage"
986- ]
987- variables ["cursor" ] = result ["data" ]["repository" ]["collaborators" ]["pageInfo" ][
988- "endCursor"
1139+ repo_collaborators = graphql_response ["data" ]["repository" ]["collaborators" ][
1140+ "edges"
9891141 ]
1142+ self .graphql_repos_collaborators [single_repo_name ].extend (repo_collaborators )
9901143 except (TypeError , KeyError ):
991- logging .debug ("Repo %s does not seem to have any collaborators" , repo .name )
992- continue
993-
994- # Extract relevant data
995- for collaborator in collaborators :
996- login : str = collaborator ["node" ]["login" ]
997- # Skip entry if collaborator is org owner, which is "admin" anyway
998- if login .lower () in [user .login .lower () for user in self .current_org_owners ]:
999- continue
1000- permission = self ._convert_graphql_perm_to_rest (collaborator ["permission" ])
1001- self .current_repos_collaborators [repo ][login .lower ()] = permission
1144+ logging .debug ("Repo %s does not seem to have any collaborators" , single_repo_name )
1145+
1146+ return repo_has_next_page , repo_cursor
1147+
1148+ logging .warning ("No relevant data found in GraphQL response" )
1149+ logging .debug ("GraphQL response: %s" , graphql_response )
1150+ return False , ""
1151+
1152+ def _populate_current_repos_collaborators (self ) -> None :
1153+ """Populate self.current_repos_collaborators with data from repo_collaborators"""
1154+ for repo , collaborators in self .current_repos_collaborators .items ():
1155+ if repo .name in self .graphql_repos_collaborators :
1156+ # Extract each collaborator from the GraphQL response for this repo
1157+ for collaborator in self .graphql_repos_collaborators [repo .name ]:
1158+ login : str = collaborator ["node" ]["login" ]
1159+ # Skip entry if collaborator is org owner, which is "admin" anyway
1160+ if login .lower () in [user .login .lower () for user in self .current_org_owners ]:
1161+ continue
1162+ permission = self ._convert_graphql_perm_to_rest (collaborator ["permission" ])
1163+ collaborators [login .lower ()] = permission
10021164
10031165 def _get_current_repos_and_user_perms (self ):
10041166 """Get all repos, their current collaborators and their permissions"""
10051167 # We copy the list of repos from self.current_repos_teams
10061168 for repo in self .current_repos_teams :
10071169 self .current_repos_collaborators [repo ] = {}
10081170
1009- for repo in self .current_repos_collaborators :
1010- # Get users for this repo
1011- self ._fetch_collaborators_of_repo (repo )
1171+ self ._fetch_collaborators_of_all_organization_repos ()
10121172
10131173 def _get_default_repository_permission (self ):
10141174 """Get the default repository permission for all users. Convert to
0 commit comments