@@ -70,7 +70,13 @@ def get_repo_owner_and_name(repo_http_url):
7070 # The first group contains the owner of the github repo extracted from the url
7171 # The second group contains the name of the github repo extracted from the url
7272 # 'But what is a regular expression?' ----> https://docs.python.org/3/howto/regex.html
73- regex = r"https?:\/\/github\.com\/([A-Za-z0-9 \- _]+)\/([A-Za-z0-9 \- _ \.]+)(.git)?\/?$"
73+ if 'github' in repo_http_url :
74+ regex = r"https?:\/\/github\.com\/([A-Za-z0-9 \- _]+)\/([A-Za-z0-9 \- _ \.]+)(.git)?\/?$"
75+ elif 'gitlab' in repo_http_url :
76+ regex = r"https?:\/\/gitlab\.com\/([A-Za-z0-9 \- _]+)\/([A-Za-z0-9 \- _ \.]+)(.git)?\/?$"
77+ elif 'bitbucket' in repo_http_url :
78+ regex = r"https?:\/\/bitbucket\.org\/([A-Za-z0-9 \- _]+)\/([A-Za-z0-9 \- _ \.]+)(.git)?\/?$"
79+
7480 result = re .search (regex , repo_http_url )
7581
7682 if not result :
@@ -86,7 +92,7 @@ def get_repo_owner_and_name(repo_http_url):
8692
8793
8894class IndexGenerator :
89- def __init__ (self , agency : str , version : str , token : Optional [str ] = None ,):
95+ def __init__ (self , agency : str , version : str , token : Optional [str ] = None , bitbucket_user : Optional [ str ] = None , bitbucket_password : Optional [ str ] = None , gitlab_token : Optional [ str ] = None ):
9096
9197 # user can change agency and version depending on parameters
9298 self .index = {
@@ -99,6 +105,9 @@ def __init__(self, agency: str, version: str, token: Optional[str] = None,):
99105 }
100106
101107 self .token = token
108+ self .gitlab_token = gitlab_token
109+ self .bitbucket_user = bitbucket_user
110+ self .bitbucket_password = bitbucket_password
102111
103112 def get_code_json_github (self ,repo : str ) -> Optional [Dict ]:
104113 try :
@@ -116,14 +125,45 @@ def get_code_json_github(self,repo : str) -> Optional[Dict]:
116125 print (f"JSON Error: { str (e )} " )
117126 return None
118127
119- def get_code_json_other (self ,repo : str ) -> Optional [Dict ]:
120- return None
128+ def get_code_json_gitlab (self ,repo : str ) -> Optional [Dict ]:
129+ try :
130+ owner ,name = get_repo_owner_and_name (repo )
131+ code_json_endpoint = f"https://gitlab.com/api/v4/projects/{ owner } %2F{ name } /repository/files/code.json?ref=HEAD"
132+ content_dict = hit_endpoint (code_json_endpoint ,self .gitlab_token )
133+ except Exception as e :
134+ print ("Problem querying the Gitlab API" )
135+ return None
136+
137+ try :
138+ decoded_content = base64 .b64decode (content_dict ['content' ])
139+ return json .loads (decoded_content )
140+ except (json .JSONDecodeError , ValueError ) as e :
141+ print (f"JSON Error { e } " )
142+ return None
143+
144+ def get_code_json_bitbucket (self ,repo : str ) -> Optional [Dict ]:
145+ try :
146+ owner , name = get_repo_owner_and_name (repo )
147+ code_json_endpoint = f"https://bitbucket.org/{ owner } /{ name } /raw/HEAD/code.json"
148+ session = requests .Session ()
149+ session .auth = (self .bitbucket_user ,self .bitbucket_password )
150+
151+ auth = session .post ('http://bitbucket.org' )
152+ response_dict = session .get (code_json_endpoint )
153+ except Exception as e :
154+ print (f"Exception when querying bitbucket.org: { e } " )
155+
156+ return json .loads (response_dict .text )
121157
122158 def get_code_json (self , repo : str ) -> Optional [Dict ]:
123159 if 'github' in repo :
124160 return self .get_code_json_github (repo )
161+ elif 'gitlab' in repo :
162+ return self .get_code_json_gitlab (repo )
163+ elif 'bitbucket' in repo :
164+ return self .get_code_json_bitbucket (repo )
125165 else :
126- return self . get_code_json_other ( repo )
166+ return None
127167
128168 def save_code_json (self , repo : str , output_path : str ) -> Optional [str ]:
129169
@@ -147,7 +187,7 @@ def update_index(self, index: Dict, code_json: Dict, org_name: str, repo_name: s
147187
148188 index ['releases' ].append (baseline )
149189
150- def get_org_repos (self , org_name : str ) -> list [Dict ]:
190+ def get_github_org_repos (self , org_name : str ) -> list [Dict ]:
151191 try :
152192 org_endpoint = f"https://api.github.com/orgs/{ org_name } /repos"
153193 print (f"\n Processing organization: { org_name } " )
@@ -162,34 +202,64 @@ def get_org_repos(self, org_name: str) -> list[Dict]:
162202 except Exception as e :
163203 raise e
164204
165- def save_organization_files (self , org_name : str , codeJSONPath ) -> None :
166- raise NotImplementedError
205+ def _enumerate_repo_orgs (self ,org_name ,repo_name , url , total_repos , codeJSONPath = None ):
206+ print (f"\n Checking { repo_name } [{ id } /{ total_repos } ]" )
207+
208+ if not codeJSONPath :
209+ code_json = self .get_code_json (url )
210+ else :
211+ repoPath = os .path .join (codeJSONPath , (repo_name + '.json' ))
212+ code_json = self .save_code_json (url ,repoPath )
213+
214+ if code_json and add_to_index :
215+ print (f"✅ Found code.json in { repo_name } " )
216+ self .update_index (self .index , code_json , org_name , repo_name )
217+ elif not code_json :
218+ print (f"❌ No code.json found in { repo_name } " )
167219
168- def process_organization (self , org_name : str , add_to_index = True , codeJSONPath = None ) -> None :
220+ def process_github_org_files (self , org_name : str , add_to_index = True , codeJSONPath = None ) -> None :
169221 try :
170- org = self .github . get_organization (org_name )
171- total_repos = self . get_org_repos ( org_name )
222+ orgs = self .get_github_org_repos (org_name )
223+ total_repos = len ( orgs )
172224
173- for id , repo in enumerate (org .get_repos (type = 'public' ), 1 ):
174- print (f"\n Checking { repo .name } [{ id } /{ total_repos } ]" )
175-
176- if not codeJSONPath :
177- code_json = self .get_code_json (repo )
178- else :
179- repoPath = os .path .join (codeJSONPath , (repo .name + '.json' ))
180- code_json = self .save_code_json (repo ,repoPath )
181-
182- if code_json and add_to_index :
183- print (f"✅ Found code.json in { repo .name } " )
184- self .update_index (self .index , code_json , org_name , repo .name )
185- elif not code_json :
186- print (f"❌ No code.json found in { repo .name } " )
225+ for id , repo in enumerate (orgs , 1 ):
226+ self ._enumerate_repo_orgs (
227+ org_name ,repo ['name' ],repo ['svn_url' ],total_repos ,codeJSONPath = codeJSONPath
228+ )
187229
188- except GithubException as e :
230+ except Exception as e :
231+ print (f"Error processing organization { org_name } : { str (e )} " )
232+
233+ def get_gitlab_org_repos (self , org_name : str ) -> list [Dict ]:
234+ try :
235+ url_encoded_org_name = org_name .replace ("/" ,"%2F" )
236+ org_endpoint = f"https://gitlab.com/api/v4/groups/{ url_encoded_org_name } /projects"
237+
238+ repo_list = hit_endpoint (org_endpoint ,self .gitlab_token )
239+
240+ total_repos = len (repo_list )
241+ print (f"Found { total_repos } public repositories" )
242+
243+ return total_repos
244+ except Exception as e :
245+ print (f"Ran into Exception when querying Gitlab Repos in group { org_name } : { e } " )
246+ return None
247+
248+ def process_gitlab_org_files (self , org_name : str , add_to_index = True , codeJSONPath = None ) -> None :
249+ try :
250+ orgs = self .get_gitlab_org_repos (org_name )
251+ total_repos = len (orgs )
252+
253+ for id , repo in enumerate (orgs , 1 ):
254+ self ._enumerate_repo_orgs (
255+ org_name ,repo ['name' ],repo ['web_url' ],total_repos ,codeJSONPath = codeJSONPath
256+ )
257+
258+ except Exception as e :
189259 print (f"Error processing organization { org_name } : { str (e )} " )
190260
191261 def save_index (self , output_path : str ) -> None :
192- # sorts index by organizaiton then by name
262+ # sorts index by organization then by name
193263 self .index ['releases' ].sort (key = lambda x : (x .get ('organization' , '' ), x .get ('name' , '' )))
194264
195265 with open (output_path , 'w' ) as f :
0 commit comments