32
32
import diskcache
33
33
from datetime import datetime
34
34
35
- CACHE_DIR = f"github_cache-all_repos-{ datetime .now ().strftime ('%Y%m%d ' )} "
35
+ CACHE_DIR = f"github_cache-all_repos-{ datetime .now ().strftime ('%Y%m' )} "
36
36
cache = diskcache .Cache (CACHE_DIR )
37
37
38
38
@@ -98,7 +98,7 @@ def get_packages(url):
98
98
}
99
99
100
100
101
- async def list_repos (orgs ):
101
+ async def list_github_repos (orgs ):
102
102
async with trio .open_nursery () as nursery :
103
103
results = []
104
104
for org in orgs :
@@ -164,63 +164,80 @@ async def main(config_file: str = "all_repos.txt"):
164
164
github_name , pypi_name = item .split (":" , maxsplit = 1 )
165
165
# pypi name may be empty for repo with no packages.
166
166
# and one repo can create multiple pypi packages.
167
- known_mapping .append ((github_name .strip (), pypi_name .strip ()))
167
+ known_mapping .append ((github_name .strip (" /" ), pypi_name .strip (" " )))
168
168
169
169
# get all packages in the pypi jupyter org
170
170
packages = get_packages (f"https://pypi.org/org/jupyter/" )
171
171
packages_urls = [f"https://pypi.org/project/{ p } " for p in packages ]
172
172
print (f"Found { len (packages )} packages in the pypi jupyter org" )
173
173
174
- missing_from_pypi_org = set ([p for _ , p in known_mapping ]) - set (packages_urls )
175
- if missing_from_pypi_org :
174
+ missing_from_pypi_org = (
175
+ set ([p for _ , p in known_mapping ]) - set (packages_urls ) - {"" }
176
+ )
177
+
178
+ async with trio .open_nursery () as nursery :
179
+ targets = []
180
+ semaphore = trio .Semaphore (15 ) # Throttle to 10 concurrent requests
181
+ for package_url in missing_from_pypi_org :
182
+
183
+ async def _loc (targets , package_url ):
184
+ async with semaphore : # Wait for semaphore to be available
185
+ package = package_url .split ("/" )[- 1 ]
186
+ maintainers = await get_package_maintainers (package )
187
+ targets .append (
188
+ (
189
+ package_url ,
190
+ maintainers ,
191
+ )
192
+ )
193
+
194
+ nursery .start_soon (_loc , targets , package_url )
195
+
196
+ if targets :
197
+ print ()
176
198
print (
177
- "Repos missing from pypi org – they are listed on the config file, with a corresponding Pypi pacakge, but the package is not part of Pypi org:"
199
+ "TO add to PiPy org – they are listed on the config file, with a "
200
+ "corresponding Pypi package, but the package is not part of Pypi org:"
178
201
)
179
- for repo in missing_from_pypi_org :
180
- print (f" { repo } " )
202
+ for package_url , maintainers in targets :
203
+ print (f" [red]{ package_url } [/red] maintained by" )
204
+ for maintainer in maintainers :
205
+ print (f" pypi: `@{ maintainer } `" )
206
+ print ()
181
207
182
208
missing_from_github_org = set (packages_urls ) - set ([p for _ , p in known_mapping ])
183
209
if missing_from_github_org :
184
210
print (
185
- "Packages missing from github org, they are on PyPI, but I don't know the source github repo...:"
211
+ "Packages missing from github org, they are on PyPI, but I don't know"
212
+ " the source github repo...:"
186
213
)
187
214
for repo in sorted (missing_from_github_org ):
188
215
print (f" { repo } " )
189
216
190
217
todo = []
191
- # now we loop over all the org/repo and check if:
192
- # - it is in the jupyter org:
193
- # - it has a pypi package (or not) in the mapping, if not,
194
- # it's ok it's not supposed to have one.
195
- # - if it has:
196
- # all repos in the mapping should be in the Pypi org
197
- # - if not, add to todo list.
198
-
199
- async for org , repo in list_repos (default_orgs ):
218
+
219
+ # we've verified the existing mapping,
220
+ # now up to all the org/repo that are not inther
221
+
222
+ print (
223
+ "listing all org and repo under jupyter purview, and filtering one without"
224
+ " mathching github repos."
225
+ )
226
+
227
+ known_org_rep = {k for k , v in known_mapping }
228
+ async for org , repo in list_github_repos (default_orgs ):
200
229
org_repo = f"{ org } /{ repo } "
201
- listed = [k for k , _ in known_mapping ]
202
- if not listed :
203
- # not listed in config. We search by default.
204
- todo .append ((org , repo ))
205
- continue
206
- candidates = [v for k , v in known_mapping if k == org_repo ]
207
- if not candidates :
208
- # not listed in config. We search by default.
209
- todo .append ((org , repo ))
230
+ if org_repo in known_org_rep :
210
231
continue
211
- for candidate in candidates :
212
- if candidate == "" :
213
- continue
214
- # not supposed to have a Pypi package
215
- elif candidate in packages_urls :
216
- pass
217
- # print(f"OK: {org_repo} -> {candidate}"")
218
- else :
219
- print (f"Missing: { org_repo } -> { candidate } " )
220
- todo .append ((org , repo ))
232
+
233
+ todo .append ((org , repo ))
221
234
222
235
print ()
223
- print ("check potentially matching Pypi names:" )
236
+ print (
237
+ "check potentially matching Pypi names, all the following seem to correspond"
238
+ " to an existing pypi package, if they indeed are part of Jupyter, "
239
+ "you can copy past the lines as is in the config file. If not just append: `{org}/{repo}:`"
240
+ )
224
241
225
242
async with trio .open_nursery () as nursery :
226
243
targets = []
@@ -229,7 +246,8 @@ async def main(config_file: str = "all_repos.txt"):
229
246
230
247
async def _loc (targets , org , repo ):
231
248
async with semaphore : # Wait for semaphore to be available
232
- maintainers = await get_package_maintainers (repo )
249
+ # maintainers = await get_package_maintainers(repo)
250
+ maintainers = []
233
251
targets .append (
234
252
(
235
253
org ,
@@ -254,14 +272,17 @@ async def _loc(targets, org, repo):
254
272
f" : https://pypi.org/project/{ repo } " ,
255
273
)
256
274
257
- for maintainer in maintainers :
258
- if maintainer in maintainers_name_map :
259
- print (f" @{ maintainers_name_map [maintainer ]} ({ maintainer } )" )
260
- else :
261
- print (f" @{ maintainer } " )
275
+ # for maintainer in maintainers:
276
+ # if maintainer in maintainers_name_map:
277
+ # print(f" @{maintainers_name_map[maintainer]} ({maintainer})")
278
+ # else:
279
+ # print(f" @{maintainer}")
262
280
263
281
print ()
264
- print ("repos with no Pypi package:" )
282
+ print (
283
+ "repos with no Pypi package, either manually add `{org}/{repo}: "
284
+ "{pypi_url}` or `{org}/{repo}: <blank>` to config file."
285
+ )
265
286
corg = ""
266
287
for org , repo , status , maintainers in sorted (targets ):
267
288
if org != corg :
0 commit comments