diff --git a/all_repos.txt b/all_repos.txt new file mode 100644 index 0000000..f4fdda1 --- /dev/null +++ b/all_repos.txt @@ -0,0 +1,516 @@ +# github_org/repo : pypi url +# the Pypi url can be empty if the repo has not matching package, or a dash +# is the repo is eponymous + + +jupyter-incubator/declarativewidgets_explorer : +jupyter-incubator/sparkmagic : - +jupyter-incubator/contentmanagement : jupyter_cms +jupyter-incubator/proposals : +jupyter-widgets/ipywidgets : - +jupyter-server/jupyter-resource-usage : - +jupyter-server/jupyter_server : jupyter-server +jupyter-server/pytest-jupyter : - +jupyter-server/jupyter_server_mathjax : - +jupyter-server/gateway_provisioners : gateway-provisioners +jupyter-server/jupyter_releaser : - +jupyter-server/fps : - +jupyter-server/jupyter_server_terminals : - +jupyter-server/jupyter_ydoc : - +jupyter-server/jupyter_server_fileid : - +jupyter-server/jupyter-scheduler : - +jupyter-server/pycrdt : - +jupyter-server/pycrdt-websocket : - +jupyter-server/enterprise_gateway : jupyter_enterprise_gateway +jupyter-server/contributing-hour : +ipython/ipython : - +ipython/traitlets : - +ipython/ipython_genutils : ipython_genutils +ipython/ipyparallel : - +ipython/ipykernel : - +ipython/ipynb : - +ipython/rlipython : - +ipython/disp : - +ipython/matplotlib-inline : - +ipython/comm : - +ipython/ipython-pygments-lexers : - +jupyter-book/jupyter-book : - +jupyter/nbviewer : - +jupyter/nbgrader : - +jupyter/terminado : - +jupyter/jupyter_core : - +jupyter/nbformat : - +jupyter/jupyter_client : - +jupyter/notebook : - +jupyter/nbconvert : - +jupyter/jupyter_console : - +jupyter/jupyter_kernel_test : - +jupyter/jupyter : - +jupyter/nbdime : - +jupyter/jupyterlab_geojson : - +jupyter/jupyter-sphinx : - +jupyter/jupyter-packaging : - +jupyter/jupyterlab-fasta : - +jupyter/nbclassic : - +jupyter/nbclient : - +jupyter/papyri : - +jupyter/notebook_shim : - +jupyter/jupyter_events : - +jupyterlab/jupyterlab : - +jupyterlab/jupyterlab-hdf5 : jupyterlab-hdf +jupyterlab/pytest-check-links : - +# legacy server was launcher +jupyterlab/jupyterlab_server : jupyterlab-launcher +jupyterlab/jupyterlab_server : - +jupyterlab/jupyterlab-git : - +jupyterlab/jupyterlab-github : - +jupyterlab/jupyterlab-latex : - +jupyterlab/jupyterlab-telemetry : - +jupyterlab/jupyterlab_pygments : - +jupyterlab/jupyterlab_apod : - +jupyterlab/jupyterlab-plugin-playground : - +jupyterlab/jupyterlab-translate : - +jupyterlab/hatch-jupyter-builder : - +jupyterlab/jupyter-collaboration : - +jupyterlab/jupyter-collaboration : jupyter-server-ydoc +jupyter/telemetry : jupyter-telemetry +jupyter-standards/schemas : + +jupyterhealth/jupyter-smart-on-fhir : - +jupyterhealth/jupyterhealth-client : - + +jupyterlab/jupyterlab-renderers :jupyterlab-katex +jupyterlab/jupyterlab-renderers :jupyterlab-mathjax2 +jupyterlab/jupyterlab-renderers :jupyterlab-mathjax3 +jupyterlab/jupyterlab-renderers :jupyterlab-vega2 +jupyterlab/jupyterlab-renderers :jupyterlab-vega3 + +jupyterlab/pull-requests :jupyterlab-pullrequests + +# All these are from the same repo: +jupyterlab/language-packs : jupyterlab-pseudo-language-pack +jupyterlab/language-packs : jupyterlab-language-pack-ar-SA +jupyterlab/language-packs : jupyterlab-language-pack-ca-ES +jupyterlab/language-packs : jupyterlab-language-pack-cs-CZ +jupyterlab/language-packs : jupyterlab-language-pack-da-DK +jupyterlab/language-packs : jupyterlab-language-pack-de-DE +jupyterlab/language-packs : jupyterlab-language-pack-el-GR +jupyterlab/language-packs : jupyterlab-language-pack-es-ES +jupyterlab/language-packs : jupyterlab-language-pack-et-EE +jupyterlab/language-packs : jupyterlab-language-pack-fi-FI +jupyterlab/language-packs : jupyterlab-language-pack-fr-FR +jupyterlab/language-packs : jupyterlab-language-pack-he-IL +jupyterlab/language-packs : jupyterlab-language-pack-hu-HU +jupyterlab/language-packs : jupyterlab-language-pack-hy-AM +jupyterlab/language-packs : jupyterlab-language-pack-id-ID +jupyterlab/language-packs : jupyterlab-language-pack-it-IT +jupyterlab/language-packs : jupyterlab-language-pack-ja-JP +jupyterlab/language-packs : jupyterlab-language-pack-ko-KR +jupyterlab/language-packs : jupyterlab-language-pack-lt-LT +jupyterlab/language-packs : jupyterlab-language-pack-nl-NL +jupyterlab/language-packs : jupyterlab-language-pack-no-NO +jupyterlab/language-packs : jupyterlab-language-pack-pl-PL +jupyterlab/language-packs : jupyterlab-language-pack-pt-BR +jupyterlab/language-packs : jupyterlab-language-pack-ro-RO +jupyterlab/language-packs : jupyterlab-language-pack-ru-RU +jupyterlab/language-packs : jupyterlab-language-pack-tr-TR +jupyterlab/language-packs : jupyterlab-language-pack-uk-UA +jupyterlab/language-packs : jupyterlab-language-pack-vi-VN +jupyterlab/language-packs : jupyterlab-language-pack-zh-CN +jupyterlab/language-packs : jupyterlab-language-pack-zh-TW + +jupyterlab/jupyter-ai : - +jupyterlab/jupyter-builder : - +jupyterlab/jupyterlab-data-explorer : - + +# +jupyter/jupyter-sphinx-theme : - + +# variation in names: +jupyterlab/jupyter-collaboration : jupyter-collaboration-ui +jupyterlab/jupyter-collaboration : jupyter-docprovider +jupyter-server/kernel_gateway : jupyter-kernel-gateway +jupyter-server/synchronizer : jupyter-server-synchronizer + +# repo with no packages: +binderhub-ci-repos/lfs: +jupyter/design: +jupyter/security: +jupyter/ideas: +binderhub-ci-repos/lfs: +jupyter/governance: +jupyter/accessibility: +jupyter/cve: + +# security forks: +# is it safe to list those URL ? +# REDACTED + +# .github repos +jupyter-standard/.github : +jupyter-server/.github : +jupyter-xeus/.github : +jupyter-standards/.github : +ipython/.github : +ipython/.github-1 : +jupyterhub/.github : +voila-dashboards/.github : +jupyter/.github : +jupyterlab/.github : +jupyter-governance/.github : + + +jupyter-governance/jupyter-foundation-governing-board : + +# .github.io repos +ipython/ipython.github.com: +jupyter-xeus/jupyter-xeus.github.io: +jupyterhub/jupyterhub.github.io: +voila-gallery/voila-gallery.github.io: +voila-dashboards/voila-dashboards.github.io: +jupyter/jupyter.github.io: + +# team compass +jupyter/docs-team-compass: +jupyter/foundations-and-standards-team-compass: +jupyter/ipython-components: +jupyter/kernels-team-compass: +jupyter/notebook-team-compass: +jupyter/software-steering-council-team-compass: +jupyter/team-compass-template: +jupyter-book/team-compass: +jupyter-governance/ec-team-compass: +jupyter-server/team-compass: +jupyter-widgets/team-compass: +jupyterhub/team-compass: +jupyterlab/frontends-team-compass: + +# misc websites: + +jupyter/cdn.jupyter.org: +jupyter/colaboratory.jupyter.org: +jupyter/nbviewer.org-deploy: +jupyter/try.jupyter.org: +jupyter-book/mystmd.org: +jupyterhub/mybinder.org-deploy: +jupyterhub/mybinder.org-user-guide: +voila-gallery/voila-gallery.org-deploy: +ipython/ipython-website: + +# various tutorials +jupyter/ngcm-tutorial: +jupyter/scipy-advanced-tutorial: +jupyter/strata-sv-2015-tutorial: +jupyter/tutorial-dashboards-declarativewidgets: +jupyter/tutorial-devteam-jupyterhub-2017: +jupyter/tutorial-devteam-projectmgt-2017: +jupyter-widgets/tutorial: +jupyter-widgets/tutorial-jupyterlite: +jupyterhub/jupyterhub-tutorial: +jupyterlab/jupytercon-jupyterlab-tutorial: +jupyterlab/scipy2018-jupyterlab-tutorial: +jupyterlab/scipy2019-jupyterlab-tutorial: + +# exemples and cookie cutter +voila-gallery/hello-world-example: +binderhub-ci-repos/cached-minimal-dockerfile: +binderhub-ci-repos/minimal-dockerfile: +binderhub-ci-repos/repo2docker-ci-clone-depth: +binderhub-ci-repos/repo2docker-subdir-support: +binderhub-ci-repos/requirements: + +# +ipython/pickleshare : - +jupyter/echo_kernel : - +jupyter/jupyter-alabaster-theme : - +jupyter/jupyter_markdown : jupyter_markdown +jupyter/jupyterhub-carina : - +jupyter/nb2kg : - +jupyter/qtconsole : - +jupyter/sphinxcontrib_github_alt : sphinxcontrib_github_alt +jupyter/testpath : - +jupyter/tmpnb : - +jupyter-book/hatch-deps-selector : - +jupyter-book/jupyter_releaser : - +jupyter-book/jupyterlab-myst : - +jupyter-book/jupyterlab-myst-quickstart : +jupyter-book/myst-spec : - +jupyter-book/mystmd : - +jupyter-book/sphinx-ext-mystmd : - + +jupyter-book/book-theme-tailwind-quickstart : +jupyter-book/jupyterlab-myst-quickstart : +jupyter-book/legacy-demo-book : +jupyter-book/myst-enhancement-proposals : +jupyter-book/myst-theme : +jupyter-book/mystmd-quickstart : +jupyter-book/vscode-mystmd : + +# jupyter widgets + +jupyter-widgets/ipydatagrid : - +jupyter-widgets/ipyleaflet : - +jupyter-widgets/pythreejs : - +jupyter-widgets/traittypes : - + + +# jupyterhub +jupyterhub/design : +jupyterhub/action-get-quayio-tags : +jupyterhub/action-k3s-helm : +jupyterhub/action-k8s-await-workloads : +jupyterhub/action-k8s-namespace-report : +jupyterhub/action-major-minor-tag-calculator: + + +jupyterhub/alabaster-jupyterhub : - +jupyterhub/batchspawner : - +jupyterhub/binderhub : - +jupyterhub/chartpress : - +jupyterhub/configurable-http-proxy : - +jupyterhub/docker-image-cleaner : - +jupyterhub/dockerspawner : - +jupyterhub/gh-scoped-creds : - +jupyterhub/jupyter-remote-desktop-proxy : - +jupyterhub/jupyter-rsession-proxy : - +jupyterhub/jupyter-server-proxy : - +jupyterhub/nbgitpuller : - +jupyterhub/jupyterhub : - +jupyterhub/jupyterhub-idle-culler : - +jupyterhub/jupyterhub-sphinx-theme : - +jupyterhub/nbgitpuller-downloader-dropbox : - +jupyterhub/nbgitpuller-downloader-generic-web : - +jupyterhub/nbgitpuller-downloader-googledrive : - +jupyterhub/nullauthenticator : - +jupyterhub/oauthenticator : - +jupyterhub/pytest-jupyterhub : - +jupyterhub/simpervisor : - +jupyterhub/sudospawner : - +jupyterhub/wrapspawner : - +jupyterhub/escapism : - +jupyterhub/pamela : - +jupyterhub/autodoc-traits : - + +# seem to append jupyter- in front of the repo name +jupyterhub/repo2docker : jupyter-repo2docker + +# seem to append jupyterhub- in front of the repo name +jupyterhub/kerberosauthenticator : jupyterhub-kerberosauthenticator +jupyterhub/ldapauthenticator : jupyterhub-ldapauthenticator +jupyterhub/ltiauthenticator : jupyterhub-ltiauthenticator +jupyterhub/nativeauthenticator : jupyterhub-nativeauthenticator +jupyterhub/dummyauthenticator : jupyterhub-dummyauthenticator +jupyterhub/firstuseauthenticator : jupyterhub-firstuseauthenticator +jupyterhub/simplespawner : jupyterhub-simplespawner +jupyterhub/systemdspawner : jupyterhub-systemdspawner +jupyterhub/tmpauthenticator : jupyterhub-tmpauthenticator +jupyterhub/traefik-proxy : jupyterhub-traefik-proxy +jupyterhub/yarnspawner : jupyterhub-yarnspawner +jupyterhub/kubespawner : jupyterhub-kubespawner + + +# misc name don't match +jupyter-widgets/jupyterlab-sidecar : sidecar +jupyter-widgets/midicontrols : ipymidicontrols +jupyter-widgets/xeus : xeus-python + +jupyter/declarativewidgets : jupyter_declarativewidgets/ + +jupyter/kernel_gateway_bundlers : jupyter_kernel_gateway_bundlers/ + + + + +# Jupyter XEUS + +jupyter-xeus/robotframework-interpreter : robotframework-interpreter +jupyter-xeus/xeus-python : xeus-python +jupyter-xeus/xeus-python-shell : xeus-python-shell +jupyter-xeus/xeus-robot : xeus-robot +jupyter-xeus/xeus-robot-wheel : xeus-robot + + +# Voilà + +voila-dashboards/hotpot_km : hotpot_km +voila-dashboards/voici : voici +voila-dashboards/voila : voila +voila-dashboards/voila-debug : voila-debug +voila-dashboards/voila-gridstack : voila-gridstack +voila-dashboards/voila-material : voila-material +voila-dashboards/voila-reveal : voila-reveal +voila-dashboards/voila-topbar : voila-topbar +voila-dashboards/voila-vuetify : voila-vuetify + + +jupyter-server/jupyverse : - +jupyterlab/lumino : +jupyterlab/retrolab : - +jupyterlab/rtc : + +# not packages +ipython/dfflib : +ipython/front-to-back : +ipython/ipython-ansible-jenkins : +ipython/ipython-doc : +ipython/ipython-in-depth: +ipython/ipython-sec : +ipython/marketing : +ipython/mozfest2014 : +ipython/nbviewer-deploy : +ipython/paper-notebook2013 : +ipython/salt-states-nbviewer : +ipython/sloan-2013-reports : +ipython/talks : +ipython/usersurveys : +ipython/xkcd-font: + +jupyter/dashboards : jupyter_dashboards + +# nodes packages + +jupyter/jupyter-js-cells : +jupyter/jupyter-js-docmanager : +jupyter/jupyter-js-domutils : +jupyter/jupyter-js-editor : +jupyter/jupyter-js-filebrowser : +jupyter/jupyter-js-input-area : +jupyter/jupyter-js-notebook : +jupyter/jupyter-js-output-area : +jupyter/jupyter-js-phosphide : +jupyter/jupyter-js-plugins : +jupyter/jupyter-js-terminal : +jupyter/jupyter-js-ui : +jupyter/jupyter-js-utils : + + +# Own by the matplotlib org: +voila-gallery/ipympl : ipympl + + +jupyterlab/jupyter-chat : jupyterlab-chat + +# misc + +jupyter/nbmanager : +jupyter/conf-event-log: +jupyter/cookiecutter-docker-stacks: + + +jupyter/dashboards_bundlers : jupyter_dashboards_bundlers +jupyter/dashboards_server : +jupyter/dashboards_setup : +jupyter/distinguished-contributors: +jupyter/docker-demo-images: +jupyter/docker-notebook: +jupyter/docker-stacks: +jupyter/enhancement-proposals: +jupyter/etph: + + +jupyter-widgets/widget-cookiecutter: +jupyter-widgets/widget-ts-cookiecutter: + +voila-gallery/cpp-xleaflet : +voila-gallery/electricitymap_dashboard : +voila-gallery/gaussian-density : +voila-gallery/render-stl : +voila-gallery/voila-spotify : + + +jupyterlab/ui-profiler: jupyterlab-ui-profiler +jupyterlab/ux-research: +jupyterlab/vscode-config-template: +jupyterlab/richoutput-js: +jupyterlab/jupyterlab_xkcd: +jupyterlab/mimerender-cookiecutter: +jupyterlab/jupyterlab-commenting: jupyterlab-commenting-service +jupyterlab/jupyterlab-celltags: +jupyterlab/jupyterlab-bot: + + + +voila-dashboards/tljh-voila-gallery : +voila-dashboards/voici-demo : +voila-dashboards/voici-gallery : +voila-dashboards/voila-heroku : +voila-dashboards/voila-huggingface : +voila-dashboards/voila-railway : +voila-dashboards/voila-template-cookiecutter: + +jupyterlab/jupyterlab-mp4 : +jupyterlab/jupyterlab-demo: +jupyterlab/galata : +jupyterhub/outreachy : +jupyterhub/pebble-helm-chart : +jupyterhub/repo2docker-action : +jupyterhub/research-facilities : +jupyterhub/the-littlest-jupyterhub : +jupyterhub/zero-to-jupyterhub-k8s : +jupyterhub/mybinder-tools : +jupyterhub/jupyterlab-hub : +jupyterhub/jupyterhub-the-hard-way : +jupyterhub/hubshare : + + +jupyter/scipy-2015-advanced-topics : +jupyter/scipy-sprint : +jupyter/surveys : +jupyter/nbshot : +jupyter/nbcache : +jupyter/nature-demo : +jupyter/jupyter_logger : +jupyter/lbnl-jupyterday : +jupyter/joss-papers : +jupyter/help : +jupyter/jupyter-blog-theme : +jupyter/jupyter-communitycalls : + + +jupyter-book/blog : + +jupyterlab/benchmarks : +jupyterlab/debugger : +jupyter-book/meta : +jupyter/tmpnb-redirector : +jupyter/atom-notebook : +jupyter/jupyterlab_json : +jupyter/jupyterhub-2016-workshop : +jupyter/ipython-py3k : +jupyter/debugger : +jupyter/experiments : +jupyter/kernels : +jupyter/markdown : +jupyterhub/jhub-proposals : + + +jupyter/colaboratory : +jupyter-server/extension-cookiecutter : +jupyter/extension-builder : +jupyter/jsplugins : +jupyter/jupyter-drive : jupyterdrive +jupyter/jupyter-sprints : +jupyter/jupytercon2023 : +jupyterhub/jupyterhub-deploy-docker : +jupyterhub/jupyterhub-deploy-hpc : +jupyterhub/jupyterhub-deploy-teaching : + +jupyter/jvm-magics : +jupyter/jvm-repr : +jupyter/kernel-protocol : +jupyter/nbindex : +jupyter/tmpnb-deploy : +jupyter/nbconvert-examples : +jupyterhub/binder-billing : +jupyter-xeus/xeus-calc : +jupyter/kernel_gateway_demos : +jupyter/notebook-research : +jupyter/mozfest15-training : +jupyter/spreadsheet : + +jupyter/newsletter : +jupyter/practice : +jupyter/roadmap : +jupyter/schema : +jupyter/services : +jupyter/showcase : diff --git a/tools/all_repos.py b/tools/all_repos.py index 4eb60f4..f8aec2b 100644 --- a/tools/all_repos.py +++ b/tools/all_repos.py @@ -22,6 +22,19 @@ from rich import print from bs4 import BeautifulSoup +maintainers_name_map = { + "mbussonn": "Carreau", + "takowl": "takluyver", + "Kyle.Kelley": "rgbkrk", + "bgranger": "ellisonbg", +} + +import diskcache +from datetime import datetime + +CACHE_DIR = f"github_cache-all_repos-{datetime.now().strftime('%Y%m')}" +cache = diskcache.Cache(CACHE_DIR) + def get_packages(url): # Send a GET request to the webpage with a custom user agent @@ -85,7 +98,7 @@ def get_packages(url): } -async def list_repos(orgs): +async def list_github_repos(orgs): async with trio.open_nursery() as nursery: results = [] for org in orgs: @@ -94,9 +107,10 @@ async def _loc(results, org): results.append(await list_repos_for_org(org)) nursery.start_soon(_loc, results, org) + for org_repos in results: - for org, repo in org_repos: - yield org, repo + for org, repo, archived, private in org_repos: + yield org, repo, archived, private async def list_repos_for_org(org): @@ -109,75 +123,234 @@ async def list_repos_for_org(org): response.raise_for_status() repos = response.json() for repo in repos: - reps.append((org, repo["name"])) + archived = repo.get("archived", None) + private = repo.get("private", None) + reps.append((org, repo["name"], archived, private)) if len(repos) < 100: break return reps -async def main(): - +async def get_package_maintainers(package: str) -> tuple[list[str], bool]: + """Get the maintainers of a package from PyPI. + + The json does not have the right information, so we need to scrape the page. + """ + assert package, "package is required" + url = f"https://pypi.org/project/{package}/" + if package in cache: + print("c", end="", flush=True) + return cache[package], True + response = await asks.get(url) + # fastly html is 200 even if package is not found, so the json instead + assert len(package.strip()) > 2, package + url = f"https://pypi.org/pypi/{package}/json" + + try: + (await asks.get(url)).raise_for_status() + except Exception as e: + raise ValueError("For package", package) from e + if response.status_code == 200: + html = response.text + soup = BeautifulSoup(html, "html.parser") + maintainers = soup.find_all("span", class_="sidebar-section__maintainer") + if not maintainers: + print("x", end="", flush=True) + return set(["unknown (blocked by fastly?)"]), False + res = set(a.text.strip() for a in maintainers) + cache[package] = res + print(".", end="", flush=True) + return res, True + print("f", end="", flush=True) + return set(["unknown (status code: " + str(response.status_code) + ")"]), False + + +async def main(config_file: str = "all_repos.txt"): + from pathlib import Path + + items = Path(config_file).read_text().splitlines() + known_mapping = [] + for item in items: + if item.startswith("#") or not item.strip(): + continue + if item.count(":") == 0: + print(f"Invalid line: {item}") + exit(1) + github_slug, pypi_url = item.split(":", maxsplit=1) + github_slug = github_slug.strip(" /") + ghorg, repo = github_slug.split("/") + assert repo + pypi_url = pypi_url.replace("", "").strip(" /") + if pypi_url == "-": + pypi_url = f"https://pypi.org/project/{repo.replace('_', '-')}" + if "https://pypi.org/project/" not in pypi_url and pypi_url != "": + pypi_url = f"https://pypi.org/project/{pypi_url}" + + # pypi name may be empty for repo with no packages. + # and one repo can create multiple pypi packages. + assert pypi_url != "https://pypi.org/project/" + known_mapping.append((github_slug, pypi_url)) + + # get all packages in the pypi jupyter org packages = get_packages(f"https://pypi.org/org/jupyter/") + packages_urls = [f"https://pypi.org/project/{p}" for p in packages] print(f"Found {len(packages)} packages in the pypi jupyter org") - map = {p.lower().replace("-", "_"): p for p in packages} + missing_from_pypi_org = ( + set([p for _, p in known_mapping]) - set(packages_urls) - {""} + ) + + async with trio.open_nursery() as nursery: + targets = [] + semaphore = trio.Semaphore(15) # Throttle to 10 concurrent requests + for package_url in missing_from_pypi_org: + + async def _loc(targets, package_url): + async with semaphore: # Wait for semaphore to be available + package = package_url.strip("/").split("/")[-1] + assert package != "project", package_url + assert package, f"package is required {package_url}" + maintainers, is_ok = await get_package_maintainers(package) + targets.append( + ( + package_url, + maintainers, + is_ok, + ) + ) + + nursery.start_soon(_loc, targets, package_url) + by_maintainers = {} + if targets: + print() + print( + f"{len(targets)} To add to PiPy org – they are listed on the config file, with a " + "corresponding Pypi package, but the package is not part of Pypi org:" + ) + for package_url, maintainers, is_ok in targets: + print(f" [yellow]{package_url}[/yellow] maintained by") + for maintainer in maintainers: + color = "[green]" if is_ok else "[red]" + end = "[/green]" if is_ok else "[/red]" + print(f"{color} pypi: `@{maintainer}` {end}") + if is_ok: + by_maintainers[maintainer] = by_maintainers.setdefault( + maintainer, [] + ) + by_maintainers[maintainer].append(package_url) + print() + + by_maintainers = { + k: v + for k, v in sorted( + by_maintainers.items(), key=lambda item: len(item[1]), reverse=True + ) + } + for maintainer, packages in by_maintainers.items(): + print( + f"[green]`@{maintainer}`[/green] {len(packages)} packages (https://pypi.org/user/{maintainer}/):" + ) + for package in sorted(packages): + print(f" {package}") + + missing_from_github_org = set(packages_urls) - set([p for _, p in known_mapping]) + if missing_from_github_org: + print( + "Packages missing from github org, they are on PyPI, but I don't know" + " the source github repo...:" + ) + for repo in sorted(missing_from_github_org): + print(f" {repo}") todo = [] - async for org, repo in list_repos(default_orgs): - lowname = repo.lower().replace("-", "_") - if lowname in map: - print( - f"{org}/{repo}".ljust(40), - f"https://pypi.org/project/{map[lowname]}", - " in jupyter org", - ) - del map[lowname] - else: - todo.append((org, repo)) + + # we've verified the existing mapping, + # now up to all the org/repo that are not inther + + print( + "listing all org and repo under jupyter purview, and filtering one without" + " mathching github repos." + ) + + known_org_rep = {k for k, v in known_mapping} + async for org, repo, archived, private in list_github_repos(default_orgs): + org_repo = f"{org}/{repo}" + if org_repo in known_org_rep: + continue + assert repo + todo.append((org, repo, archived, private)) print() - print("check potentially matching Pypi names:") + print( + "check potentially matching Pypi names, all the following seem to correspond" + " to an existing pypi package, if they indeed are part of Jupyter, " + "you can copy past the lines as is in the config file. If not just append: `{org}/{repo}:`" + ) async with trio.open_nursery() as nursery: targets = [] - for org, repo in todo: - - async def _loc(targets, org, repo): - targets.append( - ( - org, - repo, + semaphore = trio.Semaphore(15) # Throttle to 10 concurrent requests + for org, repo, archived, private in todo: + assert repo + + async def _loc(targets, org, repo, archived, private): + async with semaphore: # Wait for semaphore to be available + # maintainers = await get_package_maintainers(repo) + maintainers = [] + assert repo + targets.append( ( - await asks.get(f"https://pypi.org/pypi/{repo}/json") - ).status_code, + org, + repo, + ( + await asks.get(f"https://pypi.org/pypi/{repo}/json") + ).status_code, + maintainers, + archived, + private, + ) ) - ) - nursery.start_soon(_loc, targets, org, repo) + nursery.start_soon(_loc, targets, org, repo, archived, private) + corg = "" - for org, repo, status in sorted(targets): + for org, repo, status, maintainers, archived, private in sorted(targets): if org != corg: print() corg = org if status == 200: print( - f"https://github.com/{org}/{repo}".ljust(70), - f"{status} for https://pypi.org/project/{repo}", + f"https://github.com/{org}/{repo}".ljust(40), + f" : https://pypi.org/project/{repo}", + f"[yellow]{'(archived)' if archived else ''}[/yellow] [red]{'(private)' if private else ''}[/red]".ljust( + 20 + ), ) + # for maintainer in maintainers: + # if maintainer in maintainers_name_map: + # print(f" @{maintainers_name_map[maintainer]} ({maintainer})") + # else: + # print(f" @{maintainer}") + print() - print("repos with no Pypi package:") + print( + "repos with no Pypi package, either manually add `{org}/{repo}: " + "{pypi_url}` or `{org}/{repo}: ` to config file." + ) corg = "" - for org, repo, status in sorted(targets): + counter = 0 + for org, repo, status, maintainers, archived, private in sorted(targets): if org != corg: print() corg = org if status != 200: - print(f"https://github.com/{org}/{repo}") - - print() - print("Packages with no repos.") - print(map) + counter += 1 + print( + counter, + f"https://github.com/{org}/{repo}".ljust(30), + f"[yellow]{'(archived)' if archived else ''}[/yellow] [red]{'(private)' if private else ''}[/red]", + ) trio.run(main) diff --git a/tools/last_user_activity.py b/tools/last_user_activity.py index 5552452..95f3ad8 100644 --- a/tools/last_user_activity.py +++ b/tools/last_user_activity.py @@ -17,6 +17,7 @@ import aiohttp import diskcache import humanize +from bs4 import BeautifulSoup from rich import print default_orgs = [ @@ -212,6 +213,24 @@ async def get_user_activity( return None +async def get_package_maintainers( + package: str, session: aiohttp.ClientSession +) -> List[str]: + """Get the maintainers of a package from PyPI. + + The json does not have the right information, so we need to scrape the page. + """ + url = f"https://pypi.org/project/{package}/" + async with session.get(url) as response: + if response.status == 200: + html = await response.text() + soup = BeautifulSoup(html, "html.parser") + maintainers = soup.find_all("a", class_="package-header__author-link") + return [a.text.strip() for a in maintainers] + else: + return [] + + def get_cache_size() -> str: """Get the current cache size in a human-readable format.""" try: diff --git a/tools/tide.py b/tools/tide.py index ae801e4..6b283e8 100644 --- a/tools/tide.py +++ b/tools/tide.py @@ -74,6 +74,7 @@ def get_tidelift_data(packages): # Create a table for aligned output table = Table(show_header=True, header_style="bold magenta") + table.add_column("#") table.add_column("Package Name") table.add_column("Estimated Money") table.add_column("Lifted") @@ -89,11 +90,11 @@ def maybefloat(x): package_data.sort( key=lambda x: (x[1] is None, x[1], -maybefloat(x[2]), x[0]) ) # sort lifted True first, then None, then False, then amount, then by name - for name, lifted, estimated_money in package_data: + for i, (name, lifted, estimated_money) in enumerate(package_data, start=1): if lifted: - table.add_row(name, "-- need login ––", f"[green]{lifted}[/green]") + table.add_row(str(i), name, "-- need login ––", f"[green]{lifted}[/green]") else: - table.add_row(name, str(estimated_money), f"[red]{lifted}[/red]") + table.add_row(str(i), name, str(estimated_money), f"[red]{lifted}[/red]") print(table)