Skip to content

Commit 1a94b9a

Browse files
committed
Add a tool to check if all the repos have matching packages names
1 parent 16f98cf commit 1a94b9a

File tree

1 file changed

+182
-0
lines changed

1 file changed

+182
-0
lines changed

tools/all_repos.py

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
# https://packaging.python.org/en/latest/specifications/inline-script-metadata/
2+
# /// script
3+
# requires-python = ">=3.12"
4+
# dependencies = [
5+
# "requests",
6+
# "rich",
7+
# "beautifulsoup4",
8+
# ]
9+
# ///
10+
"""GitHub Organization Activity Tracker
11+
12+
This module tracks and reports the last activity of members across GitHub organizations.
13+
It implements disk-based caching to minimize API requests and respect rate limits.
14+
"""
15+
16+
import os
17+
import asks
18+
from rich import print
19+
import trio
20+
21+
import requests
22+
from rich import print
23+
from bs4 import BeautifulSoup
24+
25+
26+
def get_packages(url):
27+
# Send a GET request to the webpage with a custom user agent
28+
headers = {"User-Agent": "python/request/jupyter"}
29+
response = requests.get(url, headers=headers, allow_redirects=True)
30+
31+
if response.status_code != 200:
32+
print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
33+
exit(1)
34+
35+
if "A required part of this site couldn’t load" in response.text:
36+
print("Fastly is blocking us. Status code: 403")
37+
exit(1)
38+
39+
# Parse the HTML content
40+
soup = BeautifulSoup(response.content, "html.parser")
41+
42+
# Find all <h3> tags and accumulate their text in a list
43+
h3_tags = [h3.get_text(strip=True) for h3 in soup.find_all("h3")]
44+
45+
# Sort the list of <h3> contents
46+
h3_tags.sort()
47+
48+
if not h3_tags:
49+
print("No packages found")
50+
exit(1)
51+
return h3_tags
52+
53+
54+
default_orgs = [
55+
"binder-examples",
56+
"binderhub-ci-repos",
57+
"ipython",
58+
"jupyter",
59+
"jupyter-attic",
60+
"jupyter-book",
61+
"jupyter-governance",
62+
"jupyter-incubator",
63+
"jupyter-resources",
64+
"jupyter-server",
65+
"jupyter-standard",
66+
"jupyter-standards",
67+
"jupyter-widgets",
68+
"jupyter-xeus",
69+
"jupytercon",
70+
"jupyterhub",
71+
"jupyterlab",
72+
"voila-dashboards",
73+
"voila-gallery",
74+
"pickleshare",
75+
]
76+
77+
token = os.getenv("GH_TOKEN")
78+
if not token:
79+
print("[red]Error: GH_TOKEN environment variable not set[/red]")
80+
exit(1)
81+
82+
headers = {
83+
"Authorization": f"token {token}",
84+
"Accept": "application/vnd.github.v3+json",
85+
}
86+
87+
88+
async def list_repos(orgs):
89+
async with trio.open_nursery() as nursery:
90+
results = []
91+
for org in orgs:
92+
93+
async def _loc(results, org):
94+
results.append(await list_repos_for_org(org))
95+
96+
nursery.start_soon(_loc, results, org)
97+
for org_repos in results:
98+
for org, repo in org_repos:
99+
yield org, repo
100+
101+
102+
async def list_repos_for_org(org):
103+
reps = []
104+
for p in range(1, 10):
105+
response = await asks.get(
106+
f"https://api.github.com/orgs/{org}/repos?per_page=100&page={p}",
107+
headers=headers,
108+
)
109+
repos = response.json()
110+
for repo in repos:
111+
reps.append((org, repo["name"]))
112+
if len(repos) < 100:
113+
break
114+
return reps
115+
116+
117+
async def main():
118+
119+
packages = get_packages(f"https://pypi.org/org/jupyter/")
120+
print(f"Found {len(packages)} packages in the pypi jupyter org")
121+
122+
map = {p.lower().replace("-", "_"): p for p in packages}
123+
124+
todo = []
125+
async for org, repo in list_repos(default_orgs):
126+
lowname = repo.lower().replace("-", "_")
127+
if lowname in map:
128+
print(
129+
f"{org}/{repo}".ljust(40),
130+
f"https://pypi.org/project/{map[lowname]}",
131+
" in jupyter org",
132+
)
133+
del map[lowname]
134+
else:
135+
todo.append((org, repo))
136+
137+
print()
138+
print("check potentially matching Pypi names:")
139+
140+
async with trio.open_nursery() as nursery:
141+
targets = []
142+
for org, repo in todo:
143+
144+
async def _loc(targets, org, repo):
145+
targets.append(
146+
(
147+
org,
148+
repo,
149+
(
150+
await asks.get(f"https://pypi.org/pypi/{repo}/json")
151+
).status_code,
152+
)
153+
)
154+
155+
nursery.start_soon(_loc, targets, org, repo)
156+
corg = ""
157+
for org, repo, status in sorted(targets):
158+
if org != corg:
159+
print()
160+
corg = org
161+
if status == 200:
162+
print(
163+
f"https://github.com/{org}/{repo}".ljust(70),
164+
f"{status} for https://pypi.org/project/{repo}",
165+
)
166+
167+
print()
168+
print("repos with no Pypi package:")
169+
corg = ""
170+
for org, repo, status in sorted(targets):
171+
if org != corg:
172+
print()
173+
corg = org
174+
if status != 200:
175+
print(f"https://github.com/{org}/{repo}")
176+
177+
print()
178+
print("Packages with no repos.")
179+
print(map)
180+
181+
182+
trio.run(main)

0 commit comments

Comments
 (0)