Skip to content

Commit 26b445b

Browse files
committed
need throttle
1 parent 60cb706 commit 26b445b

File tree

2 files changed

+44
-4
lines changed

2 files changed

+44
-4
lines changed

tools/all_repos.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,24 @@ async def list_repos_for_org(org):
115115
return reps
116116

117117

118-
async def main():
118+
async def get_package_maintainers(package: str) -> list[str]:
119+
"""Get the maintainers of a package from PyPI.
120+
121+
The json does not have the right information, so we need to scrape the page.
122+
"""
123+
url = f"https://pypi.org/project/{package}/"
124+
response = await asks.get(url)
125+
if response.status_code == 200:
126+
html = response.text
127+
soup = BeautifulSoup(html, "html.parser")
128+
maintainers = soup.find_all("a", class_="package-header__author-link")
129+
if not maintainers:
130+
return ["unknown (blocked by fastly?)"]
131+
return [a.text.strip() for a in maintainers]
132+
return ["unknown (status code: " + str(response.status_code) + ")"]
133+
119134

135+
async def main():
120136
packages = get_packages(f"https://pypi.org/org/jupyter/")
121137
print(f"Found {len(packages)} packages in the pypi jupyter org")
122138

@@ -143,19 +159,22 @@ async def main():
143159
for org, repo in todo:
144160

145161
async def _loc(targets, org, repo):
162+
maintainers = await get_package_maintainers(repo)
146163
targets.append(
147164
(
148165
org,
149166
repo,
150167
(
151168
await asks.get(f"https://pypi.org/pypi/{repo}/json")
152169
).status_code,
170+
maintainers,
153171
)
154172
)
155173

156174
nursery.start_soon(_loc, targets, org, repo)
175+
157176
corg = ""
158-
for org, repo, status in sorted(targets):
177+
for org, repo, status, maintainers in sorted(targets):
159178
if org != corg:
160179
print()
161180
corg = org
@@ -165,16 +184,18 @@ async def _loc(targets, org, repo):
165184
f"{status} for https://pypi.org/project/{repo}",
166185
)
167186

187+
for maintainer in maintainers:
188+
print(f" |{maintainer}")
189+
168190
print()
169191
print("repos with no Pypi package:")
170192
corg = ""
171-
for org, repo, status in sorted(targets):
193+
for org, repo, status, maintainers in sorted(targets):
172194
if org != corg:
173195
print()
174196
corg = org
175197
if status != 200:
176198
print(f"https://github.com/{org}/{repo}")
177-
178199
print()
179200
print("Packages with no repos.")
180201
print(map)

tools/last_user_activity.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import aiohttp
1818
import diskcache
1919
import humanize
20+
from bs4 import BeautifulSoup
2021
from rich import print
2122

2223
default_orgs = [
@@ -212,6 +213,24 @@ async def get_user_activity(
212213
return None
213214

214215

216+
async def get_package_maintainers(
217+
package: str, session: aiohttp.ClientSession
218+
) -> List[str]:
219+
"""Get the maintainers of a package from PyPI.
220+
221+
The json does not have the right information, so we need to scrape the page.
222+
"""
223+
url = f"https://pypi.org/project/{package}/"
224+
async with session.get(url) as response:
225+
if response.status == 200:
226+
html = await response.text()
227+
soup = BeautifulSoup(html, "html.parser")
228+
maintainers = soup.find_all("a", class_="package-header__author-link")
229+
return [a.text.strip() for a in maintainers]
230+
else:
231+
return []
232+
233+
215234
def get_cache_size() -> str:
216235
"""Get the current cache size in a human-readable format."""
217236
try:

0 commit comments

Comments
 (0)