Skip to content

Commit 037bc07

Browse files
committed
minimal caching
1 parent f4a75ad commit 037bc07

File tree

1 file changed

+11
-1
lines changed

1 file changed

+11
-1
lines changed

tools/all_repos.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,12 @@
2626
"mbussonn": "Carreau",
2727
}
2828

29+
import diskcache
30+
from datetime import datetime
31+
32+
CACHE_DIR = f"github_cache-all_repos-{datetime.now().strftime('%Y%m%d')}"
33+
cache = diskcache.Cache(CACHE_DIR)
34+
2935

3036
def get_packages(url):
3137
# Send a GET request to the webpage with a custom user agent
@@ -125,14 +131,18 @@ async def get_package_maintainers(package: str) -> list[str]:
125131
The json does not have the right information, so we need to scrape the page.
126132
"""
127133
url = f"https://pypi.org/project/{package}/"
134+
if package in cache:
135+
return cache[package]
128136
response = await asks.get(url)
129137
if response.status_code == 200:
130138
html = response.text
131139
soup = BeautifulSoup(html, "html.parser")
132140
maintainers = soup.find_all("span", class_="sidebar-section__maintainer")
133141
if not maintainers:
134142
return set(["unknown (blocked by fastly?)"])
135-
return set(a.text.strip() for a in maintainers)
143+
res = set(a.text.strip() for a in maintainers)
144+
cache[package] = res
145+
return res
136146
return set(["unknown (status code: " + str(response.status_code) + ")"])
137147

138148

0 commit comments

Comments
 (0)