11import json
22import logging
3+ from dataclasses import dataclass
34from datetime import datetime
45from pathlib import Path
5- from typing import Any
6+ from typing import Any , Callable , Optional
67from zoneinfo import ZoneInfo
78
89import click
@@ -25,16 +26,35 @@ def parse_pypi(data: dict[str, Any]) -> list[str]:
2526 return [row ["project" ] for row in data ["rows" ]]
2627
2728
28- ECOSYSTEMS = {
29- "npm" : {
30- "url" : "https://packages.ecosyste.ms/api/v1/registries/npmjs.org/packages?per_page=10000&page=1&sort=downloads" ,
31- "parser" : parse_npm ,
32- },
33- "pypi" : {
34- "url" : "https://hugovk.github.io/top-pypi-packages/top-pypi-packages.min.json" ,
35- "parser" : parse_pypi ,
36- },
37- }
29+ class ServerError (Exception ):
30+ """Custom exception for HTTP 5xx errors."""
31+
32+
33+ @dataclass (frozen = True )
34+ class Ecosystem :
35+ url : str
36+ params : Optional [dict [str , Any ]]
37+ pages : Optional [int ]
38+ parser : Callable [[dict [str , Any ]], list [str ]]
39+
40+
41+ @dataclass (frozen = True )
42+ class PypiEcosystem (Ecosystem ):
43+ url = "https://hugovk.github.io/top-pypi-packages/top-pypi-packages.min.json"
44+ params = None
45+ pages = None
46+ parser = parse_pypi
47+
48+
49+ @dataclass (frozen = True )
50+ class NpmEcosystem (Ecosystem ):
51+ url = "https://packages.ecosyste.ms/api/v1/registries/npmjs.org/packages"
52+ params = {"per_page" : 1000 , "sort" : "downloads" }
53+ pages = 15
54+ parser = parse_npm
55+
56+
57+ ECOSYSTEMS = {"pypi" : PypiEcosystem , "npm" : NpmEcosystem }
3858
3959
4060@click .group ()
@@ -48,30 +68,51 @@ def entry_point() -> None:
4868 type = str ,
4969 required = True ,
5070)
51- def download (ecosystem : str ) -> None :
71+ def download (
72+ ecosystem : str ,
73+ ) -> None :
74+ selected_ecosystem = ECOSYSTEMS [ecosystem ]
75+
76+ if pages := selected_ecosystem .pages :
77+ all_packages : list [str ] = []
78+
79+ for page in range (1 , pages + 1 ):
80+ params = selected_ecosystem .params or {}
81+ params ["page" ] = page
82+ all_packages .extend (get_packages (selected_ecosystem .url , selected_ecosystem .parser , params ))
83+ else :
84+ all_packages = get_packages (selected_ecosystem .url , selected_ecosystem .parser , selected_ecosystem .params )
85+
86+ fpath = Path ("dependencies" ) / f"{ ecosystem } .json"
87+ save_data_to_file (all_packages , fpath )
88+
89+
90+ def get_packages (
91+ base_url : str , parser : Callable [[dict [str , Any ]], list [str ]], params : Optional [dict [str , Any ]] = None
92+ ) -> list [str ]:
5293 for attempt in stamina .retry_context (
53- on = (
54- httpx .TransportError ,
55- httpx .TimeoutException ,
56- ),
57- attempts = 3 ,
94+ on = (httpx .TransportError , httpx .TimeoutException , ServerError ),
95+ attempts = 5 ,
5896 wait_jitter = 1 ,
5997 wait_exp_base = 2 ,
6098 wait_max = 8 ,
6199 ):
62100 with attempt , httpx .Client (timeout = 30 ) as client :
63- logger .info ("Attempting to download %s packages. Attempt #%d." , ecosystem , attempt .num )
64- response = client .get (str (ECOSYSTEMS [ecosystem ]["url" ]))
65- response .raise_for_status ()
101+ response = client .get (str (base_url ), params = params )
102+ try :
103+ response .raise_for_status ()
104+ except httpx .HTTPStatusError as e :
105+ if e .response .is_server_error :
106+ raise ServerError from e
107+ return parser (response .json ())
66108
67- fpath = Path ("dependencies" ) / f"{ ecosystem } .json"
68109
69- packages = ECOSYSTEMS [ ecosystem ][ "parser" ]( response . json ()) # type: ignore[operator]
70- data = {"date" : datetime .now (ZoneInfo ("UTC" )).isoformat (), "packages" : packages }
110+ def save_data_to_file ( all_packages : list [ str ], fpath : Path ) -> None :
111+ data = {"date" : datetime .now (ZoneInfo ("UTC" )).isoformat (), "packages" : all_packages }
71112 with open (str (fpath ), "w" ) as fp :
72113 json .dump (data , fp )
73114
74- logger .info ("Saved `%s` file." , fpath )
115+ logger .info ("Saved %d packages to `%s` file." , len ( set ( all_packages )) , fpath )
75116
76117
77118if __name__ == "__main__" :
0 commit comments