1919)
2020
2121
22+ class ServerError (Exception ):
23+ """Custom exception for HTTP 5xx errors."""
24+
25+
26+ class InvalidJSONError (Exception ):
27+ """Custom exception for when the received JSON does not match the expected format."""
28+
29+
30+ # Directory name
2231DEPENDENCIES_DIR = "dependencies"
32+
33+ # Sources
2334TOP_PYPI_SOURCE = "https://hugovk.github.io/top-pypi-packages/top-pypi-packages.min.json"
2435TOP_NPM_SOURCE = "https://packages.ecosyste.ms/api/v1/registries/npmjs.org/packages"
36+
37+ # Retry constants
38+ RETRY_ON = (httpx .TransportError , httpx .TimeoutException , ServerError )
39+ RETRY_ATTEMPTS = 15
40+ RETRY_WAIT_JITTER = 1
41+ RETRY_WAIT_EXP_BASE = 2
42+ RETRY_WAIT_MAX = 8
2543TIMEOUT = 90
2644
2745
2846def parse_npm (data : list [dict [str , Any ]]) -> set [str ]:
29- return {x ["name" ] for x in data }
47+ try :
48+ return {x ["name" ] for x in data }
49+ except KeyError as e :
50+ raise InvalidJSONError from e
3051
3152
3253def parse_pypi (data : dict [str , Any ]) -> set [str ]:
33- return {row ["project" ] for row in data ["rows" ]}
34-
35-
36- class ServerError (Exception ):
37- """Custom exception for HTTP 5xx errors."""
54+ try :
55+ return {row ["project" ] for row in data ["rows" ]}
56+ except KeyError as e :
57+ raise InvalidJSONError from e
3858
3959
4060@dataclass (frozen = True )
@@ -61,6 +81,30 @@ class Ecosystem:
6181ECOSYSTEMS = {"pypi" : pypi_ecosystem , "npm" : npm_ecosystem }
6282
6383
84+ def get_params (params : dict [str , Any ] | None , page : int | None ) -> dict [str , Any ]:
85+ new_params : dict [str , Any ] = {}
86+ if params :
87+ new_params |= params
88+
89+ if page :
90+ new_params ["page" ] = page
91+
92+ return new_params
93+
94+
95+ def _run (ecosystem : str ) -> None :
96+ selected_ecosystem = ECOSYSTEMS [ecosystem ]
97+ all_packages : set [str ] = set ()
98+
99+ n_pages = selected_ecosystem .pages or 1
100+ for page in range (1 , n_pages + 1 ):
101+ params = get_params (selected_ecosystem .params , page if selected_ecosystem .pages else None )
102+ all_packages .update (get_packages (selected_ecosystem .url , selected_ecosystem .parser , params ))
103+
104+ fpath = Path (DEPENDENCIES_DIR ) / f"{ ecosystem } .json"
105+ save_data_to_file (list (all_packages ), fpath )
106+
107+
64108@click .group ()
65109def entry_point () -> None :
66110 pass
@@ -78,39 +122,32 @@ def download(
78122 if ecosystem not in ECOSYSTEMS :
79123 raise click .BadParameter ("Not a valid ecosystem" )
80124
81- selected_ecosystem = ECOSYSTEMS [ecosystem ]
82- all_packages : set [str ] = set ()
125+ return _run (ecosystem )
83126
84- n_pages = selected_ecosystem .pages or 1
85- params = selected_ecosystem .params .copy ()
86- for page in range (1 , n_pages + 1 ):
87- if selected_ecosystem .pages :
88- params ["page" ] = page
89127
90- all_packages .update (get_packages (selected_ecosystem .url , selected_ecosystem .parser , params ))
91-
92- fpath = Path (DEPENDENCIES_DIR ) / f"{ ecosystem } .json"
93- save_data_to_file (list (all_packages ), fpath )
94-
95-
96- @stamina .retry (
97- on = (httpx .TransportError , httpx .TimeoutException , ServerError ),
98- attempts = 10 ,
99- wait_jitter = 1 ,
100- wait_exp_base = 2 ,
101- wait_max = 8 ,
102- )
103128def get_packages (
104129 base_url : str , parser : Callable [[dict [str , Any ]], set [str ]], params : dict [str , Any ] | None = None
105130) -> set [str ]:
106- with httpx .Client (timeout = TIMEOUT ) as client :
107- response = client .get (str (base_url ), params = params )
108- try :
109- response .raise_for_status ()
110- except httpx .HTTPStatusError as e :
111- if e .response .is_server_error :
112- raise ServerError from e
113- return parser (response .json ())
131+ for attempt in stamina .retry_context (
132+ on = RETRY_ON ,
133+ attempts = RETRY_ATTEMPTS ,
134+ wait_jitter = RETRY_WAIT_JITTER ,
135+ wait_exp_base = RETRY_WAIT_EXP_BASE ,
136+ wait_max = RETRY_WAIT_MAX ,
137+ ):
138+ with attempt , httpx .Client (timeout = TIMEOUT ) as client :
139+ response = client .get (str (base_url ), params = params )
140+ try :
141+ response .raise_for_status ()
142+ except httpx .HTTPStatusError as e :
143+ if e .response .is_server_error :
144+ raise ServerError from e
145+ try :
146+ json_data = response .json ()
147+ except json .JSONDecodeError as e :
148+ raise InvalidJSONError from e
149+
150+ return parser (json_data )
114151
115152
116153def save_data_to_file (all_packages : list [str ], fpath : Path ) -> None :
0 commit comments