elementsinteractive
diff --git a/‎dependencies/npm.json‎
Lines changed: 1 addition & 1 deletion b/‎dependencies/npm.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎dependencies/pypi.json‎
Lines changed: 1 addition & 1 deletion b/‎dependencies/pypi.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎dependencies/scripts/download_packages.py‎
Lines changed: 54 additions & 49 deletions b/‎dependencies/scripts/download_packages.py‎
Lines changed: 54 additions & 49 deletions
@@ -1,7 +1,7 @@
 import json
 import logging
 from collections.abc import Callable
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from datetime import datetime
 from pathlib import Path
 from typing import Any
@@ -19,12 +19,18 @@
 )
 
 
-def parse_npm(data: list[dict[str, Any]]) -> list[str]:
-    return [x["name"] for x in data]
+DEPENDENCIES_DIR = "dependencies"
+TOP_PYPI_SOURCE = "https://hugovk.github.io/top-pypi-packages/top-pypi-packages.min.json"
+TOP_NPM_SOURCE = "https://packages.ecosyste.ms/api/v1/registries/npmjs.org/packages"
+TIMEOUT = 90
 
 
-def parse_pypi(data: dict[str, Any]) -> list[str]:
-    return [row["project"] for row in data["rows"]]
+def parse_npm(data: list[dict[str, Any]]) -> set[str]:
+    return {x["name"] for x in data}
+
+
+def parse_pypi(data: dict[str, Any]) -> set[str]:
+    return {row["project"] for row in data["rows"]}
 
 
 class ServerError(Exception):
@@ -34,28 +40,25 @@ class ServerError(Exception):
 @dataclass(frozen=True)
 class Ecosystem:
     url: str
-    params: dict[str, Any] | None
-    pages: int | None
-    parser: Callable[[dict[str, Any]], list[str]]
-
+    parser: Callable[[Any], set[str]]
+    params: dict[str, Any] = field(default_factory=dict)
+    pages: int | None = None
 
-@dataclass(frozen=True)
-class PypiEcosystem(Ecosystem):
-    url = "https://hugovk.github.io/top-pypi-packages/top-pypi-packages.min.json"
-    params = None
-    pages = None
-    parser = parse_pypi
 
+pypi_ecosystem = Ecosystem(
+    url=TOP_PYPI_SOURCE,
+    parser=parse_pypi,
+)
 
-@dataclass(frozen=True)
-class NpmEcosystem(Ecosystem):
-    url = "https://packages.ecosyste.ms/api/v1/registries/npmjs.org/packages"
-    params = {"per_page": 1000, "sort": "downloads"}
-    pages = 15
-    parser = parse_npm
+npm_ecosystem = Ecosystem(
+    url=TOP_NPM_SOURCE,
+    parser=parse_npm,
+    params={"per_page": 100, "sort": "downloads"},
+    pages=150,
+)
 
 
-ECOSYSTEMS = {"pypi": PypiEcosystem, "npm": NpmEcosystem}
+ECOSYSTEMS = {"pypi": pypi_ecosystem, "npm": npm_ecosystem}
 
 
 @click.group()
@@ -72,39 +75,41 @@ def entry_point() -> None:
 def download(
     ecosystem: str,
 ) -> None:
-    selected_ecosystem = ECOSYSTEMS[ecosystem]
+    if ecosystem not in ECOSYSTEMS:
+        raise click.BadParameter("Not a valid ecosystem")
 
-    if pages := selected_ecosystem.pages:
-        all_packages: list[str] = []
+    selected_ecosystem = ECOSYSTEMS[ecosystem]
+    all_packages: set[str] = set()
 
-        for page in range(1, pages + 1):
-            params = selected_ecosystem.params or {}
+    n_pages = selected_ecosystem.pages or 1
+    params = selected_ecosystem.params.copy()
+    for page in range(1, n_pages + 1):
+        if selected_ecosystem.pages:
             params["page"] = page
-            all_packages.extend(get_packages(selected_ecosystem.url, selected_ecosystem.parser, params))
-    else:
-        all_packages = get_packages(selected_ecosystem.url, selected_ecosystem.parser, selected_ecosystem.params)
 
-    fpath = Path("dependencies") / f"{ecosystem}.json"
-    save_data_to_file(all_packages, fpath)
+        all_packages.update(get_packages(selected_ecosystem.url, selected_ecosystem.parser, params))
 
+    fpath = Path(DEPENDENCIES_DIR) / f"{ecosystem}.json"
+    save_data_to_file(list(all_packages), fpath)
 
+
+@stamina.retry(
+    on=(httpx.TransportError, httpx.TimeoutException, ServerError),
+    attempts=10,
+    wait_jitter=1,
+    wait_exp_base=2,
+    wait_max=8,
+)
 def get_packages(
-    base_url: str, parser: Callable[[dict[str, Any]], list[str]], params: dict[str, Any] | None = None
-) -> list[str]:
-    for attempt in stamina.retry_context(
-        on=(httpx.TransportError, httpx.TimeoutException, ServerError),
-        attempts=5,
-        wait_jitter=1,
-        wait_exp_base=2,
-        wait_max=8,
-    ):
-        with attempt, httpx.Client(timeout=30) as client:
-            response = client.get(str(base_url), params=params)
-            try:
-                response.raise_for_status()
-            except httpx.HTTPStatusError as e:
-                if e.response.is_server_error:
-                    raise ServerError from e
+    base_url: str, parser: Callable[[dict[str, Any]], set[str]], params: dict[str, Any] | None = None
+) -> set[str]:
+    with httpx.Client(timeout=TIMEOUT) as client:
+        response = client.get(str(base_url), params=params)
+        try:
+            response.raise_for_status()
+        except httpx.HTTPStatusError as e:
+            if e.response.is_server_error:
+                raise ServerError from e
     return parser(response.json())
 
 
@@ -113,7 +118,7 @@ def save_data_to_file(all_packages: list[str], fpath: Path) -> None:
     with open(str(fpath), "w") as fp:
         json.dump(data, fp)
 
-    logger.info("Saved %d packages to `%s` file.", len(set(all_packages)), fpath)
+    logger.info("Saved %d packages to `%s` file.", len(all_packages), fpath)
 
 
 if __name__ == "__main__":