Skip to content

Commit 6772bde

Browse files
committed
ci: improve NPM download parameters
1 parent bdac02f commit 6772bde

File tree

3 files changed

+48
-43
lines changed

3 files changed

+48
-43
lines changed

dependencies/npm.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

dependencies/pypi.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

dependencies/scripts/download_packages.py

Lines changed: 46 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,12 @@
1919
)
2020

2121

22+
DEPENDENCIES_DIR = "dependencies"
23+
TOP_PYPI_SOURCE = "https://hugovk.github.io/top-pypi-packages/top-pypi-packages.min.json"
24+
TOP_NPM_SOURCE = "https://packages.ecosyste.ms/api/v1/registries/npmjs.org/packages"
25+
TIMEOUT = 90
26+
27+
2228
def parse_npm(data: list[dict[str, Any]]) -> list[str]:
2329
return [x["name"] for x in data]
2430

@@ -34,28 +40,25 @@ class ServerError(Exception):
3440
@dataclass(frozen=True)
3541
class Ecosystem:
3642
url: str
37-
params: dict[str, Any] | None
38-
pages: int | None
39-
parser: Callable[[dict[str, Any]], list[str]]
40-
43+
parser: Callable[[Any], list[str]]
44+
params: dict[str, Any] | None = None
45+
pages: int | None = None
4146

42-
@dataclass(frozen=True)
43-
class PypiEcosystem(Ecosystem):
44-
url = "https://hugovk.github.io/top-pypi-packages/top-pypi-packages.min.json"
45-
params = None
46-
pages = None
47-
parser = parse_pypi
4847

48+
pypi_ecosystem = Ecosystem(
49+
url=TOP_PYPI_SOURCE,
50+
parser=parse_pypi,
51+
)
4952

50-
@dataclass(frozen=True)
51-
class NpmEcosystem(Ecosystem):
52-
url = "https://packages.ecosyste.ms/api/v1/registries/npmjs.org/packages"
53-
params = {"per_page": 1000, "sort": "downloads"}
54-
pages = 15
55-
parser = parse_npm
53+
npm_ecosystem = Ecosystem(
54+
url=TOP_NPM_SOURCE,
55+
parser=parse_npm,
56+
params={"per_page": 100, "sort": "downloads"},
57+
pages=150,
58+
)
5659

5760

58-
ECOSYSTEMS = {"pypi": PypiEcosystem, "npm": NpmEcosystem}
61+
ECOSYSTEMS = {"pypi": pypi_ecosystem, "npm": npm_ecosystem}
5962

6063

6164
@click.group()
@@ -72,39 +75,41 @@ def entry_point() -> None:
7275
def download(
7376
ecosystem: str,
7477
) -> None:
75-
selected_ecosystem = ECOSYSTEMS[ecosystem]
78+
if ecosystem not in ECOSYSTEMS:
79+
raise click.BadParameter("Not a valid ecosystem")
7680

77-
if pages := selected_ecosystem.pages:
78-
all_packages: list[str] = []
81+
selected_ecosystem = ECOSYSTEMS[ecosystem]
82+
all_packages: list[str] = []
7983

80-
for page in range(1, pages + 1):
81-
params = selected_ecosystem.params or {}
84+
n_pages = selected_ecosystem.pages or 1
85+
for page in range(1, n_pages + 1):
86+
params = selected_ecosystem.params or {}
87+
if selected_ecosystem.pages:
8288
params["page"] = page
83-
all_packages.extend(get_packages(selected_ecosystem.url, selected_ecosystem.parser, params))
84-
else:
85-
all_packages = get_packages(selected_ecosystem.url, selected_ecosystem.parser, selected_ecosystem.params)
8689

87-
fpath = Path("dependencies") / f"{ecosystem}.json"
90+
all_packages.extend(get_packages(selected_ecosystem.url, selected_ecosystem.parser, params))
91+
92+
fpath = Path(DEPENDENCIES_DIR) / f"{ecosystem}.json"
8893
save_data_to_file(all_packages, fpath)
8994

9095

96+
@stamina.retry(
97+
on=(httpx.TransportError, httpx.TimeoutException, ServerError),
98+
attempts=10,
99+
wait_jitter=1,
100+
wait_exp_base=2,
101+
wait_max=8,
102+
)
91103
def get_packages(
92104
base_url: str, parser: Callable[[dict[str, Any]], list[str]], params: dict[str, Any] | None = None
93105
) -> list[str]:
94-
for attempt in stamina.retry_context(
95-
on=(httpx.TransportError, httpx.TimeoutException, ServerError),
96-
attempts=5,
97-
wait_jitter=1,
98-
wait_exp_base=2,
99-
wait_max=8,
100-
):
101-
with attempt, httpx.Client(timeout=30) as client:
102-
response = client.get(str(base_url), params=params)
103-
try:
104-
response.raise_for_status()
105-
except httpx.HTTPStatusError as e:
106-
if e.response.is_server_error:
107-
raise ServerError from e
106+
with httpx.Client(timeout=TIMEOUT) as client:
107+
response = client.get(str(base_url), params=params)
108+
try:
109+
response.raise_for_status()
110+
except httpx.HTTPStatusError as e:
111+
if e.response.is_server_error:
112+
raise ServerError from e
108113
return parser(response.json())
109114

110115

@@ -113,7 +118,7 @@ def save_data_to_file(all_packages: list[str], fpath: Path) -> None:
113118
with open(str(fpath), "w") as fp:
114119
json.dump(data, fp)
115120

116-
logger.info("Saved %d packages to `%s` file.", len(set(all_packages)), fpath)
121+
logger.info("Saved %d packages to `%s` file.", len(all_packages), fpath)
117122

118123

119124
if __name__ == "__main__":

0 commit comments

Comments
 (0)