Skip to content

Commit a516ec5

Browse files
committed
feat: Improve NPM download parameters and provide computed version
1 parent b73f7af commit a516ec5

File tree

8 files changed

+138
-68
lines changed

8 files changed

+138
-68
lines changed

.github/workflows/test.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ jobs:
1616
fail-fast: false
1717
matrix:
1818
python-version: ["3.10", "3.14"]
19-
runs-on: [ubuntu-latest]
2019
steps:
2120
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
2221

.github/workflows/weekly_download.yml

Lines changed: 35 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -2,42 +2,44 @@ name: Weekly download
22

33
on:
44
schedule:
5-
- cron: "0 0 * * 1" # every Monday at 00:00 UTC
5+
- cron: "0 0 * * 1" # every Monday at 00:00 UTC
66
workflow_dispatch:
77

8-
98
jobs:
109
download:
1110
runs-on: ubuntu-latest
1211
steps:
13-
- uses: actions/create-github-app-token@67018539274d69449ef7c02e8e71183d1719ab42 # v2.1.4
14-
id: app-token
15-
with:
16-
app-id: ${{ vars.ELEMENTSINTERACTIVE_BOT_APP_ID }}
17-
private-key: ${{ secrets.ELEMENTSINTERACTIVE_BOT_PRIVATE_KEY }}
18-
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
19-
with:
20-
fetch-depth: 0
21-
token: ${{ steps.app-token.outputs.token }}
22-
ref: ${{ github.head_ref }}
23-
- name: Install uv
24-
uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
25-
26-
- name: Install the project
27-
run: uv sync --locked --only-group download
28-
29-
- name: Download packages from trusted sources
30-
run: |
31-
uv run --no-project dependencies/scripts/download_packages.py download pypi || echo 'Failed to download trusted pypi packages'
32-
uv run --no-project dependencies/scripts/download_packages.py download npm || echo 'Failed to download trusted npm packages'
33-
34-
- name: Configure git
35-
run: |
36-
git config user.name "github-actions[bot]"
37-
git config user.email "github-actions[bot]@users.noreply.github.com"
38-
39-
- name: Push changes to repo
40-
run: |
41-
git add .
42-
git commit -m "chore: Weekly update of trusted packages"
43-
git push origin HEAD:main
12+
- uses: actions/create-github-app-token@67018539274d69449ef7c02e8e71183d1719ab42 # v2.1.4
13+
id: app-token
14+
with:
15+
app-id: ${{ vars.ELEMENTSINTERACTIVE_BOT_APP_ID }}
16+
private-key: ${{ secrets.ELEMENTSINTERACTIVE_BOT_PRIVATE_KEY }}
17+
18+
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
19+
with:
20+
fetch-depth: 0
21+
token: ${{ steps.app-token.outputs.token }}
22+
ref: ${{ github.head_ref }}
23+
24+
- name: Install uv
25+
uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
26+
27+
- name: Install the project
28+
run: uv sync --locked --only-group download
29+
30+
- name: Download packages from trusted sources
31+
run: |
32+
uv run --no-project dependencies/download_packages.py download pypi || echo 'Failed to download trusted pypi packages'
33+
uv run --no-project dependencies/scripts/download_packages.py download npm || echo 'Failed to download trusted npm packages' # Kept for backwards compatibility
34+
uv run --no-project dependencies/scripts/download_packages.py download npm_formatted || echo 'Failed to download trusted npm_formatted packages'
35+
36+
- name: Configure git
37+
run: |
38+
git config user.name "github-actions[bot]"
39+
git config user.email "github-actions[bot]@users.noreply.github.com"
40+
41+
- name: Push changes to repo
42+
run: |
43+
git add .
44+
git commit -m "chore: Weekly update of trusted packages"
45+
git push origin HEAD:main

dependencies/npm.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

dependencies/npm_formatted.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

dependencies/pypi.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

dependencies/scripts/download_packages.py

Lines changed: 96 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import json
22
import logging
3+
from abc import ABC, abstractmethod
34
from collections.abc import Callable
45
from dataclasses import dataclass
56
from datetime import datetime
@@ -10,6 +11,8 @@
1011
import click
1112
import httpx
1213
import stamina
14+
from pydantic import BaseModel
15+
from typing_extensions import Self, override
1316

1417
logger = logging.getLogger("weekly_download")
1518
logging.basicConfig(
@@ -19,6 +22,47 @@
1922
)
2023

2124

25+
DEPENDENCIES_DIR = "dependencies"
26+
TOP_PYPI_SOURCE = "https://hugovk.github.io/top-pypi-packages/top-pypi-packages.min.json"
27+
TOP_NPM_SOURCE = "https://packages.ecosyste.ms/api/v1/registries/npmjs.org/packages"
28+
29+
30+
class BaseDataInterface(BaseModel, ABC):
31+
packages: list[str]
32+
date: str = datetime.now(ZoneInfo("UTC")).isoformat()
33+
34+
@classmethod
35+
@abstractmethod
36+
def from_packages_list(cls, packages: list[str]) -> Self: ...
37+
38+
39+
class SimpleDataInterface(BaseDataInterface):
40+
@override
41+
@classmethod
42+
def from_packages_list(cls, packages) -> Self:
43+
return cls(packages=packages)
44+
45+
46+
class NpmFormattedDataInterface(BaseDataInterface):
47+
namespaces: dict[str, list[str]] # contains `namespace` as key, `packages` as strings in a list.
48+
49+
@override
50+
@classmethod
51+
def from_packages_list(cls, packages: list[str]) -> Self:
52+
namespaces: dict[str, list[str]] = {}
53+
non_namespace_packages = []
54+
55+
for package in packages:
56+
if package.startswith("@"):
57+
namespace, package_name = package.split("/")
58+
if namespace not in namespaces:
59+
namespaces[namespace] = []
60+
namespaces[namespace].append(package_name)
61+
else:
62+
non_namespace_packages.append(package)
63+
return cls(packages=non_namespace_packages, namespaces=namespaces)
64+
65+
2266
def parse_npm(data: list[dict[str, Any]]) -> list[str]:
2367
return [x["name"] for x in data]
2468

@@ -36,26 +80,39 @@ class Ecosystem:
3680
url: str
3781
params: dict[str, Any] | None
3882
pages: int | None
39-
parser: Callable[[dict[str, Any]], list[str]]
83+
parser: Callable[[Any], list[str]]
84+
data_interface: type[BaseDataInterface]
4085

4186

42-
@dataclass(frozen=True)
43-
class PypiEcosystem(Ecosystem):
44-
url = "https://hugovk.github.io/top-pypi-packages/top-pypi-packages.min.json"
45-
params = None
46-
pages = None
47-
parser = parse_pypi
48-
87+
pypi_ecosystem = Ecosystem(
88+
url=TOP_PYPI_SOURCE,
89+
params=None,
90+
pages=None,
91+
parser=parse_pypi,
92+
data_interface=SimpleDataInterface,
93+
)
4994

50-
@dataclass(frozen=True)
51-
class NpmEcosystem(Ecosystem):
52-
url = "https://packages.ecosyste.ms/api/v1/registries/npmjs.org/packages"
53-
params = {"per_page": 1000, "sort": "downloads"}
54-
pages = 15
55-
parser = parse_npm
95+
npm_ecosystem = Ecosystem(
96+
url=TOP_NPM_SOURCE,
97+
params={"per_page": 100, "sort": "downloads"},
98+
pages=150,
99+
parser=parse_npm,
100+
data_interface=SimpleDataInterface,
101+
)
56102

103+
npm_formatted_ecosystem = Ecosystem(
104+
url=TOP_NPM_SOURCE,
105+
params={"per_page": 100, "sort": "downloads"},
106+
pages=150,
107+
parser=parse_npm,
108+
data_interface=NpmFormattedDataInterface,
109+
)
57110

58-
ECOSYSTEMS = {"pypi": PypiEcosystem, "npm": NpmEcosystem}
111+
ECOSYSTEMS: dict[str, Ecosystem] = {
112+
"pypi": pypi_ecosystem,
113+
"npm": npm_ecosystem,
114+
"npm_formatted": npm_formatted_ecosystem,
115+
}
59116

60117

61118
@click.group()
@@ -72,33 +129,38 @@ def entry_point() -> None:
72129
def download(
73130
ecosystem: str,
74131
) -> None:
75-
selected_ecosystem = ECOSYSTEMS[ecosystem]
132+
if ecosystem not in ECOSYSTEMS:
133+
raise click.BadParameter("Not a valid ecosystem")
76134

77-
if pages := selected_ecosystem.pages:
78-
all_packages: list[str] = []
135+
selected_ecosystem = ECOSYSTEMS[ecosystem]
136+
all_packages: list[str] = []
79137

80-
for page in range(1, pages + 1):
81-
params = selected_ecosystem.params or {}
138+
n_pages = selected_ecosystem.pages or 1
139+
for page in range(1, n_pages + 1):
140+
params = selected_ecosystem.params or {}
141+
if selected_ecosystem.pages:
82142
params["page"] = page
83-
all_packages.extend(get_packages(selected_ecosystem.url, selected_ecosystem.parser, params))
84-
else:
85-
all_packages = get_packages(selected_ecosystem.url, selected_ecosystem.parser, selected_ecosystem.params)
86143

87-
fpath = Path("dependencies") / f"{ecosystem}.json"
88-
save_data_to_file(all_packages, fpath)
144+
all_packages.extend(get_packages(selected_ecosystem.url, selected_ecosystem.parser, params))
145+
146+
fpath = Path(DEPENDENCIES_DIR) / f"{ecosystem}.json"
147+
data = selected_ecosystem.data_interface.from_packages_list(all_packages)
148+
save_data_to_file(data, fpath)
89149

90150

91151
def get_packages(
92-
base_url: str, parser: Callable[[dict[str, Any]], list[str]], params: dict[str, Any] | None = None
152+
base_url: str,
153+
parser: Callable[[dict[str, Any]], list[str]],
154+
params: dict[str, Any] | None = None,
93155
) -> list[str]:
94156
for attempt in stamina.retry_context(
95157
on=(httpx.TransportError, httpx.TimeoutException, ServerError),
96-
attempts=5,
158+
attempts=10,
97159
wait_jitter=1,
98160
wait_exp_base=2,
99161
wait_max=8,
100162
):
101-
with attempt, httpx.Client(timeout=30) as client:
163+
with attempt, httpx.Client(timeout=90) as client:
102164
response = client.get(str(base_url), params=params)
103165
try:
104166
response.raise_for_status()
@@ -108,12 +170,14 @@ def get_packages(
108170
return parser(response.json())
109171

110172

111-
def save_data_to_file(all_packages: list[str], fpath: Path) -> None:
112-
data = {"date": datetime.now(ZoneInfo("UTC")).isoformat(), "packages": all_packages}
173+
def save_data_to_file(
174+
data: BaseDataInterface,
175+
fpath: Path,
176+
) -> None:
113177
with open(str(fpath), "w") as fp:
114-
json.dump(data, fp)
178+
json.dump(data.model_dump(), fp)
115179

116-
logger.info("Saved %d packages to `%s` file.", len(set(all_packages)), fpath)
180+
logger.info("Saved packages to `%s` file.", fpath)
117181

118182

119183
if __name__ == "__main__":

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ download = [
5757
"click>=8.1.8",
5858
"httpx>=0.28.1",
5959
"stamina>=25.1.0",
60+
"pydantic>=2.11.7,<3.0.0",
61+
6062
]
6163
local = ["ipdb<1.0.0,>=0.13.9", "commitizen<5.0,>=2.38", "pdbpp<1.0.0,>=0.11.6"]
6264

uv.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)