Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ jobs:
fail-fast: false
matrix:
python-version: ["3.10", "3.14"]
runs-on: [ubuntu-latest]
steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0

Expand Down
68 changes: 35 additions & 33 deletions .github/workflows/weekly_download.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,44 @@ name: Weekly download

on:
schedule:
- cron: "0 0 * * 1" # every Monday at 00:00 UTC
- cron: "0 0 * * 1" # every Monday at 00:00 UTC
workflow_dispatch:


jobs:
download:
runs-on: ubuntu-latest
steps:
- uses: actions/create-github-app-token@67018539274d69449ef7c02e8e71183d1719ab42 # v2.1.4
id: app-token
with:
app-id: ${{ vars.ELEMENTSINTERACTIVE_BOT_APP_ID }}
private-key: ${{ secrets.ELEMENTSINTERACTIVE_BOT_PRIVATE_KEY }}
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
fetch-depth: 0
token: ${{ steps.app-token.outputs.token }}
ref: ${{ github.head_ref }}
- name: Install uv
uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2

- name: Install the project
run: uv sync --locked --only-group download

- name: Download packages from trusted sources
run: |
uv run --no-project dependencies/scripts/download_packages.py download pypi || echo 'Failed to download trusted pypi packages'
uv run --no-project dependencies/scripts/download_packages.py download npm || echo 'Failed to download trusted npm packages'

- name: Configure git
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"

- name: Push changes to repo
run: |
git add .
git commit -m "chore: Weekly update of trusted packages"
git push origin HEAD:main
- uses: actions/create-github-app-token@67018539274d69449ef7c02e8e71183d1719ab42 # v2.1.4
id: app-token
with:
app-id: ${{ vars.ELEMENTSINTERACTIVE_BOT_APP_ID }}
private-key: ${{ secrets.ELEMENTSINTERACTIVE_BOT_PRIVATE_KEY }}

- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
fetch-depth: 0
token: ${{ steps.app-token.outputs.token }}
ref: ${{ github.head_ref }}

- name: Install uv
uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2

- name: Install the project
run: uv sync --locked --only-group download

- name: Download packages from trusted sources
run: |
uv run --no-project dependencies/scripts/download_packages.py download pypi || echo 'Failed to download trusted pypi packages'
uv run --no-project dependencies/scripts/download_packages.py download npm || echo 'Failed to download trusted npm packages' # Kept for backwards compatibility
uv run --no-project dependencies/scripts/download_packages.py download npm_formatted || echo 'Failed to download trusted npm_formatted packages'

- name: Configure git
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"

- name: Push changes to repo
run: |
git add .
git commit -m "chore: Weekly update of trusted packages"
git push origin HEAD:main
2 changes: 1 addition & 1 deletion dependencies/npm.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions dependencies/npm_formatted.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dependencies/pypi.json

Large diffs are not rendered by default.

128 changes: 96 additions & 32 deletions dependencies/scripts/download_packages.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import logging
from abc import ABC, abstractmethod
from collections.abc import Callable
from dataclasses import dataclass
from datetime import datetime
Expand All @@ -10,6 +11,8 @@
import click
import httpx
import stamina
from pydantic import BaseModel
from typing_extensions import Self, override

logger = logging.getLogger("weekly_download")
logging.basicConfig(
Expand All @@ -19,6 +22,47 @@
)


DEPENDENCIES_DIR = "dependencies"
TOP_PYPI_SOURCE = "https://hugovk.github.io/top-pypi-packages/top-pypi-packages.min.json"
TOP_NPM_SOURCE = "https://packages.ecosyste.ms/api/v1/registries/npmjs.org/packages"


class BaseDataInterface(BaseModel, ABC):
packages: list[str]
date: str = datetime.now(ZoneInfo("UTC")).isoformat()

@classmethod
@abstractmethod
def from_packages_list(cls, packages: list[str]) -> Self: ...


class SimpleDataInterface(BaseDataInterface):
@override
@classmethod
def from_packages_list(cls, packages) -> Self:
return cls(packages=packages)


class NpmFormattedDataInterface(BaseDataInterface):
namespaces: dict[str, list[str]] # contains `namespace` as key, `packages` as strings in a list.

@override
@classmethod
def from_packages_list(cls, packages: list[str]) -> Self:
namespaces: dict[str, list[str]] = {}
non_namespace_packages = []

for package in packages:
if package.startswith("@"):
namespace, package_name = package.split("/")
if namespace not in namespaces:
namespaces[namespace] = []
namespaces[namespace].append(package_name)
else:
non_namespace_packages.append(package)
return cls(packages=non_namespace_packages, namespaces=namespaces)


def parse_npm(data: list[dict[str, Any]]) -> list[str]:
return [x["name"] for x in data]

Expand All @@ -36,26 +80,39 @@ class Ecosystem:
url: str
params: dict[str, Any] | None
pages: int | None
parser: Callable[[dict[str, Any]], list[str]]
parser: Callable[[Any], list[str]]
data_interface: type[BaseDataInterface]


@dataclass(frozen=True)
class PypiEcosystem(Ecosystem):
url = "https://hugovk.github.io/top-pypi-packages/top-pypi-packages.min.json"
params = None
pages = None
parser = parse_pypi

pypi_ecosystem = Ecosystem(
url=TOP_PYPI_SOURCE,
params=None,
pages=None,
parser=parse_pypi,
data_interface=SimpleDataInterface,
)

@dataclass(frozen=True)
class NpmEcosystem(Ecosystem):
url = "https://packages.ecosyste.ms/api/v1/registries/npmjs.org/packages"
params = {"per_page": 1000, "sort": "downloads"}
pages = 15
parser = parse_npm
npm_ecosystem = Ecosystem(
url=TOP_NPM_SOURCE,
params={"per_page": 100, "sort": "downloads"},
pages=150,
parser=parse_npm,
data_interface=SimpleDataInterface,
)

npm_formatted_ecosystem = Ecosystem(
url=TOP_NPM_SOURCE,
params={"per_page": 100, "sort": "downloads"},
pages=150,
parser=parse_npm,
data_interface=NpmFormattedDataInterface,
)

ECOSYSTEMS = {"pypi": PypiEcosystem, "npm": NpmEcosystem}
ECOSYSTEMS: dict[str, Ecosystem] = {
"pypi": pypi_ecosystem,
"npm": npm_ecosystem,
"npm_formatted": npm_formatted_ecosystem,
}


@click.group()
Expand All @@ -72,33 +129,38 @@ def entry_point() -> None:
def download(
ecosystem: str,
) -> None:
selected_ecosystem = ECOSYSTEMS[ecosystem]
if ecosystem not in ECOSYSTEMS:
raise click.BadParameter("Not a valid ecosystem")

if pages := selected_ecosystem.pages:
all_packages: list[str] = []
selected_ecosystem = ECOSYSTEMS[ecosystem]
all_packages: list[str] = []

for page in range(1, pages + 1):
params = selected_ecosystem.params or {}
n_pages = selected_ecosystem.pages or 1
for page in range(1, n_pages + 1):
params = selected_ecosystem.params or {}
if selected_ecosystem.pages:
params["page"] = page
all_packages.extend(get_packages(selected_ecosystem.url, selected_ecosystem.parser, params))
else:
all_packages = get_packages(selected_ecosystem.url, selected_ecosystem.parser, selected_ecosystem.params)

fpath = Path("dependencies") / f"{ecosystem}.json"
save_data_to_file(all_packages, fpath)
all_packages.extend(get_packages(selected_ecosystem.url, selected_ecosystem.parser, params))

fpath = Path(DEPENDENCIES_DIR) / f"{ecosystem}.json"
data = selected_ecosystem.data_interface.from_packages_list(all_packages)
save_data_to_file(data, fpath)


def get_packages(
base_url: str, parser: Callable[[dict[str, Any]], list[str]], params: dict[str, Any] | None = None
base_url: str,
parser: Callable[[dict[str, Any]], list[str]],
params: dict[str, Any] | None = None,
) -> list[str]:
for attempt in stamina.retry_context(
on=(httpx.TransportError, httpx.TimeoutException, ServerError),
attempts=5,
attempts=10,
wait_jitter=1,
wait_exp_base=2,
wait_max=8,
):
with attempt, httpx.Client(timeout=30) as client:
with attempt, httpx.Client(timeout=90) as client:
response = client.get(str(base_url), params=params)
try:
response.raise_for_status()
Expand All @@ -108,12 +170,14 @@ def get_packages(
return parser(response.json())


def save_data_to_file(all_packages: list[str], fpath: Path) -> None:
data = {"date": datetime.now(ZoneInfo("UTC")).isoformat(), "packages": all_packages}
def save_data_to_file(
data: BaseDataInterface,
fpath: Path,
) -> None:
with open(str(fpath), "w") as fp:
json.dump(data, fp)
json.dump(data.model_dump(), fp)

logger.info("Saved %d packages to `%s` file.", len(set(all_packages)), fpath)
logger.info("Saved packages to `%s` file.", fpath)


if __name__ == "__main__":
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ download = [
"click>=8.1.8",
"httpx>=0.28.1",
"stamina>=25.1.0",
"pydantic>=2.11.7,<3.0.0",

]
local = ["ipdb<1.0.0,>=0.13.9", "commitizen<5.0,>=2.38", "pdbpp<1.0.0,>=0.11.6"]

Expand Down
2 changes: 2 additions & 0 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading