Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,18 @@ jobs:

- name: Run tests
run: uv run pytest tests

test-weekly-download:
runs-on: ubuntu-latest
if: "!startsWith(github.event.head_commit.message, 'bump:')"
steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0

- name: Install uv
uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2

- name: Install the dependencies
run: uv sync --locked --group download --group dev --python 3.14

- name: Run tests
run: uv run pytest dependencies/tests
66 changes: 35 additions & 31 deletions .github/workflows/weekly_download.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,46 @@ name: Weekly download

on:
schedule:
- cron: "0 0 * * 1" # every Monday at 00:00 UTC
- cron: "0 0 * * 1" # every Monday at 00:00 UTC
workflow_dispatch:


jobs:
download:
runs-on: ubuntu-latest
steps:
- uses: actions/create-github-app-token@67018539274d69449ef7c02e8e71183d1719ab42 # v2.1.4
id: app-token
with:
app-id: ${{ vars.ELEMENTSINTERACTIVE_BOT_APP_ID }}
private-key: ${{ secrets.ELEMENTSINTERACTIVE_BOT_PRIVATE_KEY }}
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
fetch-depth: 0
token: ${{ steps.app-token.outputs.token }}
ref: ${{ github.head_ref }}
- name: Install uv
uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
- uses: actions/create-github-app-token@67018539274d69449ef7c02e8e71183d1719ab42 # v2.1.4
id: app-token
with:
app-id: ${{ vars.ELEMENTSINTERACTIVE_BOT_APP_ID }}
private-key: ${{ secrets.ELEMENTSINTERACTIVE_BOT_PRIVATE_KEY }}
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
fetch-depth: 0
token: ${{ steps.app-token.outputs.token }}
ref: ${{ github.head_ref }}
- name: Install uv
uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2

- name: Install the project
run: uv sync --locked --only-group download --python 3.14

- name: Download Pypi packages
continue-on-error: true
run: |
uv run --no-project dependencies/scripts/download_packages.py download pypi

- name: Download NPM packages
continue-on-error: true
run: |
uv run --no-project dependencies/scripts/download_packages.py download npm

- name: Install the project
run: uv sync --locked --only-group download
- name: Configure git
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"

- name: Download packages from trusted sources
run: |
uv run --no-project dependencies/scripts/download_packages.py download pypi || echo 'Failed to download trusted pypi packages'
uv run --no-project dependencies/scripts/download_packages.py download npm || echo 'Failed to download trusted npm packages'

- name: Configure git
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"

- name: Push changes to repo
run: |
git add .
git commit -m "chore: Weekly update of trusted packages"
git push origin HEAD:main
- name: Push changes to repo
run: |
git add .
git commit -m "chore: Weekly update of trusted packages"
git push origin HEAD:main
Empty file.
139 changes: 104 additions & 35 deletions dependencies/scripts/download_packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,26 +19,65 @@
)


class ServerError(Exception):
"""Custom exception for HTTP 5xx errors."""


class InvalidJSONError(Exception):
"""Custom exception for when the received JSON does not match the expected format."""


# Directory name
DEPENDENCIES_DIR = "dependencies"
"""Directory name where dependency files will be saved."""

# Sources
TOP_PYPI_SOURCE = "https://hugovk.github.io/top-pypi-packages/top-pypi-packages.min.json"
"""URL for fetching top PyPI packages data."""

TOP_NPM_SOURCE = "https://packages.ecosyste.ms/api/v1/registries/npmjs.org/packages"
"""URL for fetching top npm packages data from ecosyste.ms."""

# Retry constants
RETRY_ON = (httpx.TransportError, httpx.TimeoutException, ServerError)
"""Tuple of exceptions that should trigger retry attempts."""

RETRY_ATTEMPTS = 15
"""Maximum number of retry attempts for failed requests."""

RETRY_WAIT_JITTER = 1
"""Random jitter factor for retry wait times."""

RETRY_WAIT_EXP_BASE = 2
"""Exponential backoff base multiplier for retry wait times."""

RETRY_WAIT_MAX = 8
"""Maximum wait time between retry attempts in seconds."""

TIMEOUT = 90
"""HTTP request timeout in seconds."""


def parse_npm(data: list[dict[str, Any]]) -> set[str]:
return {x["name"] for x in data}
"""Parse npm package data and extract package names."""
try:
return {x["name"] for x in data}
except KeyError as e:
raise InvalidJSONError from e


def parse_pypi(data: dict[str, Any]) -> set[str]:
return {row["project"] for row in data["rows"]}


class ServerError(Exception):
"""Custom exception for HTTP 5xx errors."""
"""Parse PyPI package data and extract package names."""
try:
return {row["project"] for row in data["rows"]}
except KeyError as e:
raise InvalidJSONError from e


@dataclass(frozen=True)
class Ecosystem:
"""Configuration for a package ecosystem (PyPI, npm, etc.)."""

url: str
parser: Callable[[Any], set[str]]
params: dict[str, Any] = field(default_factory=dict)
Expand All @@ -49,20 +88,51 @@ class Ecosystem:
url=TOP_PYPI_SOURCE,
parser=parse_pypi,
)
"""Ecosystem configuration for PyPI packages."""

npm_ecosystem = Ecosystem(
url=TOP_NPM_SOURCE,
parser=parse_npm,
params={"per_page": 100, "sort": "downloads"},
pages=150,
)
"""Ecosystem configuration for npm packages with pagination."""


ECOSYSTEMS = {"pypi": pypi_ecosystem, "npm": npm_ecosystem}
"""Dictionary mapping ecosystem names to their configurations."""


def get_params(params: dict[str, Any] | None, page: int | None) -> dict[str, Any]:
"""Combine base parameters with page parameter if provided."""
new_params: dict[str, Any] = {}
if params:
new_params |= params

if page:
new_params["page"] = page

return new_params


def _run(ecosystem: str) -> None:
"""Download packages for the specified ecosystem and save to file."""
selected_ecosystem = ECOSYSTEMS[ecosystem]
all_packages: set[str] = set()

n_pages = selected_ecosystem.pages or 1
with httpx.Client(timeout=TIMEOUT) as client:
for page in range(1, n_pages + 1):
params = get_params(selected_ecosystem.params, page if selected_ecosystem.pages else None)
all_packages.update(get_packages(client, selected_ecosystem.url, selected_ecosystem.parser, params))

fpath = Path(DEPENDENCIES_DIR) / f"{ecosystem}.json"
save_data_to_file(list(all_packages), fpath)


@click.group()
def entry_point() -> None:
"""Entry point for the CLI application."""
pass


Expand All @@ -75,45 +145,44 @@ def entry_point() -> None:
def download(
ecosystem: str,
) -> None:
"""Download packages for the specified ecosystem."""
if ecosystem not in ECOSYSTEMS:
raise click.BadParameter("Not a valid ecosystem")

selected_ecosystem = ECOSYSTEMS[ecosystem]
all_packages: set[str] = set()

n_pages = selected_ecosystem.pages or 1
params = selected_ecosystem.params.copy()
for page in range(1, n_pages + 1):
if selected_ecosystem.pages:
params["page"] = page

all_packages.update(get_packages(selected_ecosystem.url, selected_ecosystem.parser, params))

fpath = Path(DEPENDENCIES_DIR) / f"{ecosystem}.json"
save_data_to_file(list(all_packages), fpath)
return _run(ecosystem)


@stamina.retry(
on=(httpx.TransportError, httpx.TimeoutException, ServerError),
attempts=10,
wait_jitter=1,
wait_exp_base=2,
wait_max=8,
)
def get_packages(
base_url: str, parser: Callable[[dict[str, Any]], set[str]], params: dict[str, Any] | None = None
client: httpx.Client,
base_url: str,
parser: Callable[[dict[str, Any]], set[str]],
params: dict[str, Any] | None = None,
) -> set[str]:
with httpx.Client(timeout=TIMEOUT) as client:
response = client.get(str(base_url), params=params)
try:
response.raise_for_status()
except httpx.HTTPStatusError as e:
if e.response.is_server_error:
raise ServerError from e
return parser(response.json())
"""Fetch and parse package data from a URL with retry logic."""
for attempt in stamina.retry_context(
on=RETRY_ON,
attempts=RETRY_ATTEMPTS,
wait_jitter=RETRY_WAIT_JITTER,
wait_exp_base=RETRY_WAIT_EXP_BASE,
wait_max=RETRY_WAIT_MAX,
):
with attempt:
response = client.get(str(base_url), params=params)
try:
response.raise_for_status()
except httpx.HTTPStatusError as e:
if e.response.is_server_error:
raise ServerError from e
try:
json_data = response.json()
except json.JSONDecodeError as e:
raise InvalidJSONError from e

return parser(json_data)


def save_data_to_file(all_packages: list[str], fpath: Path) -> None:
"""Save package data to a JSON file with timestamp."""
data = {"date": datetime.now(ZoneInfo("UTC")).isoformat(), "packages": all_packages}
with open(str(fpath), "w") as fp:
json.dump(data, fp)
Expand Down
Empty file.
Loading
Loading