Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:
enable-cache: true
- run: uv python install # Version from pyproject.toml project.requires-python
- run: uvx --with=beautifulsoup4,dateparser,httpx,pydantic,pynntp pytest
- run: uv run scripts/nntp_io.py || true # TODO(@cclauss): Remove `|| true` after fixing nntp_io.py
- run: uv run scripts/nntp_io.py || true # TODO(@cclauss): Remove `|| true` when that script is fixed

front-end:
strategy:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/sphinx.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
# - run: shopt -s globstar && rst2myst convert docs/**/*.rst
- run: uv sync --group docs
- run: uv run sphinx-build -c docs . docs/_build/html
- uses: actions/upload-pages-artifact@v3
- uses: actions/upload-pages-artifact@v4
with:
path: docs/_build/html
- run: echo ${{ github.event_name }}
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ repos:
- tomli

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.12.9
rev: v0.12.10
hooks:
- id: ruff-check
- id: ruff-format
Expand Down
77 changes: 77 additions & 0 deletions server/plugins/collage_photos/get_photos.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# /// script
# requires-python = ">=3.13"
# dependencies = [
# "beautifulsoup4",
# "httpx",
# ]
# ///

import asyncio
from collections.abc import AsyncGenerator
from pathlib import Path

import httpx
from bs4 import BeautifulSoup

PLUGIN_NAME = "college_photos"
IMAGE_DIR = Path(__file__).parent / "college_photos_images"
SEARCH_URL = "https://completecollegephotolibrary.org/?s=laptop"
NUM_IMAGES = 10


async def fetch_image_urls(
query_url: str = SEARCH_URL, max_images: int = NUM_IMAGES
) -> AsyncGenerator[str, None]:
"""Yield image URLs from the query results page."""
async with httpx.AsyncClient() as client:
resp = await client.get(query_url)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser")
images = soup.find_all("img")
count = 0
for img in images:
src = img.get("src")
if (
src
and "collegephotolibrary.org" in src
and src.lower().endswith((".jpg", ".jpeg", ".png"))
):
yield src
count += 1
if count >= max_images:
break


async def download_images(
image_urls: list[str], dest_dir: Path = IMAGE_DIR
) -> list[str]:
"""Download images to the destination directory asynchronously."""
dest_dir.mkdir(parents=True, exist_ok=True)

async def download_one(i: int, url: str) -> str | None:
try:
(resp := await client.get(url, timeout=10)).raise_for_status()
ext = Path(url).suffix or ".jpg"
filename = f"college_photo_{i + 1:07_}{ext}"
path = dest_dir / filename
path.write_bytes(resp.content)
return str(path)
except Exception as ex: # noqa: BLE001
print(f"Failed to download {url}: {ex}")
return None

async with httpx.AsyncClient() as client:
tasks = [download_one(i, url) for i, url in enumerate(image_urls)]
results = await asyncio.gather(*tasks)
return [result for result in results if result]
return None


async def create_college_photos_from_laptop_query() -> list[str]:
"""Main entry point: fetch and save 10 images from the laptop query."""
return await download_images([url async for url in fetch_image_urls()])


if __name__ == "__main__":
results = asyncio.run(create_college_photos_from_laptop_query())
print("Downloaded images:", results)