Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[flake8]
max-line-length = 100
extend-ignore = E203, W503
31 changes: 31 additions & 0 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: Publish to PyPI

on:
release:
types: [published]
workflow_dispatch: # Allow manual trigger

jobs:
publish:
runs-on: ubuntu-latest
permissions:
contents: read
id-token: write # For PyPI trusted publishing

steps:
- uses: actions/checkout@v4

- uses: actions/setup-python@v5
with:
python-version: '3.11'

- name: Install Poetry
run: pipx install poetry

- name: Build package
run: poetry build

- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
packages-dir: dist/
54 changes: 48 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,20 @@ Inspired by the historical figure [Juan Hispalense (siglo XII)](https://es.wikip

### Installation

**For users (from PyPI):**

```bash
pip install avendehut
```

Or with pipx (recommended for CLI tools):

```bash
pipx install avendehut
```

**For development:**

1. Clone the repository.
```bash
git clone https://github.com/khnumdev/avendehut.git
Expand All @@ -38,10 +52,11 @@ Inspired by the historical figure [Juan Hispalense (siglo XII)](https://es.wikip
```bash
poetry install
```
3. Set environment variables for OneDrive (if needed).
3. Set environment variables for OneDrive (if needed) by copying `.env.example` to `.env` and filling in your values:
- `ONEDRIVE_CLIENT_ID`
- `ONEDRIVE_CLIENT_SECRET`
- `SRC_FOLDER` (OneDrive folder path)
- `ONEDRIVE_TENANT_ID`
- `SRC_FOLDER` (OneDrive folder path or local path)
- `OUT_FOLDER` (local output path)

### OneDrive Setup
Expand All @@ -68,18 +83,45 @@ Inspired by the historical figure [Juan Hispalense (siglo XII)](https://es.wikip

### Usage

**From local folder:**

```bash
# Build a catalog from a local folder
poetry run avendehut build --src ./books --out ./dist
avendehut build --src ./books --out ./dist

# Open the generated HTML
poetry run avendehut open --out ./dist
avendehut open --out ./dist

# Search from the CLI
poetry run avendehut search --out ./dist --query "Dune" --tags "sci-fi,epub"
avendehut search --out ./dist --query "Dune" --tags "sci-fi,epub"

# Export catalog
poetry run avendehut export --src-out ./dist --format csv --out ./dist/catalog.csv
avendehut export --src-out ./dist --format csv --out ./dist/catalog.csv
```

**From OneDrive:**

First, set up your environment variables (see `.env.example`):
```bash
export ONEDRIVE_CLIENT_ID="your-client-id"
export ONEDRIVE_CLIENT_SECRET="your-client-secret"
export ONEDRIVE_TENANT_ID="consumers" # or your tenant ID
```

Then use OneDrive paths with the `onedrive:/` scheme:
```bash
# Build a catalog from OneDrive folder
avendehut build --src "onedrive:/Documents/Books" --out ./dist

# Watch OneDrive folder for changes
avendehut watch --src "onedrive:/Documents/Books" --out ./dist --interval 10
```

**Development mode (with Poetry):**

When developing, prefix commands with `poetry run`:
```bash
poetry run avendehut build --src ./books --out ./dist
```

### HTML Catalog Features
Expand Down
3 changes: 1 addition & 2 deletions avendehut/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
"""

__all__ = [
"__version__",
"__version__",
]

__version__ = "0.1.0"

17 changes: 8 additions & 9 deletions avendehut/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@
@click.group(context_settings={"help_option_names": ["-h", "-help", "--help"]})
@click.version_option(package_name="avendehut")
def main() -> None:
"""avendehut - build and search a local HTML catalog of books.
"""avendehut - build and search a local HTML catalog of books.

Inspired by Juan Hispalense (Avendehut Hispanus).
"""
Inspired by Juan Hispalense (Avendehut Hispanus).
"""


# Register subcommands
Expand All @@ -35,9 +35,8 @@ def main() -> None:


if __name__ == "__main__": # pragma: no cover
try:
main()
except click.ClickException as e: # pragma: no cover
console.print(f"[red]Error:[/red] {e}")
sys.exit(e.exit_code if hasattr(e, "exit_code") else 1)

try:
main()
except click.ClickException as e: # pragma: no cover
console.print(f"[red]Error:[/red] {e}")
sys.exit(e.exit_code if hasattr(e, "exit_code") else 1)
173 changes: 104 additions & 69 deletions avendehut/commands/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from ..utils.metadata import extract_catalog_item
from ..utils.manifest import Manifest, ManifestFile, load_manifest, write_manifest
from ..utils.htmlgen import copy_template_and_write_data
from ..utils.onedrive import is_onedrive_path, list_onedrive_files


console = Console()
Expand All @@ -22,80 +23,114 @@


def iter_source_files(src: Path) -> Iterable[Path]:
for root, _dirs, files in os.walk(src):
for name in files:
path = Path(root) / name
if path.suffix.lower() in SUPPORTED_EXTENSIONS:
yield path
"""Iterate over source files, supporting both local and OneDrive paths."""
src_str = str(src)
if is_onedrive_path(src_str):
# Use OneDrive listing
for path in list_onedrive_files(src_str):
if path.suffix.lower() in SUPPORTED_EXTENSIONS:
yield path
else:
# Use local filesystem
for root, _dirs, files in os.walk(src):
for name in files:
path = Path(root) / name
if path.suffix.lower() in SUPPORTED_EXTENSIONS:
yield path


def compute_file_hash(path: Path) -> str:
sha = hashlib.sha256()
with path.open("rb") as f:
for chunk in iter(lambda: f.read(1024 * 1024), b""):
sha.update(chunk)
return sha.hexdigest()
sha = hashlib.sha256()
with path.open("rb") as f:
for chunk in iter(lambda: f.read(1024 * 1024), b""):
sha.update(chunk)
return sha.hexdigest()


@click.command(context_settings={"help_option_names": ["-h", "-help", "--help"]})
@click.option("--src", type=click.Path(exists=True, file_okay=False, path_type=Path), required=True, help="Source folder (local path)")
@click.option("--out", type=click.Path(file_okay=False, path_type=Path), required=True, help="Output folder")
@click.option("--format", "format_", type=click.Choice(["html"], case_sensitive=False), default="html", show_default=True)
@click.option("--src", type=str, required=True, help="Source folder (local path or onedrive:/path)")
@click.option(
"--out", type=click.Path(file_okay=False, path_type=Path), required=True, help="Output folder"
)
@click.option(
"--format",
"format_",
type=click.Choice(["html"], case_sensitive=False),
default="html",
show_default=True,
)
@click.option("--force", is_flag=True, help="Reprocess all files, ignoring manifest")
def build_command(src: Path, out: Path, format_: str, force: bool) -> None:
"""Scan source, process new/updated books, and generate HTML site."""
out.mkdir(parents=True, exist_ok=True)

manifest_path = out / ".manifest.json"
previous_manifest = None if force else load_manifest(manifest_path)
previous_index = {f.path_rel: f for f in (previous_manifest.files if previous_manifest else [])}

source_files = list(iter_source_files(src))
to_process: List[Path] = []
manifest_files: List[ManifestFile] = []

for file_path in source_files:
stat = file_path.stat()
path_rel = str(file_path.relative_to(src))
prev = previous_index.get(path_rel)
if prev and prev.size_bytes == stat.st_size and prev.mtime_ns == stat.st_mtime_ns:
manifest_files.append(prev)
continue
to_process.append(file_path)

items = []
with Progress() as progress:
task = progress.add_task("Processing files", total=len(to_process))
for file_path in to_process:
try:
item = extract_catalog_item(src, file_path)
items.append(item)

def build_command(src: str, out: Path, format_: str, force: bool) -> None:
"""Scan source, process new/updated books, and generate HTML site."""
# Convert src to Path for local paths, keep as string for OneDrive
src_path: Path
if is_onedrive_path(src):
src_path = Path(src)
else:
src_path = Path(src)
if not src_path.exists():
raise click.ClickException(f"Source path does not exist: {src}")
if not src_path.is_dir():
raise click.ClickException(f"Source path must be a directory: {src}")

out.mkdir(parents=True, exist_ok=True)

manifest_path = out / ".manifest.json"
previous_manifest = None if force else load_manifest(manifest_path)
previous_index = {f.path_rel: f for f in (previous_manifest.files if previous_manifest else [])}

source_files = list(iter_source_files(src_path))
to_process: List[Path] = []
manifest_files: List[ManifestFile] = []

for file_path in source_files:
stat = file_path.stat()
sha256 = compute_file_hash(file_path)
manifest_files.append(
ManifestFile(path_rel=str(file_path.relative_to(src)), size_bytes=stat.st_size, mtime_ns=stat.st_mtime_ns, sha256=sha256)
)
except Exception as exc: # pragma: no cover - rare edge cases
console.print(f"[yellow]Warning[/yellow]: Failed to process {file_path}: {exc}")
finally:
progress.advance(task)

# If there was a previous catalog, we should merge unchanged items.
# For simplicity, regenerate catalog from disk for all manifest entries.
# This keeps logic deterministic while still skipping heavy parsing of unchanged files.
catalog = []
for mf in manifest_files:
file_path = src / mf.path_rel
try:
catalog.append(extract_catalog_item(src, file_path))
except Exception as exc: # pragma: no cover
console.print(f"[yellow]Warning[/yellow]: Failed to refresh {file_path}: {exc}")

copy_template_and_write_data(out, catalog)

manifest = Manifest(version="1", generated_at=datetime.now(timezone.utc).isoformat(), files=manifest_files)
write_manifest(manifest_path, manifest)

console.print(f"[green]Build complete[/green]: {out}")

path_rel = str(file_path.relative_to(src_path))
prev = previous_index.get(path_rel)
if prev and prev.size_bytes == stat.st_size and prev.mtime_ns == stat.st_mtime_ns:
manifest_files.append(prev)
continue
to_process.append(file_path)

items = []
with Progress() as progress:
task = progress.add_task("Processing files", total=len(to_process))
for file_path in to_process:
try:
item = extract_catalog_item(src_path, file_path)
items.append(item)

stat = file_path.stat()
sha256 = compute_file_hash(file_path)
manifest_files.append(
ManifestFile(
path_rel=str(file_path.relative_to(src_path)),
size_bytes=stat.st_size,
mtime_ns=stat.st_mtime_ns,
sha256=sha256,
)
)
except Exception as exc: # pragma: no cover - rare edge cases
console.print(f"[yellow]Warning[/yellow]: Failed to process {file_path}: {exc}")
finally:
progress.advance(task)

# If there was a previous catalog, we should merge unchanged items.
# For simplicity, regenerate catalog from disk for all manifest entries.
# This keeps logic deterministic while still skipping heavy parsing of unchanged files.
catalog = []
for mf in manifest_files:
file_path = src_path / mf.path_rel
try:
catalog.append(extract_catalog_item(src_path, file_path))
except Exception as exc: # pragma: no cover
console.print(f"[yellow]Warning[/yellow]: Failed to refresh {file_path}: {exc}")

copy_template_and_write_data(out, catalog)

manifest = Manifest(
version="1", generated_at=datetime.now(timezone.utc).isoformat(), files=manifest_files
)
write_manifest(manifest_path, manifest)

console.print(f"[green]Build complete[/green]: {out}")
25 changes: 12 additions & 13 deletions avendehut/commands/clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,15 @@
@click.command(context_settings={"help_option_names": ["-h", "-help", "--help"]})
@click.option("--out", type=click.Path(file_okay=False, path_type=Path), required=True)
def clean_command(out: Path) -> None:
"""Delete generated output and manifest."""
if not out.exists():
console.print(f"[yellow]Nothing to clean:[/yellow] {out}")
return
for child in out.iterdir():
if child.name == ".gitkeep":
continue
if child.is_dir():
shutil.rmtree(child)
else:
child.unlink(missing_ok=True)
console.print(f"[green]Cleaned:[/green] {out}")

"""Delete generated output and manifest."""
if not out.exists():
console.print(f"[yellow]Nothing to clean:[/yellow] {out}")
return
for child in out.iterdir():
if child.name == ".gitkeep":
continue
if child.is_dir():
shutil.rmtree(child)
else:
child.unlink(missing_ok=True)
console.print(f"[green]Cleaned:[/green] {out}")
Loading