Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 7 additions & 8 deletions .env.example
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
# OneDrive / Microsoft Graph configuration
ONEDRIVE_CLIENT_ID= # TODO: set your Azure app client ID
ONEDRIVE_CLIENT_SECRET= # TODO: set your Azure app client secret (if used)
ONEDRIVE_TENANT_ID=consumers # TODO: 'consumers' for personal Microsoft accounts
ONEDRIVE_CLIENT_ID=
ONEDRIVE_CLIENT_SECRET=
ONEDRIVE_TENANT_ID=consumers

# When using OneDrive source
SRC_FOLDER=onedrive:/Documents/Books # TODO: OneDrive path or local folder

# Output folder for build artifacts
OUT_FOLDER=dist # TODO: output directory
# Example variables when using OneDrive or local sources
# SRC_FOLDER can be either a local path or a OneDrive path like `onedrive:/Documents/Books`
SRC_FOLDER=onedrive:/Documents/Books
OUT_FOLDER=dist

14 changes: 6 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,21 +38,17 @@ Inspired by the historical figure [Juan Hispalense (siglo XII)](https://es.wikip
```bash
poetry install
```
3. Set environment variables for OneDrive (if needed).
3. (Optional) Copy `.env.example` to `.env` and fill in values if you plan to use OneDrive.
- `ONEDRIVE_CLIENT_ID`
- `ONEDRIVE_CLIENT_SECRET`
- `SRC_FOLDER` (OneDrive folder path)
- `OUT_FOLDER` (local output path)
- `ONEDRIVE_TENANT_ID`

### OneDrive Setup

- If `--src` points to a OneDrive path, the tool uses Microsoft Graph API.
- Required environment variables:
- OneDrive integration uses Microsoft Graph API. See `.env.example` for required variables:
- `ONEDRIVE_CLIENT_ID`
- `ONEDRIVE_CLIENT_SECRET`
- `ONEDRIVE_TENANT_ID`
- `SRC_FOLDER` (OneDrive folder path)
- `OUT_FOLDER` (local output path)
- `ONEDRIVE_TENANT_ID` (use `consumers` for personal accounts)
- Register an app and configure permissions by following Microsoft's official docs: [Register an application](https://learn.microsoft.com/en-us/graph/auth-register-app-v2).

### CLI Commands
Expand All @@ -66,6 +62,8 @@ Inspired by the historical figure [Juan Hispalense (siglo XII)](https://es.wikip
| `avendehut search` | CLI search in local index. | `--query "<text>"`, `--tags "<tag1,tag2>"` |
| `avendehut export` | Exports catalog to CSV or JSON. | `--out <file>`, `--format csv|json` |

All commands support `-h`, `-help`, and `--help` for usage and options.

### Usage

```bash
Expand Down
28 changes: 20 additions & 8 deletions avendehut/commands/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,25 +38,37 @@ def compute_file_hash(path: Path) -> str:


@click.command(context_settings={"help_option_names": ["-h", "-help", "--help"]})
@click.option("--src", type=click.Path(exists=True, file_okay=False, path_type=Path), required=True, help="Source folder (local path)")
@click.option("--src", type=str, required=True, help="Source folder (local path or onedrive:/path)")
@click.option("--out", type=click.Path(file_okay=False, path_type=Path), required=True, help="Output folder")
@click.option("--format", "format_", type=click.Choice(["html"], case_sensitive=False), default="html", show_default=True)
@click.option("--force", is_flag=True, help="Reprocess all files, ignoring manifest")
def build_command(src: Path, out: Path, format_: str, force: bool) -> None:
def build_command(src: str, out: Path, format_: str, force: bool) -> None:
"""Scan source, process new/updated books, and generate HTML site."""
out.mkdir(parents=True, exist_ok=True)

manifest_path = out / ".manifest.json"
previous_manifest = None if force else load_manifest(manifest_path)
previous_index = {f.path_rel: f for f in (previous_manifest.files if previous_manifest else [])}

source_files = list(iter_source_files(src))
# Resolve source files
from ..utils.onedrive import is_onedrive_path # local import to avoid heavy deps during tests

if is_onedrive_path(src): # pragma: no cover - network path handling not covered by tests
# For now, instruct users to sync/copy files locally before building
raise click.ClickException(
"OneDrive sources are not processed directly yet. Sync or copy files locally and pass a local --src path."
)
src_path = Path(src)
if not src_path.exists() or not src_path.is_dir():
raise click.ClickException(f"--src must be an existing directory: {src}")

source_files = list(iter_source_files(src_path))
to_process: List[Path] = []
manifest_files: List[ManifestFile] = []

for file_path in source_files:
stat = file_path.stat()
path_rel = str(file_path.relative_to(src))
path_rel = str(file_path.relative_to(src_path))
prev = previous_index.get(path_rel)
if prev and prev.size_bytes == stat.st_size and prev.mtime_ns == stat.st_mtime_ns:
manifest_files.append(prev)
Expand All @@ -68,13 +80,13 @@ def build_command(src: Path, out: Path, format_: str, force: bool) -> None:
task = progress.add_task("Processing files", total=len(to_process))
for file_path in to_process:
try:
item = extract_catalog_item(src, file_path)
item = extract_catalog_item(src_path, file_path)
items.append(item)

stat = file_path.stat()
sha256 = compute_file_hash(file_path)
manifest_files.append(
ManifestFile(path_rel=str(file_path.relative_to(src)), size_bytes=stat.st_size, mtime_ns=stat.st_mtime_ns, sha256=sha256)
ManifestFile(path_rel=str(file_path.relative_to(src_path)), size_bytes=stat.st_size, mtime_ns=stat.st_mtime_ns, sha256=sha256)
)
except Exception as exc: # pragma: no cover - rare edge cases
console.print(f"[yellow]Warning[/yellow]: Failed to process {file_path}: {exc}")
Expand All @@ -86,9 +98,9 @@ def build_command(src: Path, out: Path, format_: str, force: bool) -> None:
# This keeps logic deterministic while still skipping heavy parsing of unchanged files.
catalog = []
for mf in manifest_files:
file_path = src / mf.path_rel
file_path = src_path / mf.path_rel
try:
catalog.append(extract_catalog_item(src, file_path))
catalog.append(extract_catalog_item(src_path, file_path))
except Exception as exc: # pragma: no cover
console.print(f"[yellow]Warning[/yellow]: Failed to refresh {file_path}: {exc}")

Expand Down
4 changes: 3 additions & 1 deletion avendehut/utils/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from pathlib import Path
from typing import List, Optional

from ebooklib import epub # type: ignore
from pypdf import PdfReader # type: ignore


Expand All @@ -24,6 +23,9 @@ def _iso(ts: float) -> str:


def _extract_epub(path: Path) -> tuple[str, List[str], Optional[int], Optional[str]]:
# Lazy import to avoid requiring ebooklib/lxml unless actually processing EPUB files
from ebooklib import epub # type: ignore

book = epub.read_epub(str(path))
title = (book.get_metadata("DC", "title") or [["", {}]])[0][0] or path.stem
authors = [a[0] for a in (book.get_metadata("DC", "creator") or []) if a and a[0]] or []
Expand Down
16 changes: 12 additions & 4 deletions avendehut/utils/onedrive.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,14 @@

import os
from pathlib import Path
from typing import Generator, Iterable, List
from typing import Iterable

from msgraph import GraphServiceClient # type: ignore
from azure.identity import ClientSecretCredential # type: ignore
try: # Optional imports; only needed when actually listing OneDrive files
from msgraph import GraphServiceClient # type: ignore
from azure.identity import ClientSecretCredential # type: ignore
except Exception: # pragma: no cover - allow module import without heavy deps
GraphServiceClient = object # type: ignore[assignment]
ClientSecretCredential = object # type: ignore[assignment]


def is_onedrive_path(path: str) -> bool:
Expand All @@ -19,7 +23,11 @@ def ensure_onedrive_env() -> None:
raise RuntimeError(f"Missing OneDrive environment variables: {', '.join(missing)}")


def _get_graph_client() -> GraphServiceClient:
def _get_graph_client() -> "GraphServiceClient": # type: ignore[name-defined]
# Lazy import inside function to avoid requiring these deps unless needed
from msgraph import GraphServiceClient # type: ignore
from azure.identity import ClientSecretCredential # type: ignore

tenant_id = os.environ["ONEDRIVE_TENANT_ID"]
client_id = os.environ["ONEDRIVE_CLIENT_ID"]
client_secret = os.environ["ONEDRIVE_CLIENT_SECRET"]
Expand Down