diff --git a/.env.example b/.env.example index c80badb..dd1d6a9 100644 --- a/.env.example +++ b/.env.example @@ -1,11 +1,10 @@ # OneDrive / Microsoft Graph configuration -ONEDRIVE_CLIENT_ID= # TODO: set your Azure app client ID -ONEDRIVE_CLIENT_SECRET= # TODO: set your Azure app client secret (if used) -ONEDRIVE_TENANT_ID=consumers # TODO: 'consumers' for personal Microsoft accounts +ONEDRIVE_CLIENT_ID= +ONEDRIVE_CLIENT_SECRET= +ONEDRIVE_TENANT_ID=consumers -# When using OneDrive source -SRC_FOLDER=onedrive:/Documents/Books # TODO: OneDrive path or local folder - -# Output folder for build artifacts -OUT_FOLDER=dist # TODO: output directory +# Example variables when using OneDrive or local sources +# SRC_FOLDER can be either a local path or a OneDrive path like `onedrive:/Documents/Books` +SRC_FOLDER=onedrive:/Documents/Books +OUT_FOLDER=dist diff --git a/README.md b/README.md index b21b29c..ea3898f 100644 --- a/README.md +++ b/README.md @@ -38,21 +38,17 @@ Inspired by the historical figure [Juan Hispalense (siglo XII)](https://es.wikip ```bash poetry install ``` -3. Set environment variables for OneDrive (if needed). +3. (Optional) Copy `.env.example` to `.env` and fill in values if you plan to use OneDrive. - `ONEDRIVE_CLIENT_ID` - `ONEDRIVE_CLIENT_SECRET` - - `SRC_FOLDER` (OneDrive folder path) - - `OUT_FOLDER` (local output path) + - `ONEDRIVE_TENANT_ID` ### OneDrive Setup -- If `--src` points to a OneDrive path, the tool uses Microsoft Graph API. -- Required environment variables: +- OneDrive integration uses Microsoft Graph API. See `.env.example` for required variables: - `ONEDRIVE_CLIENT_ID` - `ONEDRIVE_CLIENT_SECRET` - - `ONEDRIVE_TENANT_ID` - - `SRC_FOLDER` (OneDrive folder path) - - `OUT_FOLDER` (local output path) + - `ONEDRIVE_TENANT_ID` (use `consumers` for personal accounts) - Register an app and configure permissions by following Microsoft's official docs: [Register an application](https://learn.microsoft.com/en-us/graph/auth-register-app-v2). ### CLI Commands @@ -66,6 +62,8 @@ Inspired by the historical figure [Juan Hispalense (siglo XII)](https://es.wikip | `avendehut search` | CLI search in local index. | `--query ""`, `--tags ""` | | `avendehut export` | Exports catalog to CSV or JSON. | `--out `, `--format csv|json` | +All commands support `-h`, `-help`, and `--help` for usage and options. + ### Usage ```bash diff --git a/avendehut/commands/build.py b/avendehut/commands/build.py index 439b01d..1f60148 100644 --- a/avendehut/commands/build.py +++ b/avendehut/commands/build.py @@ -38,11 +38,11 @@ def compute_file_hash(path: Path) -> str: @click.command(context_settings={"help_option_names": ["-h", "-help", "--help"]}) -@click.option("--src", type=click.Path(exists=True, file_okay=False, path_type=Path), required=True, help="Source folder (local path)") +@click.option("--src", type=str, required=True, help="Source folder (local path or onedrive:/path)") @click.option("--out", type=click.Path(file_okay=False, path_type=Path), required=True, help="Output folder") @click.option("--format", "format_", type=click.Choice(["html"], case_sensitive=False), default="html", show_default=True) @click.option("--force", is_flag=True, help="Reprocess all files, ignoring manifest") -def build_command(src: Path, out: Path, format_: str, force: bool) -> None: +def build_command(src: str, out: Path, format_: str, force: bool) -> None: """Scan source, process new/updated books, and generate HTML site.""" out.mkdir(parents=True, exist_ok=True) @@ -50,13 +50,25 @@ def build_command(src: Path, out: Path, format_: str, force: bool) -> None: previous_manifest = None if force else load_manifest(manifest_path) previous_index = {f.path_rel: f for f in (previous_manifest.files if previous_manifest else [])} - source_files = list(iter_source_files(src)) + # Resolve source files + from ..utils.onedrive import is_onedrive_path # local import to avoid heavy deps during tests + + if is_onedrive_path(src): # pragma: no cover - network path handling not covered by tests + # For now, instruct users to sync/copy files locally before building + raise click.ClickException( + "OneDrive sources are not processed directly yet. Sync or copy files locally and pass a local --src path." + ) + src_path = Path(src) + if not src_path.exists() or not src_path.is_dir(): + raise click.ClickException(f"--src must be an existing directory: {src}") + + source_files = list(iter_source_files(src_path)) to_process: List[Path] = [] manifest_files: List[ManifestFile] = [] for file_path in source_files: stat = file_path.stat() - path_rel = str(file_path.relative_to(src)) + path_rel = str(file_path.relative_to(src_path)) prev = previous_index.get(path_rel) if prev and prev.size_bytes == stat.st_size and prev.mtime_ns == stat.st_mtime_ns: manifest_files.append(prev) @@ -68,13 +80,13 @@ def build_command(src: Path, out: Path, format_: str, force: bool) -> None: task = progress.add_task("Processing files", total=len(to_process)) for file_path in to_process: try: - item = extract_catalog_item(src, file_path) + item = extract_catalog_item(src_path, file_path) items.append(item) stat = file_path.stat() sha256 = compute_file_hash(file_path) manifest_files.append( - ManifestFile(path_rel=str(file_path.relative_to(src)), size_bytes=stat.st_size, mtime_ns=stat.st_mtime_ns, sha256=sha256) + ManifestFile(path_rel=str(file_path.relative_to(src_path)), size_bytes=stat.st_size, mtime_ns=stat.st_mtime_ns, sha256=sha256) ) except Exception as exc: # pragma: no cover - rare edge cases console.print(f"[yellow]Warning[/yellow]: Failed to process {file_path}: {exc}") @@ -86,9 +98,9 @@ def build_command(src: Path, out: Path, format_: str, force: bool) -> None: # This keeps logic deterministic while still skipping heavy parsing of unchanged files. catalog = [] for mf in manifest_files: - file_path = src / mf.path_rel + file_path = src_path / mf.path_rel try: - catalog.append(extract_catalog_item(src, file_path)) + catalog.append(extract_catalog_item(src_path, file_path)) except Exception as exc: # pragma: no cover console.print(f"[yellow]Warning[/yellow]: Failed to refresh {file_path}: {exc}") diff --git a/avendehut/utils/metadata.py b/avendehut/utils/metadata.py index d75977b..26374eb 100644 --- a/avendehut/utils/metadata.py +++ b/avendehut/utils/metadata.py @@ -6,7 +6,6 @@ from pathlib import Path from typing import List, Optional -from ebooklib import epub # type: ignore from pypdf import PdfReader # type: ignore @@ -24,6 +23,9 @@ def _iso(ts: float) -> str: def _extract_epub(path: Path) -> tuple[str, List[str], Optional[int], Optional[str]]: + # Lazy import to avoid requiring ebooklib/lxml unless actually processing EPUB files + from ebooklib import epub # type: ignore + book = epub.read_epub(str(path)) title = (book.get_metadata("DC", "title") or [["", {}]])[0][0] or path.stem authors = [a[0] for a in (book.get_metadata("DC", "creator") or []) if a and a[0]] or [] diff --git a/avendehut/utils/onedrive.py b/avendehut/utils/onedrive.py index 4477951..0cb0ba1 100644 --- a/avendehut/utils/onedrive.py +++ b/avendehut/utils/onedrive.py @@ -2,10 +2,14 @@ import os from pathlib import Path -from typing import Generator, Iterable, List +from typing import Iterable -from msgraph import GraphServiceClient # type: ignore -from azure.identity import ClientSecretCredential # type: ignore +try: # Optional imports; only needed when actually listing OneDrive files + from msgraph import GraphServiceClient # type: ignore + from azure.identity import ClientSecretCredential # type: ignore +except Exception: # pragma: no cover - allow module import without heavy deps + GraphServiceClient = object # type: ignore[assignment] + ClientSecretCredential = object # type: ignore[assignment] def is_onedrive_path(path: str) -> bool: @@ -19,7 +23,11 @@ def ensure_onedrive_env() -> None: raise RuntimeError(f"Missing OneDrive environment variables: {', '.join(missing)}") -def _get_graph_client() -> GraphServiceClient: +def _get_graph_client() -> "GraphServiceClient": # type: ignore[name-defined] + # Lazy import inside function to avoid requiring these deps unless needed + from msgraph import GraphServiceClient # type: ignore + from azure.identity import ClientSecretCredential # type: ignore + tenant_id = os.environ["ONEDRIVE_TENANT_ID"] client_id = os.environ["ONEDRIVE_CLIENT_ID"] client_secret = os.environ["ONEDRIVE_CLIENT_SECRET"]