diff --git a/README.md b/README.md index c80f9a0..f0efbdb 100644 --- a/README.md +++ b/README.md @@ -235,6 +235,14 @@ items: Paths in the manifest are resolved relative to the manifest file's directory. +**Generate a human-readable report** from an existing batch run (Markdown or HTML): + +```bash +paperbanana batch-report --batch-dir outputs/batch_20250109_123456_abc --format markdown +# or by batch ID (under default output dir) +paperbanana batch-report --batch-id batch_20250109_123456_abc --format html --output report.html +``` + | Flag | Short | Description | |------|-------|-------------| | `--manifest` | `-m` | Path to manifest file (required) | diff --git a/paperbanana/cli.py b/paperbanana/cli.py index fde4d69..44ea471 100644 --- a/paperbanana/cli.py +++ b/paperbanana/cli.py @@ -676,6 +676,68 @@ def batch( console.print(f" Report: [bold]{report_path}[/bold]") +@app.command("batch-report") +def batch_report( + batch_dir: Optional[str] = typer.Option( + None, + "--batch-dir", + "-b", + help="Path to batch run directory (e.g. outputs/batch_20250109_123456_abc)", + ), + batch_id: Optional[str] = typer.Option( + None, + "--batch-id", + help="Batch ID (e.g. batch_20250109_123456_abc); resolved under --output-dir", + ), + output_dir: str = typer.Option( + "outputs", + "--output-dir", + "-o", + help="Parent directory for batch runs (used with --batch-id)", + ), + output: Optional[str] = typer.Option( + None, + "--output", + help="Output path for the report file (default: /batch_report.)", + ), + format: str = typer.Option( + "markdown", + "--format", + "-f", + help="Report format: markdown or html", + ), +): + """Generate a human-readable report from an existing batch run (batch_report.json).""" + if format not in ("markdown", "html", "md"): + console.print(f"[red]Error: Format must be markdown or html. Got: {format}[/red]") + raise typer.Exit(1) + if batch_dir is None and batch_id is None: + console.print("[red]Error: Provide either --batch-dir or --batch-id[/red]") + raise typer.Exit(1) + if batch_dir is not None and batch_id is not None: + console.print("[red]Error: Provide only one of --batch-dir or --batch-id[/red]") + raise typer.Exit(1) + + from paperbanana.core.batch import write_batch_report + + if batch_dir is not None: + path = Path(batch_dir) + else: + path = Path(output_dir) / batch_id + + output_path = Path(output) if output else None + fmt = "markdown" if format == "md" else format + try: + written = write_batch_report(path, output_path=output_path, format=fmt) + console.print(f"[green]Report written to:[/green] [bold]{written}[/bold]") + except FileNotFoundError as e: + console.print(f"[red]Error: {e}[/red]") + raise typer.Exit(1) + except ValueError as e: + console.print(f"[red]Error: {e}[/red]") + raise typer.Exit(1) + + @app.command() def plot( data: str = typer.Option(..., "--data", "-d", help="Path to data file (CSV or JSON)"), diff --git a/paperbanana/core/batch.py b/paperbanana/core/batch.py index b50e9a8..6390bf1 100644 --- a/paperbanana/core/batch.py +++ b/paperbanana/core/batch.py @@ -1,16 +1,19 @@ -"""Batch generation: manifest loading and batch run id.""" +"""Batch generation: manifest loading, batch run id, and report generation.""" from __future__ import annotations import datetime +import json import uuid from pathlib import Path -from typing import Any +from typing import Any, Literal import structlog logger = structlog.get_logger() +REPORT_FILENAME = "batch_report.json" + def generate_batch_id() -> str: """Generate a unique batch run ID.""" @@ -79,3 +82,177 @@ def load_batch_manifest(manifest_path: Path) -> list[dict[str, Any]]: } ) return result + + +def load_batch_report(batch_dir: Path) -> dict[str, Any]: + """Load batch_report.json from a batch output directory. + + Args: + batch_dir: Path to the batch run directory (e.g. outputs/batch_20250109_123456_abc). + + Returns: + The report dict (batch_id, manifest, items, total_seconds). + + Raises: + FileNotFoundError: If batch_dir or batch_report.json does not exist. + ValueError: If the JSON is invalid or missing required keys. + """ + batch_dir = Path(batch_dir).resolve() + report_path = batch_dir / REPORT_FILENAME + if not batch_dir.exists() or not batch_dir.is_dir(): + raise FileNotFoundError(f"Batch directory not found: {batch_dir}") + if not report_path.exists(): + raise FileNotFoundError(f"No {REPORT_FILENAME} in {batch_dir}. Run a batch first.") + raw = report_path.read_text(encoding="utf-8") + data = json.loads(raw) + if not isinstance(data, dict) or "items" not in data: + raise ValueError(f"Invalid report: expected dict with 'items'. Got: {type(data)}") + return data + + +def _report_summary(report: dict[str, Any]) -> tuple[int, int, float]: + """Return (succeeded, total, total_seconds) from a batch report.""" + items = report.get("items", []) + total = len(items) + succeeded = sum(1 for x in items if x.get("output_path")) + total_seconds = report.get("total_seconds") or 0.0 + return succeeded, total, float(total_seconds) + + +def generate_batch_report_md(report: dict[str, Any], batch_dir: Path) -> str: + """Generate a Markdown report from a batch report dict.""" + batch_dir = Path(batch_dir).resolve() + batch_id = report.get("batch_id", "batch") + manifest = report.get("manifest", "") + succeeded, total, total_seconds = _report_summary(report) + lines = [ + f"# Batch Report: {batch_id}", + "", + f"- **Manifest:** `{manifest}`", + f"- **Summary:** {succeeded}/{total} succeeded in {total_seconds:.1f}s", + "", + "| ID | Caption | Status | Output / Error | Iterations |", + "|----|--------|--------|-----------------|------------|", + ] + for item in report.get("items", []): + item_id = item.get("id", "—") + caption = (item.get("caption") or "")[:60] + if len(item.get("caption") or "") > 60: + caption += "…" + caption_escaped = caption.replace("|", "\\|") + if item.get("output_path"): + status = "✓ Success" + out = item["output_path"] + if Path(out).is_absolute() and out.startswith(str(batch_dir)): + out = Path(out).relative_to(batch_dir).as_posix() + out_escaped = str(out).replace("|", "\\|") + iters = item.get("iterations", "—") + lines.append( + f"| {item_id} | {caption_escaped} | {status} | `{out_escaped}` | {iters} |" + ) + else: + status = "✗ Failed" + err = (item.get("error") or "unknown").replace("|", "\\|")[:80] + lines.append(f"| {item_id} | {caption_escaped} | {status} | {err} | — |") + return "\n".join(lines) + + +def generate_batch_report_html(report: dict[str, Any], batch_dir: Path) -> str: + """Generate an HTML report from a batch report dict.""" + batch_dir = Path(batch_dir).resolve() + batch_id = report.get("batch_id", "batch") + manifest = report.get("manifest", "") + succeeded, total, total_seconds = _report_summary(report) + + def escape(s: str) -> str: + return ( + s.replace("&", "&").replace("<", "<").replace(">", ">").replace('"', """) + ) + + rows = [] + for item in report.get("items", []): + item_id = escape(str(item.get("id", "—"))) + caption = escape((item.get("caption") or "")[:80]) + if item.get("output_path"): + status = 'Success' + out = item["output_path"] + if Path(out).is_absolute() and out.startswith(str(batch_dir)): + out = Path(out).relative_to(batch_dir).as_posix() + out_cell = f'{escape(str(out))}' + iters = item.get("iterations", "—") + rows.append( + f"{item_id}{caption}{status}" + f"{out_cell}{iters}" + ) + else: + status = 'Failed' + err = escape((item.get("error") or "unknown")[:200]) + rows.append( + f"{item_id}{caption}{status}" + f'{err}' + ) + + body_rows = "\n".join(rows) + return f""" + + + + Batch Report — {escape(batch_id)} + + + +

Batch Report: {escape(batch_id)}

+

Manifest: {escape(manifest)}
+ Summary: {succeeded}/{total} succeeded in + {total_seconds:.1f}s

+ + + + +{body_rows} + +
IDCaptionStatusOutput / ErrorIterations
+ + +""" + + +def write_batch_report( + batch_dir: Path, + output_path: Path | None = None, + format: Literal["markdown", "html", "md"] = "markdown", +) -> Path: + """Load the batch report from batch_dir, generate a report, and write it to disk. + + Args: + batch_dir: Path to the batch run directory. + output_path: Where to write the report. If None, writes to batch_dir/batch_report.{md|html}. + format: Report format: markdown, html, or md (alias for markdown). + + Returns: + The path where the report was written. + """ + batch_dir = Path(batch_dir).resolve() + report = load_batch_report(batch_dir) + ext = "html" if format == "html" else "md" + if output_path is None: + output_path = batch_dir / f"batch_report.{ext}" + output_path = Path(output_path).resolve() + output_path.parent.mkdir(parents=True, exist_ok=True) + if format == "html": + content = generate_batch_report_html(report, batch_dir) + else: + content = generate_batch_report_md(report, batch_dir) + output_path.write_text(content, encoding="utf-8") + logger.info("Wrote batch report", path=str(output_path), format=format) + return output_path diff --git a/tests/test_batch.py b/tests/test_batch.py new file mode 100644 index 0000000..3c43e46 --- /dev/null +++ b/tests/test_batch.py @@ -0,0 +1,148 @@ +"""Tests for paperbanana.core.batch — manifest loading and report generation.""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + +from paperbanana.core.batch import ( + REPORT_FILENAME, + generate_batch_report_html, + generate_batch_report_md, + load_batch_report, + write_batch_report, +) + +# --------------------------------------------------------------------------- +# load_batch_report +# --------------------------------------------------------------------------- + + +def test_load_batch_report_success(tmp_path: Path): + report_data = { + "batch_id": "batch_20250109_120000_abc", + "manifest": "/path/to/manifest.yaml", + "items": [ + {"id": "fig1", "caption": "Overview", "output_path": "/out/fig1.png", "iterations": 3}, + {"id": "fig2", "caption": "Pipeline", "error": "API error"}, + ], + "total_seconds": 42.5, + } + (tmp_path / REPORT_FILENAME).write_text(json.dumps(report_data), encoding="utf-8") + loaded = load_batch_report(tmp_path) + assert loaded["batch_id"] == "batch_20250109_120000_abc" + assert len(loaded["items"]) == 2 + assert loaded["total_seconds"] == 42.5 + + +def test_load_batch_report_dir_not_found(): + with pytest.raises(FileNotFoundError, match="Batch directory not found"): + load_batch_report(Path("/nonexistent/batch_dir")) + + +def test_load_batch_report_json_not_found(tmp_path: Path): + with pytest.raises(FileNotFoundError, match="No batch_report.json"): + load_batch_report(tmp_path) + + +def test_load_batch_report_invalid_json(tmp_path: Path): + (tmp_path / REPORT_FILENAME).write_text("not json", encoding="utf-8") + with pytest.raises(json.JSONDecodeError): + load_batch_report(tmp_path) + + +def test_load_batch_report_missing_items_key(tmp_path: Path): + (tmp_path / REPORT_FILENAME).write_text('{"batch_id": "x"}', encoding="utf-8") + with pytest.raises(ValueError, match="Invalid report"): + load_batch_report(tmp_path) + + +# --------------------------------------------------------------------------- +# generate_batch_report_md +# --------------------------------------------------------------------------- + + +def test_generate_batch_report_md_contains_summary(tmp_path: Path): + report = { + "batch_id": "batch_123", + "manifest": "manifest.yaml", + "items": [ + {"id": "a", "caption": "Cap A", "output_path": "/out/a.png", "iterations": 2}, + {"id": "b", "caption": "Cap B", "error": "Failed"}, + ], + "total_seconds": 10.0, + } + md = generate_batch_report_md(report, tmp_path) + assert "# Batch Report: batch_123" in md + assert "1/2 succeeded" in md + assert "10.0s" in md + assert "| a |" in md + assert "| b |" in md + assert "Success" in md + assert "Failed" in md + + +# --------------------------------------------------------------------------- +# generate_batch_report_html +# --------------------------------------------------------------------------- + + +def test_generate_batch_report_html_contains_summary(tmp_path: Path): + report = { + "batch_id": "batch_456", + "manifest": "m.yaml", + "items": [ + {"id": "x", "caption": "X", "output_path": str(tmp_path / "x.png"), "iterations": 1}, + ], + "total_seconds": 5.0, + } + html = generate_batch_report_html(report, tmp_path) + assert "Batch Report: batch_456" in html + assert "1/1" in html + assert "5.0s" in html + assert "" in html + assert "Success" in html + assert "x.png" in html + + +# --------------------------------------------------------------------------- +# write_batch_report +# --------------------------------------------------------------------------- + + +def test_write_batch_report_markdown(tmp_path: Path): + report_data = { + "batch_id": "batch_write", + "manifest": "manifest.yaml", + "items": [ + { + "id": "i1", + "caption": "C", + "output_path": str(tmp_path / "out.png"), + "iterations": 1, + } + ], + "total_seconds": 1.0, + } + (tmp_path / REPORT_FILENAME).write_text(json.dumps(report_data), encoding="utf-8") + out_path = tmp_path / "report.md" + written = write_batch_report(tmp_path, output_path=out_path, format="markdown") + assert written == out_path + assert out_path.exists() + assert "Batch Report: batch_write" in out_path.read_text(encoding="utf-8") + + +def test_write_batch_report_html_default_path(tmp_path: Path): + report_data = { + "batch_id": "b", + "manifest": "m", + "items": [], + "total_seconds": 0, + } + (tmp_path / REPORT_FILENAME).write_text(json.dumps(report_data), encoding="utf-8") + written = write_batch_report(tmp_path, format="html") + assert written == tmp_path / "batch_report.html" + assert written.exists() + assert "" in written.read_text(encoding="utf-8")