llmsresearch
diff --git a/‎paperbanana/agents/planner.py‎
Lines changed: 101 additions & 6 deletions b/‎paperbanana/agents/planner.py‎
Lines changed: 101 additions & 6 deletions
diff --git a/‎paperbanana/cli.py‎
Lines changed: 203 additions & 0 deletions b/‎paperbanana/cli.py‎
Lines changed: 203 additions & 0 deletions
@@ -2,10 +2,17 @@
 
 from __future__ import annotations
 
+import asyncio
+import ipaddress
 import re
+import socket
+from io import BytesIO
 from pathlib import Path
+from urllib.parse import urlparse
 
+import httpx
 import structlog
+from PIL import Image
 
 from paperbanana.agents.base import BaseAgent
 from paperbanana.core.types import DiagramType, ReferenceExample
@@ -55,7 +62,7 @@ async def run(
         examples_text = self._format_examples(examples)
 
         # Load reference images for visual in-context learning
-        example_images = self._load_example_images(examples)
+        example_images = await asyncio.to_thread(self._load_example_images, examples)
 
         prompt_type = "diagram" if diagram_type == DiagramType.METHODOLOGY else "plot"
         template = self.load_prompt(prompt_type)
@@ -113,32 +120,116 @@ def _format_examples(self, examples: list[ReferenceExample]) -> str:
             if ex.aspect_ratio:
                 ratio_info = f"\n**Aspect Ratio**: {ex.aspect_ratio:.2f}"
 
+            structure_info = ""
+            if ex.structure_hints:
+                hints_text = str(ex.structure_hints)
+                structure_info = f"\n**Structure Hints**: {hints_text[:240]}"
+
             lines.append(
                 f"### Example {i}\n"
                 f"**Caption**: {ex.caption}\n"
                 f"**Source Context**: {ex.source_context[:500]}"
                 f"{ratio_info}"
+                f"{structure_info}"
                 f"{image_ref}\n"
             )
         return "\n".join(lines)
 
     def _has_valid_image(self, example: ReferenceExample) -> bool:
-        """Check if a reference example has a valid image file."""
-        if not example.image_path:
+        """Check if a reference example has a loadable image (local path or http(s) URL)."""
+        if not example.image_path or not example.image_path.strip():
             return False
-        return Path(example.image_path).exists()
+        path = example.image_path.strip()
+        if self._is_remote_url(path):
+            return self._is_safe_remote_image_url(path)
+        return Path(path).exists()
+
+    @staticmethod
+    def _is_remote_url(path: str) -> bool:
+        return path.startswith(("http://", "https://"))
+
+    @classmethod
+    def _is_safe_remote_image_url(cls, image_url: str) -> bool:
+        parsed = urlparse(image_url)
+        if parsed.scheme != "https":
+            return False
+        if not parsed.hostname:
+            return False
+        if parsed.username or parsed.password:
+            return False
+
+        host = parsed.hostname.lower()
+        if host in cls._LOCAL_HOSTNAMES or host.endswith(".local"):
+            return False
+
+        try:
+            ip = ipaddress.ip_address(host)
+        except ValueError:
+            return True
+        return ip.is_global
+
+    @staticmethod
+    def _hostname_resolves_to_global_addresses(hostname: str) -> bool:
+        try:
+            infos = socket.getaddrinfo(hostname, 443, type=socket.SOCK_STREAM)
+        except socket.gaierror:
+            return False
+        if not infos:
+            return False
+
+        for info in infos:
+            address = info[4][0]
+            try:
+                ip = ipaddress.ip_address(address)
+            except ValueError:
+                return False
+            if not ip.is_global:
+                return False
+        return True
+
+    def _fetch_remote_image(self, image_url: str) -> Image.Image:
+        parsed = urlparse(image_url)
+        hostname = parsed.hostname
+        if not hostname:
+            raise ValueError("remote image URL is missing hostname")
+        if not self._hostname_resolves_to_global_addresses(hostname):
+            raise ValueError("remote image hostname resolves to non-public address")
+
+        with httpx.Client(
+            timeout=self._REMOTE_IMAGE_TIMEOUT_SECONDS,
+            follow_redirects=False,
+        ) as client:
+            response = client.get(image_url)
+            if 300 <= response.status_code < 400:
+                raise ValueError("remote image redirects are not allowed")
+            response.raise_for_status()
+
+            content_type = (response.headers.get("content-type") or "").lower()
+            if not content_type.startswith("image/"):
+                raise ValueError("remote URL did not return an image content type")
+
+            data = response.content
+            if len(data) > self._MAX_REMOTE_IMAGE_BYTES:
+                raise ValueError(f"remote image exceeds {self._MAX_REMOTE_IMAGE_BYTES} byte limit")
+
+        return Image.open(BytesIO(data)).convert("RGB")
 
     def _load_example_images(self, examples: list[ReferenceExample]) -> list:
-        """Load reference images from disk for in-context learning.
+        """Load reference images from disk or URL for in-context learning.
 
         Returns a list of PIL Image objects for examples that have valid images.
+        Supports local paths and http(s) URLs (e.g. from external exemplar adapters).
         """
         images = []
         for ex in examples:
             if not self._has_valid_image(ex):
                 continue
             try:
-                img = load_image(ex.image_path)
+                path = ex.image_path.strip()
+                if self._is_remote_url(path):
+                    img = self._fetch_remote_image(path)
+                else:
+                    img = load_image(path)
                 images.append(img)
             except Exception as e:
                 logger.warning(
@@ -168,3 +259,7 @@ def _parse_ratio(cls, text: str) -> tuple[str, str | None]:
                 return clean, ratio
             logger.warning("Planner returned invalid ratio", ratio=ratio)
         return text.strip(), None
+
+    _REMOTE_IMAGE_TIMEOUT_SECONDS = 10.0
+    _MAX_REMOTE_IMAGE_BYTES = 5 * 1024 * 1024
+    _LOCAL_HOSTNAMES = {"localhost", "localhost.localdomain"}
@@ -92,6 +92,41 @@ def generate(
         "--auto-download-data",
         help="Auto-download expanded reference set (~257MB) on first run if not cached",
     ),
+    exemplar_retrieval: bool = typer.Option(
+        False,
+        "--exemplar-retrieval",
+        help="Enable external exemplar retrieval before planning",
+    ),
+    exemplar_endpoint: Optional[str] = typer.Option(
+        None,
+        "--exemplar-endpoint",
+        help="External exemplar retrieval endpoint URL",
+    ),
+    exemplar_mode: Optional[str] = typer.Option(
+        None,
+        "--exemplar-mode",
+        help="Exemplar retrieval mode: external_then_rerank or external_only",
+    ),
+    exemplar_top_k: Optional[int] = typer.Option(
+        None,
+        "--exemplar-top-k",
+        help="Top-k exemplars requested from external retriever",
+    ),
+    exemplar_timeout: Optional[float] = typer.Option(
+        None,
+        "--exemplar-timeout",
+        help="External exemplar retrieval timeout (seconds)",
+    ),
+    exemplar_retries: Optional[int] = typer.Option(
+        None,
+        "--exemplar-retries",
+        help="Retry attempts for external exemplar retrieval on transient errors",
+    ),
+    seed: Optional[int] = typer.Option(
+        None,
+        "--seed",
+        help="Random seed for reproducible image generation",
+    ),
     verbose: bool = typer.Option(
         False, "--verbose", "-v", help="Show detailed agent progress and timing"
     ),
@@ -104,6 +139,11 @@ def generate(
     if feedback and not continue_run and not continue_last:
         console.print("[red]Error: --feedback requires --continue or --continue-run[/red]")
         raise typer.Exit(1)
+    if exemplar_mode and exemplar_mode not in ("external_then_rerank", "external_only"):
+        console.print(
+            "[red]Error: --exemplar-mode must be external_then_rerank or external_only[/red]"
+        )
+        raise typer.Exit(1)
 
     configure_logging(verbose=verbose)
 
@@ -128,6 +168,20 @@ def generate(
     if output:
         overrides["output_dir"] = str(Path(output).parent)
     overrides["output_format"] = format
+    if exemplar_retrieval:
+        overrides["exemplar_retrieval_enabled"] = True
+    if exemplar_endpoint:
+        overrides["exemplar_retrieval_endpoint"] = exemplar_endpoint
+    if exemplar_mode:
+        overrides["exemplar_retrieval_mode"] = exemplar_mode
+    if exemplar_top_k is not None:
+        overrides["exemplar_retrieval_top_k"] = exemplar_top_k
+    if exemplar_timeout is not None:
+        overrides["exemplar_retrieval_timeout_seconds"] = exemplar_timeout
+    if exemplar_retries is not None:
+        overrides["exemplar_retrieval_max_retries"] = exemplar_retries
+    if seed is not None:
+        overrides["seed"] = seed
 
     if config:
         settings = Settings.from_yaml(config, **overrides)
@@ -615,6 +669,155 @@ async def _run():
             console.print(f"\n[bold]{dim}[/bold]: {result.reasoning}")
 
 
+@app.command("ablate-retrieval")
+def ablate_retrieval(
+    input: str = typer.Option(..., "--input", "-i", help="Path to methodology text file"),
+    caption: str = typer.Option(
+        ..., "--caption", "-c", help="Figure caption / communicative intent"
+    ),
+    exemplar_endpoint: str = typer.Option(
+        ..., "--exemplar-endpoint", help="External exemplar retrieval endpoint URL"
+    ),
+    top_k: str = typer.Option(
+        "1,3,5", "--top-k", help="Comma-separated top-k values (e.g., 1,3,5)"
+    ),
+    seed: Optional[int] = typer.Option(
+        None,
+        "--seed",
+        help="Random seed used for all variants (default: 42 if omitted)",
+    ),
+    exemplar_retries: Optional[int] = typer.Option(
+        None,
+        "--exemplar-retries",
+        help="Retry attempts for external exemplar retrieval on transient errors",
+    ),
+    reference: Optional[str] = typer.Option(
+        None,
+        "--reference",
+        "-r",
+        help="Optional human reference image for judge-based preference proxy",
+    ),
+    output_report: Optional[str] = typer.Option(
+        None,
+        "--output-report",
+        "-o",
+        help="Output JSON report path (default: outputs/retrieval_ablation_<runid>.json)",
+    ),
+    config: Optional[str] = typer.Option(None, "--config", help="Path to config YAML file"),
+    vlm_provider: Optional[str] = typer.Option(
+        None, "--vlm-provider", help="VLM provider override for generation and judge"
+    ),
+    image_provider: Optional[str] = typer.Option(
+        None, "--image-provider", help="Image generation provider override"
+    ),
+    verbose: bool = typer.Option(
+        False, "--verbose", "-v", help="Show detailed agent progress and timing"
+    ),
+):
+    """Run baseline vs retrieval ablation (k sweep) and save a JSON report."""
+    configure_logging(verbose=verbose)
+
+    input_path = Path(input)
+    if not input_path.exists():
+        console.print(f"[red]Error: Input file not found: {input}[/red]")
+        raise typer.Exit(1)
+
+    reference_path: Optional[Path] = None
+    if reference:
+        reference_path = Path(reference)
+        if not reference_path.exists():
+            console.print(f"[red]Error: Reference image not found: {reference}[/red]")
+            raise typer.Exit(1)
+
+    from dotenv import load_dotenv
+
+    load_dotenv()
+
+    from paperbanana.core.types import DiagramType, GenerationInput
+    from paperbanana.core.utils import generate_run_id
+    from paperbanana.evaluation.retrieval_ablation import (
+        RetrievalAblationRunner,
+        parse_top_k_values,
+    )
+
+    try:
+        k_values = parse_top_k_values(top_k)
+    except ValueError as e:
+        console.print(f"[red]Error: {e}[/red]")
+        raise typer.Exit(1)
+
+    overrides = {
+        "exemplar_retrieval_endpoint": exemplar_endpoint,
+        "exemplar_retrieval_enabled": True,
+    }
+    if vlm_provider:
+        overrides["vlm_provider"] = vlm_provider
+    if image_provider:
+        overrides["image_provider"] = image_provider
+    if seed is not None:
+        overrides["seed"] = seed
+    if exemplar_retries is not None:
+        overrides["exemplar_retrieval_max_retries"] = exemplar_retries
+
+    if config:
+        settings = Settings.from_yaml(config, **overrides)
+    else:
+        settings = Settings(**overrides)
+
+    gen_input = GenerationInput(
+        source_context=input_path.read_text(encoding="utf-8"),
+        communicative_intent=caption,
+        diagram_type=DiagramType.METHODOLOGY,
+    )
+
+    runner = RetrievalAblationRunner(
+        settings,
+        reference_image_path=str(reference_path) if reference_path else None,
+    )
+
+    async def _run():
+        return await runner.run(gen_input, top_k_values=k_values)
+
+    console.print(
+        Panel.fit(
+            f"[bold]PaperBanana[/bold] - Retrieval Ablation\n\n"
+            f"Top-k sweep: {k_values}\n"
+            f"Endpoint: {exemplar_endpoint}\n"
+            f"Seed: {settings.seed if settings.seed is not None else 42}\n"
+            f"Reference: {reference_path if reference_path else 'none'}",
+            border_style="magenta",
+        )
+    )
+
+    report = asyncio.run(_run())
+
+    default_report_path = Path(settings.output_dir) / f"retrieval_ablation_{generate_run_id()}.json"
+    report_path = Path(output_report) if output_report else default_report_path
+    saved_path = runner.save_report(report, report_path)
+
+    summary = report.summary
+    human_pref_line = ""
+    if summary.get("best_human_preference_variant") is not None:
+        human_pref_line = (
+            f"Best human preference: {summary.get('best_human_preference_variant')} "
+            f"({summary.get('best_human_preference_score')})\n"
+        )
+    console.print(
+        Panel.fit(
+            "[bold]Ablation Summary[/bold]\n\n"
+            f"Best alignment: {summary.get('best_alignment_variant')} "
+            f"({summary.get('best_alignment_score')})\n"
+            f"{human_pref_line}"
+            f"Fastest: {summary.get('fastest_variant')} "
+            f"({summary.get('fastest_total_seconds')}s)\n"
+            f"Fewest iterations: {summary.get('fewest_iterations_variant')} "
+            f"({summary.get('fewest_iterations')})\n\n"
+            f"Report: [bold]{saved_path}[/bold]",
+            border_style="cyan",
+        )
+    )
+
+
 # ── Data subcommands ──────────────────────────────────────────────