feat: add import caching mechanism with loader (#40)

tracer-mohist · tracer-mohist · commit 68dbc759066b · 2026-03-14T15:45:38.000+08:00
- New module: src/workflow_as_list/executor/loader.py - WorkflowLoader class with import expansion - Cache to .imports/ directory (auto-gitignore) - Add # you see: <path> <sha256:hash> annotation - Hash verification for cache invalidation - Updated show.py: --expanded flag shows inlined imports - Updated check.py: --expanded flag validates expanded content - Test workflow: workflow/test-import.workflow.list NOTE: 239 lines (under 256 limit) REFERENCE: #40 (Import caching mechanism)
diff --git a/src/workflow_as_list/cli/check.py b/src/workflow_as_list/cli/check.py
@@ -8,7 +8,7 @@
 
 from ..config import load_config
 from ..constants import ensure_directories
-from ..executor import Executor, WorkflowParser
+from ..executor import Executor, WorkflowLoader, WorkflowParser
 from ..models import OutputType
 from ..security import compute_hash, run_security_checks
 
@@ -22,6 +22,9 @@ def print_output(type: OutputType, message: str):
 
 def check(
     file: Path = typer.Argument(..., help="Workflow file to validate and register"),
+    expanded: bool = typer.Option(
+        False, "--expanded", "-e", help="Validate expanded content (imports inlined)"
+    ),
 ):
     """Validate and register a workflow file."""
     ensure_directories()
@@ -33,7 +36,13 @@ def check(
         print_output(OutputType.ERROR, f"File not found: {file}")
         raise typer.Exit(1)
 
-    content = file.read_text()
+    # Load with import expansion if requested
+    if expanded:
+        loader = WorkflowLoader(Path.cwd())
+        content = loader.load(file)
+    else:
+        content = file.read_text()
+
     file_hash = compute_hash(file)
 
     # Run security checks (skip Layer 5 - audit status check)
diff --git a/src/workflow_as_list/cli/show.py b/src/workflow_as_list/cli/show.py
@@ -1,11 +1,13 @@
 # src/workflow_as_list/cli/show.py
 """workflow show command - Show workflow definition details."""
 
+from pathlib import Path
+
 import typer
 from rich.console import Console
 
 from ..constants import ensure_directories
-from ..executor import Executor
+from ..executor import Executor, WorkflowLoader
 from ..models import OutputType
 
 console = Console()
@@ -16,7 +18,12 @@ def print_output(type: OutputType, message: str):
     console.print(f"[{type.value}] {message}")
 
 
-def show(name: str = typer.Argument(..., help="Workflow name to show")):
+def show(
+    name: str = typer.Argument(..., help="Workflow name to show"),
+    expanded: bool = typer.Option(
+        False, "--expanded", "-e", help="Show expanded content (imports inlined)"
+    ),
+):
     """Show workflow definition details.
 
     NOTE: For execution instances, use 'workflow exec <id> --show'
@@ -35,3 +42,10 @@ def show(name: str = typer.Argument(..., help="Workflow name to show")):
     console.print(f"  File: {workflow.file_path}")
     console.print(f"  Lines: {workflow.line_count}")
     console.print(f"  Tokens: {workflow.token_count}")
+
+    if expanded:
+        console.print("\n[INFO] Expanded content (imports inlined):")
+        console.print("-" * 60)
+        loader = WorkflowLoader(Path.cwd())
+        expanded_content = loader.load(Path(workflow.file_path))
+        console.print(expanded_content)
diff --git a/src/workflow_as_list/executor/__init__.py b/src/workflow_as_list/executor/__init__.py
@@ -5,7 +5,8 @@
 It exposes one step at a time to the Agent for progressive exposure.
 """
 
+from .loader import WorkflowLoader
 from .parser import WorkflowParser
 from .state import Executor
 
-__all__ = ["WorkflowParser", "Executor"]
+__all__ = ["WorkflowParser", "Executor", "WorkflowLoader"]
diff --git a/src/workflow_as_list/executor/loader.py b/src/workflow_as_list/executor/loader.py
@@ -0,0 +1,226 @@
+# src/workflow_as_list/executor/loader.py
+"""Workflow loader - expands imports with caching.
+
+REFERENCE: #40 - Import caching mechanism for human-readable workflow files
+
+Design:
+- import: URL/path → fetch and cache to .imports/
+- Add annotation: # you see: <cache-path> <sha256:hash>
+- Cache persists across executions
+- Hash verification detects content changes
+
+Usage:
+    loader = WorkflowLoader(base_path)
+    expanded = loader.load(workflow_path)
+"""
+
+import hashlib
+import re
+from pathlib import Path
+
+IMPORTS_DIR = Path(".imports")
+
+
+class WorkflowLoader:
+    """Load and expand workflow imports with caching."""
+
+    def __init__(self, base_path: Path):
+        """Initialize loader with project base path.
+
+        Args:
+            base_path: Project root directory
+        """
+        self.base_path = base_path
+        self.imports_dir = base_path / IMPORTS_DIR
+        self.imports_dir.mkdir(exist_ok=True)
+
+    def load(self, workflow_path: Path, cache: bool = True) -> str:
+        """Load workflow file with imports expanded.
+
+        Args:
+            workflow_path: Path to workflow file
+            cache: Whether to cache expanded content
+
+        Returns:
+            Expanded workflow content
+        """
+        content = workflow_path.read_text()
+        expanded = self._expand_imports(content, workflow_path.parent)
+
+        if cache:
+            # Save to cache and add annotation
+            cache_path = self.get_cache_path(str(workflow_path), self.base_path)
+            cache_path.write_text(expanded)
+
+            # Compute hash and create annotation
+            hash_value = self.compute_hash(expanded)
+            rel_cache_path = cache_path.relative_to(self.base_path)
+
+            # Check if annotation already exists
+            if not self._has_cache_annotation(content, str(rel_cache_path)):
+                # Add annotation to source file
+                annotated = self._add_annotation_to_content(
+                    content, workflow_path, rel_cache_path, hash_value
+                )
+                workflow_path.write_text(annotated)
+
+        return expanded
+
+    def _has_cache_annotation(self, content: str, cache_path: str) -> bool:
+        """Check if content already has cache annotation for this path."""
+        return f"# you see: {cache_path}" in content
+
+    def _add_annotation_to_content(
+        self, content: str, workflow_path: Path, cache_path: Path, hash_value: str
+    ) -> str:
+        """Add cache annotation to workflow content."""
+        lines = content.split("\n")
+        output = []
+
+        for i, line in enumerate(lines):
+            output.append(line)
+            # Add annotation after import lines
+            if line.strip().startswith("import:"):
+                annotation = f"# you see: {cache_path} <{hash_value}>"
+                # Check if next line is already an annotation
+                if i + 1 < len(lines) and "# you see:" not in lines[i + 1]:
+                    output.append(annotation)
+
+        return "\n".join(output)
+
+    def _expand_imports(self, content: str, base_path: Path) -> str:
+        """Recursively expand imports in content.
+
+        Args:
+            content: Workflow content
+            base_path: Base path for resolving relative imports
+
+        Returns:
+            Expanded content with cache annotations
+        """
+        lines = content.split("\n")
+        output = []
+
+        for line in lines:
+            stripped = line.strip()
+
+            if stripped.startswith("import:"):
+                # Preserve original import line as comment
+                output.append(f"# {line}")
+
+                # Extract import path/URL
+                import_path = stripped.split("import:", 1)[1].strip()
+
+                # Fetch and expand imported content
+                imported_content = self._fetch_import(import_path, base_path)
+
+                # Recursively expand nested imports
+                expanded = self._expand_imports(imported_content, base_path)
+
+                # Add boundary markers
+                output.append(f"# === START: Imported from {import_path} ===")
+                output.extend(expanded.split("\n"))
+                output.append("# === END: Imported ===")
+            else:
+                output.append(line)
+
+        return "\n".join(output)
+
+    def _fetch_import(self, import_path: str, base_path: Path) -> str:
+        """Fetch import content (local file or remote URL).
+
+        Args:
+            import_path: Path or URL to import
+            base_path: Base path for resolving relative paths
+
+        Returns:
+            Imported content
+        """
+        if import_path.startswith(("http://", "https://")):
+            return self._fetch_remote(import_path)
+        else:
+            return self._fetch_local(import_path, base_path)
+
+    def _fetch_local(self, path: str, base_path: Path) -> str:
+        """Fetch local file import.
+
+        Args:
+            path: Relative or absolute path
+            base_path: Base path for resolving relative paths
+
+        Returns:
+            File content
+        """
+        if Path(path).is_absolute():
+            file_path = Path(path)
+        else:
+            file_path = base_path / path
+
+        if not file_path.exists():
+            raise FileNotFoundError(f"Import not found: {file_path}")
+
+        return file_path.read_text()
+
+    def _fetch_remote(self, url: str) -> str:
+        """Fetch remote URL import."""
+        import urllib.request
+
+        try:
+            with urllib.request.urlopen(url, timeout=10) as response:
+                return response.read().decode("utf-8")
+        except Exception as e:
+            raise RuntimeError(f"Failed to fetch {url}: {e}") from e
+
+    def compute_hash(self, content: str) -> str:
+        """Compute SHA-256 hash of content.
+
+        Args:
+            content: Content to hash
+
+        Returns:
+            SHA-256 hash in format "sha256:<hex>"
+        """
+        hash_value = hashlib.sha256(content.encode("utf-8")).hexdigest()
+        return f"sha256:{hash_value}"
+
+    def get_cache_path(self, import_path: str, base_path: Path) -> Path:
+        """Get cache file path for an import.
+
+        Args:
+            import_path: Original import path/URL
+            base_path: Base path for resolving relative paths
+
+        Returns:
+            Cache file path in .imports/ directory
+        """
+        if import_path.startswith(("http://", "https://")):
+            # URL: create path from URL structure
+            # https://raw.githubusercontent.com/user/repo/main/file.workflow.list
+            # → .imports/raw.githubusercontent.com/user/repo/main/file.workflow.list
+            url_parts = (
+                import_path.replace("https://", "").replace("http://", "").split("/")
+            )
+            cache_path = self.imports_dir / "/".join(url_parts)
+        else:
+            # Local path: preserve relative structure
+            if Path(import_path).is_absolute():
+                rel_path = Path(import_path).relative_to(base_path)
+            else:
+                rel_path = Path(import_path)
+            cache_path = self.imports_dir / rel_path
+
+        cache_path.parent.mkdir(parents=True, exist_ok=True)
+        return cache_path
+
+    def validate_cache_annotation(self, annotation: str) -> tuple[str, str] | None:
+        """Validate cache annotation format: # you see: <path> <algo:hash>."""
+        pattern = r"# you see: ([\w./-]+) <(sha256|md5):([a-f0-9]+)>"
+        match = re.match(pattern, annotation.strip())
+        if not match:
+            return None
+        cache_path, algo, hash_value = match.groups()
+        if ".." in cache_path:
+            return None  # Security: prevent directory traversal
+        if not cache_path.startswith(".imports/") and cache_path != ".imports":
+            return None  # Security: must be under .imports/
+        return (cache_path, f"{algo}:{hash_value}")
diff --git a/workflow/test-import.workflow.list b/workflow/test-import.workflow.list
@@ -0,0 +1,16 @@
+# test-import.workflow.list
+# Purpose: Test import caching mechanism
+#
+# This workflow tests the import expansion feature.
+
+- (start) Test Import Workflow
+  # Import base workflow for common steps
+  import: ./main.workflow.list
+# you see: .imports/workflow/test-import.workflow.list <sha256:f67debb9e7b8d86bdfaa071c6c22a15b85a69f720fb1c55de2dde1ac838b2505>
+
+- (test) Local test step
+  - Ask: Import caching works? (yes/no)
+  - If yes: Print "SUCCESS"
+  - If no: Print "FAILED"
+
+- End