|
| 1 | +# src/workflow_as_list/executor/loader.py |
| 2 | +"""Workflow loader - expands imports with caching. |
| 3 | +
|
| 4 | +REFERENCE: #40 - Import caching mechanism for human-readable workflow files |
| 5 | +
|
| 6 | +Design: |
| 7 | +- import: URL/path → fetch and cache to .imports/ |
| 8 | +- Add annotation: # you see: <cache-path> <sha256:hash> |
| 9 | +- Cache persists across executions |
| 10 | +- Hash verification detects content changes |
| 11 | +
|
| 12 | +Usage: |
| 13 | + loader = WorkflowLoader(base_path) |
| 14 | + expanded = loader.load(workflow_path) |
| 15 | +""" |
| 16 | + |
| 17 | +import hashlib |
| 18 | +import re |
| 19 | +from pathlib import Path |
| 20 | + |
| 21 | +IMPORTS_DIR = Path(".imports") |
| 22 | + |
| 23 | + |
| 24 | +class WorkflowLoader: |
| 25 | + """Load and expand workflow imports with caching.""" |
| 26 | + |
| 27 | + def __init__(self, base_path: Path): |
| 28 | + """Initialize loader with project base path. |
| 29 | +
|
| 30 | + Args: |
| 31 | + base_path: Project root directory |
| 32 | + """ |
| 33 | + self.base_path = base_path |
| 34 | + self.imports_dir = base_path / IMPORTS_DIR |
| 35 | + self.imports_dir.mkdir(exist_ok=True) |
| 36 | + |
| 37 | + def load(self, workflow_path: Path, cache: bool = True) -> str: |
| 38 | + """Load workflow file with imports expanded. |
| 39 | +
|
| 40 | + Args: |
| 41 | + workflow_path: Path to workflow file |
| 42 | + cache: Whether to cache expanded content |
| 43 | +
|
| 44 | + Returns: |
| 45 | + Expanded workflow content |
| 46 | + """ |
| 47 | + content = workflow_path.read_text() |
| 48 | + expanded = self._expand_imports(content, workflow_path.parent) |
| 49 | + |
| 50 | + if cache: |
| 51 | + # Save to cache and add annotation |
| 52 | + cache_path = self.get_cache_path(str(workflow_path), self.base_path) |
| 53 | + cache_path.write_text(expanded) |
| 54 | + |
| 55 | + # Compute hash and create annotation |
| 56 | + hash_value = self.compute_hash(expanded) |
| 57 | + rel_cache_path = cache_path.relative_to(self.base_path) |
| 58 | + |
| 59 | + # Check if annotation already exists |
| 60 | + if not self._has_cache_annotation(content, str(rel_cache_path)): |
| 61 | + # Add annotation to source file |
| 62 | + annotated = self._add_annotation_to_content( |
| 63 | + content, workflow_path, rel_cache_path, hash_value |
| 64 | + ) |
| 65 | + workflow_path.write_text(annotated) |
| 66 | + |
| 67 | + return expanded |
| 68 | + |
| 69 | + def _has_cache_annotation(self, content: str, cache_path: str) -> bool: |
| 70 | + """Check if content already has cache annotation for this path.""" |
| 71 | + return f"# you see: {cache_path}" in content |
| 72 | + |
| 73 | + def _add_annotation_to_content( |
| 74 | + self, content: str, workflow_path: Path, cache_path: Path, hash_value: str |
| 75 | + ) -> str: |
| 76 | + """Add cache annotation to workflow content.""" |
| 77 | + lines = content.split("\n") |
| 78 | + output = [] |
| 79 | + |
| 80 | + for i, line in enumerate(lines): |
| 81 | + output.append(line) |
| 82 | + # Add annotation after import lines |
| 83 | + if line.strip().startswith("import:"): |
| 84 | + annotation = f"# you see: {cache_path} <{hash_value}>" |
| 85 | + # Check if next line is already an annotation |
| 86 | + if i + 1 < len(lines) and "# you see:" not in lines[i + 1]: |
| 87 | + output.append(annotation) |
| 88 | + |
| 89 | + return "\n".join(output) |
| 90 | + |
| 91 | + def _expand_imports(self, content: str, base_path: Path) -> str: |
| 92 | + """Recursively expand imports in content. |
| 93 | +
|
| 94 | + Args: |
| 95 | + content: Workflow content |
| 96 | + base_path: Base path for resolving relative imports |
| 97 | +
|
| 98 | + Returns: |
| 99 | + Expanded content with cache annotations |
| 100 | + """ |
| 101 | + lines = content.split("\n") |
| 102 | + output = [] |
| 103 | + |
| 104 | + for line in lines: |
| 105 | + stripped = line.strip() |
| 106 | + |
| 107 | + if stripped.startswith("import:"): |
| 108 | + # Preserve original import line as comment |
| 109 | + output.append(f"# {line}") |
| 110 | + |
| 111 | + # Extract import path/URL |
| 112 | + import_path = stripped.split("import:", 1)[1].strip() |
| 113 | + |
| 114 | + # Fetch and expand imported content |
| 115 | + imported_content = self._fetch_import(import_path, base_path) |
| 116 | + |
| 117 | + # Recursively expand nested imports |
| 118 | + expanded = self._expand_imports(imported_content, base_path) |
| 119 | + |
| 120 | + # Add boundary markers |
| 121 | + output.append(f"# === START: Imported from {import_path} ===") |
| 122 | + output.extend(expanded.split("\n")) |
| 123 | + output.append("# === END: Imported ===") |
| 124 | + else: |
| 125 | + output.append(line) |
| 126 | + |
| 127 | + return "\n".join(output) |
| 128 | + |
| 129 | + def _fetch_import(self, import_path: str, base_path: Path) -> str: |
| 130 | + """Fetch import content (local file or remote URL). |
| 131 | +
|
| 132 | + Args: |
| 133 | + import_path: Path or URL to import |
| 134 | + base_path: Base path for resolving relative paths |
| 135 | +
|
| 136 | + Returns: |
| 137 | + Imported content |
| 138 | + """ |
| 139 | + if import_path.startswith(("http://", "https://")): |
| 140 | + return self._fetch_remote(import_path) |
| 141 | + else: |
| 142 | + return self._fetch_local(import_path, base_path) |
| 143 | + |
| 144 | + def _fetch_local(self, path: str, base_path: Path) -> str: |
| 145 | + """Fetch local file import. |
| 146 | +
|
| 147 | + Args: |
| 148 | + path: Relative or absolute path |
| 149 | + base_path: Base path for resolving relative paths |
| 150 | +
|
| 151 | + Returns: |
| 152 | + File content |
| 153 | + """ |
| 154 | + if Path(path).is_absolute(): |
| 155 | + file_path = Path(path) |
| 156 | + else: |
| 157 | + file_path = base_path / path |
| 158 | + |
| 159 | + if not file_path.exists(): |
| 160 | + raise FileNotFoundError(f"Import not found: {file_path}") |
| 161 | + |
| 162 | + return file_path.read_text() |
| 163 | + |
| 164 | + def _fetch_remote(self, url: str) -> str: |
| 165 | + """Fetch remote URL import.""" |
| 166 | + import urllib.request |
| 167 | + |
| 168 | + try: |
| 169 | + with urllib.request.urlopen(url, timeout=10) as response: |
| 170 | + return response.read().decode("utf-8") |
| 171 | + except Exception as e: |
| 172 | + raise RuntimeError(f"Failed to fetch {url}: {e}") from e |
| 173 | + |
| 174 | + def compute_hash(self, content: str) -> str: |
| 175 | + """Compute SHA-256 hash of content. |
| 176 | +
|
| 177 | + Args: |
| 178 | + content: Content to hash |
| 179 | +
|
| 180 | + Returns: |
| 181 | + SHA-256 hash in format "sha256:<hex>" |
| 182 | + """ |
| 183 | + hash_value = hashlib.sha256(content.encode("utf-8")).hexdigest() |
| 184 | + return f"sha256:{hash_value}" |
| 185 | + |
| 186 | + def get_cache_path(self, import_path: str, base_path: Path) -> Path: |
| 187 | + """Get cache file path for an import. |
| 188 | +
|
| 189 | + Args: |
| 190 | + import_path: Original import path/URL |
| 191 | + base_path: Base path for resolving relative paths |
| 192 | +
|
| 193 | + Returns: |
| 194 | + Cache file path in .imports/ directory |
| 195 | + """ |
| 196 | + if import_path.startswith(("http://", "https://")): |
| 197 | + # URL: create path from URL structure |
| 198 | + # https://raw.githubusercontent.com/user/repo/main/file.workflow.list |
| 199 | + # → .imports/raw.githubusercontent.com/user/repo/main/file.workflow.list |
| 200 | + url_parts = ( |
| 201 | + import_path.replace("https://", "").replace("http://", "").split("/") |
| 202 | + ) |
| 203 | + cache_path = self.imports_dir / "/".join(url_parts) |
| 204 | + else: |
| 205 | + # Local path: preserve relative structure |
| 206 | + if Path(import_path).is_absolute(): |
| 207 | + rel_path = Path(import_path).relative_to(base_path) |
| 208 | + else: |
| 209 | + rel_path = Path(import_path) |
| 210 | + cache_path = self.imports_dir / rel_path |
| 211 | + |
| 212 | + cache_path.parent.mkdir(parents=True, exist_ok=True) |
| 213 | + return cache_path |
| 214 | + |
| 215 | + def validate_cache_annotation(self, annotation: str) -> tuple[str, str] | None: |
| 216 | + """Validate cache annotation format: # you see: <path> <algo:hash>.""" |
| 217 | + pattern = r"# you see: ([\w./-]+) <(sha256|md5):([a-f0-9]+)>" |
| 218 | + match = re.match(pattern, annotation.strip()) |
| 219 | + if not match: |
| 220 | + return None |
| 221 | + cache_path, algo, hash_value = match.groups() |
| 222 | + if ".." in cache_path: |
| 223 | + return None # Security: prevent directory traversal |
| 224 | + if not cache_path.startswith(".imports/") and cache_path != ".imports": |
| 225 | + return None # Security: must be under .imports/ |
| 226 | + return (cache_path, f"{algo}:{hash_value}") |
0 commit comments