Skip to content

Commit 73ea7d1

Browse files
committed
fix: per-import caching with correct annotations (#40)
- Each import cached independently - Annotation points to import's cache - Works for local and remote imports REFERENCE: #40
1 parent 4055b2a commit 73ea7d1

File tree

4 files changed

+102
-109
lines changed

4 files changed

+102
-109
lines changed

src/workflow_as_list/executor/loader.py

Lines changed: 84 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
# src/workflow_as_list/executor/loader.py
2-
"""Workflow loader - expands imports with caching.
2+
"""Workflow loader - expands imports with per-import caching.
33
44
REFERENCE: #40 - Import caching mechanism for human-readable workflow files
55
66
Design:
7-
- import: URL/path → fetch and cache to .imports/
8-
- Add annotation: # you see: <cache-path> <sha256:hash>
9-
- Cache persists across executions
10-
- Hash verification detects content changes
7+
- Each import: URL/path → cached independently to .imports/<name>
8+
- Each import line gets annotation: # you see: <cache-path> <sha256:hash>
9+
- Annotation points to the IMPORT's cache, not parent file
10+
- Users can directly open cache file to read imported content
1111
1212
Usage:
1313
loader = WorkflowLoader(base_path)
@@ -22,14 +22,10 @@
2222

2323

2424
class WorkflowLoader:
25-
"""Load and expand workflow imports with caching."""
25+
"""Load and expand workflow imports with per-import caching."""
2626

2727
def __init__(self, base_path: Path):
28-
"""Initialize loader with project base path.
29-
30-
Args:
31-
base_path: Project root directory
32-
"""
28+
"""Initialize loader with project base path."""
3329
self.base_path = base_path
3430
self.imports_dir = base_path / IMPORTS_DIR
3531
self.imports_dir.mkdir(exist_ok=True)
@@ -39,73 +35,69 @@ def load(self, workflow_path: Path, cache: bool = True) -> str:
3935
4036
Args:
4137
workflow_path: Path to workflow file
42-
cache: Whether to cache expanded content
38+
cache: Whether to cache imported content
4339
4440
Returns:
4541
Expanded workflow content
4642
"""
4743
content = workflow_path.read_text()
48-
expanded = self._expand_imports(content, workflow_path.parent)
44+
expanded = self._expand_imports(content, workflow_path.parent, cache)
4945

5046
if cache:
51-
# Save to cache and add annotation
52-
cache_path = self.get_cache_path(str(workflow_path), self.base_path)
53-
cache_path.write_text(expanded)
54-
55-
# Compute hash and create annotation
56-
hash_value = self.compute_hash(expanded)
57-
rel_cache_path = cache_path.relative_to(self.base_path)
58-
59-
# Check if annotation already exists
60-
if not self._has_cache_annotation(content, str(rel_cache_path)):
61-
# Add annotation to source file
62-
annotated = self._add_annotation_to_content(
63-
content, workflow_path, rel_cache_path, hash_value
64-
)
65-
workflow_path.write_text(annotated)
47+
# Add annotations to source file
48+
self._add_annotations_to_source(workflow_path, content)
6649

6750
return expanded
6851

69-
def _has_cache_annotation(self, content: str, cache_path: str) -> bool:
70-
"""Check if content already has cache annotation for this path."""
71-
return f"# you see: {cache_path}" in content
72-
73-
def _add_annotation_to_content(
74-
self, content: str, workflow_path: Path, cache_path: Path, hash_value: str
75-
) -> str:
76-
"""Add cache annotation BEFORE import line with matching indentation."""
52+
def _add_annotations_to_source(self, workflow_path: Path, content: str) -> None:
53+
"""Add cache annotations to source file for each import."""
7754
lines = content.split("\n")
7855
output = []
79-
added = set()
56+
added_annotations = {} # import_path -> annotation
57+
58+
for line in lines:
59+
stripped = line.strip()
60+
61+
if stripped.startswith("import:"):
62+
import_path = stripped.split("import:", 1)[1].strip()
63+
64+
# Check if annotation already exists
65+
if import_path not in added_annotations:
66+
# Fetch and cache this import
67+
imported_content = self._fetch_import(
68+
import_path, workflow_path.parent
69+
)
70+
expanded = self._expand_imports(
71+
imported_content, workflow_path.parent, False
72+
)
73+
74+
cache_path = self._get_import_cache_path(
75+
import_path, workflow_path.parent
76+
)
77+
cache_path.write_text(expanded)
8078

81-
for i, line in enumerate(lines):
82-
if line.strip().startswith("import:"):
83-
has_annotation = False
84-
if i + 1 < len(lines) and "# you see:" in lines[i + 1]:
85-
has_annotation = True
79+
hash_value = self.compute_hash(expanded)
80+
rel_cache_path = cache_path.relative_to(self.base_path)
8681

87-
if not has_annotation and str(workflow_path) not in added:
88-
# Match import line indentation
82+
# Create annotation
8983
indent = len(line) - len(line.lstrip())
9084
annotation = (
91-
" " * indent + f"# you see: <{cache_path}> <{hash_value}>"
85+
" " * indent + f"# you see: <{rel_cache_path}> <{hash_value}>"
9286
)
93-
output.append(annotation)
94-
added.add(str(workflow_path))
87+
added_annotations[import_path] = annotation
9588

96-
output.append(line)
89+
# Add annotation before import line
90+
output.append(added_annotations[import_path])
9791

98-
return "\n".join(output)
92+
output.append(line)
9993

100-
def _expand_imports(self, content: str, base_path: Path) -> str:
101-
"""Recursively expand imports in content.
94+
# Write back to source file
95+
workflow_path.write_text("\n".join(output))
10296

103-
Args:
104-
content: Workflow content
105-
base_path: Base path for resolving relative imports
97+
def _expand_imports(self, content: str, base_path: Path, cache: bool = True) -> str:
98+
"""Recursively expand imports with per-import caching.
10699
107-
Returns:
108-
Expanded content with cache annotations
100+
Each import is cached independently and annotated.
109101
"""
110102
lines = content.split("\n")
111103
output = []
@@ -114,19 +106,37 @@ def _expand_imports(self, content: str, base_path: Path) -> str:
114106
stripped = line.strip()
115107

116108
if stripped.startswith("import:"):
117-
# Preserve original import line as comment
118-
output.append(f"# {line}")
119-
120109
# Extract import path/URL
121110
import_path = stripped.split("import:", 1)[1].strip()
122111

123-
# Fetch and expand imported content
112+
# Fetch imported content
124113
imported_content = self._fetch_import(import_path, base_path)
125114

126115
# Recursively expand nested imports
127-
expanded = self._expand_imports(imported_content, base_path)
116+
expanded = self._expand_imports(imported_content, base_path, cache)
117+
118+
if cache:
119+
# Cache this import independently
120+
cache_path = self._get_import_cache_path(import_path, base_path)
121+
cache_path.write_text(expanded)
122+
123+
# Compute hash for this import
124+
hash_value = self.compute_hash(expanded)
125+
126+
# Get relative cache path for annotation
127+
rel_cache_path = cache_path.relative_to(self.base_path)
128+
129+
# Add annotation BEFORE import line (with matching indent)
130+
indent = len(line) - len(line.lstrip())
131+
annotation = (
132+
" " * indent + f"# you see: <{rel_cache_path}> <{hash_value}>"
133+
)
134+
output.append(annotation)
128135

129-
# Add boundary markers
136+
# Preserve original import as comment
137+
output.append(f"# {line}")
138+
139+
# Add boundary markers with expanded content
130140
output.append(f"# === START: Imported from {import_path} ===")
131141
output.extend(expanded.split("\n"))
132142
output.append("# === END: Imported ===")
@@ -136,38 +146,17 @@ def _expand_imports(self, content: str, base_path: Path) -> str:
136146
return "\n".join(output)
137147

138148
def _fetch_import(self, import_path: str, base_path: Path) -> str:
139-
"""Fetch import content (local file or remote URL).
140-
141-
Args:
142-
import_path: Path or URL to import
143-
base_path: Base path for resolving relative paths
144-
145-
Returns:
146-
Imported content
147-
"""
149+
"""Fetch import content (local file or remote URL)."""
148150
if import_path.startswith(("http://", "https://")):
149151
return self._fetch_remote(import_path)
150152
else:
151153
return self._fetch_local(import_path, base_path)
152154

153155
def _fetch_local(self, path: str, base_path: Path) -> str:
154-
"""Fetch local file import.
155-
156-
Args:
157-
path: Relative or absolute path
158-
base_path: Base path for resolving relative paths
159-
160-
Returns:
161-
File content
162-
"""
163-
if Path(path).is_absolute():
164-
file_path = Path(path)
165-
else:
166-
file_path = base_path / path
167-
156+
"""Fetch local file import."""
157+
file_path = Path(path) if Path(path).is_absolute() else base_path / path
168158
if not file_path.exists():
169159
raise FileNotFoundError(f"Import not found: {file_path}")
170-
171160
return file_path.read_text()
172161

173162
def _fetch_remote(self, url: str) -> str:
@@ -180,27 +169,10 @@ def _fetch_remote(self, url: str) -> str:
180169
except Exception as e:
181170
raise RuntimeError(f"Failed to fetch {url}: {e}") from e
182171

183-
def compute_hash(self, content: str) -> str:
184-
"""Compute SHA-256 hash of content.
172+
def _get_import_cache_path(self, import_path: str, base_path: Path) -> Path:
173+
"""Get cache file path for a single import.
185174
186-
Args:
187-
content: Content to hash
188-
189-
Returns:
190-
SHA-256 hash in format "sha256:<hex>"
191-
"""
192-
hash_value = hashlib.sha256(content.encode("utf-8")).hexdigest()
193-
return f"sha256:{hash_value}"
194-
195-
def get_cache_path(self, import_path: str, base_path: Path) -> Path:
196-
"""Get cache file path for an import.
197-
198-
Args:
199-
import_path: Original import path/URL
200-
base_path: Base path for resolving relative paths
201-
202-
Returns:
203-
Cache file path in .imports/ directory
175+
Each import is cached independently with a clear name.
204176
"""
205177
if import_path.startswith(("http://", "https://")):
206178
# URL: create path from URL structure
@@ -221,9 +193,14 @@ def get_cache_path(self, import_path: str, base_path: Path) -> Path:
221193
cache_path.parent.mkdir(parents=True, exist_ok=True)
222194
return cache_path
223195

196+
def compute_hash(self, content: str) -> str:
197+
"""Compute SHA-256 hash of content."""
198+
hash_value = hashlib.sha256(content.encode("utf-8")).hexdigest()
199+
return f"sha256:{hash_value}"
200+
224201
def validate_cache_annotation(self, annotation: str) -> tuple[str, str] | None:
225202
"""Validate cache annotation format: # you see: <path> <algo:hash>."""
226-
pattern = r"# you see: ([\w./-]+) <(sha256|md5):([a-f0-9]+)>"
203+
pattern = r"# you see: <([\w./-]+)> <(sha256|md5):([a-f0-9]+)>"
227204
match = re.match(pattern, annotation.strip())
228205
if not match:
229206
return None

uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

workflow/test-import.workflow.list

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
- (start) Test Import Workflow
77
# Import base workflow for common steps
8-
# you see: <.imports/workflow/test-import.workflow.list> <sha256:f67debb9e7b8d86bdfaa071c6c22a15b85a69f720fb1c55de2dde1ac838b2505>
8+
# you see: <.imports/main.workflow.list> <sha256:6b30743e0e2cbec36c0c98c0fd27814a8ba6fd4a73aed0e40d020111cc199994>
99
import: ./main.workflow.list
1010

1111
- (test) Local test step
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# test-remote-import.workflow.list
2+
# Purpose: Test remote URL import caching
3+
#
4+
# This workflow tests remote import expansion.
5+
6+
- (start) Test Remote Import
7+
# Import from remote URL
8+
# you see: <.imports/raw.githubusercontent.com/tracer-mohist/workflow-as-list/refs/heads/main/examples/git/commit.workflow.list> <sha256:61726152b038af779e07b68db164187111b6d02a6427db25c03fd987f4ac0c30>
9+
import: https://raw.githubusercontent.com/tracer-mohist/workflow-as-list/refs/heads/main/examples/git/commit.workflow.list
10+
11+
- (test) Verify import
12+
- Ask: Remote import cached? (yes/no)
13+
- If yes: Print "SUCCESS"
14+
- If no: Print "FAILED"
15+
16+
- End

0 commit comments

Comments
 (0)