11# src/workflow_as_list/executor/loader.py
2- """Workflow loader - expands imports with caching.
2+ """Workflow loader - expands imports with per-import caching.
33
44REFERENCE: #40 - Import caching mechanism for human-readable workflow files
55
66Design:
7- - import: URL/path → fetch and cache to .imports/
8- - Add annotation: # you see: <cache-path> <sha256:hash>
9- - Cache persists across executions
10- - Hash verification detects content changes
7+ - Each import: URL/path → cached independently to .imports/<name>
8+ - Each import line gets annotation: # you see: <cache-path> <sha256:hash>
9+ - Annotation points to the IMPORT's cache, not parent file
10+ - Users can directly open cache file to read imported content
1111
1212Usage:
1313 loader = WorkflowLoader(base_path)
2222
2323
2424class WorkflowLoader :
25- """Load and expand workflow imports with caching."""
25+ """Load and expand workflow imports with per-import caching."""
2626
2727 def __init__ (self , base_path : Path ):
28- """Initialize loader with project base path.
29-
30- Args:
31- base_path: Project root directory
32- """
28+ """Initialize loader with project base path."""
3329 self .base_path = base_path
3430 self .imports_dir = base_path / IMPORTS_DIR
3531 self .imports_dir .mkdir (exist_ok = True )
@@ -39,73 +35,69 @@ def load(self, workflow_path: Path, cache: bool = True) -> str:
3935
4036 Args:
4137 workflow_path: Path to workflow file
42- cache: Whether to cache expanded content
38+ cache: Whether to cache imported content
4339
4440 Returns:
4541 Expanded workflow content
4642 """
4743 content = workflow_path .read_text ()
48- expanded = self ._expand_imports (content , workflow_path .parent )
44+ expanded = self ._expand_imports (content , workflow_path .parent , cache )
4945
5046 if cache :
51- # Save to cache and add annotation
52- cache_path = self .get_cache_path (str (workflow_path ), self .base_path )
53- cache_path .write_text (expanded )
54-
55- # Compute hash and create annotation
56- hash_value = self .compute_hash (expanded )
57- rel_cache_path = cache_path .relative_to (self .base_path )
58-
59- # Check if annotation already exists
60- if not self ._has_cache_annotation (content , str (rel_cache_path )):
61- # Add annotation to source file
62- annotated = self ._add_annotation_to_content (
63- content , workflow_path , rel_cache_path , hash_value
64- )
65- workflow_path .write_text (annotated )
47+ # Add annotations to source file
48+ self ._add_annotations_to_source (workflow_path , content )
6649
6750 return expanded
6851
69- def _has_cache_annotation (self , content : str , cache_path : str ) -> bool :
70- """Check if content already has cache annotation for this path."""
71- return f"# you see: { cache_path } " in content
72-
73- def _add_annotation_to_content (
74- self , content : str , workflow_path : Path , cache_path : Path , hash_value : str
75- ) -> str :
76- """Add cache annotation BEFORE import line with matching indentation."""
52+ def _add_annotations_to_source (self , workflow_path : Path , content : str ) -> None :
53+ """Add cache annotations to source file for each import."""
7754 lines = content .split ("\n " )
7855 output = []
79- added = set ()
56+ added_annotations = {} # import_path -> annotation
57+
58+ for line in lines :
59+ stripped = line .strip ()
60+
61+ if stripped .startswith ("import:" ):
62+ import_path = stripped .split ("import:" , 1 )[1 ].strip ()
63+
64+ # Check if annotation already exists
65+ if import_path not in added_annotations :
66+ # Fetch and cache this import
67+ imported_content = self ._fetch_import (
68+ import_path , workflow_path .parent
69+ )
70+ expanded = self ._expand_imports (
71+ imported_content , workflow_path .parent , False
72+ )
73+
74+ cache_path = self ._get_import_cache_path (
75+ import_path , workflow_path .parent
76+ )
77+ cache_path .write_text (expanded )
8078
81- for i , line in enumerate (lines ):
82- if line .strip ().startswith ("import:" ):
83- has_annotation = False
84- if i + 1 < len (lines ) and "# you see:" in lines [i + 1 ]:
85- has_annotation = True
79+ hash_value = self .compute_hash (expanded )
80+ rel_cache_path = cache_path .relative_to (self .base_path )
8681
87- if not has_annotation and str (workflow_path ) not in added :
88- # Match import line indentation
82+ # Create annotation
8983 indent = len (line ) - len (line .lstrip ())
9084 annotation = (
91- " " * indent + f"# you see: <{ cache_path } > <{ hash_value } >"
85+ " " * indent + f"# you see: <{ rel_cache_path } > <{ hash_value } >"
9286 )
93- output .append (annotation )
94- added .add (str (workflow_path ))
87+ added_annotations [import_path ] = annotation
9588
96- output .append (line )
89+ # Add annotation before import line
90+ output .append (added_annotations [import_path ])
9791
98- return " \n " . join ( output )
92+ output . append ( line )
9993
100- def _expand_imports ( self , content : str , base_path : Path ) -> str :
101- """Recursively expand imports in content.
94+ # Write back to source file
95+ workflow_path . write_text ( " \n " . join ( output ))
10296
103- Args:
104- content: Workflow content
105- base_path: Base path for resolving relative imports
97+ def _expand_imports (self , content : str , base_path : Path , cache : bool = True ) -> str :
98+ """Recursively expand imports with per-import caching.
10699
107- Returns:
108- Expanded content with cache annotations
100+ Each import is cached independently and annotated.
109101 """
110102 lines = content .split ("\n " )
111103 output = []
@@ -114,19 +106,37 @@ def _expand_imports(self, content: str, base_path: Path) -> str:
114106 stripped = line .strip ()
115107
116108 if stripped .startswith ("import:" ):
117- # Preserve original import line as comment
118- output .append (f"# { line } " )
119-
120109 # Extract import path/URL
121110 import_path = stripped .split ("import:" , 1 )[1 ].strip ()
122111
123- # Fetch and expand imported content
112+ # Fetch imported content
124113 imported_content = self ._fetch_import (import_path , base_path )
125114
126115 # Recursively expand nested imports
127- expanded = self ._expand_imports (imported_content , base_path )
116+ expanded = self ._expand_imports (imported_content , base_path , cache )
117+
118+ if cache :
119+ # Cache this import independently
120+ cache_path = self ._get_import_cache_path (import_path , base_path )
121+ cache_path .write_text (expanded )
122+
123+ # Compute hash for this import
124+ hash_value = self .compute_hash (expanded )
125+
126+ # Get relative cache path for annotation
127+ rel_cache_path = cache_path .relative_to (self .base_path )
128+
129+ # Add annotation BEFORE import line (with matching indent)
130+ indent = len (line ) - len (line .lstrip ())
131+ annotation = (
132+ " " * indent + f"# you see: <{ rel_cache_path } > <{ hash_value } >"
133+ )
134+ output .append (annotation )
128135
129- # Add boundary markers
136+ # Preserve original import as comment
137+ output .append (f"# { line } " )
138+
139+ # Add boundary markers with expanded content
130140 output .append (f"# === START: Imported from { import_path } ===" )
131141 output .extend (expanded .split ("\n " ))
132142 output .append ("# === END: Imported ===" )
@@ -136,38 +146,17 @@ def _expand_imports(self, content: str, base_path: Path) -> str:
136146 return "\n " .join (output )
137147
138148 def _fetch_import (self , import_path : str , base_path : Path ) -> str :
139- """Fetch import content (local file or remote URL).
140-
141- Args:
142- import_path: Path or URL to import
143- base_path: Base path for resolving relative paths
144-
145- Returns:
146- Imported content
147- """
149+ """Fetch import content (local file or remote URL)."""
148150 if import_path .startswith (("http://" , "https://" )):
149151 return self ._fetch_remote (import_path )
150152 else :
151153 return self ._fetch_local (import_path , base_path )
152154
153155 def _fetch_local (self , path : str , base_path : Path ) -> str :
154- """Fetch local file import.
155-
156- Args:
157- path: Relative or absolute path
158- base_path: Base path for resolving relative paths
159-
160- Returns:
161- File content
162- """
163- if Path (path ).is_absolute ():
164- file_path = Path (path )
165- else :
166- file_path = base_path / path
167-
156+ """Fetch local file import."""
157+ file_path = Path (path ) if Path (path ).is_absolute () else base_path / path
168158 if not file_path .exists ():
169159 raise FileNotFoundError (f"Import not found: { file_path } " )
170-
171160 return file_path .read_text ()
172161
173162 def _fetch_remote (self , url : str ) -> str :
@@ -180,27 +169,10 @@ def _fetch_remote(self, url: str) -> str:
180169 except Exception as e :
181170 raise RuntimeError (f"Failed to fetch { url } : { e } " ) from e
182171
183- def compute_hash (self , content : str ) -> str :
184- """Compute SHA-256 hash of content .
172+ def _get_import_cache_path (self , import_path : str , base_path : Path ) -> Path :
173+ """Get cache file path for a single import .
185174
186- Args:
187- content: Content to hash
188-
189- Returns:
190- SHA-256 hash in format "sha256:<hex>"
191- """
192- hash_value = hashlib .sha256 (content .encode ("utf-8" )).hexdigest ()
193- return f"sha256:{ hash_value } "
194-
195- def get_cache_path (self , import_path : str , base_path : Path ) -> Path :
196- """Get cache file path for an import.
197-
198- Args:
199- import_path: Original import path/URL
200- base_path: Base path for resolving relative paths
201-
202- Returns:
203- Cache file path in .imports/ directory
175+ Each import is cached independently with a clear name.
204176 """
205177 if import_path .startswith (("http://" , "https://" )):
206178 # URL: create path from URL structure
@@ -221,9 +193,14 @@ def get_cache_path(self, import_path: str, base_path: Path) -> Path:
221193 cache_path .parent .mkdir (parents = True , exist_ok = True )
222194 return cache_path
223195
196+ def compute_hash (self , content : str ) -> str :
197+ """Compute SHA-256 hash of content."""
198+ hash_value = hashlib .sha256 (content .encode ("utf-8" )).hexdigest ()
199+ return f"sha256:{ hash_value } "
200+
224201 def validate_cache_annotation (self , annotation : str ) -> tuple [str , str ] | None :
225202 """Validate cache annotation format: # you see: <path> <algo:hash>."""
226- pattern = r"# you see: ([\w./-]+) <(sha256|md5):([a-f0-9]+)>"
203+ pattern = r"# you see: < ([\w./-]+)> <(sha256|md5):([a-f0-9]+)>"
227204 match = re .match (pattern , annotation .strip ())
228205 if not match :
229206 return None
0 commit comments