Skip to content

Commit 11fbc66

Browse files
authored
Fix #522: Store include deps in fingerprint so changes survive auto-deps tag stripping (#527)
The original calculate_prompt_hash() only worked on the first sync because auto-deps rewrites the .prompt file and strips <include> tags. On subsequent syncs, the hash function couldn't see dependencies anymore. Fix: Store include_deps (path -> hash) in the fingerprint JSON. On subsequent syncs, calculate_prompt_hash() falls back to stored deps when tags are gone. Deps are captured BEFORE auto-deps runs in sync_orchestration.py so they survive the tag stripping. - Add include_deps field to Fingerprint dataclass (backward compatible) - Add extract_include_deps() and calculate_prompt_hash() with stored_deps fallback - Capture deps before auto-deps in sync_orchestration.py, thread through saves - Update operation_log.py save_fingerprint() to preserve stored deps - 11 unit tests + 3 E2E tests covering Greg's exact scenario
1 parent 1b9f59a commit 11fbc66

File tree

5 files changed

+720
-14
lines changed

5 files changed

+720
-14
lines changed

pdd/operation_log.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -231,13 +231,15 @@ def save_fingerprint(
231231
"""
232232
from dataclasses import asdict
233233
from datetime import timezone
234-
from .sync_determine_operation import calculate_current_hashes, Fingerprint
234+
from .sync_determine_operation import calculate_current_hashes, Fingerprint, read_fingerprint
235235
from . import __version__
236236

237237
path = get_fingerprint_path(basename, language)
238238

239-
# Calculate file hashes from paths (if provided)
240-
current_hashes = calculate_current_hashes(paths) if paths else {}
239+
# Issue #522: Pass stored include deps for prompt hash calculation
240+
prev_fp = read_fingerprint(basename, language)
241+
stored_deps = prev_fp.include_deps if prev_fp else None
242+
current_hashes = calculate_current_hashes(paths, stored_include_deps=stored_deps) if paths else {}
241243

242244
# Create Fingerprint with same format as _save_fingerprint_atomic
243245
fingerprint = Fingerprint(
@@ -249,6 +251,7 @@ def save_fingerprint(
249251
example_hash=current_hashes.get('example_hash'),
250252
test_hash=current_hashes.get('test_hash'),
251253
test_files=current_hashes.get('test_files'),
254+
include_deps=current_hashes.get('include_deps'), # Issue #522
252255
)
253256

254257
try:

pdd/sync_determine_operation.py

Lines changed: 149 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
"""
88

99
import os
10+
import re
1011
import sys
1112
import json
1213
import hashlib
@@ -109,6 +110,7 @@ class Fingerprint:
109110
example_hash: Optional[str]
110111
test_hash: Optional[str] # Keep for backward compat (primary test file)
111112
test_files: Optional[Dict[str, str]] = None # Bug #156: {"test_foo.py": "hash1", ...}
113+
include_deps: Optional[Dict[str, str]] = None # Issue #522: {"path": "hash", ...}
112114

113115

114116
@dataclass
@@ -836,6 +838,121 @@ def calculate_sha256(file_path: Path) -> Optional[str]:
836838
return None
837839

838840

841+
_INCLUDE_PATTERN = re.compile(r'<include>(.*?)</include>')
842+
_BACKTICK_INCLUDE_PATTERN = re.compile(r'```<([^>]*?)>```')
843+
844+
845+
def _resolve_include_path(include_ref: str, prompt_dir: Path) -> Optional[Path]:
846+
"""Resolve an <include> reference to an absolute Path."""
847+
p = Path(include_ref)
848+
if p.is_absolute() and p.exists():
849+
return p
850+
candidate = prompt_dir / include_ref
851+
if candidate.exists():
852+
return candidate
853+
candidate = Path.cwd() / include_ref
854+
if candidate.exists():
855+
return candidate
856+
return None
857+
858+
859+
def extract_include_deps(prompt_path: Path) -> Dict[str, str]:
860+
"""Extract include dependency paths and their hashes from a prompt file.
861+
862+
Returns a dict mapping resolved dependency paths to their SHA256 hashes.
863+
Only includes dependencies that exist on disk.
864+
"""
865+
if not prompt_path.exists():
866+
return {}
867+
868+
try:
869+
prompt_content = prompt_path.read_text(encoding='utf-8', errors='ignore')
870+
except (IOError, OSError):
871+
return {}
872+
873+
include_refs = _INCLUDE_PATTERN.findall(prompt_content)
874+
include_refs += _BACKTICK_INCLUDE_PATTERN.findall(prompt_content)
875+
876+
if not include_refs:
877+
return {}
878+
879+
deps = {}
880+
prompt_dir = prompt_path.parent
881+
for ref in sorted(set(r.strip() for r in include_refs)):
882+
dep_path = _resolve_include_path(ref, prompt_dir)
883+
if dep_path and dep_path.exists():
884+
dep_hash = calculate_sha256(dep_path)
885+
if dep_hash:
886+
deps[str(dep_path)] = dep_hash
887+
888+
return deps
889+
890+
891+
def calculate_prompt_hash(prompt_path: Path, stored_deps: Optional[Dict[str, str]] = None) -> Optional[str]:
892+
"""Hash a prompt file including the content of all its <include> dependencies.
893+
894+
If the prompt has <include> tags, extracts and hashes those dependencies.
895+
If no tags are found but stored_deps is provided (from a previous fingerprint),
896+
uses those stored dependency paths to compute the hash. This handles the case
897+
where the auto-deps step strips <include> tags from the prompt file.
898+
899+
Args:
900+
prompt_path: Path to the prompt file.
901+
stored_deps: Previously stored dependency paths from fingerprint (issue #522).
902+
903+
Returns:
904+
SHA256 hex digest of the prompt + dependency contents, or None.
905+
"""
906+
if not prompt_path.exists():
907+
return None
908+
909+
try:
910+
prompt_content = prompt_path.read_text(encoding='utf-8', errors='ignore')
911+
except (IOError, OSError):
912+
return None
913+
914+
# Try to find include refs in current prompt content
915+
include_refs = _INCLUDE_PATTERN.findall(prompt_content)
916+
include_refs += _BACKTICK_INCLUDE_PATTERN.findall(prompt_content)
917+
918+
# Resolve to actual paths
919+
prompt_dir = prompt_path.parent
920+
dep_paths = []
921+
if include_refs:
922+
for ref in sorted(set(r.strip() for r in include_refs)):
923+
dep_path = _resolve_include_path(ref, prompt_dir)
924+
if dep_path and dep_path.exists():
925+
dep_paths.append(dep_path)
926+
elif stored_deps:
927+
# No include tags in prompt — use stored dependency paths from fingerprint
928+
for dep_path_str in sorted(stored_deps.keys()):
929+
dep_path = Path(dep_path_str)
930+
if dep_path.exists():
931+
dep_paths.append(dep_path)
932+
933+
if not dep_paths:
934+
return calculate_sha256(prompt_path)
935+
936+
# Build composite hash: prompt bytes + sorted dependency contents
937+
hasher = hashlib.sha256()
938+
try:
939+
with open(prompt_path, 'rb') as f:
940+
for chunk in iter(lambda: f.read(4096), b""):
941+
hasher.update(chunk)
942+
except (IOError, OSError):
943+
return None
944+
945+
for dep_path in dep_paths:
946+
try:
947+
with open(dep_path, 'rb') as f:
948+
for chunk in iter(lambda: f.read(4096), b""):
949+
hasher.update(chunk)
950+
except (IOError, OSError):
951+
pass
952+
953+
return hasher.hexdigest()
954+
955+
839956
def read_fingerprint(basename: str, language: str) -> Optional[Fingerprint]:
840957
"""Reads and validates the JSON fingerprint file."""
841958
meta_dir = get_meta_dir()
@@ -857,7 +974,8 @@ def read_fingerprint(basename: str, language: str) -> Optional[Fingerprint]:
857974
code_hash=data.get('code_hash'),
858975
example_hash=data.get('example_hash'),
859976
test_hash=data.get('test_hash'),
860-
test_files=data.get('test_files') # Bug #156
977+
test_files=data.get('test_files'), # Bug #156
978+
include_deps=data.get('include_deps'), # Issue #522
861979
)
862980
except (json.JSONDecodeError, KeyError, IOError):
863981
return None
@@ -889,9 +1007,14 @@ def read_run_report(basename: str, language: str) -> Optional[RunReport]:
8891007
return None
8901008

8911009

892-
def calculate_current_hashes(paths: Dict[str, Any]) -> Dict[str, Any]:
893-
"""Computes the hashes for all current files on disk."""
894-
# Return hash keys that match what the fingerprint expects
1010+
def calculate_current_hashes(paths: Dict[str, Any], stored_include_deps: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
1011+
"""Computes the hashes for all current files on disk.
1012+
1013+
Args:
1014+
paths: Dictionary of PDD file paths.
1015+
stored_include_deps: Previously stored include dependency paths from fingerprint.
1016+
Used when the prompt no longer has <include> tags (issue #522).
1017+
"""
8951018
hashes = {}
8961019
for file_type, file_path in paths.items():
8971020
if file_type == 'test_files':
@@ -901,6 +1024,22 @@ def calculate_current_hashes(paths: Dict[str, Any]) -> Dict[str, Any]:
9011024
for f in file_path
9021025
if isinstance(f, Path) and f.exists()
9031026
}
1027+
elif file_type == 'prompt' and isinstance(file_path, Path):
1028+
# Issue #522: Hash prompt with <include> dependencies
1029+
hashes['prompt_hash'] = calculate_prompt_hash(file_path, stored_deps=stored_include_deps)
1030+
# Also extract current include deps for persistence
1031+
hashes['include_deps'] = extract_include_deps(file_path)
1032+
# If no deps found in prompt but we have stored deps, preserve them
1033+
if not hashes['include_deps'] and stored_include_deps:
1034+
# Re-hash stored deps to check for changes
1035+
updated_deps = {}
1036+
for dep_path_str, old_hash in stored_include_deps.items():
1037+
dep_path = Path(dep_path_str)
1038+
if dep_path.exists():
1039+
new_hash = calculate_sha256(dep_path)
1040+
if new_hash:
1041+
updated_deps[dep_path_str] = new_hash
1042+
hashes['include_deps'] = updated_deps
9041043
elif isinstance(file_path, Path):
9051044
hashes[f"{file_type}_hash"] = calculate_sha256(file_path)
9061045
return hashes
@@ -1361,7 +1500,9 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
13611500
# If the user modified the prompt, we need to regenerate regardless of runtime state
13621501
if fingerprint:
13631502
paths = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
1364-
current_prompt_hash = calculate_sha256(paths['prompt'])
1503+
# Issue #522: Use stored include deps so changes to included files are detected
1504+
# even when auto-deps has stripped <include> tags from the prompt
1505+
current_prompt_hash = calculate_prompt_hash(paths['prompt'], stored_deps=fingerprint.include_deps)
13651506
if current_prompt_hash and current_prompt_hash != fingerprint.prompt_hash:
13661507
prompt_content = paths['prompt'].read_text(encoding='utf-8', errors='ignore') if paths['prompt'].exists() else ""
13671508
has_deps = check_for_dependencies(prompt_content)
@@ -1610,7 +1751,9 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
16101751

16111752
# 2. Analyze File State
16121753
paths = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
1613-
current_hashes = calculate_current_hashes(paths)
1754+
# Issue #522: Pass stored include deps so prompt hash accounts for dependency changes
1755+
stored_deps = fingerprint.include_deps if fingerprint else None
1756+
current_hashes = calculate_current_hashes(paths, stored_include_deps=stored_deps)
16141757

16151758
# 3. Implement the Decision Tree
16161759
if not fingerprint:

pdd/sync_orchestration.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,8 @@ def _save_run_report_atomic(report: Dict[str, Any], basename: str, language: str
236236

237237
def _save_fingerprint_atomic(basename: str, language: str, operation: str,
238238
paths: Dict[str, Path], cost: float, model: str,
239-
atomic_state: Optional['AtomicStateUpdate'] = None):
239+
atomic_state: Optional['AtomicStateUpdate'] = None,
240+
include_deps_override: Optional[Dict[str, str]] = None):
240241
"""Save fingerprint state after successful operation, supporting atomic updates.
241242
242243
Args:
@@ -247,14 +248,26 @@ def _save_fingerprint_atomic(basename: str, language: str, operation: str,
247248
cost: The cost of the operation.
248249
model: The model used.
249250
atomic_state: Optional AtomicStateUpdate for atomic writes (Issue #159 fix).
251+
include_deps_override: Pre-captured include deps (Issue #522). Used when
252+
auto-deps may have stripped <include> tags before fingerprint save.
250253
"""
251254
if atomic_state:
252255
# Buffer for atomic write
253256
from datetime import datetime, timezone
254-
from .sync_determine_operation import calculate_current_hashes, Fingerprint
257+
from .sync_determine_operation import calculate_current_hashes, Fingerprint, read_fingerprint
255258
from . import __version__
256259

257-
current_hashes = calculate_current_hashes(paths)
260+
# Issue #522: Use override deps if provided (captured before auto-deps),
261+
# otherwise fall back to stored deps from previous fingerprint
262+
if include_deps_override is not None:
263+
stored_deps = include_deps_override
264+
else:
265+
prev_fp = read_fingerprint(basename, language)
266+
stored_deps = prev_fp.include_deps if prev_fp else None
267+
current_hashes = calculate_current_hashes(paths, stored_include_deps=stored_deps)
268+
# If override provided and current extraction found nothing, use the override
269+
if include_deps_override and not current_hashes.get('include_deps'):
270+
current_hashes['include_deps'] = include_deps_override
258271
fingerprint = Fingerprint(
259272
pdd_version=__version__,
260273
timestamp=datetime.now(timezone.utc).isoformat(),
@@ -264,6 +277,7 @@ def _save_fingerprint_atomic(basename: str, language: str, operation: str,
264277
example_hash=current_hashes.get('example_hash'),
265278
test_hash=current_hashes.get('test_hash'),
266279
test_files=current_hashes.get('test_files'), # Bug #156
280+
include_deps=current_hashes.get('include_deps'), # Issue #522
267281
)
268282

269283
fingerprint_file = META_DIR / f"{_safe_basename(basename)}_{language.lower()}.json"
@@ -1471,6 +1485,7 @@ def sync_worker_logic():
14711485
result = {}
14721486
success = False
14731487
op_start_time = time.time()
1488+
include_deps_override = None # Issue #522: Captured before auto-deps strips tags
14741489

14751490
# Issue #159 fix: Use atomic state for consistent run_report + fingerprint writes
14761491
with AtomicStateUpdate(basename, language) as atomic_state:
@@ -1480,6 +1495,9 @@ def sync_worker_logic():
14801495
if operation == 'auto-deps':
14811496
temp_output = str(pdd_files['prompt']).replace('.prompt', '_with_deps.prompt')
14821497
original_content = pdd_files['prompt'].read_text(encoding='utf-8')
1498+
# Issue #522: Capture include deps BEFORE auto-deps may strip tags
1499+
from .sync_determine_operation import extract_include_deps
1500+
include_deps_override = extract_include_deps(pdd_files['prompt'])
14831501
result = auto_deps_main(
14841502
ctx,
14851503
prompt_file=str(pdd_files['prompt']),
@@ -1622,7 +1640,7 @@ def __init__(self, rc, out, err):
16221640
crash_log_content = f"Auto-fixed: {auto_fix_msg}"
16231641
# Fix for issue #430: Save fingerprint and track operation completion before continuing
16241642
operations_completed.append('crash')
1625-
_save_fingerprint_atomic(basename, language, 'crash', pdd_files, 0.0, 'auto-fix', atomic_state=atomic_state)
1643+
_save_fingerprint_atomic(basename, language, 'crash', pdd_files, 0.0, 'auto-fix', atomic_state=atomic_state, include_deps_override=include_deps_override)
16261644
continue # Skip crash_main, move to next operation
16271645
else:
16281646
# Auto-fix didn't fully work, update error log and proceed
@@ -1920,7 +1938,7 @@ def __init__(self, rc, out, err):
19201938
model_name = _extract_model_from_result(operation, result)
19211939
last_model_name = str(model_name)
19221940
operations_completed.append(operation)
1923-
_save_fingerprint_atomic(basename, language, operation, pdd_files, actual_cost, str(model_name), atomic_state=atomic_state)
1941+
_save_fingerprint_atomic(basename, language, operation, pdd_files, actual_cost, str(model_name), atomic_state=atomic_state, include_deps_override=include_deps_override)
19241942

19251943
update_log_entry(log_entry, success=success, cost=actual_cost, model=model_name, duration=duration, error=errors[-1] if errors and not success else None)
19261944
append_log_entry(basename, language, log_entry)

0 commit comments

Comments
 (0)