Skip to content

Commit 57cd218

Browse files
committed
feat(sync): auto-update tests when prompt changes (#203)
Add test_prompt_hash field to Fingerprint to track which prompt version tests were generated from. When prompt changes and code is regenerated, sync now detects stale tests and triggers test regeneration. - Add test_prompt_hash field to Fingerprint dataclass - Update read_fingerprint() to load test_prompt_hash from JSON - Add stale test detection in _perform_sync_analysis() - Update _save_operation_fingerprint() to set test_prompt_hash based on operation: - generate: sets to None (tests now stale) - test: sets to current prompt hash - other ops: preserves existing value - Add 12 unit tests covering the new functionality
1 parent 69bb542 commit 57cd218

File tree

6 files changed

+779
-6
lines changed

6 files changed

+779
-6
lines changed

pdd/operation_log.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -211,25 +211,47 @@ def save_fingerprint(
211211
operation: str,
212212
paths: Optional[Dict[str, Path]] = None,
213213
cost: float = 0.0,
214-
model: str = "unknown"
214+
model: str = "unknown",
215+
test_prompt_hash: Optional[str] = None
215216
) -> None:
216217
"""
217218
Save the current fingerprint/state to the state file.
218219
219220
Writes the full Fingerprint dataclass format compatible with read_fingerprint()
220221
in sync_determine_operation.py. This ensures manual commands (generate, example)
221222
don't break sync's fingerprint tracking.
223+
224+
Args:
225+
test_prompt_hash: Issue #203 - Hash of prompt when tests were generated.
226+
If None, automatically determined based on operation:
227+
- generate: None (tests now stale)
228+
- test: current prompt hash (tests updated)
229+
- other: preserved from existing fingerprint
222230
"""
223231
from dataclasses import asdict
224232
from datetime import timezone
225-
from .sync_determine_operation import calculate_current_hashes, Fingerprint
233+
from .sync_determine_operation import calculate_current_hashes, Fingerprint, read_fingerprint
226234
from . import __version__
227235

228236
path = get_fingerprint_path(basename, language)
229237

230238
# Calculate file hashes from paths (if provided)
231239
current_hashes = calculate_current_hashes(paths) if paths else {}
232240

241+
# Issue #203: Determine test_prompt_hash based on operation type
242+
# This mirrors the logic in sync_orchestration._save_fingerprint_atomic
243+
if test_prompt_hash is None:
244+
if operation == 'generate':
245+
# Code regenerated, tests are now stale
246+
test_prompt_hash = None
247+
elif operation == 'test':
248+
# Tests regenerated, link to current prompt
249+
test_prompt_hash = current_hashes.get('prompt_hash')
250+
else:
251+
# Other operations: preserve existing value
252+
existing_fp = read_fingerprint(basename, language)
253+
test_prompt_hash = existing_fp.test_prompt_hash if existing_fp else None
254+
233255
# Create Fingerprint with same format as _save_fingerprint_atomic
234256
fingerprint = Fingerprint(
235257
pdd_version=__version__,
@@ -240,6 +262,7 @@ def save_fingerprint(
240262
example_hash=current_hashes.get('example_hash'),
241263
test_hash=current_hashes.get('test_hash'),
242264
test_files=current_hashes.get('test_files'),
265+
test_prompt_hash=test_prompt_hash, # Issue #203
243266
)
244267

245268
try:

pdd/sync_determine_operation.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ class Fingerprint:
109109
example_hash: Optional[str]
110110
test_hash: Optional[str] # Keep for backward compat (primary test file)
111111
test_files: Optional[Dict[str, str]] = None # Bug #156: {"test_foo.py": "hash1", ...}
112+
test_prompt_hash: Optional[str] = None # Issue #203: Hash of prompt when tests were generated
112113

113114

114115
@dataclass
@@ -782,7 +783,8 @@ def read_fingerprint(basename: str, language: str) -> Optional[Fingerprint]:
782783
code_hash=data.get('code_hash'),
783784
example_hash=data.get('example_hash'),
784785
test_hash=data.get('test_hash'),
785-
test_files=data.get('test_files') # Bug #156
786+
test_files=data.get('test_files'), # Bug #156
787+
test_prompt_hash=data.get('test_prompt_hash') # Issue #203
786788
)
787789
except (json.JSONDecodeError, KeyError, IOError):
788790
return None
@@ -1521,6 +1523,26 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
15211523

15221524
if not changes:
15231525
# No Changes (Hashes Match Fingerprint) - Progress workflow with skip awareness
1526+
1527+
# Issue #203: Check if tests are stale (generated from old prompt version)
1528+
# Even if workflow appears complete, tests may need regeneration if prompt changed
1529+
if (not skip_tests and fingerprint and paths['test'].exists() and
1530+
fingerprint.test_prompt_hash is not None and
1531+
fingerprint.test_prompt_hash != current_hashes.get('prompt_hash')):
1532+
return SyncDecision(
1533+
operation='test',
1534+
reason='Tests outdated - generated from old prompt version, need regeneration',
1535+
confidence=0.90,
1536+
estimated_cost=estimate_operation_cost('test'),
1537+
details={
1538+
'decision_type': 'heuristic',
1539+
'test_prompt_hash': fingerprint.test_prompt_hash,
1540+
'current_prompt_hash': current_hashes.get('prompt_hash'),
1541+
'tests_stale': True,
1542+
'workflow_stage': 'test_regeneration_for_prompt_change'
1543+
}
1544+
)
1545+
15241546
if _is_workflow_complete(paths, skip_tests, skip_verify, basename, language):
15251547
return SyncDecision(
15261548
operation='nothing',

pdd/sync_orchestration.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -199,10 +199,26 @@ def _save_fingerprint_atomic(basename: str, language: str, operation: str,
199199
if atomic_state:
200200
# Buffer for atomic write
201201
from datetime import datetime, timezone
202-
from .sync_determine_operation import calculate_current_hashes, Fingerprint
202+
from .sync_determine_operation import calculate_current_hashes, Fingerprint, read_fingerprint
203203
from . import __version__
204204

205205
current_hashes = calculate_current_hashes(paths)
206+
207+
# Issue #203: Determine test_prompt_hash based on operation
208+
# - 'generate': Reset to None (tests become stale since code changed)
209+
# - 'test': Set to current prompt_hash (tests are now up-to-date with prompt)
210+
# - Other operations: Preserve existing test_prompt_hash
211+
existing_fingerprint = read_fingerprint(basename, language)
212+
if operation == 'generate':
213+
# Code regenerated - tests are now stale
214+
test_prompt_hash = None
215+
elif operation == 'test':
216+
# Tests regenerated - link them to current prompt version
217+
test_prompt_hash = current_hashes.get('prompt_hash')
218+
else:
219+
# Preserve existing test_prompt_hash for other operations
220+
test_prompt_hash = existing_fingerprint.test_prompt_hash if existing_fingerprint else None
221+
206222
fingerprint = Fingerprint(
207223
pdd_version=__version__,
208224
timestamp=datetime.now(timezone.utc).isoformat(),
@@ -212,13 +228,19 @@ def _save_fingerprint_atomic(basename: str, language: str, operation: str,
212228
example_hash=current_hashes.get('example_hash'),
213229
test_hash=current_hashes.get('test_hash'),
214230
test_files=current_hashes.get('test_files'), # Bug #156
231+
test_prompt_hash=test_prompt_hash, # Issue #203
215232
)
216233

217234
fingerprint_file = META_DIR / f"{_safe_basename(basename)}_{language}.json"
218235
atomic_state.set_fingerprint(asdict(fingerprint), fingerprint_file)
219236
else:
220237
# Direct write using operation_log
221-
save_fingerprint(basename, language, operation, paths, cost, model)
238+
# Issue #203: Preserve test_prompt_hash from existing fingerprint for skip operations
239+
from .sync_determine_operation import read_fingerprint as read_fp
240+
existing_fp = read_fp(basename, language)
241+
existing_test_prompt_hash = existing_fp.test_prompt_hash if existing_fp else None
242+
save_fingerprint(basename, language, operation, paths, cost, model,
243+
test_prompt_hash=existing_test_prompt_hash)
222244

223245
def _python_cov_target_for_code_file(code_file: Path) -> str:
224246
"""Return a `pytest-cov` `--cov` target for a Python code file.

tests/test_operation_log.py

Lines changed: 233 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -623,4 +623,236 @@ def test_fingerprint_hash_compatibility_with_sync(tmp_path):
623623
assert result.command == "generate"
624624

625625
# Verify pdd_version is set
626-
assert result.pdd_version is not None, "pdd_version should be set"
626+
assert result.pdd_version is not None, "pdd_version should be set"
627+
628+
629+
# --------------------------------------------------------------------------------
630+
# ISSUE #203: test_prompt_hash auto-management in save_fingerprint
631+
# --------------------------------------------------------------------------------
632+
633+
class TestIssue203SaveFingerprintTestPromptHash:
634+
"""Test that save_fingerprint automatically manages test_prompt_hash based on operation type."""
635+
636+
def test_generate_operation_sets_test_prompt_hash_to_none(self, tmp_path):
637+
"""
638+
Issue #203: When operation='generate', test_prompt_hash should be None
639+
because code was regenerated and tests are now stale.
640+
"""
641+
from pdd.operation_log import save_fingerprint
642+
from pdd.sync_determine_operation import read_fingerprint
643+
644+
basename = "gen_test"
645+
language = "python"
646+
647+
meta_dir = tmp_path / ".pdd" / "meta"
648+
meta_dir.mkdir(parents=True)
649+
650+
# Create existing fingerprint with test_prompt_hash set
651+
existing_fp = meta_dir / f"{basename}_{language}.json"
652+
existing_fp.write_text(json.dumps({
653+
"pdd_version": "0.0.1",
654+
"timestamp": "2024-01-01T00:00:00",
655+
"command": "test",
656+
"prompt_hash": "old_prompt_hash",
657+
"code_hash": None,
658+
"example_hash": None,
659+
"test_hash": None,
660+
"test_files": None,
661+
"test_prompt_hash": "existing_test_prompt_hash"
662+
}))
663+
664+
with patch("pdd.operation_log.META_DIR", str(meta_dir)), \
665+
patch("pdd.sync_determine_operation.get_meta_dir", return_value=meta_dir):
666+
667+
# Call save_fingerprint with operation='generate' (no explicit test_prompt_hash)
668+
save_fingerprint(
669+
basename=basename,
670+
language=language,
671+
operation="generate",
672+
paths={},
673+
cost=0.1,
674+
model="test"
675+
)
676+
677+
# Read back and verify test_prompt_hash is None
678+
result = read_fingerprint(basename, language)
679+
assert result is not None
680+
assert result.test_prompt_hash is None, (
681+
"generate operation should set test_prompt_hash to None (tests now stale)"
682+
)
683+
684+
def test_test_operation_sets_test_prompt_hash_to_current(self, tmp_path):
685+
"""
686+
Issue #203: When operation='test', test_prompt_hash should be set to
687+
the current prompt hash (tests regenerated, linked to current prompt).
688+
"""
689+
from pdd.operation_log import save_fingerprint
690+
from pdd.sync_determine_operation import read_fingerprint
691+
692+
basename = "test_op_test"
693+
language = "python"
694+
695+
meta_dir = tmp_path / ".pdd" / "meta"
696+
prompts_dir = tmp_path / "prompts"
697+
meta_dir.mkdir(parents=True)
698+
prompts_dir.mkdir(parents=True)
699+
700+
# Create a prompt file with known content
701+
prompt_file = prompts_dir / f"{basename}_{language}.prompt"
702+
prompt_file.write_text("% Test prompt content\n")
703+
704+
paths = {"prompt": prompt_file}
705+
706+
with patch("pdd.operation_log.META_DIR", str(meta_dir)), \
707+
patch("pdd.sync_determine_operation.get_meta_dir", return_value=meta_dir):
708+
709+
# Call save_fingerprint with operation='test'
710+
save_fingerprint(
711+
basename=basename,
712+
language=language,
713+
operation="test",
714+
paths=paths,
715+
cost=0.1,
716+
model="test"
717+
)
718+
719+
# Read back and verify test_prompt_hash equals prompt_hash
720+
result = read_fingerprint(basename, language)
721+
assert result is not None
722+
assert result.prompt_hash is not None, "prompt_hash should be calculated"
723+
assert result.test_prompt_hash == result.prompt_hash, (
724+
"test operation should set test_prompt_hash to current prompt_hash"
725+
)
726+
727+
def test_example_operation_preserves_test_prompt_hash(self, tmp_path):
728+
"""
729+
Issue #203: When operation is not 'generate' or 'test', the existing
730+
test_prompt_hash should be preserved.
731+
"""
732+
from pdd.operation_log import save_fingerprint
733+
from pdd.sync_determine_operation import read_fingerprint
734+
735+
basename = "example_test"
736+
language = "python"
737+
738+
meta_dir = tmp_path / ".pdd" / "meta"
739+
meta_dir.mkdir(parents=True)
740+
741+
existing_test_prompt_hash = "preserved_hash_value"
742+
743+
# Create existing fingerprint with test_prompt_hash set
744+
existing_fp = meta_dir / f"{basename}_{language}.json"
745+
existing_fp.write_text(json.dumps({
746+
"pdd_version": "0.0.1",
747+
"timestamp": "2024-01-01T00:00:00",
748+
"command": "test",
749+
"prompt_hash": "some_hash",
750+
"code_hash": None,
751+
"example_hash": None,
752+
"test_hash": None,
753+
"test_files": None,
754+
"test_prompt_hash": existing_test_prompt_hash
755+
}))
756+
757+
with patch("pdd.operation_log.META_DIR", str(meta_dir)), \
758+
patch("pdd.sync_determine_operation.get_meta_dir", return_value=meta_dir):
759+
760+
# Call save_fingerprint with operation='example'
761+
save_fingerprint(
762+
basename=basename,
763+
language=language,
764+
operation="example",
765+
paths={},
766+
cost=0.1,
767+
model="test"
768+
)
769+
770+
# Read back and verify test_prompt_hash is preserved
771+
result = read_fingerprint(basename, language)
772+
assert result is not None
773+
assert result.test_prompt_hash == existing_test_prompt_hash, (
774+
"example operation should preserve existing test_prompt_hash"
775+
)
776+
777+
def test_fix_operation_preserves_test_prompt_hash(self, tmp_path):
778+
"""
779+
Issue #203: Fix operation should also preserve existing test_prompt_hash.
780+
"""
781+
from pdd.operation_log import save_fingerprint
782+
from pdd.sync_determine_operation import read_fingerprint
783+
784+
basename = "fix_test"
785+
language = "python"
786+
787+
meta_dir = tmp_path / ".pdd" / "meta"
788+
meta_dir.mkdir(parents=True)
789+
790+
existing_test_prompt_hash = "fix_preserved_hash"
791+
792+
# Create existing fingerprint
793+
existing_fp = meta_dir / f"{basename}_{language}.json"
794+
existing_fp.write_text(json.dumps({
795+
"pdd_version": "0.0.1",
796+
"timestamp": "2024-01-01T00:00:00",
797+
"command": "test",
798+
"prompt_hash": "some_hash",
799+
"code_hash": None,
800+
"example_hash": None,
801+
"test_hash": None,
802+
"test_files": None,
803+
"test_prompt_hash": existing_test_prompt_hash
804+
}))
805+
806+
with patch("pdd.operation_log.META_DIR", str(meta_dir)), \
807+
patch("pdd.sync_determine_operation.get_meta_dir", return_value=meta_dir):
808+
809+
save_fingerprint(
810+
basename=basename,
811+
language=language,
812+
operation="fix",
813+
paths={},
814+
cost=0.1,
815+
model="test"
816+
)
817+
818+
result = read_fingerprint(basename, language)
819+
assert result is not None
820+
assert result.test_prompt_hash == existing_test_prompt_hash, (
821+
"fix operation should preserve existing test_prompt_hash"
822+
)
823+
824+
def test_explicit_test_prompt_hash_overrides_auto_logic(self, tmp_path):
825+
"""
826+
Issue #203: When test_prompt_hash is explicitly passed, it should override
827+
the automatic logic.
828+
"""
829+
from pdd.operation_log import save_fingerprint
830+
from pdd.sync_determine_operation import read_fingerprint
831+
832+
basename = "explicit_test"
833+
language = "python"
834+
835+
meta_dir = tmp_path / ".pdd" / "meta"
836+
meta_dir.mkdir(parents=True)
837+
838+
explicit_hash = "explicitly_passed_hash"
839+
840+
with patch("pdd.operation_log.META_DIR", str(meta_dir)), \
841+
patch("pdd.sync_determine_operation.get_meta_dir", return_value=meta_dir):
842+
843+
# Even for 'generate' operation, explicit test_prompt_hash should be used
844+
save_fingerprint(
845+
basename=basename,
846+
language=language,
847+
operation="generate",
848+
paths={},
849+
cost=0.1,
850+
model="test",
851+
test_prompt_hash=explicit_hash
852+
)
853+
854+
result = read_fingerprint(basename, language)
855+
assert result is not None
856+
assert result.test_prompt_hash == explicit_hash, (
857+
"Explicit test_prompt_hash should override automatic logic"
858+
)

0 commit comments

Comments
 (0)