feat(sync): auto-update tests when prompt changes (#203)

Enfoirer · Enfoirer · commit 57cd2181dade · 2026-01-19T00:29:56.000+08:00
Add test_prompt_hash field to Fingerprint to track which prompt version
tests were generated from. When prompt changes and code is regenerated,
sync now detects stale tests and triggers test regeneration.

- Add test_prompt_hash field to Fingerprint dataclass
- Update read_fingerprint() to load test_prompt_hash from JSON
- Add stale test detection in _perform_sync_analysis()
- Update _save_operation_fingerprint() to set test_prompt_hash based on operation:
  - generate: sets to None (tests now stale)
  - test: sets to current prompt hash
  - other ops: preserves existing value
- Add 12 unit tests covering the new functionality
diff --git a/pdd/operation_log.py b/pdd/operation_log.py
@@ -211,25 +211,47 @@ def save_fingerprint(
     operation: str,
     paths: Optional[Dict[str, Path]] = None,
     cost: float = 0.0,
-    model: str = "unknown"
+    model: str = "unknown",
+    test_prompt_hash: Optional[str] = None
 ) -> None:
     """
     Save the current fingerprint/state to the state file.
 
     Writes the full Fingerprint dataclass format compatible with read_fingerprint()
     in sync_determine_operation.py. This ensures manual commands (generate, example)
     don't break sync's fingerprint tracking.
+
+    Args:
+        test_prompt_hash: Issue #203 - Hash of prompt when tests were generated.
+                         If None, automatically determined based on operation:
+                         - generate: None (tests now stale)
+                         - test: current prompt hash (tests updated)
+                         - other: preserved from existing fingerprint
     """
     from dataclasses import asdict
     from datetime import timezone
-    from .sync_determine_operation import calculate_current_hashes, Fingerprint
+    from .sync_determine_operation import calculate_current_hashes, Fingerprint, read_fingerprint
     from . import __version__
 
     path = get_fingerprint_path(basename, language)
 
     # Calculate file hashes from paths (if provided)
     current_hashes = calculate_current_hashes(paths) if paths else {}
 
+    # Issue #203: Determine test_prompt_hash based on operation type
+    # This mirrors the logic in sync_orchestration._save_fingerprint_atomic
+    if test_prompt_hash is None:
+        if operation == 'generate':
+            # Code regenerated, tests are now stale
+            test_prompt_hash = None
+        elif operation == 'test':
+            # Tests regenerated, link to current prompt
+            test_prompt_hash = current_hashes.get('prompt_hash')
+        else:
+            # Other operations: preserve existing value
+            existing_fp = read_fingerprint(basename, language)
+            test_prompt_hash = existing_fp.test_prompt_hash if existing_fp else None
+
     # Create Fingerprint with same format as _save_fingerprint_atomic
     fingerprint = Fingerprint(
         pdd_version=__version__,
@@ -240,6 +262,7 @@ def save_fingerprint(
         example_hash=current_hashes.get('example_hash'),
         test_hash=current_hashes.get('test_hash'),
         test_files=current_hashes.get('test_files'),
+        test_prompt_hash=test_prompt_hash,  # Issue #203
     )
 
     try:
diff --git a/pdd/sync_determine_operation.py b/pdd/sync_determine_operation.py
@@ -109,6 +109,7 @@ class Fingerprint:
     example_hash: Optional[str]
     test_hash: Optional[str]  # Keep for backward compat (primary test file)
     test_files: Optional[Dict[str, str]] = None  # Bug #156: {"test_foo.py": "hash1", ...}
+    test_prompt_hash: Optional[str] = None  # Issue #203: Hash of prompt when tests were generated
 
 
 @dataclass
@@ -782,7 +783,8 @@ def read_fingerprint(basename: str, language: str) -> Optional[Fingerprint]:
             code_hash=data.get('code_hash'),
             example_hash=data.get('example_hash'),
             test_hash=data.get('test_hash'),
-            test_files=data.get('test_files')  # Bug #156
+            test_files=data.get('test_files'),  # Bug #156
+            test_prompt_hash=data.get('test_prompt_hash')  # Issue #203
         )
     except (json.JSONDecodeError, KeyError, IOError):
         return None
@@ -1521,6 +1523,26 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
     
     if not changes:
         # No Changes (Hashes Match Fingerprint) - Progress workflow with skip awareness
+
+        # Issue #203: Check if tests are stale (generated from old prompt version)
+        # Even if workflow appears complete, tests may need regeneration if prompt changed
+        if (not skip_tests and fingerprint and paths['test'].exists() and
+            fingerprint.test_prompt_hash is not None and
+            fingerprint.test_prompt_hash != current_hashes.get('prompt_hash')):
+            return SyncDecision(
+                operation='test',
+                reason='Tests outdated - generated from old prompt version, need regeneration',
+                confidence=0.90,
+                estimated_cost=estimate_operation_cost('test'),
+                details={
+                    'decision_type': 'heuristic',
+                    'test_prompt_hash': fingerprint.test_prompt_hash,
+                    'current_prompt_hash': current_hashes.get('prompt_hash'),
+                    'tests_stale': True,
+                    'workflow_stage': 'test_regeneration_for_prompt_change'
+                }
+            )
+
         if _is_workflow_complete(paths, skip_tests, skip_verify, basename, language):
             return SyncDecision(
                 operation='nothing',
diff --git a/pdd/sync_orchestration.py b/pdd/sync_orchestration.py
@@ -199,10 +199,26 @@ def _save_fingerprint_atomic(basename: str, language: str, operation: str,
     if atomic_state:
         # Buffer for atomic write
         from datetime import datetime, timezone
-        from .sync_determine_operation import calculate_current_hashes, Fingerprint
+        from .sync_determine_operation import calculate_current_hashes, Fingerprint, read_fingerprint
         from . import __version__
 
         current_hashes = calculate_current_hashes(paths)
+
+        # Issue #203: Determine test_prompt_hash based on operation
+        # - 'generate': Reset to None (tests become stale since code changed)
+        # - 'test': Set to current prompt_hash (tests are now up-to-date with prompt)
+        # - Other operations: Preserve existing test_prompt_hash
+        existing_fingerprint = read_fingerprint(basename, language)
+        if operation == 'generate':
+            # Code regenerated - tests are now stale
+            test_prompt_hash = None
+        elif operation == 'test':
+            # Tests regenerated - link them to current prompt version
+            test_prompt_hash = current_hashes.get('prompt_hash')
+        else:
+            # Preserve existing test_prompt_hash for other operations
+            test_prompt_hash = existing_fingerprint.test_prompt_hash if existing_fingerprint else None
+
         fingerprint = Fingerprint(
             pdd_version=__version__,
             timestamp=datetime.now(timezone.utc).isoformat(),
@@ -212,13 +228,19 @@ def _save_fingerprint_atomic(basename: str, language: str, operation: str,
             example_hash=current_hashes.get('example_hash'),
             test_hash=current_hashes.get('test_hash'),
             test_files=current_hashes.get('test_files'),  # Bug #156
+            test_prompt_hash=test_prompt_hash,  # Issue #203
         )
 
         fingerprint_file = META_DIR / f"{_safe_basename(basename)}_{language}.json"
         atomic_state.set_fingerprint(asdict(fingerprint), fingerprint_file)
     else:
         # Direct write using operation_log
-        save_fingerprint(basename, language, operation, paths, cost, model)
+        # Issue #203: Preserve test_prompt_hash from existing fingerprint for skip operations
+        from .sync_determine_operation import read_fingerprint as read_fp
+        existing_fp = read_fp(basename, language)
+        existing_test_prompt_hash = existing_fp.test_prompt_hash if existing_fp else None
+        save_fingerprint(basename, language, operation, paths, cost, model,
+                        test_prompt_hash=existing_test_prompt_hash)
 
 def _python_cov_target_for_code_file(code_file: Path) -> str:
     """Return a `pytest-cov` `--cov` target for a Python code file.
diff --git a/tests/test_operation_log.py b/tests/test_operation_log.py
@@ -623,4 +623,236 @@ def test_fingerprint_hash_compatibility_with_sync(tmp_path):
         assert result.command == "generate"
 
         # Verify pdd_version is set
-        assert result.pdd_version is not None, "pdd_version should be set"
+        assert result.pdd_version is not None, "pdd_version should be set"
+
+
+# --------------------------------------------------------------------------------
+# ISSUE #203: test_prompt_hash auto-management in save_fingerprint
+# --------------------------------------------------------------------------------
+
+class TestIssue203SaveFingerprintTestPromptHash:
+    """Test that save_fingerprint automatically manages test_prompt_hash based on operation type."""
+
+    def test_generate_operation_sets_test_prompt_hash_to_none(self, tmp_path):
+        """
+        Issue #203: When operation='generate', test_prompt_hash should be None
+        because code was regenerated and tests are now stale.
+        """
+        from pdd.operation_log import save_fingerprint
+        from pdd.sync_determine_operation import read_fingerprint
+
+        basename = "gen_test"
+        language = "python"
+
+        meta_dir = tmp_path / ".pdd" / "meta"
+        meta_dir.mkdir(parents=True)
+
+        # Create existing fingerprint with test_prompt_hash set
+        existing_fp = meta_dir / f"{basename}_{language}.json"
+        existing_fp.write_text(json.dumps({
+            "pdd_version": "0.0.1",
+            "timestamp": "2024-01-01T00:00:00",
+            "command": "test",
+            "prompt_hash": "old_prompt_hash",
+            "code_hash": None,
+            "example_hash": None,
+            "test_hash": None,
+            "test_files": None,
+            "test_prompt_hash": "existing_test_prompt_hash"
+        }))
+
+        with patch("pdd.operation_log.META_DIR", str(meta_dir)), \
+             patch("pdd.sync_determine_operation.get_meta_dir", return_value=meta_dir):
+
+            # Call save_fingerprint with operation='generate' (no explicit test_prompt_hash)
+            save_fingerprint(
+                basename=basename,
+                language=language,
+                operation="generate",
+                paths={},
+                cost=0.1,
+                model="test"
+            )
+
+            # Read back and verify test_prompt_hash is None
+            result = read_fingerprint(basename, language)
+            assert result is not None
+            assert result.test_prompt_hash is None, (
+                "generate operation should set test_prompt_hash to None (tests now stale)"
+            )
+
+    def test_test_operation_sets_test_prompt_hash_to_current(self, tmp_path):
+        """
+        Issue #203: When operation='test', test_prompt_hash should be set to
+        the current prompt hash (tests regenerated, linked to current prompt).
+        """
+        from pdd.operation_log import save_fingerprint
+        from pdd.sync_determine_operation import read_fingerprint
+
+        basename = "test_op_test"
+        language = "python"
+
+        meta_dir = tmp_path / ".pdd" / "meta"
+        prompts_dir = tmp_path / "prompts"
+        meta_dir.mkdir(parents=True)
+        prompts_dir.mkdir(parents=True)
+
+        # Create a prompt file with known content
+        prompt_file = prompts_dir / f"{basename}_{language}.prompt"
+        prompt_file.write_text("% Test prompt content\n")
+
+        paths = {"prompt": prompt_file}
+
+        with patch("pdd.operation_log.META_DIR", str(meta_dir)), \
+             patch("pdd.sync_determine_operation.get_meta_dir", return_value=meta_dir):
+
+            # Call save_fingerprint with operation='test'
+            save_fingerprint(
+                basename=basename,
+                language=language,
+                operation="test",
+                paths=paths,
+                cost=0.1,
+                model="test"
+            )
+
+            # Read back and verify test_prompt_hash equals prompt_hash
+            result = read_fingerprint(basename, language)
+            assert result is not None
+            assert result.prompt_hash is not None, "prompt_hash should be calculated"
+            assert result.test_prompt_hash == result.prompt_hash, (
+                "test operation should set test_prompt_hash to current prompt_hash"
+            )
+
+    def test_example_operation_preserves_test_prompt_hash(self, tmp_path):
+        """
+        Issue #203: When operation is not 'generate' or 'test', the existing
+        test_prompt_hash should be preserved.
+        """
+        from pdd.operation_log import save_fingerprint
+        from pdd.sync_determine_operation import read_fingerprint
+
+        basename = "example_test"
+        language = "python"
+
+        meta_dir = tmp_path / ".pdd" / "meta"
+        meta_dir.mkdir(parents=True)
+
+        existing_test_prompt_hash = "preserved_hash_value"
+
+        # Create existing fingerprint with test_prompt_hash set
+        existing_fp = meta_dir / f"{basename}_{language}.json"
+        existing_fp.write_text(json.dumps({
+            "pdd_version": "0.0.1",
+            "timestamp": "2024-01-01T00:00:00",
+            "command": "test",
+            "prompt_hash": "some_hash",
+            "code_hash": None,
+            "example_hash": None,
+            "test_hash": None,
+            "test_files": None,
+            "test_prompt_hash": existing_test_prompt_hash
+        }))
+
+        with patch("pdd.operation_log.META_DIR", str(meta_dir)), \
+             patch("pdd.sync_determine_operation.get_meta_dir", return_value=meta_dir):
+
+            # Call save_fingerprint with operation='example'
+            save_fingerprint(
+                basename=basename,
+                language=language,
+                operation="example",
+                paths={},
+                cost=0.1,
+                model="test"
+            )
+
+            # Read back and verify test_prompt_hash is preserved
+            result = read_fingerprint(basename, language)
+            assert result is not None
+            assert result.test_prompt_hash == existing_test_prompt_hash, (
+                "example operation should preserve existing test_prompt_hash"
+            )
+
+    def test_fix_operation_preserves_test_prompt_hash(self, tmp_path):
+        """
+        Issue #203: Fix operation should also preserve existing test_prompt_hash.
+        """
+        from pdd.operation_log import save_fingerprint
+        from pdd.sync_determine_operation import read_fingerprint
+
+        basename = "fix_test"
+        language = "python"
+
+        meta_dir = tmp_path / ".pdd" / "meta"
+        meta_dir.mkdir(parents=True)
+
+        existing_test_prompt_hash = "fix_preserved_hash"
+
+        # Create existing fingerprint
+        existing_fp = meta_dir / f"{basename}_{language}.json"
+        existing_fp.write_text(json.dumps({
+            "pdd_version": "0.0.1",
+            "timestamp": "2024-01-01T00:00:00",
+            "command": "test",
+            "prompt_hash": "some_hash",
+            "code_hash": None,
+            "example_hash": None,
+            "test_hash": None,
+            "test_files": None,
+            "test_prompt_hash": existing_test_prompt_hash
+        }))
+
+        with patch("pdd.operation_log.META_DIR", str(meta_dir)), \
+             patch("pdd.sync_determine_operation.get_meta_dir", return_value=meta_dir):
+
+            save_fingerprint(
+                basename=basename,
+                language=language,
+                operation="fix",
+                paths={},
+                cost=0.1,
+                model="test"
+            )
+
+            result = read_fingerprint(basename, language)
+            assert result is not None
+            assert result.test_prompt_hash == existing_test_prompt_hash, (
+                "fix operation should preserve existing test_prompt_hash"
+            )
+
+    def test_explicit_test_prompt_hash_overrides_auto_logic(self, tmp_path):
+        """
+        Issue #203: When test_prompt_hash is explicitly passed, it should override
+        the automatic logic.
+        """
+        from pdd.operation_log import save_fingerprint
+        from pdd.sync_determine_operation import read_fingerprint
+
+        basename = "explicit_test"
+        language = "python"
+
+        meta_dir = tmp_path / ".pdd" / "meta"
+        meta_dir.mkdir(parents=True)
+
+        explicit_hash = "explicitly_passed_hash"
+
+        with patch("pdd.operation_log.META_DIR", str(meta_dir)), \
+             patch("pdd.sync_determine_operation.get_meta_dir", return_value=meta_dir):
+
+            # Even for 'generate' operation, explicit test_prompt_hash should be used
+            save_fingerprint(
+                basename=basename,
+                language=language,
+                operation="generate",
+                paths={},
+                cost=0.1,
+                model="test",
+                test_prompt_hash=explicit_hash
+            )
+
+            result = read_fingerprint(basename, language)
+            assert result is not None
+            assert result.test_prompt_hash == explicit_hash, (
+                "Explicit test_prompt_hash should override automatic logic"
+            )
diff --git a/tests/test_sync_determine_operation.py b/tests/test_sync_determine_operation.py
diff --git a/tests/test_sync_orchestration.py b/tests/test_sync_orchestration.py