feat(sync): auto-update tests when prompt changes (#203)

Enfoirer · Enfoirer · commit 25f6ec3595cf · 2026-01-19T00:24:02.000+08:00
Add test_prompt_hash field to Fingerprint to track which prompt version
tests were generated from. When prompt changes and code is regenerated,
sync now detects stale tests and triggers test regeneration.

- Add test_prompt_hash field to Fingerprint dataclass
- Update read_fingerprint() to load test_prompt_hash from JSON
- Add stale test detection in _perform_sync_analysis()
- Update _save_operation_fingerprint() to set test_prompt_hash based on operation:
  - generate: sets to None (tests now stale)
  - test: sets to current prompt hash
  - other ops: preserves existing value
- Add 12 unit tests covering the new functionality
diff --git a/pdd/operation_log.py b/pdd/operation_log.py
@@ -211,14 +211,19 @@ def save_fingerprint(
     operation: str,
     paths: Optional[Dict[str, Path]] = None,
     cost: float = 0.0,
-    model: str = "unknown"
+    model: str = "unknown",
+    test_prompt_hash: Optional[str] = None
 ) -> None:
     """
     Save the current fingerprint/state to the state file.
 
     Writes the full Fingerprint dataclass format compatible with read_fingerprint()
     in sync_determine_operation.py. This ensures manual commands (generate, example)
     don't break sync's fingerprint tracking.
+
+    Args:
+        test_prompt_hash: Issue #203 - Hash of prompt when tests were generated.
+                         Pass existing value to preserve during skip operations.
     """
     from dataclasses import asdict
     from datetime import timezone
@@ -240,6 +245,7 @@ def save_fingerprint(
         example_hash=current_hashes.get('example_hash'),
         test_hash=current_hashes.get('test_hash'),
         test_files=current_hashes.get('test_files'),
+        test_prompt_hash=test_prompt_hash,  # Issue #203
     )
 
     try:
diff --git a/pdd/sync_determine_operation.py b/pdd/sync_determine_operation.py
@@ -109,6 +109,7 @@ class Fingerprint:
     example_hash: Optional[str]
     test_hash: Optional[str]  # Keep for backward compat (primary test file)
     test_files: Optional[Dict[str, str]] = None  # Bug #156: {"test_foo.py": "hash1", ...}
+    test_prompt_hash: Optional[str] = None  # Issue #203: Hash of prompt when tests were generated
 
 
 @dataclass
@@ -782,7 +783,8 @@ def read_fingerprint(basename: str, language: str) -> Optional[Fingerprint]:
             code_hash=data.get('code_hash'),
             example_hash=data.get('example_hash'),
             test_hash=data.get('test_hash'),
-            test_files=data.get('test_files')  # Bug #156
+            test_files=data.get('test_files'),  # Bug #156
+            test_prompt_hash=data.get('test_prompt_hash')  # Issue #203
         )
     except (json.JSONDecodeError, KeyError, IOError):
         return None
@@ -1521,6 +1523,26 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
     
     if not changes:
         # No Changes (Hashes Match Fingerprint) - Progress workflow with skip awareness
+
+        # Issue #203: Check if tests are stale (generated from old prompt version)
+        # Even if workflow appears complete, tests may need regeneration if prompt changed
+        if (not skip_tests and fingerprint and paths['test'].exists() and
+            fingerprint.test_prompt_hash is not None and
+            fingerprint.test_prompt_hash != current_hashes.get('prompt_hash')):
+            return SyncDecision(
+                operation='test',
+                reason='Tests outdated - generated from old prompt version, need regeneration',
+                confidence=0.90,
+                estimated_cost=estimate_operation_cost('test'),
+                details={
+                    'decision_type': 'heuristic',
+                    'test_prompt_hash': fingerprint.test_prompt_hash,
+                    'current_prompt_hash': current_hashes.get('prompt_hash'),
+                    'tests_stale': True,
+                    'workflow_stage': 'test_regeneration_for_prompt_change'
+                }
+            )
+
         if _is_workflow_complete(paths, skip_tests, skip_verify, basename, language):
             return SyncDecision(
                 operation='nothing',
diff --git a/pdd/sync_orchestration.py b/pdd/sync_orchestration.py
@@ -199,10 +199,26 @@ def _save_fingerprint_atomic(basename: str, language: str, operation: str,
     if atomic_state:
         # Buffer for atomic write
         from datetime import datetime, timezone
-        from .sync_determine_operation import calculate_current_hashes, Fingerprint
+        from .sync_determine_operation import calculate_current_hashes, Fingerprint, read_fingerprint
         from . import __version__
 
         current_hashes = calculate_current_hashes(paths)
+
+        # Issue #203: Determine test_prompt_hash based on operation
+        # - 'generate': Reset to None (tests become stale since code changed)
+        # - 'test': Set to current prompt_hash (tests are now up-to-date with prompt)
+        # - Other operations: Preserve existing test_prompt_hash
+        existing_fingerprint = read_fingerprint(basename, language)
+        if operation == 'generate':
+            # Code regenerated - tests are now stale
+            test_prompt_hash = None
+        elif operation == 'test':
+            # Tests regenerated - link them to current prompt version
+            test_prompt_hash = current_hashes.get('prompt_hash')
+        else:
+            # Preserve existing test_prompt_hash for other operations
+            test_prompt_hash = existing_fingerprint.test_prompt_hash if existing_fingerprint else None
+
         fingerprint = Fingerprint(
             pdd_version=__version__,
             timestamp=datetime.now(timezone.utc).isoformat(),
@@ -212,13 +228,19 @@ def _save_fingerprint_atomic(basename: str, language: str, operation: str,
             example_hash=current_hashes.get('example_hash'),
             test_hash=current_hashes.get('test_hash'),
             test_files=current_hashes.get('test_files'),  # Bug #156
+            test_prompt_hash=test_prompt_hash,  # Issue #203
         )
 
         fingerprint_file = META_DIR / f"{_safe_basename(basename)}_{language}.json"
         atomic_state.set_fingerprint(asdict(fingerprint), fingerprint_file)
     else:
         # Direct write using operation_log
-        save_fingerprint(basename, language, operation, paths, cost, model)
+        # Issue #203: Preserve test_prompt_hash from existing fingerprint for skip operations
+        from .sync_determine_operation import read_fingerprint as read_fp
+        existing_fp = read_fp(basename, language)
+        existing_test_prompt_hash = existing_fp.test_prompt_hash if existing_fp else None
+        save_fingerprint(basename, language, operation, paths, cost, model,
+                        test_prompt_hash=existing_test_prompt_hash)
 
 def _python_cov_target_for_code_file(code_file: Path) -> str:
     """Return a `pytest-cov` `--cov` target for a Python code file.
diff --git a/tests/test_sync_determine_operation.py b/tests/test_sync_determine_operation.py
@@ -3041,3 +3041,254 @@ def test_prompt_change_detected_even_after_crash_workflow(pdd_test_environment):
         f"Expected 'generate' or 'auto-deps' due to prompt change, got '{decision.operation}'"
     assert 'prompt' in decision.reason.lower(), \
         f"Reason should mention prompt change: {decision.reason}"
+
+
+# --- Issue #203: Auto-update tests based on prompt changes ---
+
+class TestIssue203FingerprintTestPromptHash:
+    """Tests for the test_prompt_hash field in Fingerprint dataclass (Issue #203)."""
+
+    def test_fingerprint_has_test_prompt_hash_field(self):
+        """Fingerprint dataclass should have test_prompt_hash field."""
+        fp = Fingerprint(
+            pdd_version="1.0.0",
+            timestamp="2024-01-01T00:00:00Z",
+            command="test",
+            prompt_hash="prompt_hash_123",
+            code_hash="code_hash_456",
+            example_hash="example_hash_789",
+            test_hash="test_hash_abc",
+            test_files=None,
+            test_prompt_hash="prompt_hash_123",
+        )
+        assert hasattr(fp, 'test_prompt_hash')
+        assert fp.test_prompt_hash == "prompt_hash_123"
+
+    def test_fingerprint_test_prompt_hash_defaults_to_none(self):
+        """test_prompt_hash should default to None for backward compatibility."""
+        fp = Fingerprint(
+            pdd_version="1.0.0",
+            timestamp="2024-01-01T00:00:00Z",
+            command="generate",
+            prompt_hash="hash1",
+            code_hash="hash2",
+            example_hash="hash3",
+            test_hash="hash4",
+        )
+        assert fp.test_prompt_hash is None
+
+    def test_fingerprint_serialization_includes_test_prompt_hash(self):
+        """asdict should include test_prompt_hash in serialized output."""
+        from dataclasses import asdict
+        fp = Fingerprint(
+            pdd_version="1.0.0",
+            timestamp="2024-01-01T00:00:00Z",
+            command="test",
+            prompt_hash="p1",
+            code_hash="c1",
+            example_hash="e1",
+            test_hash="t1",
+            test_files=None,
+            test_prompt_hash="p1",
+        )
+        data = asdict(fp)
+        assert 'test_prompt_hash' in data
+        assert data['test_prompt_hash'] == "p1"
+
+
+class TestIssue203ReadFingerprintTestPromptHash:
+    """Tests for reading test_prompt_hash from fingerprint files (Issue #203)."""
+
+    def test_read_fingerprint_with_test_prompt_hash(self, pdd_test_environment):
+        """read_fingerprint should correctly read test_prompt_hash field."""
+        fingerprint_data = {
+            "pdd_version": "1.0.0",
+            "timestamp": "2024-01-01T00:00:00Z",
+            "command": "test",
+            "prompt_hash": "prompt_abc",
+            "code_hash": "code_def",
+            "example_hash": "example_ghi",
+            "test_hash": "test_jkl",
+            "test_files": None,
+            "test_prompt_hash": "prompt_abc",
+        }
+        fp_file = get_meta_dir() / "issue203_python.json"
+        create_fingerprint_file(fp_file, fingerprint_data)
+
+        fp = read_fingerprint("issue203", "python")
+
+        assert fp is not None
+        assert fp.test_prompt_hash == "prompt_abc"
+
+    def test_read_fingerprint_backward_compat_without_test_prompt_hash(self, pdd_test_environment):
+        """read_fingerprint should handle old fingerprints without test_prompt_hash."""
+        old_fingerprint_data = {
+            "pdd_version": "0.99.0",
+            "timestamp": "2024-01-01T00:00:00Z",
+            "command": "generate",
+            "prompt_hash": "old_prompt",
+            "code_hash": "old_code",
+            "example_hash": "old_example",
+            "test_hash": "old_test",
+            "test_files": None,
+            # No test_prompt_hash field - simulating old format
+        }
+        fp_file = get_meta_dir() / "oldmod203_python.json"
+        create_fingerprint_file(fp_file, old_fingerprint_data)
+
+        fp = read_fingerprint("oldmod203", "python")
+
+        assert fp is not None
+        assert fp.test_prompt_hash is None
+
+
+class TestIssue203StaleTestDetection:
+    """Tests for sync_determine_operation detecting stale tests (Issue #203)."""
+
+    @patch('sync_determine_operation.construct_paths')
+    def test_detects_stale_tests_when_test_prompt_hash_differs(self, mock_construct, pdd_test_environment):
+        """Should return 'test' operation when test_prompt_hash doesn't match current prompt."""
+        prompts_dir = pdd_test_environment / "prompts"
+
+        # Create all required files
+        p_hash = create_file(prompts_dir / f"{BASENAME}_{LANGUAGE}.prompt", "NEW prompt content for 203")
+        c_hash = create_file(pdd_test_environment / f"{BASENAME}.py", "# regenerated code")
+        e_hash = create_file(pdd_test_environment / f"{BASENAME}_example.py", "# example")
+        t_hash = create_file(pdd_test_environment / f"test_{BASENAME}.py", "# old tests")
+
+        mock_construct.return_value = (
+            {}, {},
+            {
+                'code_file': str(pdd_test_environment / f"{BASENAME}.py"),
+                'example_file': str(pdd_test_environment / f"{BASENAME}_example.py"),
+                'test_file': str(pdd_test_environment / f"test_{BASENAME}.py")
+            },
+            LANGUAGE
+        )
+
+        # Create fingerprint with OLD test_prompt_hash (different from current prompt)
+        old_prompt_hash = "old_prompt_hash_before_change_203"
+        fp_path = get_meta_dir() / f"{BASENAME}_{LANGUAGE}.json"
+        create_fingerprint_file(fp_path, {
+            "pdd_version": "1.0",
+            "timestamp": "t",
+            "command": "test",
+            "prompt_hash": p_hash,
+            "code_hash": c_hash,
+            "example_hash": e_hash,
+            "test_hash": t_hash,
+            "test_files": None,
+            "test_prompt_hash": old_prompt_hash,  # OLD - different from current!
+        })
+
+        # Create run_report (coverage above TARGET_COVERAGE=90.0 to avoid test_extend)
+        rr_path = get_meta_dir() / f"{BASENAME}_{LANGUAGE}_run.json"
+        create_run_report_file(rr_path, {
+            "timestamp": "t",
+            "exit_code": 0,
+            "tests_passed": 5,
+            "tests_failed": 0,
+            "coverage": 95.0,
+            "test_hash": t_hash,
+        })
+
+        decision = sync_determine_operation(BASENAME, LANGUAGE, TARGET_COVERAGE, prompts_dir=str(prompts_dir))
+
+        assert decision.operation == 'test'
+        assert 'outdated' in decision.reason.lower()
+        assert decision.details.get('tests_stale') is True
+
+    @patch('sync_determine_operation.construct_paths')
+    def test_no_stale_test_detection_when_test_prompt_hash_matches(self, mock_construct, pdd_test_environment):
+        """Should return 'nothing' when test_prompt_hash matches current prompt."""
+        prompts_dir = pdd_test_environment / "prompts"
+
+        p_hash = create_file(prompts_dir / f"{BASENAME}_{LANGUAGE}.prompt", "synced prompt 203")
+        c_hash = create_file(pdd_test_environment / f"{BASENAME}.py", "# synced code")
+        e_hash = create_file(pdd_test_environment / f"{BASENAME}_example.py", "# example")
+        t_hash = create_file(pdd_test_environment / f"test_{BASENAME}.py", "# synced tests")
+
+        mock_construct.return_value = (
+            {}, {},
+            {
+                'code_file': str(pdd_test_environment / f"{BASENAME}.py"),
+                'example_file': str(pdd_test_environment / f"{BASENAME}_example.py"),
+                'test_file': str(pdd_test_environment / f"test_{BASENAME}.py")
+            },
+            LANGUAGE
+        )
+
+        fp_path = get_meta_dir() / f"{BASENAME}_{LANGUAGE}.json"
+        create_fingerprint_file(fp_path, {
+            "pdd_version": "1.0",
+            "timestamp": "t",
+            "command": "test",
+            "prompt_hash": p_hash,
+            "code_hash": c_hash,
+            "example_hash": e_hash,
+            "test_hash": t_hash,
+            "test_files": None,
+            "test_prompt_hash": p_hash,  # MATCHES current prompt hash
+        })
+
+        rr_path = get_meta_dir() / f"{BASENAME}_{LANGUAGE}_run.json"
+        create_run_report_file(rr_path, {
+            "timestamp": "t",
+            "exit_code": 0,
+            "tests_passed": 10,
+            "tests_failed": 0,
+            "coverage": 95.0,  # Above TARGET_COVERAGE=90.0
+            "test_hash": t_hash,
+        })
+
+        decision = sync_determine_operation(BASENAME, LANGUAGE, TARGET_COVERAGE, prompts_dir=str(prompts_dir))
+
+        assert decision.operation == 'nothing'
+
+    @patch('sync_determine_operation.construct_paths')
+    def test_no_stale_test_detection_when_test_prompt_hash_is_none(self, mock_construct, pdd_test_environment):
+        """Should NOT trigger stale test detection when test_prompt_hash is None (backward compat)."""
+        prompts_dir = pdd_test_environment / "prompts"
+
+        p_hash = create_file(prompts_dir / f"{BASENAME}_{LANGUAGE}.prompt", "legacy prompt 203")
+        c_hash = create_file(pdd_test_environment / f"{BASENAME}.py", "# code")
+        e_hash = create_file(pdd_test_environment / f"{BASENAME}_example.py", "# example")
+        t_hash = create_file(pdd_test_environment / f"test_{BASENAME}.py", "# tests")
+
+        mock_construct.return_value = (
+            {}, {},
+            {
+                'code_file': str(pdd_test_environment / f"{BASENAME}.py"),
+                'example_file': str(pdd_test_environment / f"{BASENAME}_example.py"),
+                'test_file': str(pdd_test_environment / f"test_{BASENAME}.py")
+            },
+            LANGUAGE
+        )
+
+        fp_path = get_meta_dir() / f"{BASENAME}_{LANGUAGE}.json"
+        create_fingerprint_file(fp_path, {
+            "pdd_version": "0.99",
+            "timestamp": "t",
+            "command": "test",
+            "prompt_hash": p_hash,
+            "code_hash": c_hash,
+            "example_hash": e_hash,
+            "test_hash": t_hash,
+            # No test_prompt_hash - legacy fingerprint
+        })
+
+        rr_path = get_meta_dir() / f"{BASENAME}_{LANGUAGE}_run.json"
+        create_run_report_file(rr_path, {
+            "timestamp": "t",
+            "exit_code": 0,
+            "tests_passed": 5,
+            "tests_failed": 0,
+            "coverage": 95.0,  # Above TARGET_COVERAGE=90.0
+            "test_hash": t_hash,
+        })
+
+        decision = sync_determine_operation(BASENAME, LANGUAGE, TARGET_COVERAGE, prompts_dir=str(prompts_dir))
+
+        # Should NOT trigger stale test detection for legacy fingerprints
+        assert decision.operation == 'nothing'
+        assert decision.details.get('tests_stale') is not True
diff --git a/tests/test_sync_orchestration.py b/tests/test_sync_orchestration.py