Skip to content

Commit 25f6ec3

Browse files
committed
feat(sync): auto-update tests when prompt changes (#203)
Add test_prompt_hash field to Fingerprint to track which prompt version tests were generated from. When prompt changes and code is regenerated, sync now detects stale tests and triggers test regeneration. - Add test_prompt_hash field to Fingerprint dataclass - Update read_fingerprint() to load test_prompt_hash from JSON - Add stale test detection in _perform_sync_analysis() - Update _save_operation_fingerprint() to set test_prompt_hash based on operation: - generate: sets to None (tests now stale) - test: sets to current prompt hash - other ops: preserves existing value - Add 12 unit tests covering the new functionality
1 parent 69bb542 commit 25f6ec3

File tree

5 files changed

+528
-4
lines changed

5 files changed

+528
-4
lines changed

pdd/operation_log.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,14 +211,19 @@ def save_fingerprint(
211211
operation: str,
212212
paths: Optional[Dict[str, Path]] = None,
213213
cost: float = 0.0,
214-
model: str = "unknown"
214+
model: str = "unknown",
215+
test_prompt_hash: Optional[str] = None
215216
) -> None:
216217
"""
217218
Save the current fingerprint/state to the state file.
218219
219220
Writes the full Fingerprint dataclass format compatible with read_fingerprint()
220221
in sync_determine_operation.py. This ensures manual commands (generate, example)
221222
don't break sync's fingerprint tracking.
223+
224+
Args:
225+
test_prompt_hash: Issue #203 - Hash of prompt when tests were generated.
226+
Pass existing value to preserve during skip operations.
222227
"""
223228
from dataclasses import asdict
224229
from datetime import timezone
@@ -240,6 +245,7 @@ def save_fingerprint(
240245
example_hash=current_hashes.get('example_hash'),
241246
test_hash=current_hashes.get('test_hash'),
242247
test_files=current_hashes.get('test_files'),
248+
test_prompt_hash=test_prompt_hash, # Issue #203
243249
)
244250

245251
try:

pdd/sync_determine_operation.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ class Fingerprint:
109109
example_hash: Optional[str]
110110
test_hash: Optional[str] # Keep for backward compat (primary test file)
111111
test_files: Optional[Dict[str, str]] = None # Bug #156: {"test_foo.py": "hash1", ...}
112+
test_prompt_hash: Optional[str] = None # Issue #203: Hash of prompt when tests were generated
112113

113114

114115
@dataclass
@@ -782,7 +783,8 @@ def read_fingerprint(basename: str, language: str) -> Optional[Fingerprint]:
782783
code_hash=data.get('code_hash'),
783784
example_hash=data.get('example_hash'),
784785
test_hash=data.get('test_hash'),
785-
test_files=data.get('test_files') # Bug #156
786+
test_files=data.get('test_files'), # Bug #156
787+
test_prompt_hash=data.get('test_prompt_hash') # Issue #203
786788
)
787789
except (json.JSONDecodeError, KeyError, IOError):
788790
return None
@@ -1521,6 +1523,26 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
15211523

15221524
if not changes:
15231525
# No Changes (Hashes Match Fingerprint) - Progress workflow with skip awareness
1526+
1527+
# Issue #203: Check if tests are stale (generated from old prompt version)
1528+
# Even if workflow appears complete, tests may need regeneration if prompt changed
1529+
if (not skip_tests and fingerprint and paths['test'].exists() and
1530+
fingerprint.test_prompt_hash is not None and
1531+
fingerprint.test_prompt_hash != current_hashes.get('prompt_hash')):
1532+
return SyncDecision(
1533+
operation='test',
1534+
reason='Tests outdated - generated from old prompt version, need regeneration',
1535+
confidence=0.90,
1536+
estimated_cost=estimate_operation_cost('test'),
1537+
details={
1538+
'decision_type': 'heuristic',
1539+
'test_prompt_hash': fingerprint.test_prompt_hash,
1540+
'current_prompt_hash': current_hashes.get('prompt_hash'),
1541+
'tests_stale': True,
1542+
'workflow_stage': 'test_regeneration_for_prompt_change'
1543+
}
1544+
)
1545+
15241546
if _is_workflow_complete(paths, skip_tests, skip_verify, basename, language):
15251547
return SyncDecision(
15261548
operation='nothing',

pdd/sync_orchestration.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -199,10 +199,26 @@ def _save_fingerprint_atomic(basename: str, language: str, operation: str,
199199
if atomic_state:
200200
# Buffer for atomic write
201201
from datetime import datetime, timezone
202-
from .sync_determine_operation import calculate_current_hashes, Fingerprint
202+
from .sync_determine_operation import calculate_current_hashes, Fingerprint, read_fingerprint
203203
from . import __version__
204204

205205
current_hashes = calculate_current_hashes(paths)
206+
207+
# Issue #203: Determine test_prompt_hash based on operation
208+
# - 'generate': Reset to None (tests become stale since code changed)
209+
# - 'test': Set to current prompt_hash (tests are now up-to-date with prompt)
210+
# - Other operations: Preserve existing test_prompt_hash
211+
existing_fingerprint = read_fingerprint(basename, language)
212+
if operation == 'generate':
213+
# Code regenerated - tests are now stale
214+
test_prompt_hash = None
215+
elif operation == 'test':
216+
# Tests regenerated - link them to current prompt version
217+
test_prompt_hash = current_hashes.get('prompt_hash')
218+
else:
219+
# Preserve existing test_prompt_hash for other operations
220+
test_prompt_hash = existing_fingerprint.test_prompt_hash if existing_fingerprint else None
221+
206222
fingerprint = Fingerprint(
207223
pdd_version=__version__,
208224
timestamp=datetime.now(timezone.utc).isoformat(),
@@ -212,13 +228,19 @@ def _save_fingerprint_atomic(basename: str, language: str, operation: str,
212228
example_hash=current_hashes.get('example_hash'),
213229
test_hash=current_hashes.get('test_hash'),
214230
test_files=current_hashes.get('test_files'), # Bug #156
231+
test_prompt_hash=test_prompt_hash, # Issue #203
215232
)
216233

217234
fingerprint_file = META_DIR / f"{_safe_basename(basename)}_{language}.json"
218235
atomic_state.set_fingerprint(asdict(fingerprint), fingerprint_file)
219236
else:
220237
# Direct write using operation_log
221-
save_fingerprint(basename, language, operation, paths, cost, model)
238+
# Issue #203: Preserve test_prompt_hash from existing fingerprint for skip operations
239+
from .sync_determine_operation import read_fingerprint as read_fp
240+
existing_fp = read_fp(basename, language)
241+
existing_test_prompt_hash = existing_fp.test_prompt_hash if existing_fp else None
242+
save_fingerprint(basename, language, operation, paths, cost, model,
243+
test_prompt_hash=existing_test_prompt_hash)
222244

223245
def _python_cov_target_for_code_file(code_file: Path) -> str:
224246
"""Return a `pytest-cov` `--cov` target for a Python code file.

tests/test_sync_determine_operation.py

Lines changed: 251 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3041,3 +3041,254 @@ def test_prompt_change_detected_even_after_crash_workflow(pdd_test_environment):
30413041
f"Expected 'generate' or 'auto-deps' due to prompt change, got '{decision.operation}'"
30423042
assert 'prompt' in decision.reason.lower(), \
30433043
f"Reason should mention prompt change: {decision.reason}"
3044+
3045+
3046+
# --- Issue #203: Auto-update tests based on prompt changes ---
3047+
3048+
class TestIssue203FingerprintTestPromptHash:
3049+
"""Tests for the test_prompt_hash field in Fingerprint dataclass (Issue #203)."""
3050+
3051+
def test_fingerprint_has_test_prompt_hash_field(self):
3052+
"""Fingerprint dataclass should have test_prompt_hash field."""
3053+
fp = Fingerprint(
3054+
pdd_version="1.0.0",
3055+
timestamp="2024-01-01T00:00:00Z",
3056+
command="test",
3057+
prompt_hash="prompt_hash_123",
3058+
code_hash="code_hash_456",
3059+
example_hash="example_hash_789",
3060+
test_hash="test_hash_abc",
3061+
test_files=None,
3062+
test_prompt_hash="prompt_hash_123",
3063+
)
3064+
assert hasattr(fp, 'test_prompt_hash')
3065+
assert fp.test_prompt_hash == "prompt_hash_123"
3066+
3067+
def test_fingerprint_test_prompt_hash_defaults_to_none(self):
3068+
"""test_prompt_hash should default to None for backward compatibility."""
3069+
fp = Fingerprint(
3070+
pdd_version="1.0.0",
3071+
timestamp="2024-01-01T00:00:00Z",
3072+
command="generate",
3073+
prompt_hash="hash1",
3074+
code_hash="hash2",
3075+
example_hash="hash3",
3076+
test_hash="hash4",
3077+
)
3078+
assert fp.test_prompt_hash is None
3079+
3080+
def test_fingerprint_serialization_includes_test_prompt_hash(self):
3081+
"""asdict should include test_prompt_hash in serialized output."""
3082+
from dataclasses import asdict
3083+
fp = Fingerprint(
3084+
pdd_version="1.0.0",
3085+
timestamp="2024-01-01T00:00:00Z",
3086+
command="test",
3087+
prompt_hash="p1",
3088+
code_hash="c1",
3089+
example_hash="e1",
3090+
test_hash="t1",
3091+
test_files=None,
3092+
test_prompt_hash="p1",
3093+
)
3094+
data = asdict(fp)
3095+
assert 'test_prompt_hash' in data
3096+
assert data['test_prompt_hash'] == "p1"
3097+
3098+
3099+
class TestIssue203ReadFingerprintTestPromptHash:
3100+
"""Tests for reading test_prompt_hash from fingerprint files (Issue #203)."""
3101+
3102+
def test_read_fingerprint_with_test_prompt_hash(self, pdd_test_environment):
3103+
"""read_fingerprint should correctly read test_prompt_hash field."""
3104+
fingerprint_data = {
3105+
"pdd_version": "1.0.0",
3106+
"timestamp": "2024-01-01T00:00:00Z",
3107+
"command": "test",
3108+
"prompt_hash": "prompt_abc",
3109+
"code_hash": "code_def",
3110+
"example_hash": "example_ghi",
3111+
"test_hash": "test_jkl",
3112+
"test_files": None,
3113+
"test_prompt_hash": "prompt_abc",
3114+
}
3115+
fp_file = get_meta_dir() / "issue203_python.json"
3116+
create_fingerprint_file(fp_file, fingerprint_data)
3117+
3118+
fp = read_fingerprint("issue203", "python")
3119+
3120+
assert fp is not None
3121+
assert fp.test_prompt_hash == "prompt_abc"
3122+
3123+
def test_read_fingerprint_backward_compat_without_test_prompt_hash(self, pdd_test_environment):
3124+
"""read_fingerprint should handle old fingerprints without test_prompt_hash."""
3125+
old_fingerprint_data = {
3126+
"pdd_version": "0.99.0",
3127+
"timestamp": "2024-01-01T00:00:00Z",
3128+
"command": "generate",
3129+
"prompt_hash": "old_prompt",
3130+
"code_hash": "old_code",
3131+
"example_hash": "old_example",
3132+
"test_hash": "old_test",
3133+
"test_files": None,
3134+
# No test_prompt_hash field - simulating old format
3135+
}
3136+
fp_file = get_meta_dir() / "oldmod203_python.json"
3137+
create_fingerprint_file(fp_file, old_fingerprint_data)
3138+
3139+
fp = read_fingerprint("oldmod203", "python")
3140+
3141+
assert fp is not None
3142+
assert fp.test_prompt_hash is None
3143+
3144+
3145+
class TestIssue203StaleTestDetection:
3146+
"""Tests for sync_determine_operation detecting stale tests (Issue #203)."""
3147+
3148+
@patch('sync_determine_operation.construct_paths')
3149+
def test_detects_stale_tests_when_test_prompt_hash_differs(self, mock_construct, pdd_test_environment):
3150+
"""Should return 'test' operation when test_prompt_hash doesn't match current prompt."""
3151+
prompts_dir = pdd_test_environment / "prompts"
3152+
3153+
# Create all required files
3154+
p_hash = create_file(prompts_dir / f"{BASENAME}_{LANGUAGE}.prompt", "NEW prompt content for 203")
3155+
c_hash = create_file(pdd_test_environment / f"{BASENAME}.py", "# regenerated code")
3156+
e_hash = create_file(pdd_test_environment / f"{BASENAME}_example.py", "# example")
3157+
t_hash = create_file(pdd_test_environment / f"test_{BASENAME}.py", "# old tests")
3158+
3159+
mock_construct.return_value = (
3160+
{}, {},
3161+
{
3162+
'code_file': str(pdd_test_environment / f"{BASENAME}.py"),
3163+
'example_file': str(pdd_test_environment / f"{BASENAME}_example.py"),
3164+
'test_file': str(pdd_test_environment / f"test_{BASENAME}.py")
3165+
},
3166+
LANGUAGE
3167+
)
3168+
3169+
# Create fingerprint with OLD test_prompt_hash (different from current prompt)
3170+
old_prompt_hash = "old_prompt_hash_before_change_203"
3171+
fp_path = get_meta_dir() / f"{BASENAME}_{LANGUAGE}.json"
3172+
create_fingerprint_file(fp_path, {
3173+
"pdd_version": "1.0",
3174+
"timestamp": "t",
3175+
"command": "test",
3176+
"prompt_hash": p_hash,
3177+
"code_hash": c_hash,
3178+
"example_hash": e_hash,
3179+
"test_hash": t_hash,
3180+
"test_files": None,
3181+
"test_prompt_hash": old_prompt_hash, # OLD - different from current!
3182+
})
3183+
3184+
# Create run_report (coverage above TARGET_COVERAGE=90.0 to avoid test_extend)
3185+
rr_path = get_meta_dir() / f"{BASENAME}_{LANGUAGE}_run.json"
3186+
create_run_report_file(rr_path, {
3187+
"timestamp": "t",
3188+
"exit_code": 0,
3189+
"tests_passed": 5,
3190+
"tests_failed": 0,
3191+
"coverage": 95.0,
3192+
"test_hash": t_hash,
3193+
})
3194+
3195+
decision = sync_determine_operation(BASENAME, LANGUAGE, TARGET_COVERAGE, prompts_dir=str(prompts_dir))
3196+
3197+
assert decision.operation == 'test'
3198+
assert 'outdated' in decision.reason.lower()
3199+
assert decision.details.get('tests_stale') is True
3200+
3201+
@patch('sync_determine_operation.construct_paths')
3202+
def test_no_stale_test_detection_when_test_prompt_hash_matches(self, mock_construct, pdd_test_environment):
3203+
"""Should return 'nothing' when test_prompt_hash matches current prompt."""
3204+
prompts_dir = pdd_test_environment / "prompts"
3205+
3206+
p_hash = create_file(prompts_dir / f"{BASENAME}_{LANGUAGE}.prompt", "synced prompt 203")
3207+
c_hash = create_file(pdd_test_environment / f"{BASENAME}.py", "# synced code")
3208+
e_hash = create_file(pdd_test_environment / f"{BASENAME}_example.py", "# example")
3209+
t_hash = create_file(pdd_test_environment / f"test_{BASENAME}.py", "# synced tests")
3210+
3211+
mock_construct.return_value = (
3212+
{}, {},
3213+
{
3214+
'code_file': str(pdd_test_environment / f"{BASENAME}.py"),
3215+
'example_file': str(pdd_test_environment / f"{BASENAME}_example.py"),
3216+
'test_file': str(pdd_test_environment / f"test_{BASENAME}.py")
3217+
},
3218+
LANGUAGE
3219+
)
3220+
3221+
fp_path = get_meta_dir() / f"{BASENAME}_{LANGUAGE}.json"
3222+
create_fingerprint_file(fp_path, {
3223+
"pdd_version": "1.0",
3224+
"timestamp": "t",
3225+
"command": "test",
3226+
"prompt_hash": p_hash,
3227+
"code_hash": c_hash,
3228+
"example_hash": e_hash,
3229+
"test_hash": t_hash,
3230+
"test_files": None,
3231+
"test_prompt_hash": p_hash, # MATCHES current prompt hash
3232+
})
3233+
3234+
rr_path = get_meta_dir() / f"{BASENAME}_{LANGUAGE}_run.json"
3235+
create_run_report_file(rr_path, {
3236+
"timestamp": "t",
3237+
"exit_code": 0,
3238+
"tests_passed": 10,
3239+
"tests_failed": 0,
3240+
"coverage": 95.0, # Above TARGET_COVERAGE=90.0
3241+
"test_hash": t_hash,
3242+
})
3243+
3244+
decision = sync_determine_operation(BASENAME, LANGUAGE, TARGET_COVERAGE, prompts_dir=str(prompts_dir))
3245+
3246+
assert decision.operation == 'nothing'
3247+
3248+
@patch('sync_determine_operation.construct_paths')
3249+
def test_no_stale_test_detection_when_test_prompt_hash_is_none(self, mock_construct, pdd_test_environment):
3250+
"""Should NOT trigger stale test detection when test_prompt_hash is None (backward compat)."""
3251+
prompts_dir = pdd_test_environment / "prompts"
3252+
3253+
p_hash = create_file(prompts_dir / f"{BASENAME}_{LANGUAGE}.prompt", "legacy prompt 203")
3254+
c_hash = create_file(pdd_test_environment / f"{BASENAME}.py", "# code")
3255+
e_hash = create_file(pdd_test_environment / f"{BASENAME}_example.py", "# example")
3256+
t_hash = create_file(pdd_test_environment / f"test_{BASENAME}.py", "# tests")
3257+
3258+
mock_construct.return_value = (
3259+
{}, {},
3260+
{
3261+
'code_file': str(pdd_test_environment / f"{BASENAME}.py"),
3262+
'example_file': str(pdd_test_environment / f"{BASENAME}_example.py"),
3263+
'test_file': str(pdd_test_environment / f"test_{BASENAME}.py")
3264+
},
3265+
LANGUAGE
3266+
)
3267+
3268+
fp_path = get_meta_dir() / f"{BASENAME}_{LANGUAGE}.json"
3269+
create_fingerprint_file(fp_path, {
3270+
"pdd_version": "0.99",
3271+
"timestamp": "t",
3272+
"command": "test",
3273+
"prompt_hash": p_hash,
3274+
"code_hash": c_hash,
3275+
"example_hash": e_hash,
3276+
"test_hash": t_hash,
3277+
# No test_prompt_hash - legacy fingerprint
3278+
})
3279+
3280+
rr_path = get_meta_dir() / f"{BASENAME}_{LANGUAGE}_run.json"
3281+
create_run_report_file(rr_path, {
3282+
"timestamp": "t",
3283+
"exit_code": 0,
3284+
"tests_passed": 5,
3285+
"tests_failed": 0,
3286+
"coverage": 95.0, # Above TARGET_COVERAGE=90.0
3287+
"test_hash": t_hash,
3288+
})
3289+
3290+
decision = sync_determine_operation(BASENAME, LANGUAGE, TARGET_COVERAGE, prompts_dir=str(prompts_dir))
3291+
3292+
# Should NOT trigger stale test detection for legacy fingerprints
3293+
assert decision.operation == 'nothing'
3294+
assert decision.details.get('tests_stale') is not True

0 commit comments

Comments
 (0)