|
| 1 | +""" |
| 2 | +E2E Test for Issue #545: pdd fix exits 'no changes to commit' when fix exists |
| 3 | +as unstaged changes. |
| 4 | +
|
| 5 | +Bug: When pdd fix resumes after a prior interrupted run, the orchestrator |
| 6 | +captures initial_file_hashes AFTER the prior run's modifications already exist |
| 7 | +on disk. The hash delta is zero, so _commit_and_push() returns "No changes to |
| 8 | +commit" — leaving modified files orphaned as unstaged changes in the worktree. |
| 9 | +
|
| 10 | +These tests exercise the full orchestrator code path with real git operations. |
| 11 | +Only LLM execution (run_agentic_task) and state persistence are mocked. |
| 12 | +_get_file_hashes and _commit_and_push run unpatched against real git repos. |
| 13 | +""" |
| 14 | + |
| 15 | +import subprocess |
| 16 | +from pathlib import Path |
| 17 | +from unittest.mock import patch |
| 18 | + |
| 19 | +import pytest |
| 20 | + |
| 21 | + |
| 22 | +def _create_git_repo_with_remote(tmp_path: Path): |
| 23 | + """Create a git repo cloned from a bare remote, suitable for push testing.""" |
| 24 | + bare = tmp_path / "bare.git" |
| 25 | + worktree = tmp_path / "worktree" |
| 26 | + |
| 27 | + subprocess.run(["git", "init", "--bare", str(bare)], |
| 28 | + check=True, capture_output=True) |
| 29 | + subprocess.run(["git", "clone", str(bare), str(worktree)], |
| 30 | + check=True, capture_output=True) |
| 31 | + subprocess.run(["git", "config", "user.email", "test@test.com"], |
| 32 | + cwd=worktree, check=True, capture_output=True) |
| 33 | + subprocess.run(["git", "config", "user.name", "Test"], |
| 34 | + cwd=worktree, check=True, capture_output=True) |
| 35 | + |
| 36 | + # Create initial committed file inside a pdd/ subdirectory |
| 37 | + pdd_dir = worktree / "pdd" |
| 38 | + pdd_dir.mkdir() |
| 39 | + module = pdd_dir / "module.py" |
| 40 | + module.write_text("x = 1\n") |
| 41 | + |
| 42 | + subprocess.run(["git", "add", "."], |
| 43 | + cwd=worktree, check=True, capture_output=True) |
| 44 | + subprocess.run(["git", "commit", "-m", "initial commit"], |
| 45 | + cwd=worktree, check=True, capture_output=True) |
| 46 | + |
| 47 | + branch = subprocess.run( |
| 48 | + ["git", "rev-parse", "--abbrev-ref", "HEAD"], |
| 49 | + cwd=worktree, capture_output=True, text=True, check=True |
| 50 | + ).stdout.strip() |
| 51 | + subprocess.run(["git", "push", "-u", "origin", branch], |
| 52 | + cwd=worktree, check=True, capture_output=True) |
| 53 | + |
| 54 | + return worktree, module |
| 55 | + |
| 56 | + |
| 57 | +def _run_orchestrator_with_all_tests_pass(worktree: Path): |
| 58 | + """Run the full orchestrator with Step 2 returning ALL_TESTS_PASS. |
| 59 | +
|
| 60 | + Only mocks LLM calls and state persistence. _get_file_hashes and |
| 61 | + _commit_and_push run unpatched against the real git repo. |
| 62 | + """ |
| 63 | + from pdd.agentic_e2e_fix_orchestrator import run_agentic_e2e_fix_orchestrator |
| 64 | + |
| 65 | + def step_side_effect(*args, **kwargs): |
| 66 | + """Mock LLM: Step 2 returns ALL_TESTS_PASS, everything else passes.""" |
| 67 | + label = kwargs.get("label", "") |
| 68 | + if "step2" in label: |
| 69 | + return (True, "ALL_TESTS_PASS", 0.1, "gpt-4") |
| 70 | + return (True, f"Output for {label}", 0.1, "gpt-4") |
| 71 | + |
| 72 | + with patch("pdd.agentic_e2e_fix_orchestrator.run_agentic_task") as mock_run, \ |
| 73 | + patch("pdd.agentic_e2e_fix_orchestrator.load_workflow_state") as mock_load, \ |
| 74 | + patch("pdd.agentic_e2e_fix_orchestrator.save_workflow_state") as mock_save, \ |
| 75 | + patch("pdd.agentic_e2e_fix_orchestrator.clear_workflow_state") as mock_clear, \ |
| 76 | + patch("pdd.agentic_e2e_fix_orchestrator.load_prompt_template") as mock_template, \ |
| 77 | + patch("pdd.agentic_e2e_fix_orchestrator.console"): |
| 78 | + |
| 79 | + mock_run.side_effect = step_side_effect |
| 80 | + mock_load.return_value = (None, None) |
| 81 | + mock_save.return_value = None |
| 82 | + mock_template.return_value = "Prompt for {issue_number}" |
| 83 | + |
| 84 | + success, message, cost, model, files = run_agentic_e2e_fix_orchestrator( |
| 85 | + issue_url="http://github.com/owner/repo/issues/545", |
| 86 | + issue_content="Bug: pdd fix exits 'no changes to commit'", |
| 87 | + repo_owner="owner", |
| 88 | + repo_name="repo", |
| 89 | + issue_number=545, |
| 90 | + issue_author="user", |
| 91 | + issue_title="Fix unstaged changes on resume", |
| 92 | + cwd=worktree, |
| 93 | + quiet=True, |
| 94 | + max_cycles=1, |
| 95 | + resume=False, |
| 96 | + use_github_state=False, |
| 97 | + ) |
| 98 | + |
| 99 | + return success, message, cost, model, files |
| 100 | + |
| 101 | + |
| 102 | +class TestIssue545OrphanedChangesCommittedE2E: |
| 103 | + """E2E tests for Issue #545: orchestrator must commit pre-existing unstaged |
| 104 | + modifications when ALL_TESTS_PASS at Step 2. |
| 105 | +
|
| 106 | + These tests simulate the resume scenario where a prior pdd fix run wrote |
| 107 | + code changes to disk but the commit step (Step 8) failed. On the next run, |
| 108 | + Step 2 detects ALL_TESTS_PASS (the code is already fixed), but the |
| 109 | + orchestrator must still detect and commit the orphaned unstaged changes. |
| 110 | + """ |
| 111 | + |
| 112 | + def test_orchestrator_commits_orphaned_changes_on_all_tests_pass(self, tmp_path): |
| 113 | + """Primary E2E bug test: orchestrator must commit a pre-existing |
| 114 | + unstaged modification when ALL_TESTS_PASS at Step 2. |
| 115 | +
|
| 116 | + Scenario: |
| 117 | + 1. Git repo has 'pdd/module.py' committed as "x = 1" |
| 118 | + 2. Prior run modified it to "x = 2" but commit failed (unstaged change) |
| 119 | + 3. Orchestrator starts, captures tainted hash snapshot |
| 120 | + 4. Step 2 returns ALL_TESTS_PASS -> _commit_and_push() is called |
| 121 | + 5. Bug: hash delta is zero -> "No changes to commit" -> file orphaned |
| 122 | + 6. Fix: fallback to _get_modified_and_untracked() detects the change |
| 123 | + """ |
| 124 | + worktree, module = _create_git_repo_with_remote(tmp_path) |
| 125 | + |
| 126 | + # Simulate prior run's modification (exists BEFORE orchestrator starts) |
| 127 | + module.write_text("x = 2 # fixed by prior interrupted run\n") |
| 128 | + |
| 129 | + success, message, cost, model, files = _run_orchestrator_with_all_tests_pass(worktree) |
| 130 | + |
| 131 | + assert success is True, f"Orchestrator should succeed, got message: {message}" |
| 132 | + |
| 133 | + # Assert on REAL git state — the orphaned file must be committed |
| 134 | + diff_result = subprocess.run( |
| 135 | + ["git", "diff", "--name-only", "HEAD"], |
| 136 | + cwd=worktree, capture_output=True, text=True |
| 137 | + ) |
| 138 | + assert diff_result.stdout.strip() == "", ( |
| 139 | + f"Issue #545 BUG DETECTED: Pre-existing unstaged modification from a " |
| 140 | + f"prior interrupted pdd fix run was NOT committed by the orchestrator.\n\n" |
| 141 | + f" Still unstaged: {diff_result.stdout.strip()!r}\n\n" |
| 142 | + f" The orchestrator called _commit_and_push() with a tainted hash " |
| 143 | + f"snapshot (captured AFTER the modification), so hash delta was zero " |
| 144 | + f"and it returned 'No changes to commit' without checking " |
| 145 | + f"git diff --name-only HEAD.\n\n" |
| 146 | + f" Fix: add a fallback to _get_modified_and_untracked(cwd) in " |
| 147 | + f"_commit_and_push() when hash comparison yields empty files_to_commit." |
| 148 | + ) |
| 149 | + |
| 150 | + # Verify the commit was pushed to the remote (use @{upstream} instead |
| 151 | + # of origin/HEAD — bare repos don't set a symbolic HEAD ref). |
| 152 | + unpushed = subprocess.run( |
| 153 | + ["git", "rev-list", "@{upstream}..HEAD"], |
| 154 | + cwd=worktree, capture_output=True, text=True |
| 155 | + ) |
| 156 | + assert unpushed.stdout.strip() == "", ( |
| 157 | + f"Issue #545: The fix commit should have been pushed to remote, " |
| 158 | + f"but unpushed commits exist: {unpushed.stdout.strip()!r}" |
| 159 | + ) |
| 160 | + |
| 161 | + def test_orchestrator_commits_multiple_orphaned_files(self, tmp_path): |
| 162 | + """E2E test: orchestrator commits ALL orphaned files, not just one. |
| 163 | +
|
| 164 | + Simulates the real-world scenario from the issue where 9+ files were |
| 165 | + left as unstaged changes after a prior interrupted run. |
| 166 | + """ |
| 167 | + worktree, module = _create_git_repo_with_remote(tmp_path) |
| 168 | + |
| 169 | + # Create additional committed files |
| 170 | + pdd_dir = worktree / "pdd" |
| 171 | + for name in ["code_generator.py", "llm_invoke.py", "preprocess.py"]: |
| 172 | + f = pdd_dir / name |
| 173 | + f.write_text(f"# original {name}\n") |
| 174 | + subprocess.run(["git", "add", "."], |
| 175 | + cwd=worktree, check=True, capture_output=True) |
| 176 | + subprocess.run(["git", "commit", "-m", "add more files"], |
| 177 | + cwd=worktree, check=True, capture_output=True) |
| 178 | + subprocess.run(["git", "push"], |
| 179 | + cwd=worktree, check=True, capture_output=True) |
| 180 | + |
| 181 | + # Simulate prior run modifying ALL files (orphaned unstaged changes) |
| 182 | + module.write_text("x = 2 # fixed\n") |
| 183 | + (pdd_dir / "code_generator.py").write_text("# fixed code_generator\n") |
| 184 | + (pdd_dir / "llm_invoke.py").write_text("# fixed llm_invoke\n") |
| 185 | + (pdd_dir / "preprocess.py").write_text("# fixed preprocess\n") |
| 186 | + |
| 187 | + success, message, cost, model, files = _run_orchestrator_with_all_tests_pass(worktree) |
| 188 | + |
| 189 | + assert success is True, f"Orchestrator should succeed, got: {message}" |
| 190 | + |
| 191 | + # All 4 orphaned files must be committed |
| 192 | + diff_result = subprocess.run( |
| 193 | + ["git", "diff", "--name-only", "HEAD"], |
| 194 | + cwd=worktree, capture_output=True, text=True |
| 195 | + ) |
| 196 | + remaining = diff_result.stdout.strip() |
| 197 | + assert remaining == "", ( |
| 198 | + f"Issue #545: All orphaned files should have been committed but " |
| 199 | + f"{remaining!r} remain unstaged. _commit_and_push() returned " |
| 200 | + f"'No changes to commit' because the hash snapshot was tainted." |
| 201 | + ) |
| 202 | + |
| 203 | + def test_orchestrator_reports_no_changes_when_worktree_is_clean(self, tmp_path): |
| 204 | + """Regression guard: clean worktree stays clean after orchestrator run. |
| 205 | +
|
| 206 | + When there are no pre-existing modifications, the orchestrator should |
| 207 | + NOT create spurious commits. This ensures the fallback doesn't introduce |
| 208 | + false positives. |
| 209 | + """ |
| 210 | + worktree, module = _create_git_repo_with_remote(tmp_path) |
| 211 | + |
| 212 | + # NO modifications — worktree is clean |
| 213 | + success, message, cost, model, files = _run_orchestrator_with_all_tests_pass(worktree) |
| 214 | + |
| 215 | + assert success is True |
| 216 | + |
| 217 | + # Verify no spurious commits were created |
| 218 | + log_result = subprocess.run( |
| 219 | + ["git", "log", "--oneline"], |
| 220 | + cwd=worktree, capture_output=True, text=True |
| 221 | + ) |
| 222 | + # Should only have the initial commit, no fix commit |
| 223 | + assert "fix:" not in log_result.stdout.lower(), ( |
| 224 | + f"Regression: No fix commit should be created when worktree is clean. " |
| 225 | + f"Git log: {log_result.stdout.strip()!r}" |
| 226 | + ) |
0 commit comments