diff --git a/pdd/operation_log.py b/pdd/operation_log.py
index 180af9351..dade81835 100644
--- a/pdd/operation_log.py
+++ b/pdd/operation_log.py
@@ -336,8 +336,9 @@ def wrapper(*args: Any, **kwargs: Any) -> Any:
                 model = "unknown"
                 if success and result:
                     if isinstance(result, tuple) and len(result) >= 3:
-                        if isinstance(result[1], (int, float)): cost = float(result[1])
-                        if isinstance(result[2], str): model = str(result[2])
+                        from .sync_orchestration import _extract_cost_from_result, _extract_model_from_result
+                        cost = _extract_cost_from_result(operation, result)
+                        model = _extract_model_from_result(operation, result)
 
                 update_log_entry(entry, success=success, cost=cost, model=model, duration=duration, error=error_msg)
                 if basename and language:
diff --git a/pdd/sync_orchestration.py b/pdd/sync_orchestration.py
index d6eef6a4a..effd29d0a 100644
--- a/pdd/sync_orchestration.py
+++ b/pdd/sync_orchestration.py
@@ -70,6 +70,46 @@
 # Note: _safe_basename is imported from sync_determine_operation
 
 
+def _extract_cost_from_result(operation: str, result: tuple) -> float:
+    """Extract cost from an operation result tuple at the correct index.
+
+    Each operation returns a different tuple format:
+    - crash/fix/verify: 6-tuple (..., cost, model) — cost at index 4
+    - generate: 4-tuple (content, was_incremental, cost, model) — cost at index 2
+    - example/test/test_extend/auto-deps: 3-or-4-tuple (..., cost, model, ...) — cost at index 1
+    """
+    if operation in ('crash', 'fix', 'verify'):
+        idx = 4
+    elif operation == 'generate':
+        idx = 2
+    else:
+        idx = 1
+    if len(result) > idx:
+        val = result[idx]
+        if isinstance(val, (int, float)) and not isinstance(val, bool):
+            return float(val)
+    return 0.0
+
+
+def _extract_model_from_result(operation: str, result: tuple) -> str:
+    """Extract model name from an operation result tuple at the correct index.
+
+    Each operation returns a different tuple format:
+    - crash/fix/verify: 6-tuple (..., cost, model) — model at index 5
+    - generate: 4-tuple (content, was_incremental, cost, model) — model at index 3
+    - example/test/test_extend/auto-deps: 3-or-4-tuple (..., cost, model, ...) — model at index 2
+    """
+    if operation in ('crash', 'fix', 'verify'):
+        idx = 5
+    elif operation == 'generate':
+        idx = 3
+    else:
+        idx = 2
+    if len(result) > idx and isinstance(result[idx], str):
+        return result[idx]
+    return 'unknown'
+
+
 def _use_agentic_path(language: str, agentic_mode: bool) -> bool:
     """Returns True if we should use agentic path (non-Python OR agentic_mode for Python).
 
@@ -1854,8 +1894,7 @@ def __init__(self, rc, out, err):
                                         success = pdd_files['test'].exists()
                                 else:
                                     success = bool(result[0])
-                                # Cost is always at index 1 in both 3-tuple and 4-tuple returns
-                                cost = result[1] if len(result) >= 2 and isinstance(result[1], (int, float)) else 0.0
+                                cost = _extract_cost_from_result(operation, result)
                                 current_cost_ref[0] += cost
                             else:
                                 success = result is not None
@@ -1877,17 +1916,8 @@ def __init__(self, rc, out, err):
                                  actual_cost = result.get('cost', 0.0)
                                  model_name = result.get('model', 'unknown')
                             elif isinstance(result, tuple) and len(result) >= 3:
-                                 # cmd_test_main returns 4-tuple: (content, cost, model, agentic_success)
-                                 # Other commands may return either:
-                                 #   - 3-tuple: (content, cost, model), e.g. some context operations
-                                 #   - 4-tuple: (content, was_incremental, cost, model_name), e.g. code_generator_main
-                                 # For tests, cost is at index 1; for most other 4+ tuples, cost is at index -2 and model at -1.
-                                 if operation in ('test', 'test_extend') and len(result) >= 4:
-                                     actual_cost = result[1] if isinstance(result[1], (int, float)) else 0.0
-                                     model_name = result[2] if isinstance(result[2], str) else 'unknown'
-                                 else:
-                                     actual_cost = result[1] if isinstance(result[1], (int, float)) else 0.0
-                                     model_name = result[2] if len(result) >= 3 and isinstance(result[2], str) else 'unknown'
+                                 actual_cost = _extract_cost_from_result(operation, result)
+                                 model_name = _extract_model_from_result(operation, result)
                             last_model_name = str(model_name)
                             operations_completed.append(operation)
                             _save_fingerprint_atomic(basename, language, operation, pdd_files, actual_cost, str(model_name), atomic_state=atomic_state)
diff --git a/tests/test_e2e_issue_555_cost_extraction_wrong_index.py b/tests/test_e2e_issue_555_cost_extraction_wrong_index.py
new file mode 100644
index 000000000..de4cf140f
--- /dev/null
+++ b/tests/test_e2e_issue_555_cost_extraction_wrong_index.py
@@ -0,0 +1,335 @@
+"""Tests for issue #555: cost extraction reads wrong tuple index for generate/crash/fix/verify.
+
+The bug: sync_orchestration.py lines 1857-1858 and 1889-1890 always read result[1] as cost
+and result[2] as model, but each operation returns a different tuple format:
+
+- generate: (content, was_incremental, cost, model) — cost at index 2, model at index 3
+- crash: (success, final_code, final_program, attempts, cost, model) — cost at index 4, model at index 5
+- fix: (success, fixed_unit_test, fixed_code, attempts, total_cost, model_name) — cost at index 4, model at index 5
+- verify: (success, final_program, final_code, attempts, total_cost, model_name) — cost at index 4, model at index 5
+
+Secondary bug: operation_log.py lines 339-340 has the same incorrect extraction logic.
+
+Bool quirk: Python's bool subclasses int, so isinstance(False, (int, float)) returns True.
+For generate, result[1] is was_incremental (bool), which silently passes the isinstance check
+and cost becomes int(False)=0 or int(True)=1 instead of the actual cost at result[2].
+"""
+
+import pytest
+
+from pdd.sync_orchestration import _extract_cost_from_result, _extract_model_from_result
+
+
+class TestCostExtractionBudgetAccumulation:
+    """Test cost extraction via _extract_cost_from_result helper (fixes sync_orchestration.py:1857-1858).
+
+    The old buggy code always read result[1] as cost with no operation-specific branching.
+    The fix dispatches to the correct index per operation.
+    """
+
+    def test_generate_cost_extraction(self):
+        """generate returns (content, was_incremental, cost, model) — cost is at index 2, not 1.
+
+        Bug: result[1] is was_incremental (bool). Since bool subclasses int,
+        isinstance(False, (int, float)) is True → cost becomes 0 instead of 0.0421.
+        """
+        # code_generator_main return format: (content, was_incremental, cost, model)
+        result = ("generated code", False, 0.0421, "claude-sonnet-4-6")
+
+        cost = _extract_cost_from_result('generate', result)
+
+        assert cost == pytest.approx(0.0421), (
+            f"generate cost should be {result[2]} (index 2) but got {cost}. "
+            f"result[1] = {result[1]!r} (was_incremental) was read as cost due to bool⊂int."
+        )
+
+    def test_crash_cost_extraction(self):
+        """crash returns (success, final_code, final_program, attempts, cost, model) — cost at index 4.
+
+        Bug: result[1] is final_code (str), isinstance fails → cost defaults to 0.0.
+        """
+        # crash_main return format: (success, final_code, final_program, attempts, cost, model)
+        result = (True, "code content", "program content", 2, 0.0315, "gpt-4o-mini")
+
+        cost = _extract_cost_from_result('crash', result)
+
+        assert cost == pytest.approx(0.0315), (
+            f"crash cost should be {result[4]} (index 4) but got {cost}. "
+            f"result[1] = {result[1]!r} (final_code string) is not a number."
+        )
+
+    def test_fix_cost_extraction(self):
+        """fix returns (success, fixed_unit_test, fixed_code, attempts, total_cost, model) — cost at index 4.
+
+        Bug: result[1] is fixed_unit_test (str), isinstance fails → cost defaults to 0.0.
+        """
+        # fix_main return format: (success, fixed_unit_test, fixed_code, attempts, total_cost, model_name)
+        result = (True, "fixed test code", "fixed source code", 3, 0.0892, "claude-sonnet-4-6")
+
+        cost = _extract_cost_from_result('fix', result)
+
+        assert cost == pytest.approx(0.0892), (
+            f"fix cost should be {result[4]} (index 4) but got {cost}. "
+            f"result[1] = {result[1]!r} (fixed_unit_test string) is not a number."
+        )
+
+    def test_verify_cost_extraction(self):
+        """verify returns (success, final_program, final_code, attempts, total_cost, model) — cost at index 4.
+
+        Bug: result[1] is final_program (str), isinstance fails → cost defaults to 0.0.
+        """
+        # fix_verification_main return format: (success, final_program, final_code, attempts, total_cost, model_name)
+        result = (True, "program content", "code content", 1, 0.0267, "gpt-4o-mini")
+
+        cost = _extract_cost_from_result('verify', result)
+
+        assert cost == pytest.approx(0.0267), (
+            f"verify cost should be {result[4]} (index 4) but got {cost}. "
+            f"result[1] = {result[1]!r} (final_program string) is not a number."
+        )
+
+
+class TestCostExtractionLogging:
+    """Test cost/model extraction via helpers (fixes sync_orchestration.py:1885-1890 log entry).
+
+    The old buggy code read result[1] as cost and result[2] as model for all non-test operations,
+    which is wrong for generate (cost=[2], model=[3]) and crash/fix/verify (cost=[4], model=[5]).
+    """
+
+    def test_generate_logging_cost_and_model(self):
+        """generate: cost should be result[2], model should be result[3].
+
+        Bug: Reads result[1] (was_incremental=False) as cost → 0 (bool→int).
+        Reads result[2] (0.0421 float) as model → 'unknown' (not a string).
+        """
+        result = ("generated code", False, 0.0421, "claude-sonnet-4-6")
+
+        actual_cost = _extract_cost_from_result('generate', result)
+        model_name = _extract_model_from_result('generate', result)
+
+        assert actual_cost == pytest.approx(0.0421), (
+            f"generate log cost should be {result[2]} but got {actual_cost}. "
+            f"result[1] = {result[1]!r} (was_incremental bool) was misread as cost."
+        )
+        assert model_name == "claude-sonnet-4-6", (
+            f"generate log model should be '{result[3]}' but got '{model_name}'. "
+            f"result[2] = {result[2]!r} (cost float) was misread as model."
+        )
+
+    def test_crash_logging_cost_and_model(self):
+        """crash: cost should be result[4], model should be result[5].
+
+        Bug: Reads result[1] (final_code str) as cost → 0.0 (isinstance fails).
+        Reads result[2] (final_program str) as model → uses wrong string.
+        """
+        result = (True, "code content", "program content", 2, 0.0315, "gpt-4o-mini")
+
+        actual_cost = _extract_cost_from_result('crash', result)
+        model_name = _extract_model_from_result('crash', result)
+
+        assert actual_cost == pytest.approx(0.0315), (
+            f"crash log cost should be {result[4]} but got {actual_cost}."
+        )
+        assert model_name == "gpt-4o-mini", (
+            f"crash log model should be '{result[5]}' but got '{model_name}'. "
+            f"result[2] = {result[2]!r} (final_program string) was misread as model."
+        )
+
+    def test_fix_logging_cost_and_model(self):
+        """fix: cost should be result[4], model should be result[5].
+
+        Bug: Reads result[1] (fixed_unit_test str) as cost → 0.0 (isinstance fails).
+        Reads result[2] (fixed_code str) as model → uses wrong string.
+        """
+        result = (True, "fixed test code", "fixed source code", 3, 0.0892, "claude-sonnet-4-6")
+
+        actual_cost = _extract_cost_from_result('fix', result)
+        model_name = _extract_model_from_result('fix', result)
+
+        assert actual_cost == pytest.approx(0.0892), (
+            f"fix log cost should be {result[4]} but got {actual_cost}."
+        )
+        assert model_name == "claude-sonnet-4-6", (
+            f"fix log model should be '{result[5]}' but got '{model_name}'. "
+            f"result[2] = {result[2]!r} (fixed_code string) was misread as model."
+        )
+
+    def test_verify_logging_cost_and_model(self):
+        """verify: cost should be result[4], model should be result[5].
+
+        Bug: Reads result[1] (final_program str) as cost → 0.0 (isinstance fails).
+        Reads result[2] (final_code str) as model → uses wrong string.
+        """
+        result = (True, "program content", "code content", 1, 0.0267, "gpt-4o-mini")
+
+        actual_cost = _extract_cost_from_result('verify', result)
+        model_name = _extract_model_from_result('verify', result)
+
+        assert actual_cost == pytest.approx(0.0267), (
+            f"verify log cost should be {result[4]} but got {actual_cost}."
+        )
+        assert model_name == "gpt-4o-mini", (
+            f"verify log model should be '{result[5]}' but got '{model_name}'. "
+            f"result[2] = {result[2]!r} (final_code string) was misread as model."
+        )
+
+
+class TestBoolSubclassesIntQuirk:
+    """Test that bool values are not silently accepted as cost values.
+
+    Python's bool subclasses int: isinstance(False, int) is True.
+    For generate, result[1] = was_incremental (bool). The fixed helper
+    explicitly excludes bool with `not isinstance(val, bool)`.
+    """
+
+    def test_bool_false_not_accepted_as_cost(self):
+        """The fix must reject bool values — generate's was_incremental should not sneak through.
+
+        When was_incremental=False, the old code set cost to 0 (int value of False).
+        The fix reads index 2 instead.
+        """
+        # generate 4-tuple: (content, was_incremental, cost, model)
+        result = ("generated code", False, 0.0421, "claude-sonnet-4-6")
+
+        cost = _extract_cost_from_result('generate', result)
+
+        assert not isinstance(cost, bool), (
+            f"Cost = {cost!r} is a bool (was_incremental), not a real cost value."
+        )
+        assert cost == pytest.approx(0.0421), (
+            f"Cost should be {result[2]} but got {cost}."
+        )
+
+    def test_bool_true_not_misread_as_cost_one_dollar(self):
+        """When was_incremental=True, the fix must NOT record cost as $1.00.
+
+        True == 1 in Python, so float(True) = 1.0. The old code would record
+        an incremental generate that costs $0.0053 as costing $1.00.
+        """
+        # generate 4-tuple with was_incremental=True
+        result = ("generated code", True, 0.0053, "gpt-4o-mini")
+
+        cost = _extract_cost_from_result('generate', result)
+
+        assert cost == pytest.approx(0.0053), (
+            f"Cost should be {result[2]} but got {cost} (True → int 1). "
+            f"was_incremental=True was interpreted as cost=$1.00."
+        )
+
+
+class TestRegressionGuards:
+    """Ensure operations that already extract cost correctly continue to work.
+
+    These operations have cost at index 1, model at index 2:
+    - example: (content, cost, model) — 3-tuple
+    - test: (content, cost, model, agentic_success) — 4-tuple
+    - test_extend: (content, cost, model, agentic_success) — 4-tuple
+    """
+
+    def test_example_cost_extraction(self):
+        """example returns (content, cost, model) — cost at index 1 works correctly."""
+        result = ("example content", 0.0023, "gpt-4o-mini")
+
+        cost = _extract_cost_from_result('example', result)
+        model = _extract_model_from_result('example', result)
+
+        assert cost == pytest.approx(0.0023)
+        assert model == "gpt-4o-mini"
+
+    def test_test_cost_extraction(self):
+        """test returns (content, cost, model, agentic_success) — cost at index 1 works correctly."""
+        result = ("test content", 0.0078, "claude-sonnet-4-6", True)
+
+        cost = _extract_cost_from_result('test', result)
+        model = _extract_model_from_result('test', result)
+
+        assert cost == pytest.approx(0.0078)
+        assert model == "claude-sonnet-4-6"
+
+    def test_test_extend_cost_extraction(self):
+        """test_extend returns same format as test — cost at index 1 works correctly."""
+        result = ("test content", 0.0045, "gpt-4o-mini", False)
+
+        cost = _extract_cost_from_result('test_extend', result)
+        model = _extract_model_from_result('test_extend', result)
+
+        assert cost == pytest.approx(0.0045)
+        assert model == "gpt-4o-mini"
+
+
+class TestBudgetAccumulationAllOperations:
+    """E2E test: budget accumulation across all operation types in a sync loop.
+
+    With the bug, generate/crash/fix/verify costs are lost or wrong,
+    so the total accumulated cost is much lower than actual spend.
+    """
+
+    def test_total_budget_with_all_operations(self):
+        """Simulate sync loop with all 6 operations — total cost should sum correctly.
+
+        With the old bug:
+        - example: 0.01 (correct)
+        - generate: 0 (was_incremental=False → bool treated as 0)
+        - crash: 0.0 (string at [1] fails isinstance)
+        - test: 0.02 (correct)
+        - fix: 0.0 (string at [1] fails isinstance)
+        - verify: 0.0 (string at [1] fails isinstance)
+        Buggy total: ~0.03 instead of 0.23
+        """
+        operations_and_results = [
+            ('example', ("content", 0.01, "gpt-4o-mini")),
+            ('generate', ("code", False, 0.05, "claude-sonnet-4-6")),
+            ('crash', (True, "code", "program", 2, 0.03, "gpt-4o-mini")),
+            ('test', ("tests", 0.02, "gpt-4o-mini", True)),
+            ('fix', (True, "fixed test", "fixed code", 1, 0.08, "claude-sonnet-4-6")),
+            ('verify', (True, "program", "code", 1, 0.04, "gpt-4o-mini")),
+        ]
+
+        expected_total = 0.01 + 0.05 + 0.03 + 0.02 + 0.08 + 0.04  # = 0.23
+
+        current_cost = 0.0
+        for operation, result in operations_and_results:
+            cost = _extract_cost_from_result(operation, result)
+            current_cost += cost
+
+        assert current_cost == pytest.approx(expected_total), (
+            f"Total cost should be ${expected_total:.2f} but got ${current_cost:.4f}. "
+            f"generate/crash/fix/verify costs are lost due to wrong tuple index."
+        )
+
+
+class TestOperationLogDecoratorBug:
+    """Test the identical bug fix in operation_log.py:339-340.
+
+    The log_operation decorator now uses the same _extract_cost_from_result and
+    _extract_model_from_result helpers, fixing the operation-specific dispatch.
+    """
+
+    def test_generate_decorator_extraction(self):
+        """Decorator reads correct index for generate: cost at [2], model at [3]."""
+        result = ("generated code", False, 0.0421, "claude-sonnet-4-6")
+
+        cost = _extract_cost_from_result('generate', result)
+        model = _extract_model_from_result('generate', result)
+
+        assert cost == pytest.approx(0.0421), (
+            f"Decorator cost for generate should be {result[2]} but got {cost}. "
+            f"result[1] = {result[1]!r} (bool) was cast to float."
+        )
+        assert model == "claude-sonnet-4-6", (
+            f"Decorator model for generate should be '{result[3]}' but got '{model}'."
+        )
+
+    def test_fix_decorator_extraction(self):
+        """Decorator reads correct index for fix: cost at [4], model at [5]."""
+        result = (True, "fixed test code", "fixed source code", 3, 0.0892, "claude-sonnet-4-6")
+
+        cost = _extract_cost_from_result('fix', result)
+        model = _extract_model_from_result('fix', result)
+
+        assert cost == pytest.approx(0.0892), (
+            f"Decorator cost for fix should be {result[4]} but got {cost}."
+        )
+        assert model == "claude-sonnet-4-6", (
+            f"Decorator model for fix should be '{result[5]}' but got '{model}'."
+        )
diff --git a/tests/test_e2e_issue_555_sync_cost_extraction.py b/tests/test_e2e_issue_555_sync_cost_extraction.py
new file mode 100644
index 000000000..7ab8adcee
--- /dev/null
+++ b/tests/test_e2e_issue_555_sync_cost_extraction.py
@@ -0,0 +1,503 @@
+"""E2E test for issue #555: cost extraction reads wrong tuple index in sync_orchestration.
+
+Unlike the unit tests (test_e2e_issue_555_cost_extraction_wrong_index.py) which replicate
+the buggy extraction logic inline, these E2E tests call the REAL sync_orchestration function
+and verify that actual cost/model values are correctly extracted from operation results.
+
+The bug: sync_orchestration.py lines 1857-1858 and 1889-1890 always read result[1] as cost
+and result[2] as model. This is wrong for:
+  - generate: returns (content, was_incremental, cost, model) — cost at [2], model at [3]
+  - crash: returns (success, final_code, final_program, attempts, cost, model) — cost at [4], model at [5]
+  - fix: returns (success, fixed_test, fixed_code, attempts, cost, model) — cost at [4], model at [5]
+  - verify: returns (success, final_program, final_code, attempts, cost, model) — cost at [4], model at [5]
+
+The bool subclassing int quirk makes generate silently wrong — isinstance(False, (int, float))
+passes, so was_incremental=False is treated as cost $0.00.
+
+E2E approach: Call real sync_orchestration with mocked operation functions returning
+realistic tuples. Verify that returned total_cost and logged cost/model are correct.
+"""
+
+import pytest
+from pathlib import Path
+from unittest.mock import patch, MagicMock, call
+
+import pdd
+from pdd.sync_orchestration import sync_orchestration
+from pdd.sync_determine_operation import SyncDecision
+from pdd.sync_determine_operation import RunReport
+
+
+@pytest.fixture(autouse=True)
+def set_pdd_path(monkeypatch):
+    """Set PDD_PATH so language detection works."""
+    pdd_package_dir = Path(pdd.__file__).parent
+    monkeypatch.setenv("PDD_PATH", str(pdd_package_dir))
+
+
+@pytest.fixture
+def sync_workspace(tmp_path):
+    """Create minimal workspace files that pass sync_orchestration's file checks."""
+    prompt = tmp_path / "prompts" / "cost_test_python.prompt"
+    prompt.parent.mkdir(parents=True, exist_ok=True)
+    prompt.write_text("Test prompt for cost extraction")
+
+    code = tmp_path / "src" / "cost_test.py"
+    code.parent.mkdir(parents=True, exist_ok=True)
+    code.write_text("def hello(): pass\n")
+
+    example = tmp_path / "examples" / "cost_test_example.py"
+    example.parent.mkdir(parents=True, exist_ok=True)
+    example.write_text("from cost_test import hello\nhello()\n")
+
+    test = tmp_path / "tests" / "test_cost_test.py"
+    test.parent.mkdir(parents=True, exist_ok=True)
+    test.write_text("def test_hello(): pass\n")
+
+    return {
+        'prompt': prompt,
+        'code': code,
+        'example': example,
+        'test': test,
+    }
+
+
+def _make_decision(operation, reason='auto'):
+    return SyncDecision(operation=operation, reason=reason)
+
+
+def _build_patches(sync_workspace, op_results):
+    """Build the common patches dict for sync_orchestration, with operation mocks from op_results."""
+    call_count = [0]
+    decisions = list(op_results.keys())
+
+    def mock_determine(*args, **kwargs):
+        if call_count[0] < len(decisions):
+            op = decisions[call_count[0]]
+            call_count[0] += 1
+            return _make_decision(op, f'Need {op}')
+        return _make_decision('all_synced', 'Complete')
+
+    # Create a failed RunReport so crash/verify pre-checks detect a crash
+    # and proceed to call crash_main/fix_verification_main
+    failed_run_report = RunReport(
+        timestamp='2024-01-01T00:00:00Z',
+        exit_code=1,
+        tests_passed=0,
+        tests_failed=1,
+        coverage=0.0,
+        test_hash='abc123'
+    )
+
+    patches = {
+        'pdd.sync_orchestration.get_pdd_file_paths': MagicMock(return_value=sync_workspace),
+        'pdd.sync_orchestration.sync_determine_operation': mock_determine,
+        'pdd.sync_orchestration.SyncLock': MagicMock(),
+        'pdd.sync_orchestration.log_event': MagicMock(),
+        'pdd.sync_orchestration.append_log_entry': MagicMock(),
+        'pdd.sync_orchestration._save_fingerprint_atomic': MagicMock(),
+        'pdd.sync_orchestration._save_run_report_atomic': MagicMock(),
+        'pdd.sync_orchestration.calculate_sha256': MagicMock(return_value='abc123'),
+        'pdd.sync_orchestration.maybe_steer_operation': MagicMock(side_effect=lambda op, *a, **kw: (op, False)),
+        'pdd.sync_orchestration.create_log_entry': MagicMock(return_value={'details': {}}),
+        'pdd.sync_orchestration.load_operation_log': MagicMock(return_value=[]),
+        # Mock crash/verify pre-processing to ensure crash_main/fix_verification_main are called
+        'pdd.sync_orchestration.read_run_report': MagicMock(return_value=failed_run_report),
+        'pdd.sync_orchestration._try_auto_fix_import_error': MagicMock(return_value=(False, None)),
+        'pdd.sync_orchestration._try_auto_fix_env_var_error': MagicMock(return_value=(False, None)),
+        'pdd.sync_orchestration.clear_run_report': MagicMock(),
+    }
+
+    # Wire up the real update_log_entry so we can inspect its calls
+    update_log_entry_mock = MagicMock(side_effect=lambda entry, **kw: entry.update(kw) or entry)
+    patches['pdd.sync_orchestration.update_log_entry'] = update_log_entry_mock
+
+    # Map operation names to their function paths
+    op_function_map = {
+        'generate': 'pdd.sync_orchestration.code_generator_main',
+        'example': 'pdd.sync_orchestration.context_generator_main',
+        'crash': 'pdd.sync_orchestration.crash_main',
+        'verify': 'pdd.sync_orchestration.fix_verification_main',
+        'test': 'pdd.sync_orchestration.cmd_test_main',
+        'test_extend': 'pdd.sync_orchestration.cmd_test_main',
+        'fix': 'pdd.sync_orchestration.fix_main',
+    }
+
+    for op, result in op_results.items():
+        func_path = op_function_map.get(op)
+        if func_path:
+            patches[func_path] = MagicMock(return_value=result)
+
+    return patches, update_log_entry_mock
+
+
+def _run_sync(sync_workspace, tmp_path, monkeypatch, op_results, budget=10.0,
+              skip_verify=False, skip_tests=False):
+    """Run sync_orchestration with mocked operations returning specified results."""
+    monkeypatch.chdir(tmp_path)
+
+    patches, update_log_mock = _build_patches(sync_workspace, op_results)
+
+    ctx_managers = [patch(k, v) for k, v in patches.items()]
+    for cm in ctx_managers:
+        cm.start()
+    try:
+        result = sync_orchestration(
+            basename='cost_test',
+            language='python',
+            budget=budget,
+            max_attempts=1,
+            strength=0.5,
+            temperature=0.0,
+            skip_verify=skip_verify,
+            skip_tests=skip_tests,
+            quiet=True,
+            force=True,
+            no_steer=True,
+        )
+        return result, update_log_mock
+    finally:
+        for cm in ctx_managers:
+            cm.stop()
+
+
+class TestE2ESyncGenerateCostExtraction:
+    """E2E: sync_orchestration must correctly extract cost from generate's 4-tuple result."""
+
+    def test_generate_cost_in_total(self, sync_workspace, tmp_path, monkeypatch):
+        """generate returns (content, was_incremental, cost, model) — cost at index 2.
+
+        Bug: result[1] is was_incremental (bool). Since bool subclasses int,
+        isinstance(False, (int, float)) passes → cost recorded as 0 instead of 0.0421.
+        """
+        generate_cost = 0.0421
+        result, _ = _run_sync(
+            sync_workspace, tmp_path, monkeypatch,
+            op_results={
+                'generate': ("generated code", False, generate_cost, "claude-sonnet-4-6"),
+            },
+        )
+
+        total_cost = result.get('total_cost', 0.0)
+        assert total_cost >= generate_cost, (
+            f"sync total_cost should include generate cost ${generate_cost} "
+            f"but got ${total_cost:.6f}. Bug: result[1]=False (bool) treated as cost $0."
+        )
+
+    def test_generate_cost_logged_correctly(self, sync_workspace, tmp_path, monkeypatch):
+        """The logged actual_cost and model should come from the correct tuple indices."""
+        generate_cost = 0.0421
+        _, update_log_mock = _run_sync(
+            sync_workspace, tmp_path, monkeypatch,
+            op_results={
+                'generate': ("generated code", False, generate_cost, "claude-sonnet-4-6"),
+            },
+        )
+
+        # Find the update_log_entry call for the successful operation
+        for c in update_log_mock.call_args_list:
+            kwargs = c.kwargs if c.kwargs else {}
+            if kwargs.get('success', False):
+                actual_cost = kwargs.get('cost', 0.0)
+                model = kwargs.get('model', 'unknown')
+                assert actual_cost == pytest.approx(generate_cost), (
+                    f"Logged cost should be {generate_cost} but got {actual_cost}. "
+                    f"Bug: result[1]=False (was_incremental) was read as cost."
+                )
+                assert model == "claude-sonnet-4-6", (
+                    f"Logged model should be 'claude-sonnet-4-6' but got '{model}'. "
+                    f"Bug: result[2]=0.0421 (cost float) was read as model."
+                )
+                return
+
+        pytest.fail("No successful update_log_entry call found")
+
+    def test_generate_bool_true_not_treated_as_cost(self, sync_workspace, tmp_path, monkeypatch):
+        """When was_incremental=True, cost should NOT be 1 (int value of True)."""
+        actual_cost = 0.0053
+        result, _ = _run_sync(
+            sync_workspace, tmp_path, monkeypatch,
+            op_results={
+                'generate': ("generated code", True, actual_cost, "gpt-4o-mini"),
+            },
+        )
+
+        total_cost = result.get('total_cost', 0.0)
+        # With the bug: True → int 1 → cost = 1.0
+        # Without the bug: cost = 0.0053
+        assert total_cost == pytest.approx(actual_cost, abs=0.01), (
+            f"sync total_cost should be ~${actual_cost} but got ${total_cost:.4f}. "
+            f"Bug: was_incremental=True → int(True)=1 → cost recorded as $1.00."
+        )
+
+
+class TestE2ESyncCrashCostExtraction:
+    """E2E: sync_orchestration must correctly extract cost from crash's 6-tuple result."""
+
+    def test_crash_cost_in_total(self, sync_workspace, tmp_path, monkeypatch):
+        """crash returns (success, final_code, final_program, attempts, cost, model) — cost at index 4.
+
+        Bug: result[1] is final_code (str), isinstance fails → cost defaults to 0.0.
+        The actual cost at index 4 is completely ignored.
+        """
+        crash_cost = 0.0315
+        result, _ = _run_sync(
+            sync_workspace, tmp_path, monkeypatch,
+            op_results={
+                'crash': (True, "fixed code", "fixed program", 2, crash_cost, "gpt-4o-mini"),
+            },
+        )
+
+        total_cost = result.get('total_cost', 0.0)
+        assert total_cost >= crash_cost, (
+            f"sync total_cost should include crash cost ${crash_cost} "
+            f"but got ${total_cost:.6f}. Bug: result[1]='fixed code' (str) fails isinstance."
+        )
+
+    def test_crash_model_logged_correctly(self, sync_workspace, tmp_path, monkeypatch):
+        """The logged model should be 'gpt-4o-mini', not the final_program string."""
+        _, update_log_mock = _run_sync(
+            sync_workspace, tmp_path, monkeypatch,
+            op_results={
+                'crash': (True, "fixed code", "fixed program", 2, 0.0315, "gpt-4o-mini"),
+            },
+        )
+
+        for c in update_log_mock.call_args_list:
+            kwargs = c.kwargs if c.kwargs else {}
+            if kwargs.get('success', False):
+                model = kwargs.get('model', 'unknown')
+                assert model == "gpt-4o-mini", (
+                    f"Logged model should be 'gpt-4o-mini' but got '{model}'. "
+                    f"Bug: result[2]='fixed program' (str) was read as model name."
+                )
+                return
+
+        pytest.fail("No successful update_log_entry call found")
+
+
+class TestE2ESyncFixCostExtraction:
+    """E2E: sync_orchestration must correctly extract cost from fix's 6-tuple result."""
+
+    def test_fix_cost_in_total(self, sync_workspace, tmp_path, monkeypatch):
+        """fix returns (success, fixed_test, fixed_code, attempts, cost, model) — cost at index 4.
+
+        Bug: result[1] is fixed_unit_test (str), isinstance fails → cost defaults to 0.0.
+        """
+        fix_cost = 0.0892
+        result, _ = _run_sync(
+            sync_workspace, tmp_path, monkeypatch,
+            op_results={
+                'fix': (True, "fixed test code", "fixed source code", 3, fix_cost, "claude-sonnet-4-6"),
+            },
+        )
+
+        total_cost = result.get('total_cost', 0.0)
+        assert total_cost >= fix_cost, (
+            f"sync total_cost should include fix cost ${fix_cost} "
+            f"but got ${total_cost:.6f}. Bug: result[1]='fixed test code' fails isinstance."
+        )
+
+
+class TestE2ESyncVerifyCostExtraction:
+    """E2E: sync_orchestration must correctly extract cost from verify's 6-tuple result."""
+
+    def test_verify_cost_in_total(self, sync_workspace, tmp_path, monkeypatch):
+        """verify returns (success, final_program, final_code, attempts, cost, model) — cost at index 4.
+
+        Bug: result[1] is final_program (str), isinstance fails → cost defaults to 0.0.
+        """
+        verify_cost = 0.0267
+        result, _ = _run_sync(
+            sync_workspace, tmp_path, monkeypatch,
+            op_results={
+                'verify': (True, "program content", "code content", 1, verify_cost, "gpt-4o-mini"),
+            },
+        )
+
+        total_cost = result.get('total_cost', 0.0)
+        assert total_cost >= verify_cost, (
+            f"sync total_cost should include verify cost ${verify_cost} "
+            f"but got ${total_cost:.6f}. Bug: result[1]='program content' fails isinstance."
+        )
+
+
+class TestE2ERegressionGuards:
+    """E2E: Operations that already work (example, test, test_extend) must not regress."""
+
+    def test_example_cost_in_total(self, sync_workspace, tmp_path, monkeypatch):
+        """example returns (content, cost, model) — cost at index 1 works correctly."""
+        example_cost = 0.0023
+        result, _ = _run_sync(
+            sync_workspace, tmp_path, monkeypatch,
+            op_results={
+                'example': ("example content", example_cost, "gpt-4o-mini"),
+            },
+        )
+
+        total_cost = result.get('total_cost', 0.0)
+        assert total_cost >= example_cost, (
+            f"sync total_cost should include example cost ${example_cost} "
+            f"but got ${total_cost:.6f}."
+        )
+
+    def test_test_cost_in_total(self, sync_workspace, tmp_path, monkeypatch):
+        """test returns (content, cost, model, agentic_success) — cost at index 1 works correctly."""
+        test_cost = 0.0078
+        result, _ = _run_sync(
+            sync_workspace, tmp_path, monkeypatch,
+            op_results={
+                'test': ("test content", test_cost, "claude-sonnet-4-6", True),
+            },
+        )
+
+        total_cost = result.get('total_cost', 0.0)
+        assert total_cost >= test_cost, (
+            f"sync total_cost should include test cost ${test_cost} "
+            f"but got ${total_cost:.6f}."
+        )
+
+
+class TestE2EBudgetAccumulationAllOperations:
+    """E2E: full sync loop with all operation types — total cost must be accurate."""
+
+    def test_multi_operation_budget_accumulation(self, sync_workspace, tmp_path, monkeypatch):
+        """Run example + generate + test in sequence. All costs must accumulate.
+
+        Expected total: $0.01 + $0.05 + $0.02 = $0.08
+        With bug: $0.01 + $0.00 + $0.02 = $0.03 (generate cost lost due to bool→int)
+        """
+        result, _ = _run_sync(
+            sync_workspace, tmp_path, monkeypatch,
+            op_results={
+                'example': ("content", 0.01, "gpt-4o-mini"),
+                'generate': ("code", False, 0.05, "claude-sonnet-4-6"),
+                'test': ("tests", 0.02, "gpt-4o-mini", True),
+            },
+        )
+
+        total_cost = result.get('total_cost', 0.0)
+        expected_total = 0.01 + 0.05 + 0.02  # = 0.08
+        assert total_cost == pytest.approx(expected_total, abs=0.005), (
+            f"Total cost should be ~${expected_total:.2f} but got ${total_cost:.4f}. "
+            f"Bug: generate cost lost (was_incremental=False → $0.00)."
+        )
+
+    def test_budget_enforcement_with_generate_cost(self, sync_workspace, tmp_path, monkeypatch):
+        """If generate costs $0.08 and budget is $0.05, sync should stop due to budget exceeded.
+
+        With the bug: generate cost recorded as $0 (bool→int), so budget check never triggers
+        and expensive subsequent operations keep running.
+        """
+        result, _ = _run_sync(
+            sync_workspace, tmp_path, monkeypatch,
+            op_results={
+                'generate': ("code", False, 0.08, "claude-sonnet-4-6"),
+                'test': ("tests", 0.05, "gpt-4o-mini", True),
+            },
+            budget=0.05,
+        )
+
+        total_cost = result.get('total_cost', 0.0)
+        # With bug: total_cost ≈ 0.05 (only test cost counted, generate was $0)
+        # Without bug: total_cost ≈ 0.08 (generate exceeds budget, test may not run)
+        assert total_cost >= 0.05, (
+            f"Total cost should reflect generate cost (${0.08}) but got ${total_cost:.4f}. "
+            f"Bug: generate cost lost, budget check didn't fire."
+        )
+
+
+class TestE2EOperationLogDecorator:
+    """E2E: the log_operation decorator in operation_log.py must extract cost/model correctly.
+
+    The decorator at operation_log.py:338-340 has the same bug — always reads result[1] as
+    cost and result[2] as model, regardless of operation type.
+    """
+
+    def test_decorator_extracts_generate_cost(self, tmp_path):
+        """Calling a decorated function that returns a generate-style 4-tuple.
+
+        The decorator should log cost from index 2 and model from index 3.
+        Bug: logs cost from index 1 (was_incremental=False → 0.0) and model from index 2 (cost float).
+        """
+        from pdd.operation_log import log_operation, update_log_entry
+
+        captured_updates = []
+        original_update = update_log_entry
+
+        def tracking_update(entry, **kwargs):
+            result = original_update(entry, **kwargs)
+            captured_updates.append(dict(kwargs))
+            return result
+
+        # log_operation infers basename/language from prompt_file kwarg
+        prompt_file = tmp_path / "test_python.prompt"
+        prompt_file.write_text("test prompt")
+
+        @log_operation(operation='generate')
+        def fake_generate(prompt_file=None):
+            return ("generated code", False, 0.0421, "claude-sonnet-4-6")
+
+        with patch('pdd.operation_log.update_log_entry', side_effect=tracking_update), \
+             patch('pdd.operation_log.append_log_entry', MagicMock()), \
+             patch('pdd.operation_log.save_fingerprint', MagicMock()):
+            fake_generate(prompt_file=str(prompt_file))
+
+        assert len(captured_updates) >= 1, "Decorator should have called update_log_entry"
+
+        update_kwargs = captured_updates[-1]
+        logged_cost = update_kwargs.get('cost', 0.0)
+        logged_model = update_kwargs.get('model', 'unknown')
+
+        assert logged_cost == pytest.approx(0.0421), (
+            f"Decorator logged cost={logged_cost} for generate, "
+            f"expected 0.0421. Bug: result[1]=False (bool) → float(False)=0.0."
+        )
+        assert logged_model == "claude-sonnet-4-6", (
+            f"Decorator logged model='{logged_model}' for generate, "
+            f"expected 'claude-sonnet-4-6'. Bug: result[2]=0.0421 (float) is not a string."
+        )
+
+    def test_decorator_extracts_fix_cost(self, tmp_path):
+        """Calling a decorated function that returns a fix-style 6-tuple.
+
+        The decorator should log cost from index 4 and model from index 5.
+        Bug: logs cost=0.0 (result[1]='fixed test' str fails isinstance) and
+        model='fixed source code' (result[2] is a code string, not a model name).
+        """
+        from pdd.operation_log import log_operation, update_log_entry
+
+        captured_updates = []
+        original_update = update_log_entry
+
+        def tracking_update(entry, **kwargs):
+            result = original_update(entry, **kwargs)
+            captured_updates.append(dict(kwargs))
+            return result
+
+        prompt_file = tmp_path / "test_python.prompt"
+        prompt_file.write_text("test prompt")
+
+        @log_operation(operation='fix')
+        def fake_fix(prompt_file=None):
+            return (True, "fixed test code", "fixed source code", 3, 0.0892, "claude-sonnet-4-6")
+
+        with patch('pdd.operation_log.update_log_entry', side_effect=tracking_update), \
+             patch('pdd.operation_log.append_log_entry', MagicMock()), \
+             patch('pdd.operation_log.save_fingerprint', MagicMock()):
+            fake_fix(prompt_file=str(prompt_file))
+
+        assert len(captured_updates) >= 1, "Decorator should have called update_log_entry"
+
+        update_kwargs = captured_updates[-1]
+        logged_cost = update_kwargs.get('cost', 0.0)
+        logged_model = update_kwargs.get('model', 'unknown')
+
+        assert logged_cost == pytest.approx(0.0892), (
+            f"Decorator logged cost={logged_cost} for fix, expected 0.0892. "
+            f"Bug: result[1]='fixed test code' (str) fails isinstance → cost=0.0."
+        )
+        assert logged_model == "claude-sonnet-4-6", (
+            f"Decorator logged model='{logged_model}' for fix, expected 'claude-sonnet-4-6'. "
+            f"Bug: result[2]='fixed source code' was read as model name."
+        )
\ No newline at end of file