|
4 | 4 | """ |
5 | 5 | from __future__ import annotations |
6 | 6 |
|
| 7 | +import json |
| 8 | +import shutil |
7 | 9 | from pathlib import Path |
8 | 10 |
|
9 | 11 | import ladybug |
@@ -979,3 +981,141 @@ def test_incremental_preserves_incoming_edges_to_dependent(self, tmp_path: Path) |
979 | 981 | assert cb_after_count > 0, "out-of-scope C->B CALLS edge must be preserved" |
980 | 982 |
|
981 | 983 | conn.close() |
| 984 | + |
| 985 | + |
| 986 | +class TestIncrementalRegressions: |
| 987 | + """Regression tests for the ``increment`` always-fully-reprocesses loop. |
| 988 | +
|
| 989 | + Two bugs fed the loop: |
| 990 | + 1. ``_write_clients_producers_and_calls`` built a default ``MemberEntry`` |
| 991 | + missing the required ``node_id`` field. Because ``dict.get(k, default)`` |
| 992 | + evaluates ``default`` eagerly, the TypeError fired whenever ANY |
| 993 | + ``declares_client`` / ``declares_producer`` row existed — crashing every |
| 994 | + client-bearing incremental rebuild into a full-rebuild fallback. |
| 995 | + 2. ``_init_hash_tracker`` (run by every full reprocess AND by that fallback) |
| 996 | + did ``load()`` + ``update()`` and never pruned hashes for files no longer |
| 997 | + on disk, so ghost entries persisted and re-triggered the loop every run. |
| 998 | + """ |
| 999 | + |
| 1000 | + def test_init_hash_tracker_prunes_stale_entries(self, tmp_path: Path) -> None: |
| 1001 | + """A full rebuild drops hashes for files no longer on disk (ghost pruning). |
| 1002 | +
|
| 1003 | + Without pruning, a stale entry is re-detected as 'removed' on every |
| 1004 | + ``increment``, sustaining an endless full-rebuild loop. |
| 1005 | + """ |
| 1006 | + from build_ast_graph import write_ladybug |
| 1007 | + |
| 1008 | + source_root = tmp_path / "src" |
| 1009 | + source_root.mkdir() |
| 1010 | + (source_root / "A.java").write_text("package pkg; class A {}", encoding="utf-8") |
| 1011 | + index_dir = tmp_path / "index" |
| 1012 | + index_dir.mkdir() |
| 1013 | + ladybug_path = index_dir / "code_graph.lbug" |
| 1014 | + |
| 1015 | + tables = GraphTables() |
| 1016 | + pass1_parse(source_root, tables, verbose=False) |
| 1017 | + write_ladybug(ladybug_path, tables, source_root=source_root, verbose=False) |
| 1018 | + |
| 1019 | + # Inject a ghost hash for a file that does not exist on disk. |
| 1020 | + hash_file = index_dir / ".graph_hashes.json" |
| 1021 | + data = json.loads(hash_file.read_text(encoding="utf-8")) |
| 1022 | + data["ghost/Deleted.java"] = "0" * 64 |
| 1023 | + hash_file.write_text(json.dumps(data), encoding="utf-8") |
| 1024 | + |
| 1025 | + # A second full rebuild (what `reprocess --graph-only` does) re-runs |
| 1026 | + # _init_hash_tracker, which must drop the ghost. |
| 1027 | + tables2 = GraphTables() |
| 1028 | + pass1_parse(source_root, tables2, verbose=False) |
| 1029 | + write_ladybug(ladybug_path, tables2, source_root=source_root, verbose=False) |
| 1030 | + |
| 1031 | + after = json.loads(hash_file.read_text(encoding="utf-8")) |
| 1032 | + assert "ghost/Deleted.java" not in after |
| 1033 | + assert "A.java" in after |
| 1034 | + |
| 1035 | + def test_incremental_with_http_clients_does_not_fall_back(self, tmp_path: Path) -> None: |
| 1036 | + """A corpus with Feign/Kafka clients/producers rebuilds incrementally. |
| 1037 | +
|
| 1038 | + ``http_caller_smoke`` emits DECLARES_CLIENT / DECLARES_PRODUCER rows, so |
| 1039 | + the buggy eager ``MemberEntry`` default in |
| 1040 | + ``_write_clients_producers_and_calls`` crashed here before the fix |
| 1041 | + (forcing full_fallback). After the fix: mode is "incremental". |
| 1042 | + """ |
| 1043 | + from _builders import build_ladybug_full_into |
| 1044 | + from build_ast_graph import incremental_rebuild |
| 1045 | + |
| 1046 | + corpus = Path(__file__).parent / "fixtures" / "http_caller_smoke" |
| 1047 | + source_root = tmp_path / "src" |
| 1048 | + shutil.copytree(corpus, source_root) |
| 1049 | + index_dir = tmp_path / "index" |
| 1050 | + index_dir.mkdir() |
| 1051 | + ladybug_path = index_dir / "code_graph.lbug" |
| 1052 | + |
| 1053 | + # Full build seeds .graph_hashes.json via write_ladybug -> _init_hash_tracker. |
| 1054 | + build_ladybug_full_into(source_root, ladybug_path) |
| 1055 | + |
| 1056 | + # Mutate one file unrelated to the clients/producers. |
| 1057 | + target = source_root / "src" / "main" / "java" / "smoke" / "http" / "TopicNames.java" |
| 1058 | + target.write_text(target.read_text(encoding="utf-8") + "\n// edit\n", encoding="utf-8") |
| 1059 | + |
| 1060 | + result = incremental_rebuild(source_root, ladybug_path, verbose=False) |
| 1061 | + assert result.mode == "incremental", ( |
| 1062 | + f"expected incremental, got {result.mode!r} (the node_id crash in " |
| 1063 | + "_write_clients_producers_and_calls forces a full fallback)" |
| 1064 | + ) |
| 1065 | + assert result.files_changed == 1 |
| 1066 | + |
| 1067 | + def test_reprocess_graph_only_then_increment_is_noop(self, tmp_path: Path) -> None: |
| 1068 | + """The reported scenario at the builder level: a full graph rebuild (what |
| 1069 | + ``reprocess --graph-only`` does) followed by ``increment`` with no source |
| 1070 | + changes must be a no-op, not a second full rebuild.""" |
| 1071 | + from _builders import build_ladybug_full_into |
| 1072 | + from build_ast_graph import incremental_rebuild |
| 1073 | + |
| 1074 | + corpus = Path(__file__).parent / "fixtures" / "http_caller_smoke" |
| 1075 | + source_root = tmp_path / "src" |
| 1076 | + shutil.copytree(corpus, source_root) |
| 1077 | + index_dir = tmp_path / "index" |
| 1078 | + index_dir.mkdir() |
| 1079 | + ladybug_path = index_dir / "code_graph.lbug" |
| 1080 | + |
| 1081 | + # Simulate `reprocess --graph-only`: full rebuild seeds the hash store. |
| 1082 | + build_ladybug_full_into(source_root, ladybug_path) |
| 1083 | + |
| 1084 | + # `increment` with no source changes. |
| 1085 | + result = incremental_rebuild(source_root, ladybug_path, verbose=False) |
| 1086 | + assert result.mode == "incremental" |
| 1087 | + assert (result.files_changed, result.files_added, result.files_removed) == (0, 0, 0) |
| 1088 | + |
| 1089 | + def test_incremental_ghost_entry_then_next_run_is_noop(self, tmp_path: Path) -> None: |
| 1090 | + """A ghost hash entry is detected as 'removed' once, processed by the |
| 1091 | + scoped path (which prunes it), so the following run is a clean no-op. |
| 1092 | +
|
| 1093 | + Guards both fixes together: the node_id fix lets the scoped path |
| 1094 | + complete, and that path prunes the ghost (lines that delete `removed` |
| 1095 | + hashes). Before the fixes this fell back to full and preserved the ghost. |
| 1096 | + """ |
| 1097 | + from _builders import build_ladybug_full_into |
| 1098 | + from build_ast_graph import incremental_rebuild |
| 1099 | + |
| 1100 | + corpus = Path(__file__).parent / "fixtures" / "http_caller_smoke" |
| 1101 | + source_root = tmp_path / "src" |
| 1102 | + shutil.copytree(corpus, source_root) |
| 1103 | + index_dir = tmp_path / "index" |
| 1104 | + index_dir.mkdir() |
| 1105 | + ladybug_path = index_dir / "code_graph.lbug" |
| 1106 | + build_ladybug_full_into(source_root, ladybug_path) |
| 1107 | + |
| 1108 | + # Inject a ghost (no source change). |
| 1109 | + hash_file = index_dir / ".graph_hashes.json" |
| 1110 | + data = json.loads(hash_file.read_text(encoding="utf-8")) |
| 1111 | + data["ghost/Gone.java"] = "0" * 64 |
| 1112 | + hash_file.write_text(json.dumps(data), encoding="utf-8") |
| 1113 | + |
| 1114 | + first = incremental_rebuild(source_root, ladybug_path, verbose=False) |
| 1115 | + assert first.mode == "incremental", f"expected incremental, got {first.mode!r}" |
| 1116 | + assert first.files_removed == 1 |
| 1117 | + |
| 1118 | + # The ghost must be gone, so the next run detects nothing. |
| 1119 | + second = incremental_rebuild(source_root, ladybug_path, verbose=False) |
| 1120 | + assert second.mode == "incremental" |
| 1121 | + assert (second.files_changed, second.files_added, second.files_removed) == (0, 0, 0) |
0 commit comments