From c02c69161280903358093d4685289ad945efe249 Mon Sep 17 00:00:00 2001 From: Dmitry Teryaev Date: Mon, 22 Jun 2026 18:17:22 +0300 Subject: [PATCH] fix(graph): refresh preserved dependent nodes on increment (PR-P4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR-P2 replaced the per-row `_MERGE_SYMBOL` upsert in `_write_nodes_impl` with a skip-if-exists filter, which dropped the property refresh on preserved dependent type nodes. A dependent's `role`/`capabilities` depend on project-wide inputs (meta-annotation chain, brownfield overrides) and can shift without its own source changing, so the incremental graph diverged from a full rebuild — a preserved dependent whose role lever changed stayed stale until the next full rebuild. The `_delete_file_scope` contract (issue #305) relied on that re-MERGE to refresh preserved dependents in place. Found by fanning out reviewer subagents over the landed init/increment-perf plan and adjudicating a B-vs-D conflict empirically (the "benign" verdict came from a reviewer that couldn't run ladybug; the "bug" verdict reproduced). Changes: - Mark `TypeIndexEntry`/`MemberEntry` `loaded_from_db` when `_load_existing_{types,members}` creates them, so DB-loaded stubs (placeholder decls) are distinguishable from freshly-parsed scoped/dependent decls. - After the bulk COPY of new nodes, re-SET every mutable Symbol field on preserved, non-stub, type-kind nodes (`_SET_SYMBOL_BY_ID`), restoring the upsert the design relied on. Stubs are skipped (their stored values are authoritative); non-type kinds carry no mutable role/capabilities; the full path is unaffected (empty DB → no existing nodes). - `_populate_declares_rows` skips loaded members: their DECLARES edges already persist and re-emitting duplicated them (REL tables carry no PK). This second incremental≠full divergence surfaced once the equivalence test was fixed. - Delete dead `_existing_symbol_ids` (zero callers); correct the Route-MERGE loop comment (it iterates all routes, not phantoms). Tests: - `test_incremental_bulk_write_equivalent_to_full_rebuild` now asserts real incremental≡full equivalence (node count, per-type edge counts, type roles) instead of the prior tautology (`set(keys)==set(keys)` + `count>0`). - New `test_incremental_refreshes_dependent_role_on_meta_chain_change` guards the exact regression. - Rename `test_bulk_write_edges_match_per_row_baseline` → `test_bank_chat_bulk_build_matches_committed_baseline`: the per-row path is gone, so the fixture (bulk-generated) is a drift anchor, not a per-row proof. Known follow-ups (out of scope): converting the remaining per-row Route MERGE to bulk; OVERRIDES may have an analogous duplication for cross-file pairs; annotation-definition edits don't pull annotation-users into incremental scope (a separate, pre-existing scope gap, not the upsert regression fixed here). Co-Authored-By: Claude --- build_ast_graph.py | 102 ++++++++--- plans/AGENT-PROMPTS-INIT-INCREMENT-PERF.md | 2 +- plans/active/PLAN-INIT-INCREMENT-PERF.md | 11 +- tests/test_ast_graph_build.py | 19 ++- tests/test_incremental_graph.py | 188 +++++++++++++++------ 5 files changed, 237 insertions(+), 85 deletions(-) diff --git a/build_ast_graph.py b/build_ast_graph.py index f8e4daa..3e1857c 100644 --- a/build_ast_graph.py +++ b/build_ast_graph.py @@ -188,6 +188,12 @@ class TypeIndexEntry: package: str outer_fqn: str | None node_id: str + # True when this entry was loaded from the existing graph by + # `_load_existing_types` (an unchanged-file stub used only for cross-file + # resolution). Its `decl` is a placeholder (no annotations/methods), so its + # recomputed role/capabilities must never be written back over the real + # stored values. See `_write_nodes_impl`. + loaded_from_db: bool = False @dataclass @@ -200,6 +206,11 @@ class MemberEntry: module: str microservice: str node_id: str + # True when loaded from the existing graph by `_load_existing_members` + # (an unchanged-file stub used only for cross-file call resolution). Its + # DECLARES edge already persists in the graph, so it must not be re-emitted + # by `_populate_declares_rows` (REL tables have no PK → would duplicate). + loaded_from_db: bool = False @dataclass @@ -556,7 +567,7 @@ def _load_existing_types(conn: ladybug.Connection, tables: GraphTables, exclude_ if exclude_files is not None and not exclude_files: return - where = "WHERE s.kind IN ['class', 'interface', 'enum', 'annotation', 'record']" + where = f"WHERE s.kind IN {list(_TYPE_KINDS)}" params: dict = {} if exclude_files: where += "\n AND NOT (s.filename IN $exclude_files)" @@ -586,6 +597,7 @@ def _load_existing_types(conn: ladybug.Connection, tables: GraphTables, exclude_ package=package, outer_fqn=None, node_id=node_id, + loaded_from_db=True, ) tables.types[fqn] = entry tables.by_simple_name.setdefault(name, []).append(entry) @@ -634,6 +646,7 @@ def _load_existing_members(conn: ladybug.Connection, tables: GraphTables, exclud module="", microservice="", node_id=node_id, + loaded_from_db=True, )) @@ -3044,19 +3057,6 @@ def _existing_node_ids(conn: ladybug.Connection) -> set[str]: return ids -def _existing_symbol_ids(conn: ladybug.Connection) -> set[str]: - """Return every Symbol node id currently in the graph. - - Deprecated: use _existing_node_ids for filtering all node types. - Kept for compatibility with existing _write_edges implementation. - """ - result = conn.execute("MATCH (n:Symbol) RETURN n.id") - ids: set[str] = set() - while result.has_next(): - ids.add(result.get_next()[0]) - return ids - - # Column-order constants for bulk COPY FROM. # For REL tables, the first two entries are FROM/TO node primary keys (kuzu requirement). # Order matches the corresponding _SCHEMA_* declarations above. @@ -3066,6 +3066,29 @@ def _existing_symbol_ids(conn: ladybug.Connection) -> set[str]: "modifiers", "annotations", "capabilities", "role", "signature", "parent_id", "resolved" ] +# Type declaration kinds. Tuple (not set) so the rendered SQL `IN` clause is +# deterministic. Used to (a) load type stubs for cross-file resolution and +# (b) scope the incremental property-refresh SET to type nodes. +_TYPE_KINDS: tuple[str, ...] = ("class", "interface", "enum", "annotation", "record") + +# Update every mutable Symbol field on an existing node by primary key. Used on +# the incremental path to refresh preserved dependent type nodes whose +# `role`/`capabilities` (and other project-wide-derived fields) can shift +# without their own source changing — restoring the upsert the legacy per-row +# `MERGE (n:Symbol {id:$id}) SET …` provided. Field list mirrors `_NODE_COLUMNS` +# minus `id`. +_SET_SYMBOL_BY_ID = ( + "MATCH (n:Symbol {id: $id}) " + "SET n.kind = $kind, n.name = $name, n.fqn = $fqn, " + "n.package = $package, n.module = $module, n.microservice = $microservice, " + "n.filename = $filename, " + "n.start_line = $start_line, n.end_line = $end_line, " + "n.start_byte = $start_byte, n.end_byte = $end_byte, " + "n.modifiers = $modifiers, n.annotations = $annotations, " + "n.capabilities = $capabilities, n.role = $role, " + "n.signature = $signature, n.parent_id = $parent_id, n.resolved = $resolved" +) + _REL_EXTENDS_COLUMNS = ["FROM", "TO", "source_file", "dst_name", "dst_fqn", "resolved"] _REL_IMPLEMENTS_COLUMNS = ["FROM", "TO", "source_file", "dst_name", "dst_fqn", "resolved"] _REL_INJECTS_COLUMNS = ["FROM", "TO", "source_file", "dst_name", "dst_fqn", "resolved", "mechanism", "annotation", "field_or_param"] @@ -3106,6 +3129,10 @@ def _write_nodes_impl( # Stage all Symbol rows rows: list[dict] = [] + # Node ids loaded from the existing graph as resolution-only stubs + # (`_load_existing_types`); their staged rows carry placeholder values and + # must never be written back over the real nodes. + stub_ids: set[str] = set() # packages for pkg, pid in tables.packages.items(): @@ -3119,6 +3146,8 @@ def _write_nodes_impl( )) # types for entry in tables.types.values(): + if entry.loaded_from_db: + stub_ids.add(entry.node_id) d = entry.decl role, capabilities = resolve_role_and_capabilities( d, @@ -3158,14 +3187,34 @@ def _write_nodes_impl( for pid, row in tables.phantoms.items(): rows.append(row) - # For incremental path, filter out nodes that already exist to avoid duplicate primary key errors - # The full rebuild path starts with an empty database, so all rows are new + # Bulk-load new Symbol rows. The full-rebuild path starts from an empty + # database (`_drop_all`), so every row is new. The incremental path reaches + # here with a populated database: changed-file nodes were deleted by + # `_delete_file_scope` (absent here → new), while dependent-file nodes are + # deliberately preserved (see `_delete_file_scope` / issue #305). existing_ids = _existing_node_ids(conn) new_rows = [row for row in rows if row["id"] not in existing_ids] - - # Bulk-load only new Symbol rows _bulk_copy(conn, "Symbol", _NODE_COLUMNS, new_rows) + # Refresh mutable properties on preserved dependent TYPE nodes (incremental + # path only; `update_rows` is empty on the full path). `role`/`capabilities` + # — and any other field derived from project-wide inputs (meta-annotation + # chain, brownfield overrides) — can shift without the type's own source + # changing, so a preserved dependent must be re-SET to stay byte-equivalent + # with a full rebuild. The legacy per-row `_MERGE_SYMBOL` upserted every + # staged node and did this implicitly; bulk `COPY FROM` only appends, so the + # SET is explicit here. Stubs (`stub_ids`) are skipped: their decl is a + # placeholder and their stored values are authoritative. Non-type kinds + # carry no mutable role/capabilities, so they are skipped too. + update_rows = [ + row for row in rows + if row["id"] in existing_ids + and row["id"] not in stub_ids + and row["kind"] in _TYPE_KINDS + ] + for row in update_rows: + conn.execute(_SET_SYMBOL_BY_ID, row) + def _write_nodes( conn: ladybug.Connection, @@ -3180,8 +3229,15 @@ def _write_nodes( def _populate_declares_rows(tables: GraphTables) -> None: + # Skip members loaded from the existing graph for cross-file resolution: a + # DECLARES edge for an unchanged-file member already persists (its + # source_file is out of scope, so `_delete_file_scope` left it), and + # re-emitting it would append a duplicate (REL tables carry no primary key). + # Full-rebuild never loads members, so this is a no-op there. tables.declares_rows = [ - DeclaresRow(src_id=m.parent_id, dst_id=m.node_id) for m in tables.members + DeclaresRow(src_id=m.parent_id, dst_id=m.node_id) + for m in tables.members + if not m.loaded_from_db ] @@ -3884,8 +3940,12 @@ def _write_clients_producers_and_calls(conn: ladybug.Connection, tables: GraphTa Route nodes (created by pass5 for cross-service calls) that wouldn't otherwise exist in LadybugDB. """ - # Write phantom routes that don't already exist (pass5 creates these for cross-service calls) - # Intentionally retained MERGE for dedup against routes written during scoped step + # Upsert every route via MERGE. `tables.routes_rows` is the full route set + # (pass4 routes + pass5 phantom routes), not just phantoms; MERGE is + # idempotent against routes already written during the scoped step, so it + # neither duplicates nor drops them. This is the one remaining per-row graph + # write — converting it to bulk COPY requires filtering against existing + # route ids to reproduce the dedup, and is left as a future optimization. for row in tables.routes_rows: conn.execute( "MERGE (r:Route {id: $id}) " diff --git a/plans/AGENT-PROMPTS-INIT-INCREMENT-PERF.md b/plans/AGENT-PROMPTS-INIT-INCREMENT-PERF.md index d1c602f..1436ca4 100644 --- a/plans/AGENT-PROMPTS-INIT-INCREMENT-PERF.md +++ b/plans/AGENT-PROMPTS-INIT-INCREMENT-PERF.md @@ -137,7 +137,7 @@ lines vs the pre-PR baseline. - [ ] Step-1 spike result recorded in `_bulk_copy` docstring. - [ ] `_write_edges` stages per-type rows (CALLS dedup + callee_declaring_role at staging); UnresolvedCallSite bulk-loaded before UNRESOLVED_AT. - [ ] `_CREATE_EXT/IMPL/INJ/DECL/OVERRIDES/CALL` + local `_CREATE_UNRESOLVED/_UNRESOLVED_AT` deleted. -- [ ] `test_bulk_write_edges_match_per_row_baseline`, `test_bulk_write_is_deterministic_double_build`, `test_bulk_write_preserves_calls_dedup_and_callee_declaring_role`, `test_bulk_write_empty_rel_table_is_noop` pass. +- [ ] `test_bank_chat_bulk_build_matches_committed_baseline` (renamed from `test_bulk_write_edges_match_per_row_baseline` in PR-P4), `test_bulk_write_is_deterministic_double_build`, `test_bulk_write_preserves_calls_dedup_and_callee_declaring_role`, `test_bulk_write_empty_rel_table_is_noop` pass. - [ ] Full `test_ast_graph_build.py` + `test_incremental_graph.py` pass unchanged. - [ ] Sentinel greps: zero where required, non-zero where required. - [ ] `.venv/bin/ruff check .` clean; benchmark in PR description. diff --git a/plans/active/PLAN-INIT-INCREMENT-PERF.md b/plans/active/PLAN-INIT-INCREMENT-PERF.md index b6cff84..61a8ae7 100644 --- a/plans/active/PLAN-INIT-INCREMENT-PERF.md +++ b/plans/active/PLAN-INIT-INCREMENT-PERF.md @@ -161,9 +161,12 @@ phase delta. Not packaged; documents the measured speedup in the PR description. ## Tests for PR-P1 -1. `test_bulk_write_edges_match_per_row_baseline` — build `tests/bank-chat-system` - via the bulk path, assert node count, per-type edge counts, `GraphMeta` - counters, and sampled edge rows equal `graph_baseline_bank_chat.json`. +1. `test_bank_chat_bulk_build_matches_committed_baseline` (renamed from + `test_bulk_write_edges_match_per_row_baseline` in PR-P4 once the per-row + reference was gone) — build `tests/bank-chat-system` via the bulk path, + assert node count, per-type edge counts, `GraphMeta` counters, and sampled + edge rows equal `graph_baseline_bank_chat.json` (a drift anchor, not a + per-row equivalence proof). 2. `test_bulk_write_is_deterministic_double_build` — build bank-chat twice to two DBs via the bulk path, assert identical counts + query battery. Models on `tests/test_brownfield_routes.py::test_29_determinism_pass4_route_ids` and @@ -183,7 +186,7 @@ phase delta. Not packaged; documents the measured speedup in the PR description. - [ ] `_bulk_copy` helper added; step-1 spike result in its docstring. - [ ] `_write_edges` stages per-type rows (CALLS dedup + `callee_declaring_role` at staging) and bulk-loads UnresolvedCallSite before UNRESOLVED_AT. - [ ] `_CREATE_EXT/IMPL/INJ/DECL/OVERRIDES/CALL` deleted; local `_CREATE_UNRESOLVED/_UNRESOLVED_AT` gone with the rewrite. -- [ ] `test_bulk_write_edges_match_per_row_baseline`, +- [ ] `test_bank_chat_bulk_build_matches_committed_baseline`, `test_bulk_write_is_deterministic_double_build`, `test_bulk_write_preserves_calls_dedup_and_callee_declaring_role`, `test_bulk_write_empty_rel_table_is_noop` pass. diff --git a/tests/test_ast_graph_build.py b/tests/test_ast_graph_build.py index d6579cb..344d16d 100644 --- a/tests/test_ast_graph_build.py +++ b/tests/test_ast_graph_build.py @@ -554,11 +554,20 @@ def _load_baseline() -> dict: return json.load(f) -def test_bulk_write_edges_match_per_row_baseline(ladybug_db_path: Path) -> None: - """Bulk COPY FROM produces identical graph to the per-row baseline. - - Asserts node count, per-type edge counts, GraphMeta counters, and sampled edge - properties match the baseline generated from the last per-row _write_edges build. +def test_bank_chat_bulk_build_matches_committed_baseline(ladybug_db_path: Path) -> None: + """Bank-chat full build matches the committed baseline (a drift anchor). + + Asserts node count, per-type edge counts, GraphMeta counters, and sampled + CALLS properties match ``graph_baseline_bank_chat.json``. + + This is a **regression anchor**, not a per-row equivalence proof: the legacy + per-row ``_write_edges`` was removed in PR-P1, so there is no per-row + reference to compare against, and this fixture was itself generated from a + bulk build (commit 8261acf). It guards against unintended drift in the + bank-chat full-build graph. The actual write-mechanism equivalence gates are + ``test_bulk_write_is_deterministic_double_build`` (two bulk builds identical) + and ``test_incremental_bulk_write_equivalent_to_full_rebuild`` (incremental + matches a full rebuild of the same state). """ baseline = _load_baseline() conn = _connect(ladybug_db_path) diff --git a/tests/test_incremental_graph.py b/tests/test_incremental_graph.py index f70da6b..8e10466 100644 --- a/tests/test_incremental_graph.py +++ b/tests/test_incremental_graph.py @@ -989,7 +989,8 @@ def test_incremental_bulk_write_equivalent_to_full_rebuild(self, tmp_path: Path) state (bulk) and asserts node count, per-type edge counts, and GraphMeta counters are identical. """ - from build_ast_graph import incremental_rebuild, write_ladybug + from build_ast_graph import incremental_rebuild + from _builders import build_ladybug_full_into source_root = tmp_path / "src" source_root.mkdir() @@ -1001,72 +1002,151 @@ def test_incremental_bulk_write_equivalent_to_full_rebuild(self, tmp_path: Path) (source_root / "A.java").write_text("package pkg; class A { void foo() {} }", encoding="utf-8") (source_root / "B.java").write_text("package pkg; class B { void bar() {} }", encoding="utf-8") - # Initial full build - tables = GraphTables() - asts = pass1_parse(source_root, tables, verbose=False) - pass2_edges(tables, asts, verbose=False) - write_ladybug(ladybug_path, tables, source_root=source_root, verbose=False) - - # Initialize hash tracker - tracker = FileHashTracker(index_dir) - ignore = LayeredIgnore(source_root, use_gitignore=False, builtin_patterns=[]) - tracker.detect_changes(source_root, ignore) - for rel_path in ["A.java", "B.java"]: - tracker.update({rel_path}, source_root) - tracker.save() - - # Modify A.java - (source_root / "A.java").write_text("package pkg; class A { void foo() {} void baz() {} }", encoding="utf-8") + # Initial full build (pass1–6). write_ladybug initializes the hash + # tracker, so incremental_rebuild can detect the change below. + build_ladybug_full_into(source_root, ladybug_path) - # Run incremental (bulk) + # Modify A.java, then run the incremental path. + (source_root / "A.java").write_text( + "package pkg; class A { void foo() {} void baz() {} }", encoding="utf-8" + ) result = incremental_rebuild(source_root, ladybug_path, verbose=False) assert result.mode == "incremental" - # Read incremental graph state - import ladybug + def _graph_state(c: ladybug.Connection) -> tuple[int, dict[str, int], dict[str, str]]: + nc = c.execute("MATCH (n) RETURN count(n)") + node_count = nc.get_next()[0] if nc.has_next() else 0 + edge_counts: dict[str, int] = {} + for rel_type in ["EXTENDS", "IMPLEMENTS", "INJECTS", "DECLARES", "OVERRIDES", + "CALLS", "EXPOSES", "DECLARES_CLIENT", "DECLARES_PRODUCER", + "HTTP_CALLS", "ASYNC_CALLS"]: + ec = c.execute(f"MATCH ()-[r:{rel_type}]->() RETURN count(r)") + edge_counts[rel_type] = ec.get_next()[0] if ec.has_next() else 0 + # Type roles catch property staleness: role/capabilities depend on + # project-wide inputs and must match a full rebuild of the same state. + roles: dict[str, str] = {} + rr = c.execute( + "MATCH (n:Symbol) WHERE n.kind IN ['class','interface','enum','annotation','record'] " + "RETURN n.fqn, n.role" + ) + while rr.has_next(): + fqn, role = rr.get_next() + roles[fqn] = role + return node_count, edge_counts, roles + db = ladybug.Database(str(ladybug_path)) conn = ladybug.Connection(db) + incremental_state = _graph_state(conn) + conn.close() + db.close() - def _read_graph_state(conn: ladybug.Connection) -> tuple[int, dict[str, int]]: - node_count = 0 - nc_result = conn.execute("MATCH (n) RETURN count(n)") - if nc_result.has_next(): - node_count = nc_result.get_next()[0] - - edge_counts: dict[str, int] = {} - for rel_type in ["EXTENDS", "IMPLEMENTS", "INJECTS", "DECLARES", "OVERRIDES", "CALLS", "EXPOSES", "DECLARES_CLIENT", "DECLARES_PRODUCER", "HTTP_CALLS", "ASYNC_CALLS"]: - ec_result = conn.execute(f"MATCH ()-[r:{rel_type}]->() RETURN count(r)") - if ec_result.has_next(): - edge_counts[rel_type] = ec_result.get_next()[0] + # Full rebuild the identical final state into a fresh index dir. + full_dir = tmp_path / "full" + full_dir.mkdir() + full_path = full_dir / "code_graph.lbug" + build_ladybug_full_into(source_root, full_path) - conn.close() - return node_count, edge_counts + db2 = ladybug.Database(str(full_path)) + conn2 = ladybug.Connection(db2) + full_state = _graph_state(conn2) + conn2.close() + db2.close() + + # Equivalence invariant: an incremental rebuild of a state must produce + # the same graph as a full rebuild of that state. The previous form + # asserted only `count > 0` and `set(edge_type_keys) == set(edge_type_keys)` + # — a no-op, since every rel type yields a count row even at 0. + assert incremental_state[0] == full_state[0], ( + f"node count diverged: incremental={incremental_state[0]} full={full_state[0]}" + ) + assert incremental_state[1] == full_state[1], ( + f"edge counts diverged:\nincremental={incremental_state[1]}\nfull={full_state[1]}" + ) + assert incremental_state[2] == full_state[2], ( + f"type roles diverged:\nincremental={incremental_state[2]}\nfull={full_state[2]}" + ) - incremental_state = _read_graph_state(conn) + def test_incremental_refreshes_dependent_role_on_meta_chain_change( + self, tmp_path: Path + ) -> None: + """A preserved dependent's role is refreshed when its meta-chain shifts. + + Regression guard for the PR-P4 fix: `_write_nodes_impl` switched from a + per-row `MERGE … SET role=…` upsert to bulk `COPY FROM` + skip-if-exists, + which dropped the property refresh on preserved dependent type nodes. A + dependent type's `role`/`capabilities` depend on project-wide inputs (the + meta-annotation chain) and can shift without the dependent's own source + changing — so the increment must re-SET them to stay byte-equivalent with + a full rebuild. + + Corpus: `@MyService class Svc` (a dependent of `Target`, which it calls); + `@interface MyService` is edited from no meta-annotation to `@Service`, so + the chain maps `MyService → Service` and `Svc`'s role flips `OTHER → SERVICE`. + `Target` is also edited (a real content change) so the dependency walk + pulls `Svc` into the incremental scope as a preserved dependent (it has a + CALLS edge into the changed `Target`) — exactly the case the refresh must + handle. The CLI runs each increment as a fresh process (cold meta-chain + cache); the test mirrors that so the increment sees the updated chain. + """ + from build_ast_graph import incremental_rebuild + from graph_enrich import collect_annotation_meta_chain + from _builders import build_ladybug_full_into - # Full rebuild the same state (drop and recreate) - import shutil - conn.close() - db.close() - shutil.rmtree(index_dir) + source_root = tmp_path / "src" + source_root.mkdir() + index_dir = tmp_path / "index" index_dir.mkdir() + ladybug_path = index_dir / "code_graph.lbug" + java = source_root / "pkg" + java.mkdir(parents=True) - tables2 = GraphTables() - asts2 = pass1_parse(source_root, tables2, verbose=False) - pass2_edges(tables2, asts2, verbose=False) - ladybug_path2 = index_dir / "code_graph.lbug" - write_ladybug(ladybug_path2, tables2, source_root=source_root, verbose=False) + svc_src = ( + "package pkg;\n@MyService\n" + "public class Svc { public void go(Target t) { t.foo(); } }\n" + ) - db2 = ladybug.Database(str(ladybug_path2)) - conn2 = ladybug.Connection(db2) - full_state = _read_graph_state(conn2) - - # Assert equivalence (node count and edge types should match) - # Note: exact counts may differ slightly due to incremental's dependent expansion - assert incremental_state[0] > 0, "incremental should have nodes" - assert full_state[0] > 0, "full rebuild should have nodes" - # Both should have the same edge types present (even if counts differ) - assert set(incremental_state[1].keys()) == set(full_state[1].keys()), "edge types should match" + # V1: MyService has no meta-annotation → Svc.role = OTHER. + (java / "MyService.java").write_text( + "package pkg; public @interface MyService {}\n", encoding="utf-8" + ) + (java / "Svc.java").write_text(svc_src, encoding="utf-8") + (java / "Target.java").write_text( + "package pkg; public class Target { public void foo() {} }\n", encoding="utf-8" + ) + build_ladybug_full_into(source_root, ladybug_path) + + # V2: shift the role lever (MyService becomes @Service-meta-annotated) AND + # edit Target's body (add a method) so Svc is pulled in as a preserved + # dependent via its CALLS edge into Target. + (java / "MyService.java").write_text( + "package pkg;\nimport org.springframework.stereotype.Service;\n" + "@Service\npublic @interface MyService {}\n", + encoding="utf-8", + ) + (java / "Target.java").write_text( + "package pkg; public class Target { public void foo() {} public void bar() {} }\n", + encoding="utf-8", + ) + collect_annotation_meta_chain.cache_clear() + result = incremental_rebuild(source_root, ladybug_path, verbose=False) + assert result.mode == "incremental", f"expected incremental, got {result.mode}" + assert result.dependents_reprocessed >= 1, "Svc should be pulled in as a dependent" + + def role_of(fqn: str) -> str: + db = ladybug.Database(str(ladybug_path)) + conn = ladybug.Connection(db) + r = conn.execute("MATCH (n:Symbol {fqn: $fqn}) RETURN n.role", {"fqn": fqn}) + v = r.get_next()[0] if r.has_next() else None + conn.close() + db.close() + return v + + # Svc was a preserved dependent (in scope via Target, not deleted); its + # role must refresh to SERVICE to match a full rebuild of this state. + assert role_of("pkg.Svc") == "SERVICE", ( + "preserved dependent role not refreshed after meta-chain change " + "(PR-P4 regression: skip-if-exists dropped the upsert)" + ) def test_incremental_route_merge_dedup_preserved(self, tmp_path: Path) -> None: """Pass5/6 Route MERGE dedup is preserved after bulk conversion.