From c02c69161280903358093d4685289ad945efe249 Mon Sep 17 00:00:00 2001
From: Dmitry Teryaev <doudmitry@gmail.com>
Date: Mon, 22 Jun 2026 18:17:22 +0300
Subject: [PATCH] fix(graph): refresh preserved dependent nodes on increment
 (PR-P4)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PR-P2 replaced the per-row `_MERGE_SYMBOL` upsert in `_write_nodes_impl` with a
skip-if-exists filter, which dropped the property refresh on preserved dependent
type nodes. A dependent's `role`/`capabilities` depend on project-wide inputs
(meta-annotation chain, brownfield overrides) and can shift without its own
source changing, so the incremental graph diverged from a full rebuild — a
preserved dependent whose role lever changed stayed stale until the next full
rebuild. The `_delete_file_scope` contract (issue #305) relied on that re-MERGE
to refresh preserved dependents in place.

Found by fanning out reviewer subagents over the landed init/increment-perf plan
and adjudicating a B-vs-D conflict empirically (the "benign" verdict came from a
reviewer that couldn't run ladybug; the "bug" verdict reproduced).

Changes:
- Mark `TypeIndexEntry`/`MemberEntry` `loaded_from_db` when
  `_load_existing_{types,members}` creates them, so DB-loaded stubs (placeholder
  decls) are distinguishable from freshly-parsed scoped/dependent decls.
- After the bulk COPY of new nodes, re-SET every mutable Symbol field on
  preserved, non-stub, type-kind nodes (`_SET_SYMBOL_BY_ID`), restoring the
  upsert the design relied on. Stubs are skipped (their stored values are
  authoritative); non-type kinds carry no mutable role/capabilities; the full
  path is unaffected (empty DB → no existing nodes).
- `_populate_declares_rows` skips loaded members: their DECLARES edges already
  persist and re-emitting duplicated them (REL tables carry no PK). This second
  incremental≠full divergence surfaced once the equivalence test was fixed.
- Delete dead `_existing_symbol_ids` (zero callers); correct the Route-MERGE
  loop comment (it iterates all routes, not phantoms).

Tests:
- `test_incremental_bulk_write_equivalent_to_full_rebuild` now asserts real
  incremental≡full equivalence (node count, per-type edge counts, type roles)
  instead of the prior tautology (`set(keys)==set(keys)` + `count>0`).
- New `test_incremental_refreshes_dependent_role_on_meta_chain_change` guards
  the exact regression.
- Rename `test_bulk_write_edges_match_per_row_baseline` →
  `test_bank_chat_bulk_build_matches_committed_baseline`: the per-row path is
  gone, so the fixture (bulk-generated) is a drift anchor, not a per-row proof.

Known follow-ups (out of scope): converting the remaining per-row Route MERGE
to bulk; OVERRIDES may have an analogous duplication for cross-file pairs;
annotation-definition edits don't pull annotation-users into incremental scope
(a separate, pre-existing scope gap, not the upsert regression fixed here).

Co-Authored-By: Claude <noreply@anthropic.com>
---
 build_ast_graph.py                         | 102 ++++++++---
 plans/AGENT-PROMPTS-INIT-INCREMENT-PERF.md |   2 +-
 plans/active/PLAN-INIT-INCREMENT-PERF.md   |  11 +-
 tests/test_ast_graph_build.py              |  19 ++-
 tests/test_incremental_graph.py            | 188 +++++++++++++++------
 5 files changed, 237 insertions(+), 85 deletions(-)

diff --git a/build_ast_graph.py b/build_ast_graph.py
index f8e4daa..3e1857c 100644
--- a/build_ast_graph.py
+++ b/build_ast_graph.py
@@ -188,6 +188,12 @@ class TypeIndexEntry:
     package: str
     outer_fqn: str | None
     node_id: str
+    # True when this entry was loaded from the existing graph by
+    # `_load_existing_types` (an unchanged-file stub used only for cross-file
+    # resolution). Its `decl` is a placeholder (no annotations/methods), so its
+    # recomputed role/capabilities must never be written back over the real
+    # stored values. See `_write_nodes_impl`.
+    loaded_from_db: bool = False
 
 
 @dataclass
@@ -200,6 +206,11 @@ class MemberEntry:
     module: str
     microservice: str
     node_id: str
+    # True when loaded from the existing graph by `_load_existing_members`
+    # (an unchanged-file stub used only for cross-file call resolution). Its
+    # DECLARES edge already persists in the graph, so it must not be re-emitted
+    # by `_populate_declares_rows` (REL tables have no PK → would duplicate).
+    loaded_from_db: bool = False
 
 
 @dataclass
@@ -556,7 +567,7 @@ def _load_existing_types(conn: ladybug.Connection, tables: GraphTables, exclude_
     if exclude_files is not None and not exclude_files:
         return
 
-    where = "WHERE s.kind IN ['class', 'interface', 'enum', 'annotation', 'record']"
+    where = f"WHERE s.kind IN {list(_TYPE_KINDS)}"
     params: dict = {}
     if exclude_files:
         where += "\n    AND NOT (s.filename IN $exclude_files)"
@@ -586,6 +597,7 @@ def _load_existing_types(conn: ladybug.Connection, tables: GraphTables, exclude_
             package=package,
             outer_fqn=None,
             node_id=node_id,
+            loaded_from_db=True,
         )
         tables.types[fqn] = entry
         tables.by_simple_name.setdefault(name, []).append(entry)
@@ -634,6 +646,7 @@ def _load_existing_members(conn: ladybug.Connection, tables: GraphTables, exclud
             module="",
             microservice="",
             node_id=node_id,
+            loaded_from_db=True,
         ))
 
 
@@ -3044,19 +3057,6 @@ def _existing_node_ids(conn: ladybug.Connection) -> set[str]:
     return ids
 
 
-def _existing_symbol_ids(conn: ladybug.Connection) -> set[str]:
-    """Return every Symbol node id currently in the graph.
-
-    Deprecated: use _existing_node_ids for filtering all node types.
-    Kept for compatibility with existing _write_edges implementation.
-    """
-    result = conn.execute("MATCH (n:Symbol) RETURN n.id")
-    ids: set[str] = set()
-    while result.has_next():
-        ids.add(result.get_next()[0])
-    return ids
-
-
 # Column-order constants for bulk COPY FROM.
 # For REL tables, the first two entries are FROM/TO node primary keys (kuzu requirement).
 # Order matches the corresponding _SCHEMA_* declarations above.
@@ -3066,6 +3066,29 @@ def _existing_symbol_ids(conn: ladybug.Connection) -> set[str]:
     "modifiers", "annotations", "capabilities", "role", "signature", "parent_id", "resolved"
 ]
 
+# Type declaration kinds. Tuple (not set) so the rendered SQL `IN` clause is
+# deterministic. Used to (a) load type stubs for cross-file resolution and
+# (b) scope the incremental property-refresh SET to type nodes.
+_TYPE_KINDS: tuple[str, ...] = ("class", "interface", "enum", "annotation", "record")
+
+# Update every mutable Symbol field on an existing node by primary key. Used on
+# the incremental path to refresh preserved dependent type nodes whose
+# `role`/`capabilities` (and other project-wide-derived fields) can shift
+# without their own source changing — restoring the upsert the legacy per-row
+# `MERGE (n:Symbol {id:$id}) SET …` provided. Field list mirrors `_NODE_COLUMNS`
+# minus `id`.
+_SET_SYMBOL_BY_ID = (
+    "MATCH (n:Symbol {id: $id}) "
+    "SET n.kind = $kind, n.name = $name, n.fqn = $fqn, "
+    "n.package = $package, n.module = $module, n.microservice = $microservice, "
+    "n.filename = $filename, "
+    "n.start_line = $start_line, n.end_line = $end_line, "
+    "n.start_byte = $start_byte, n.end_byte = $end_byte, "
+    "n.modifiers = $modifiers, n.annotations = $annotations, "
+    "n.capabilities = $capabilities, n.role = $role, "
+    "n.signature = $signature, n.parent_id = $parent_id, n.resolved = $resolved"
+)
+
 _REL_EXTENDS_COLUMNS = ["FROM", "TO", "source_file", "dst_name", "dst_fqn", "resolved"]
 _REL_IMPLEMENTS_COLUMNS = ["FROM", "TO", "source_file", "dst_name", "dst_fqn", "resolved"]
 _REL_INJECTS_COLUMNS = ["FROM", "TO", "source_file", "dst_name", "dst_fqn", "resolved", "mechanism", "annotation", "field_or_param"]
@@ -3106,6 +3129,10 @@ def _write_nodes_impl(
 
     # Stage all Symbol rows
     rows: list[dict] = []
+    # Node ids loaded from the existing graph as resolution-only stubs
+    # (`_load_existing_types`); their staged rows carry placeholder values and
+    # must never be written back over the real nodes.
+    stub_ids: set[str] = set()
 
     # packages
     for pkg, pid in tables.packages.items():
@@ -3119,6 +3146,8 @@ def _write_nodes_impl(
         ))
     # types
     for entry in tables.types.values():
+        if entry.loaded_from_db:
+            stub_ids.add(entry.node_id)
         d = entry.decl
         role, capabilities = resolve_role_and_capabilities(
             d,
@@ -3158,14 +3187,34 @@ def _write_nodes_impl(
     for pid, row in tables.phantoms.items():
         rows.append(row)
 
-    # For incremental path, filter out nodes that already exist to avoid duplicate primary key errors
-    # The full rebuild path starts with an empty database, so all rows are new
+    # Bulk-load new Symbol rows. The full-rebuild path starts from an empty
+    # database (`_drop_all`), so every row is new. The incremental path reaches
+    # here with a populated database: changed-file nodes were deleted by
+    # `_delete_file_scope` (absent here → new), while dependent-file nodes are
+    # deliberately preserved (see `_delete_file_scope` / issue #305).
     existing_ids = _existing_node_ids(conn)
     new_rows = [row for row in rows if row["id"] not in existing_ids]
-
-    # Bulk-load only new Symbol rows
     _bulk_copy(conn, "Symbol", _NODE_COLUMNS, new_rows)
 
+    # Refresh mutable properties on preserved dependent TYPE nodes (incremental
+    # path only; `update_rows` is empty on the full path). `role`/`capabilities`
+    # — and any other field derived from project-wide inputs (meta-annotation
+    # chain, brownfield overrides) — can shift without the type's own source
+    # changing, so a preserved dependent must be re-SET to stay byte-equivalent
+    # with a full rebuild. The legacy per-row `_MERGE_SYMBOL` upserted every
+    # staged node and did this implicitly; bulk `COPY FROM` only appends, so the
+    # SET is explicit here. Stubs (`stub_ids`) are skipped: their decl is a
+    # placeholder and their stored values are authoritative. Non-type kinds
+    # carry no mutable role/capabilities, so they are skipped too.
+    update_rows = [
+        row for row in rows
+        if row["id"] in existing_ids
+        and row["id"] not in stub_ids
+        and row["kind"] in _TYPE_KINDS
+    ]
+    for row in update_rows:
+        conn.execute(_SET_SYMBOL_BY_ID, row)
+
 
 def _write_nodes(
     conn: ladybug.Connection,
@@ -3180,8 +3229,15 @@ def _write_nodes(
 
 
 def _populate_declares_rows(tables: GraphTables) -> None:
+    # Skip members loaded from the existing graph for cross-file resolution: a
+    # DECLARES edge for an unchanged-file member already persists (its
+    # source_file is out of scope, so `_delete_file_scope` left it), and
+    # re-emitting it would append a duplicate (REL tables carry no primary key).
+    # Full-rebuild never loads members, so this is a no-op there.
     tables.declares_rows = [
-        DeclaresRow(src_id=m.parent_id, dst_id=m.node_id) for m in tables.members
+        DeclaresRow(src_id=m.parent_id, dst_id=m.node_id)
+        for m in tables.members
+        if not m.loaded_from_db
     ]
 
 
@@ -3884,8 +3940,12 @@ def _write_clients_producers_and_calls(conn: ladybug.Connection, tables: GraphTa
     Route nodes (created by pass5 for cross-service calls) that wouldn't
     otherwise exist in LadybugDB.
     """
-    # Write phantom routes that don't already exist (pass5 creates these for cross-service calls)
-    # Intentionally retained MERGE for dedup against routes written during scoped step
+    # Upsert every route via MERGE. `tables.routes_rows` is the full route set
+    # (pass4 routes + pass5 phantom routes), not just phantoms; MERGE is
+    # idempotent against routes already written during the scoped step, so it
+    # neither duplicates nor drops them. This is the one remaining per-row graph
+    # write — converting it to bulk COPY requires filtering against existing
+    # route ids to reproduce the dedup, and is left as a future optimization.
     for row in tables.routes_rows:
         conn.execute(
             "MERGE (r:Route {id: $id}) "
diff --git a/plans/AGENT-PROMPTS-INIT-INCREMENT-PERF.md b/plans/AGENT-PROMPTS-INIT-INCREMENT-PERF.md
index d1c602f..1436ca4 100644
--- a/plans/AGENT-PROMPTS-INIT-INCREMENT-PERF.md
+++ b/plans/AGENT-PROMPTS-INIT-INCREMENT-PERF.md
@@ -137,7 +137,7 @@ lines vs the pre-PR baseline.
 - [ ] Step-1 spike result recorded in `_bulk_copy` docstring.
 - [ ] `_write_edges` stages per-type rows (CALLS dedup + callee_declaring_role at staging); UnresolvedCallSite bulk-loaded before UNRESOLVED_AT.
 - [ ] `_CREATE_EXT/IMPL/INJ/DECL/OVERRIDES/CALL` + local `_CREATE_UNRESOLVED/_UNRESOLVED_AT` deleted.
-- [ ] `test_bulk_write_edges_match_per_row_baseline`, `test_bulk_write_is_deterministic_double_build`, `test_bulk_write_preserves_calls_dedup_and_callee_declaring_role`, `test_bulk_write_empty_rel_table_is_noop` pass.
+- [ ] `test_bank_chat_bulk_build_matches_committed_baseline` (renamed from `test_bulk_write_edges_match_per_row_baseline` in PR-P4), `test_bulk_write_is_deterministic_double_build`, `test_bulk_write_preserves_calls_dedup_and_callee_declaring_role`, `test_bulk_write_empty_rel_table_is_noop` pass.
 - [ ] Full `test_ast_graph_build.py` + `test_incremental_graph.py` pass unchanged.
 - [ ] Sentinel greps: zero where required, non-zero where required.
 - [ ] `.venv/bin/ruff check .` clean; benchmark in PR description.
diff --git a/plans/active/PLAN-INIT-INCREMENT-PERF.md b/plans/active/PLAN-INIT-INCREMENT-PERF.md
index b6cff84..61a8ae7 100644
--- a/plans/active/PLAN-INIT-INCREMENT-PERF.md
+++ b/plans/active/PLAN-INIT-INCREMENT-PERF.md
@@ -161,9 +161,12 @@ phase delta. Not packaged; documents the measured speedup in the PR description.
 
 ## Tests for PR-P1
 
-1. `test_bulk_write_edges_match_per_row_baseline` — build `tests/bank-chat-system`
-   via the bulk path, assert node count, per-type edge counts, `GraphMeta`
-   counters, and sampled edge rows equal `graph_baseline_bank_chat.json`.
+1. `test_bank_chat_bulk_build_matches_committed_baseline` (renamed from
+   `test_bulk_write_edges_match_per_row_baseline` in PR-P4 once the per-row
+   reference was gone) — build `tests/bank-chat-system` via the bulk path,
+   assert node count, per-type edge counts, `GraphMeta` counters, and sampled
+   edge rows equal `graph_baseline_bank_chat.json` (a drift anchor, not a
+   per-row equivalence proof).
 2. `test_bulk_write_is_deterministic_double_build` — build bank-chat twice to two
    DBs via the bulk path, assert identical counts + query battery. Models on
    `tests/test_brownfield_routes.py::test_29_determinism_pass4_route_ids` and
@@ -183,7 +186,7 @@ phase delta. Not packaged; documents the measured speedup in the PR description.
 - [ ] `_bulk_copy` helper added; step-1 spike result in its docstring.
 - [ ] `_write_edges` stages per-type rows (CALLS dedup + `callee_declaring_role` at staging) and bulk-loads UnresolvedCallSite before UNRESOLVED_AT.
 - [ ] `_CREATE_EXT/IMPL/INJ/DECL/OVERRIDES/CALL` deleted; local `_CREATE_UNRESOLVED/_UNRESOLVED_AT` gone with the rewrite.
-- [ ] `test_bulk_write_edges_match_per_row_baseline`,
+- [ ] `test_bank_chat_bulk_build_matches_committed_baseline`,
       `test_bulk_write_is_deterministic_double_build`,
       `test_bulk_write_preserves_calls_dedup_and_callee_declaring_role`,
       `test_bulk_write_empty_rel_table_is_noop` pass.
diff --git a/tests/test_ast_graph_build.py b/tests/test_ast_graph_build.py
index d6579cb..344d16d 100644
--- a/tests/test_ast_graph_build.py
+++ b/tests/test_ast_graph_build.py
@@ -554,11 +554,20 @@ def _load_baseline() -> dict:
         return json.load(f)
 
 
-def test_bulk_write_edges_match_per_row_baseline(ladybug_db_path: Path) -> None:
-    """Bulk COPY FROM produces identical graph to the per-row baseline.
-
-    Asserts node count, per-type edge counts, GraphMeta counters, and sampled edge
-    properties match the baseline generated from the last per-row _write_edges build.
+def test_bank_chat_bulk_build_matches_committed_baseline(ladybug_db_path: Path) -> None:
+    """Bank-chat full build matches the committed baseline (a drift anchor).
+
+    Asserts node count, per-type edge counts, GraphMeta counters, and sampled
+    CALLS properties match ``graph_baseline_bank_chat.json``.
+
+    This is a **regression anchor**, not a per-row equivalence proof: the legacy
+    per-row ``_write_edges`` was removed in PR-P1, so there is no per-row
+    reference to compare against, and this fixture was itself generated from a
+    bulk build (commit 8261acf). It guards against unintended drift in the
+    bank-chat full-build graph. The actual write-mechanism equivalence gates are
+    ``test_bulk_write_is_deterministic_double_build`` (two bulk builds identical)
+    and ``test_incremental_bulk_write_equivalent_to_full_rebuild`` (incremental
+    matches a full rebuild of the same state).
     """
     baseline = _load_baseline()
     conn = _connect(ladybug_db_path)
diff --git a/tests/test_incremental_graph.py b/tests/test_incremental_graph.py
index f70da6b..8e10466 100644
--- a/tests/test_incremental_graph.py
+++ b/tests/test_incremental_graph.py
@@ -989,7 +989,8 @@ def test_incremental_bulk_write_equivalent_to_full_rebuild(self, tmp_path: Path)
         state (bulk) and asserts node count, per-type edge counts, and GraphMeta
         counters are identical.
         """
-        from build_ast_graph import incremental_rebuild, write_ladybug
+        from build_ast_graph import incremental_rebuild
+        from _builders import build_ladybug_full_into
 
         source_root = tmp_path / "src"
         source_root.mkdir()
@@ -1001,72 +1002,151 @@ def test_incremental_bulk_write_equivalent_to_full_rebuild(self, tmp_path: Path)
         (source_root / "A.java").write_text("package pkg; class A { void foo() {} }", encoding="utf-8")
         (source_root / "B.java").write_text("package pkg; class B { void bar() {} }", encoding="utf-8")
 
-        # Initial full build
-        tables = GraphTables()
-        asts = pass1_parse(source_root, tables, verbose=False)
-        pass2_edges(tables, asts, verbose=False)
-        write_ladybug(ladybug_path, tables, source_root=source_root, verbose=False)
-
-        # Initialize hash tracker
-        tracker = FileHashTracker(index_dir)
-        ignore = LayeredIgnore(source_root, use_gitignore=False, builtin_patterns=[])
-        tracker.detect_changes(source_root, ignore)
-        for rel_path in ["A.java", "B.java"]:
-            tracker.update({rel_path}, source_root)
-        tracker.save()
-
-        # Modify A.java
-        (source_root / "A.java").write_text("package pkg; class A { void foo() {} void baz() {} }", encoding="utf-8")
+        # Initial full build (pass1–6). write_ladybug initializes the hash
+        # tracker, so incremental_rebuild can detect the change below.
+        build_ladybug_full_into(source_root, ladybug_path)
 
-        # Run incremental (bulk)
+        # Modify A.java, then run the incremental path.
+        (source_root / "A.java").write_text(
+            "package pkg; class A { void foo() {} void baz() {} }", encoding="utf-8"
+        )
         result = incremental_rebuild(source_root, ladybug_path, verbose=False)
         assert result.mode == "incremental"
 
-        # Read incremental graph state
-        import ladybug
+        def _graph_state(c: ladybug.Connection) -> tuple[int, dict[str, int], dict[str, str]]:
+            nc = c.execute("MATCH (n) RETURN count(n)")
+            node_count = nc.get_next()[0] if nc.has_next() else 0
+            edge_counts: dict[str, int] = {}
+            for rel_type in ["EXTENDS", "IMPLEMENTS", "INJECTS", "DECLARES", "OVERRIDES",
+                             "CALLS", "EXPOSES", "DECLARES_CLIENT", "DECLARES_PRODUCER",
+                             "HTTP_CALLS", "ASYNC_CALLS"]:
+                ec = c.execute(f"MATCH ()-[r:{rel_type}]->() RETURN count(r)")
+                edge_counts[rel_type] = ec.get_next()[0] if ec.has_next() else 0
+            # Type roles catch property staleness: role/capabilities depend on
+            # project-wide inputs and must match a full rebuild of the same state.
+            roles: dict[str, str] = {}
+            rr = c.execute(
+                "MATCH (n:Symbol) WHERE n.kind IN ['class','interface','enum','annotation','record'] "
+                "RETURN n.fqn, n.role"
+            )
+            while rr.has_next():
+                fqn, role = rr.get_next()
+                roles[fqn] = role
+            return node_count, edge_counts, roles
+
         db = ladybug.Database(str(ladybug_path))
         conn = ladybug.Connection(db)
+        incremental_state = _graph_state(conn)
+        conn.close()
+        db.close()
 
-        def _read_graph_state(conn: ladybug.Connection) -> tuple[int, dict[str, int]]:
-            node_count = 0
-            nc_result = conn.execute("MATCH (n) RETURN count(n)")
-            if nc_result.has_next():
-                node_count = nc_result.get_next()[0]
-
-            edge_counts: dict[str, int] = {}
-            for rel_type in ["EXTENDS", "IMPLEMENTS", "INJECTS", "DECLARES", "OVERRIDES", "CALLS", "EXPOSES", "DECLARES_CLIENT", "DECLARES_PRODUCER", "HTTP_CALLS", "ASYNC_CALLS"]:
-                ec_result = conn.execute(f"MATCH ()-[r:{rel_type}]->() RETURN count(r)")
-                if ec_result.has_next():
-                    edge_counts[rel_type] = ec_result.get_next()[0]
+        # Full rebuild the identical final state into a fresh index dir.
+        full_dir = tmp_path / "full"
+        full_dir.mkdir()
+        full_path = full_dir / "code_graph.lbug"
+        build_ladybug_full_into(source_root, full_path)
 
-            conn.close()
-            return node_count, edge_counts
+        db2 = ladybug.Database(str(full_path))
+        conn2 = ladybug.Connection(db2)
+        full_state = _graph_state(conn2)
+        conn2.close()
+        db2.close()
+
+        # Equivalence invariant: an incremental rebuild of a state must produce
+        # the same graph as a full rebuild of that state. The previous form
+        # asserted only `count > 0` and `set(edge_type_keys) == set(edge_type_keys)`
+        # — a no-op, since every rel type yields a count row even at 0.
+        assert incremental_state[0] == full_state[0], (
+            f"node count diverged: incremental={incremental_state[0]} full={full_state[0]}"
+        )
+        assert incremental_state[1] == full_state[1], (
+            f"edge counts diverged:\nincremental={incremental_state[1]}\nfull={full_state[1]}"
+        )
+        assert incremental_state[2] == full_state[2], (
+            f"type roles diverged:\nincremental={incremental_state[2]}\nfull={full_state[2]}"
+        )
 
-        incremental_state = _read_graph_state(conn)
+    def test_incremental_refreshes_dependent_role_on_meta_chain_change(
+        self, tmp_path: Path
+    ) -> None:
+        """A preserved dependent's role is refreshed when its meta-chain shifts.
+
+        Regression guard for the PR-P4 fix: `_write_nodes_impl` switched from a
+        per-row `MERGE … SET role=…` upsert to bulk `COPY FROM` + skip-if-exists,
+        which dropped the property refresh on preserved dependent type nodes. A
+        dependent type's `role`/`capabilities` depend on project-wide inputs (the
+        meta-annotation chain) and can shift without the dependent's own source
+        changing — so the increment must re-SET them to stay byte-equivalent with
+        a full rebuild.
+
+        Corpus: `@MyService class Svc` (a dependent of `Target`, which it calls);
+        `@interface MyService` is edited from no meta-annotation to `@Service`, so
+        the chain maps `MyService → Service` and `Svc`'s role flips `OTHER → SERVICE`.
+        `Target` is also edited (a real content change) so the dependency walk
+        pulls `Svc` into the incremental scope as a preserved dependent (it has a
+        CALLS edge into the changed `Target`) — exactly the case the refresh must
+        handle. The CLI runs each increment as a fresh process (cold meta-chain
+        cache); the test mirrors that so the increment sees the updated chain.
+        """
+        from build_ast_graph import incremental_rebuild
+        from graph_enrich import collect_annotation_meta_chain
+        from _builders import build_ladybug_full_into
 
-        # Full rebuild the same state (drop and recreate)
-        import shutil
-        conn.close()
-        db.close()
-        shutil.rmtree(index_dir)
+        source_root = tmp_path / "src"
+        source_root.mkdir()
+        index_dir = tmp_path / "index"
         index_dir.mkdir()
+        ladybug_path = index_dir / "code_graph.lbug"
+        java = source_root / "pkg"
+        java.mkdir(parents=True)
 
-        tables2 = GraphTables()
-        asts2 = pass1_parse(source_root, tables2, verbose=False)
-        pass2_edges(tables2, asts2, verbose=False)
-        ladybug_path2 = index_dir / "code_graph.lbug"
-        write_ladybug(ladybug_path2, tables2, source_root=source_root, verbose=False)
+        svc_src = (
+            "package pkg;\n@MyService\n"
+            "public class Svc { public void go(Target t) { t.foo(); } }\n"
+        )
 
-        db2 = ladybug.Database(str(ladybug_path2))
-        conn2 = ladybug.Connection(db2)
-        full_state = _read_graph_state(conn2)
-
-        # Assert equivalence (node count and edge types should match)
-        # Note: exact counts may differ slightly due to incremental's dependent expansion
-        assert incremental_state[0] > 0, "incremental should have nodes"
-        assert full_state[0] > 0, "full rebuild should have nodes"
-        # Both should have the same edge types present (even if counts differ)
-        assert set(incremental_state[1].keys()) == set(full_state[1].keys()), "edge types should match"
+        # V1: MyService has no meta-annotation → Svc.role = OTHER.
+        (java / "MyService.java").write_text(
+            "package pkg; public @interface MyService {}\n", encoding="utf-8"
+        )
+        (java / "Svc.java").write_text(svc_src, encoding="utf-8")
+        (java / "Target.java").write_text(
+            "package pkg; public class Target { public void foo() {} }\n", encoding="utf-8"
+        )
+        build_ladybug_full_into(source_root, ladybug_path)
+
+        # V2: shift the role lever (MyService becomes @Service-meta-annotated) AND
+        # edit Target's body (add a method) so Svc is pulled in as a preserved
+        # dependent via its CALLS edge into Target.
+        (java / "MyService.java").write_text(
+            "package pkg;\nimport org.springframework.stereotype.Service;\n"
+            "@Service\npublic @interface MyService {}\n",
+            encoding="utf-8",
+        )
+        (java / "Target.java").write_text(
+            "package pkg; public class Target { public void foo() {} public void bar() {} }\n",
+            encoding="utf-8",
+        )
+        collect_annotation_meta_chain.cache_clear()
+        result = incremental_rebuild(source_root, ladybug_path, verbose=False)
+        assert result.mode == "incremental", f"expected incremental, got {result.mode}"
+        assert result.dependents_reprocessed >= 1, "Svc should be pulled in as a dependent"
+
+        def role_of(fqn: str) -> str:
+            db = ladybug.Database(str(ladybug_path))
+            conn = ladybug.Connection(db)
+            r = conn.execute("MATCH (n:Symbol {fqn: $fqn}) RETURN n.role", {"fqn": fqn})
+            v = r.get_next()[0] if r.has_next() else None
+            conn.close()
+            db.close()
+            return v
+
+        # Svc was a preserved dependent (in scope via Target, not deleted); its
+        # role must refresh to SERVICE to match a full rebuild of this state.
+        assert role_of("pkg.Svc") == "SERVICE", (
+            "preserved dependent role not refreshed after meta-chain change "
+            "(PR-P4 regression: skip-if-exists dropped the upsert)"
+        )
 
     def test_incremental_route_merge_dedup_preserved(self, tmp_path: Path) -> None:
         """Pass5/6 Route MERGE dedup is preserved after bulk conversion.