Skip to content

Commit b21fcd4

Browse files
HumanBean17claude
andauthored
fix(graph): incremental correctness + perf follow-ups (PR-P5) (#344)
Three follow-ups to the init/increment-perf plan (PR-P1..P4): PR-P5b (correctness): pull annotation-definition users into incremental scope. Annotation usage is a node property (annotations STRING[]), not a Symbol->Symbol edge, so _find_dependents never pulled annotation users in — when an @interface definition changed (e.g. gained a meta-annotation shifting the Layer-A chain in resolve_role_and_capabilities), types carrying it went stale until the next full rebuild. Add _find_annotation_dependents; merge its results into dependent_files so users are re-parsed and role-re-SET (the expansion cap bounds scope). Direct usage only; transitive composition documented as a known limitation. PR-P5c (perf): convert the last per-row graph write — the global pass 5-6 Route MERGE upsert — to bulk, mirroring _write_nodes_impl's Symbol pattern (_SET_ROUTE_BY_ID + COPY-new/SET-existing). Behavior-preserving; routes_rows already carries unique ids. PR-P5a (investigation, no code): confirmed OVERRIDES does NOT duplicate on increment — loaded stub types carry no supertype edges, so _populate_overrides_rows can't re-derive their pairs (unlike DECLARES, fixed in PR-P4, which is derived from a member's own fields). No guard shipped; added an invariant guard test pinning the behavior. Co-authored-by: Claude <noreply@anthropic.com>
1 parent 82ea014 commit b21fcd4

2 files changed

Lines changed: 242 additions & 22 deletions

File tree

build_ast_graph.py

Lines changed: 82 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -684,6 +684,54 @@ def _find_dependents(conn: ladybug.Connection, changed_node_ids: set[str]) -> se
684684
return dependent_files
685685

686686

687+
def _find_annotation_dependents(conn: ladybug.Connection, changed_node_ids: set[str]) -> set[str]:
688+
"""Find files that USE an annotation whose DEFINITION is among the changed nodes.
689+
690+
Annotation usage is a node property (``annotations`` STRING[]), not a
691+
Symbol->Symbol edge, so `_find_dependents` — which walks edges — never pulls
692+
annotation users into scope. When an annotation definition changes (e.g.
693+
``@interface Foo`` gains a meta-annotation that shifts the Layer-A chain in
694+
`resolve_role_and_capabilities`), every type carrying ``@Foo`` may need its
695+
``role``/``capabilities`` recomputed or it goes stale until the next full
696+
rebuild. Return those users' files so the orchestrator treats them as
697+
dependents (re-parsed, role re-SET); the expansion cap bounds the scope.
698+
699+
Scope is direct usage only: a user of an annotation that transitively
700+
composes the changed one (e.g. ``@A`` where ``@A`` is meta-annotated with the
701+
changed ``@B``) is NOT pulled in — that reverse-chain walk is left to a
702+
future hardening pass. The direct case covers the dominant real-world shape
703+
(a stereotype annotation applied directly to many types).
704+
"""
705+
if not changed_node_ids:
706+
return set()
707+
# Changed annotation definitions → the simple names users reference them by.
708+
# Runs before `_delete_file_scope`, so the def nodes still exist.
709+
name_result = conn.execute(
710+
"MATCH (s:Symbol) WHERE s.id IN $ids AND s.kind = 'annotation' RETURN s.name",
711+
{"ids": list(changed_node_ids)},
712+
)
713+
names: list[str] = []
714+
while name_result.has_next():
715+
nm = name_result.get_next()[0]
716+
if nm:
717+
names.append(nm)
718+
if not names:
719+
return set()
720+
dependent_files: set[str] = set()
721+
for nm in names:
722+
user_result = conn.execute(
723+
"MATCH (s:Symbol) "
724+
"WHERE list_contains(s.annotations, $nm) AND s.filename <> '' "
725+
"RETURN DISTINCT s.filename",
726+
{"nm": nm},
727+
)
728+
while user_result.has_next():
729+
fn = user_result.get_next()[0]
730+
if fn:
731+
dependent_files.add(fn)
732+
return dependent_files
733+
734+
687735
def _delete_file_scope(
688736
conn: ladybug.Connection,
689737
changed_files: set[str],
@@ -3089,6 +3137,19 @@ def _existing_node_ids(conn: ladybug.Connection) -> set[str]:
30893137
"n.signature = $signature, n.parent_id = $parent_id, n.resolved = $resolved"
30903138
)
30913139

3140+
# Refresh every mutable Route field on an existing Route node by id. Mirrors the
3141+
# `_write_nodes_impl` Symbol pattern (bulk COPY new rows + per-row SET existing
3142+
# ones) so the global pass 5-6 step no longer needs a per-row MERGE upsert.
3143+
# Field list mirrors `_ROUTE_COLUMNS` minus `id`.
3144+
_SET_ROUTE_BY_ID = (
3145+
"MATCH (r:Route {id: $id}) "
3146+
"SET r.kind = $kind, r.framework = $framework, r.method = $method, "
3147+
"r.path = $path, r.path_template = $path_template, r.path_regex = $path_regex, "
3148+
"r.topic = $topic, r.broker = $broker, r.feign_name = $feign_name, r.feign_url = $feign_url, "
3149+
"r.microservice = $microservice, r.module = $module, r.filename = $filename, "
3150+
"r.start_line = $start_line, r.end_line = $end_line, r.resolved = $resolved"
3151+
)
3152+
30923153
_REL_EXTENDS_COLUMNS = ["FROM", "TO", "source_file", "dst_name", "dst_fqn", "resolved"]
30933154
_REL_IMPLEMENTS_COLUMNS = ["FROM", "TO", "source_file", "dst_name", "dst_fqn", "resolved"]
30943155
_REL_INJECTS_COLUMNS = ["FROM", "TO", "source_file", "dst_name", "dst_fqn", "resolved", "mechanism", "annotation", "field_or_param"]
@@ -3776,6 +3837,12 @@ def incremental_rebuild(
37763837
# Find dependents
37773838
dependent_files = _find_dependents(conn, changed_node_ids)
37783839

3840+
# Annotation-definition change: also pull in files that USE the changed
3841+
# annotation. Annotation usage is a node property, not a Symbol->Symbol
3842+
# edge, so `_find_dependents` misses them and their role (derived from
3843+
# the project-wide meta-chain) would go stale. See PR-P5b.
3844+
dependent_files |= _find_annotation_dependents(conn, changed_node_ids)
3845+
37793846
# Union changed files with dependents
37803847
scope_files = changed_files | dependent_files
37813848

@@ -3940,22 +4007,21 @@ def _write_clients_producers_and_calls(conn: ladybug.Connection, tables: GraphTa
39404007
Route nodes (created by pass5 for cross-service calls) that wouldn't
39414008
otherwise exist in LadybugDB.
39424009
"""
3943-
# Upsert every route via MERGE. `tables.routes_rows` is the full route set
3944-
# (pass4 routes + pass5 phantom routes), not just phantoms; MERGE is
3945-
# idempotent against routes already written during the scoped step, so it
3946-
# neither duplicates nor drops them. This is the one remaining per-row graph
3947-
# write — converting it to bulk COPY requires filtering against existing
3948-
# route ids to reproduce the dedup, and is left as a future optimization.
3949-
for row in tables.routes_rows:
3950-
conn.execute(
3951-
"MERGE (r:Route {id: $id}) "
3952-
"SET r.kind = $kind, r.framework = $framework, r.method = $method, "
3953-
"r.path = $path, r.path_template = $path_template, r.path_regex = $path_regex, "
3954-
"r.topic = $topic, r.broker = $broker, r.feign_name = $feign_name, r.feign_url = $feign_url, "
3955-
"r.microservice = $microservice, r.module = $module, r.filename = $filename, "
3956-
"r.start_line = $start_line, r.end_line = $end_line, r.resolved = $resolved",
3957-
asdict(row),
3958-
)
4010+
# Bulk-write routes, mirroring `_write_nodes_impl`: COPY the rows that are new
4011+
# to the DB, and SET every mutable field on the routes already present (the
4012+
# caller does NOT delete routes, only Clients/Producers — see the global
4013+
# pass 5-6 block). `tables.routes_rows` is the full route set (pass4 routes +
4014+
# pass5 phantom routes), not just phantoms, so the SET keeps existing routes'
4015+
# properties in sync with pass5's cross-service enrichment while the COPY
4016+
# materializes phantoms that have no node yet. Replaces the last per-row
4017+
# graph write (MERGE upsert).
4018+
route_rows = [asdict(row) for row in tables.routes_rows]
4019+
existing_route_ids = _existing_node_ids(conn)
4020+
new_route_rows = [r for r in route_rows if r["id"] not in existing_route_ids]
4021+
_bulk_copy(conn, "Route", _ROUTE_COLUMNS, new_route_rows)
4022+
for r in route_rows:
4023+
if r["id"] in existing_route_ids:
4024+
conn.execute(_SET_ROUTE_BY_ID, r)
39594025

39604026
# Build node_id lookup for members and types
39614027
member_by_id = {m.node_id: m for m in tables.members}

tests/test_incremental_graph.py

Lines changed: 160 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1148,12 +1148,166 @@ def role_of(fqn: str) -> str:
11481148
"(PR-P4 regression: skip-if-exists dropped the upsert)"
11491149
)
11501150

1151-
def test_incremental_route_merge_dedup_preserved(self, tmp_path: Path) -> None:
1152-
"""Pass5/6 Route MERGE dedup is preserved after bulk conversion.
1151+
def test_incremental_pulls_in_annotation_users_on_def_change(
1152+
self, tmp_path: Path
1153+
) -> None:
1154+
"""Editing an annotation definition refreshes its direct users' roles.
1155+
1156+
Regression guard for the PR-P5b fix. Annotation usage is a node property
1157+
(``annotations`` STRING[]), not a Symbol->Symbol edge, so `_find_dependents`
1158+
— which walks edges — never pulls annotation users into scope. When an
1159+
annotation definition changes (e.g. ``@interface MyService`` gains a
1160+
``@Service`` meta-annotation that shifts the Layer-A chain), types
1161+
carrying ``@MyService`` need their ``role`` recomputed or they go stale.
1162+
1163+
Unlike `test_incremental_refreshes_dependent_role_on_meta_chain_change`
1164+
(PR-P4), the user here has NO edge to any changed node: it is pulled in
1165+
purely by the annotation-usage expansion (`_find_annotation_dependents`),
1166+
so this isolates the scope-tracking fix from the preserved-dependent
1167+
property refresh.
1168+
"""
1169+
from build_ast_graph import incremental_rebuild
1170+
from graph_enrich import collect_annotation_meta_chain
1171+
from _builders import build_ladybug_full_into
1172+
1173+
source_root = tmp_path / "src"
1174+
source_root.mkdir()
1175+
index_dir = tmp_path / "index"
1176+
index_dir.mkdir()
1177+
ladybug_path = index_dir / "code_graph.lbug"
1178+
java = source_root / "pkg"
1179+
java.mkdir(parents=True)
1180+
1181+
# Svc carries @MyService but calls/extends nothing — no edge to any other
1182+
# node, so `_find_dependents` cannot pull it in. Only annotation usage can.
1183+
(java / "MyService.java").write_text(
1184+
"package pkg; public @interface MyService {}\n", encoding="utf-8"
1185+
)
1186+
(java / "Svc.java").write_text(
1187+
"package pkg;\n@MyService\npublic class Svc {}\n", encoding="utf-8"
1188+
)
1189+
build_ladybug_full_into(source_root, ladybug_path)
1190+
1191+
# Edit ONLY the annotation definition: add a @Service meta-annotation so
1192+
# the chain maps MyService → Service and Svc's role flips OTHER → SERVICE.
1193+
(java / "MyService.java").write_text(
1194+
"package pkg;\nimport org.springframework.stereotype.Service;\n"
1195+
"@Service\npublic @interface MyService {}\n",
1196+
encoding="utf-8",
1197+
)
1198+
collect_annotation_meta_chain.cache_clear()
1199+
result = incremental_rebuild(source_root, ladybug_path, verbose=False)
1200+
assert result.mode == "incremental", f"expected incremental, got {result.mode}"
1201+
assert result.dependents_reprocessed >= 1, (
1202+
"Svc should be pulled in as an annotation-dependent, not just the def file"
1203+
)
1204+
1205+
def role_of(fqn: str) -> str:
1206+
db = ladybug.Database(str(ladybug_path))
1207+
conn = ladybug.Connection(db)
1208+
r = conn.execute("MATCH (n:Symbol {fqn: $fqn}) RETURN n.role", {"fqn": fqn})
1209+
v = r.get_next()[0] if r.has_next() else None
1210+
conn.close()
1211+
db.close()
1212+
return v
1213+
1214+
# Svc was pulled in by annotation usage (no edge to the changed def); its
1215+
# role must refresh to SERVICE to match a full rebuild of this state.
1216+
assert role_of("pkg.Svc") == "SERVICE", (
1217+
"annotation user's role not refreshed after annotation-def change "
1218+
"(PR-P5b regression: annotation users not pulled into incremental scope)"
1219+
)
11531220

1154-
Creates a corpus where pass5/6 re-emits an existing route and verifies
1155-
no duplicate Route rows after incremental rebuild (the retained MERGE
1156-
dedups against routes written during the scoped step).
1221+
def test_incremental_overrides_not_duplicated_for_non_scope_subtype(
1222+
self, tmp_path: Path
1223+
) -> None:
1224+
"""A non-scope subtype's OVERRIDES edge is not duplicated on increment.
1225+
1226+
Invariant guard for the OVERRIDES path. Unlike DECLARES (derived purely
1227+
from a member's own ``parent_id``/``node_id``, which is why a loaded
1228+
non-scope member could duplicate it — fixed in PR-P4), an OVERRIDES pair
1229+
needs the subtype's supertype via `_direct_supertype_ids`, which reads
1230+
`tables.extends_rows`/`implements_rows`. Those edge tables are populated
1231+
only by `pass2_edges` over *parsed* files; cross-file resolution stubs
1232+
(`loaded_from_db`) load nodes but NOT edges, so a loaded subtype has no
1233+
derivable supertype and `_populate_overrides_rows` never re-emits its
1234+
OVERRIDES — the edge (``source_file`` = its out-of-scope file) survives
1235+
untouched, matching a full rebuild.
1236+
1237+
This test pins that behavior down. If stub edge-loading is ever added as
1238+
an optimization, this guard will catch the resulting duplication (the
1239+
same class of bug PR-P4 fixed for DECLARES).
1240+
1241+
Corpus: `TImpl implements T` overrides `foo()` in non-scope files; an
1242+
unrelated `Other.java` change triggers the increment so `TImpl`/`T` are
1243+
loaded as stubs. The increment must keep exactly one OVERRIDES edge.
1244+
"""
1245+
from build_ast_graph import incremental_rebuild
1246+
from _builders import build_ladybug_full_into
1247+
1248+
source_root = tmp_path / "src"
1249+
source_root.mkdir()
1250+
index_dir = tmp_path / "index"
1251+
index_dir.mkdir()
1252+
ladybug_path = index_dir / "code_graph.lbug"
1253+
java = source_root / "pkg"
1254+
java.mkdir(parents=True)
1255+
1256+
(java / "T.java").write_text(
1257+
"package pkg; public interface T { void foo(); }\n", encoding="utf-8"
1258+
)
1259+
(java / "TImpl.java").write_text(
1260+
"package pkg; public class TImpl implements T { public void foo() {} }\n",
1261+
encoding="utf-8",
1262+
)
1263+
(java / "Other.java").write_text(
1264+
"package pkg; public class Other { public void go() {} }\n", encoding="utf-8"
1265+
)
1266+
build_ladybug_full_into(source_root, ladybug_path)
1267+
1268+
# Change Other.java only. Nothing references Other, so the scope is just
1269+
# {Other.java}; TImpl/T are non-scope and loaded as resolution stubs.
1270+
(java / "Other.java").write_text(
1271+
"package pkg; public class Other { public void go() {} public void more() {} }\n",
1272+
encoding="utf-8",
1273+
)
1274+
result = incremental_rebuild(source_root, ladybug_path, verbose=False)
1275+
assert result.mode == "incremental", f"expected incremental, got {result.mode}"
1276+
1277+
def overrides_count(path: Path) -> int:
1278+
db = ladybug.Database(str(path))
1279+
conn = ladybug.Connection(db)
1280+
r = conn.execute("MATCH ()-[o:OVERRIDES]->() RETURN count(o)")
1281+
n = r.get_next()[0] if r.has_next() else 0
1282+
conn.close()
1283+
db.close()
1284+
return n
1285+
1286+
# Exactly one override relationship exists (TImpl.foo → T.foo); a
1287+
# duplicate (2) is the bug. Cross-check against a fresh full rebuild of
1288+
# the identical final state — the increment must match it.
1289+
full_dir = tmp_path / "full"
1290+
full_dir.mkdir()
1291+
full_path = full_dir / "code_graph.lbug"
1292+
build_ladybug_full_into(source_root, full_path)
1293+
1294+
assert overrides_count(ladybug_path) == 1, (
1295+
"non-scope OVERRIDES edge duplicated on increment "
1296+
"(PR-P5a regression: loaded subtype method re-emitted)"
1297+
)
1298+
assert overrides_count(ladybug_path) == overrides_count(full_path), (
1299+
"incremental OVERRIDES count diverged from full rebuild"
1300+
)
1301+
1302+
def test_incremental_route_merge_dedup_preserved(self, tmp_path: Path) -> None:
1303+
"""Pass5/6 Route write does not duplicate existing routes.
1304+
1305+
Creates a corpus where pass5/6 re-emits an existing route and verifies
1306+
no duplicate Route rows after incremental rebuild. The global step now
1307+
bulk-writes routes (COPY new ids + SET existing ids — see PR-P5c),
1308+
replacing the per-row MERGE upsert this test was originally written for;
1309+
the name is kept for plan-reference continuity. The SET branch is what
1310+
dedups against routes written during the scoped step here.
11571311
"""
11581312
from build_ast_graph import incremental_rebuild, write_ladybug
11591313

@@ -1216,7 +1370,7 @@ def test_incremental_route_merge_dedup_preserved(self, tmp_path: Path) -> None:
12161370
route_count = route_count_result.get_next()[0]
12171371

12181372
# Should be exactly 1 route (no duplicates)
1219-
assert route_count == 1, f"Expected 1 route, found {route_count} (MERGE dedup failed)"
1373+
assert route_count == 1, f"Expected 1 route, found {route_count} (route dedup failed)"
12201374

12211375
conn.close()
12221376

0 commit comments

Comments
 (0)