Skip to content

Commit 053de82

Browse files
HumanBean17claude
andcommitted
fix(update): honor yaml source_root so update stops mass-deleting the index
run_update passed the discovered config dir as an explicit source_root to resolve_operator_config, routing it into the branch that SKIPS the YAML source_root field. With a config living in a subdir next to `source_root: ../`, update then indexed that subdir (no Java) against the real index one level up, so cocoindex treated every indexed file as removed and deleted them — the "Updating index (Lance + graph)..." hang, and the ever-growing Lance `_deletions` + 1000s+ increment after a ctrl+C left cocoindex.db mid-reconcile. This is the same bug class #316 fixed for the MCP server (its docstring warns that a non-None source_root skips the YAML field); run_update was the last production caller still passing a discovered dir. Pass source_root=None so the YAML source_root is honored exactly like increment/init/reprocess. run_install is unaffected (it passes the user-confirmed Java root). Adds a regression test mirroring the reported layout (config in my-project-context/, source_root: ../, real index one level up) that captures the env handed to cocoindex and asserts SOURCE_ROOT resolves to the YAML root, not the config dir. No schema, ontology, embedding, or env-var change. Existing indexes remain valid; no reindex required. Co-Authored-By: Claude <noreply@anthropic.com>
1 parent e866304 commit 053de82

2 files changed

Lines changed: 103 additions & 2 deletions

File tree

java_codebase_rag/installer.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1250,9 +1250,14 @@ def run_update(
12501250
print("Skipping index update.")
12511251
return EXIT_PARTIAL if has_artifact_failures else EXIT_SUCCESS
12521252

1253-
# Resolve configuration
1253+
# Resolve configuration. Pass source_root=None so the YAML ``source_root``
1254+
# field is honored exactly like increment/init/reprocess — passing the
1255+
# discovered config dir here routes resolve_operator_config into the
1256+
# explicit-override branch that SKIPS the YAML field, which made `update`
1257+
# point cocoindex at the config dir (no Java) against the real index and
1258+
# mass-delete it. Discovery still runs against the CLI's cwd.
12541259
try:
1255-
cfg = resolve_operator_config(source_root=project_root, cli_index_dir=None)
1260+
cfg = resolve_operator_config(source_root=None, cli_index_dir=None)
12561261
index_dir = cfg.index_dir
12571262
except Exception as e:
12581263
print(f"\nWarning: Failed to resolve configuration: {e}")

tests/test_installer.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1161,6 +1161,102 @@ def test_update_no_index_skips_increment(self, tmp_path, monkeypatch):
11611161
# Should succeed (no hosts is fatal, but no index is just a warning)
11621162
assert result == 0
11631163

1164+
def test_update_honors_yaml_source_root_for_nested_config_dir(
1165+
self, tmp_path, monkeypatch
1166+
):
1167+
"""run_update must resolve source_root exactly like increment.
1168+
1169+
Regression for the "update mass-deletes the index" bug. run_update passed
1170+
the discovered config dir as an explicit source_root, routing
1171+
resolve_operator_config into the branch that SKIPS the YAML source_root
1172+
field. With a config living in my-project-context/ next to
1173+
``source_root: ../``, update then indexed my-project-context/ (no Java)
1174+
against the real index one level up — so cocoindex saw every indexed
1175+
file as removed and deleted it (the "_deletions keeps growing" symptom
1176+
after the run was ctrl+C'd mid-delete).
1177+
1178+
After the fix, the env handed to cocoindex carries the YAML-resolved
1179+
source_root (one level above the config dir), NOT the config dir itself.
1180+
"""
1181+
import json
1182+
import shutil
1183+
from subprocess import CompletedProcess
1184+
from java_codebase_rag.installer import run_update
1185+
1186+
# Layout mirroring the reported bug:
1187+
# tmp_path/
1188+
# my-project-context/ <- cwd; config lives here
1189+
# .java-codebase-rag.yml <- source_root: ../ ; index_dir: ../.java-codebase-rag
1190+
# .java-codebase-rag/ <- real index, one level above the config
1191+
# code_graph.lbug <- marker so "index exists"
1192+
config_dir = tmp_path / "my-project-context"
1193+
config_dir.mkdir()
1194+
(config_dir / ".java-codebase-rag.yml").write_text(
1195+
"source_root: ../\nindex_dir: ../.java-codebase-rag\n",
1196+
encoding="utf-8",
1197+
)
1198+
index_dir = tmp_path / ".java-codebase-rag"
1199+
index_dir.mkdir()
1200+
(index_dir / "code_graph.lbug").write_text("", encoding="utf-8")
1201+
1202+
# A configured host so run_update reaches the index phase.
1203+
(config_dir / ".mcp.json").write_text(
1204+
json.dumps(
1205+
{
1206+
"mcpServers": {
1207+
"java-codebase-rag": {
1208+
"command": "/usr/local/bin/java-codebase-rag-mcp",
1209+
"type": "stdio",
1210+
}
1211+
}
1212+
}
1213+
)
1214+
)
1215+
monkeypatch.setattr(shutil, "which", lambda x: "/usr/local/bin/java-codebase-rag-mcp")
1216+
monkeypatch.setattr(
1217+
"java_codebase_rag.installer._read_package_artifact",
1218+
lambda path: "PACKAGE CONTENT",
1219+
)
1220+
1221+
# The CLI invokes update from the config dir, so the process cwd is the
1222+
# config dir — resolve_operator_config(source_root=None) discovers the
1223+
# config via Path.cwd(), exactly as increment/init/reprocess do.
1224+
# delenv: resolve_operator_config honors JAVA_CODEBASE_RAG_SOURCE_ROOT /
1225+
# _INDEX_DIR from os.environ first, and apply_to_os_environ() writes them
1226+
# unscoped — a sibling test can leak a value that overrides discovery.
1227+
monkeypatch.delenv("JAVA_CODEBASE_RAG_SOURCE_ROOT", raising=False)
1228+
monkeypatch.delenv("JAVA_CODEBASE_RAG_INDEX_DIR", raising=False)
1229+
monkeypatch.chdir(config_dir)
1230+
1231+
# Capture the subprocess env run_update hands cocoindex: it carries the
1232+
# resolved JAVA_CODEBASE_RAG_SOURCE_ROOT / _INDEX_DIR.
1233+
captured: dict = {}
1234+
1235+
def capture_coco(env, *, full_reprocess, quiet, verbose=True, lance_project_root=None):
1236+
captured["env"] = env
1237+
return CompletedProcess(["cocoindex"], 0)
1238+
1239+
def noop_graph(**kwargs):
1240+
return CompletedProcess(["build_ast_graph", "--incremental"], 0)
1241+
1242+
monkeypatch.setattr("java_codebase_rag.pipeline.run_cocoindex_update", capture_coco)
1243+
monkeypatch.setattr("java_codebase_rag.pipeline.run_incremental_graph", noop_graph)
1244+
1245+
result = run_update(force=False, dry_run=False, cwd=config_dir)
1246+
1247+
# The index phase must have run (env captured), not been skipped.
1248+
assert "env" in captured, "run_update did not reach the cocoindex update step"
1249+
env = captured["env"]
1250+
# source_root: ../ must resolve ONE level above the config dir (the real
1251+
# Java tree), NOT the config dir itself.
1252+
assert env["JAVA_CODEBASE_RAG_SOURCE_ROOT"] == str(tmp_path.resolve())
1253+
assert env["JAVA_CODEBASE_RAG_SOURCE_ROOT"] != str(config_dir.resolve())
1254+
# index_dir lands on the real index one level above the config dir.
1255+
assert env["JAVA_CODEBASE_RAG_INDEX_DIR"] == str(index_dir.resolve())
1256+
# result is independent of the source_root assertion (artifact refresh
1257+
# may report partial failure unrelated to this regression); tolerate it.
1258+
assert result in (0, 1)
1259+
11641260
def test_install_then_update_cycle(self, tmp_path, monkeypatch):
11651261
"""install then update: artifacts refreshed, no errors"""
11661262
from java_codebase_rag.installer import run_install, run_update

0 commit comments

Comments
 (0)