Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/JAVA-CODEBASE-RAG-CLI.md
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ java-codebase-rag reprocess --source-root /path/to/java/repo --index-dir /path/t

### `erase`

Deletes cocoindex state, the LadybugDB directory, and Lance tables under the index dir. Requires **`--yes`** or interactive confirmation on a TTY. Non-TTY without `--yes` exits **2**.
Deletes cocoindex state, the LadybugDB graph (`code_graph.lbug`), the graph builder's content-hash store (`.graph_hashes.json`), and Lance tables under the index dir. Requires **`--yes`** or interactive confirmation on a TTY. Non-TTY without `--yes` exits **2**.

```bash
java-codebase-rag erase --source-root /path/to/java/repo --index-dir /path/to/.java-codebase-rag --yes
Expand Down
40 changes: 32 additions & 8 deletions java_codebase_rag/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -580,11 +580,33 @@ def _cmd_update(args: argparse.Namespace) -> int:
)


def _rm_any(path: Path) -> None:
"""Remove ``path`` whether it is a regular file, directory, or symlink.

``code_graph.lbug`` is a single regular file in this repo, but kuzu may lay
the graph out as a directory; ``cocoindex.db`` is always a directory.
``shutil.rmtree`` is a silent no-op on a regular file and ``Path.unlink``
raises ``IsADirectoryError`` on a directory, so a type-blind delete left
index artifacts on disk (issue #346). A symlinked directory is unlinked, not
recursed into, so the link target is never followed. Failures are warned to
stderr rather than swallowed, so erase does not report success while leaving
an artifact behind (the exact failure mode issue #346 reported).
"""
try:
if path.is_dir() and not path.is_symlink():
shutil.rmtree(path)
elif path.exists() or path.is_symlink():
path.unlink(missing_ok=True)
except OSError as exc:
print(f"warning: failed to remove {path}: {exc}", file=sys.stderr)


def _cmd_erase(args: argparse.Namespace) -> int:
cfg = _resolved_from_ns(args)
_startup_hints(cfg)
cfg.apply_to_os_environ()
to_describe: list[Path] = [cfg.ladybug_path, cfg.cocoindex_db]
graph_hashes_path = cfg.ladybug_path.parent / ".graph_hashes.json"
to_describe: list[Path] = [cfg.ladybug_path, cfg.cocoindex_db, graph_hashes_path]
if cfg.index_dir.is_dir():
try:
import lancedb
Expand Down Expand Up @@ -621,13 +643,15 @@ def work(progress: "PipelineProgress | None") -> int:
)
elif drop.returncode != 0:
print(clip(drop.stderr, 4000), file=sys.stderr)
if cfg.ladybug_path.exists():
shutil.rmtree(cfg.ladybug_path, ignore_errors=True)
if cfg.cocoindex_db.exists():
try:
cfg.cocoindex_db.unlink()
except OSError:
pass
# Remove the LadybugDB graph, the cocoindex state store, and the graph
# builder's content-hash store. Each is removed by type (see _rm_any):
# code_graph.lbug is a file here but may be a dir under kuzu, while
# cocoindex.db is a directory — a type-blind delete silently no-oped on
# one or the other, and .graph_hashes.json was never targeted at all
# (issue #346).
_rm_any(cfg.ladybug_path)
_rm_any(cfg.cocoindex_db)
_rm_any(graph_hashes_path)
if cfg.index_dir.is_dir():
try:
import lancedb
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "java-codebase-rag"
version = "0.6.5"
version = "0.6.6"
description = "MCP server for semantic + structural search over Java codebases"
readme = "README.md"
requires-python = ">=3.11"
Expand Down
46 changes: 44 additions & 2 deletions tests/test_java_codebase_rag_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,35 @@ def test_cli_erase_succeeds_with_yes_flag(tmp_path: Path) -> None:
assert proc.returncode == 0, proc.stderr + proc.stdout


def test_erase_removes_graph_file_cocoindex_dir_and_hash_store(tmp_path: Path) -> None:
"""erase must delete code_graph.lbug (file), cocoindex.db (dir), .graph_hashes.json.

Regression for issue #346: a type-blind delete left both on disk.
shutil.rmtree is a silent no-op on a regular file (code_graph.lbug), and
Path.unlink raises IsADirectoryError on cocoindex.db (a directory) — both
swallowed — and .graph_hashes.json was never targeted. The follow-up init
then refused because code_graph.lbug survived.
"""
idx = tmp_path / "erase_artifacts"
idx.mkdir()
# Real on-disk layout: graph is a single FILE, cocoindex state is a DIR.
(idx / "code_graph.lbug").write_bytes(b"fake-kuzu-db")
(idx / "cocoindex.db").mkdir()
(idx / "cocoindex.db" / "state.json").write_text("{}", encoding="utf-8")
(idx / ".graph_hashes.json").write_text("{}", encoding="utf-8")
env = os.environ.copy()
env["JAVA_CODEBASE_RAG_INDEX_DIR"] = str(idx)
env["JAVA_CODEBASE_RAG_SOURCE_ROOT"] = str(tmp_path)
proc = _run_cli(
["erase", "--source-root", str(tmp_path), "--index-dir", str(idx), "--yes"],
env=env,
)
assert proc.returncode == 0, proc.stderr + proc.stdout
assert not (idx / "code_graph.lbug").exists(), "erase left code_graph.lbug on disk"
assert not (idx / "cocoindex.db").exists(), "erase left cocoindex.db/ on disk"
assert not (idx / ".graph_hashes.json").exists(), "erase left .graph_hashes.json on disk"


def test_embedding_model_precedence_cli_over_env_over_yaml_over_default(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
Expand Down Expand Up @@ -377,21 +406,34 @@ def test_legacy_env_var_set_emits_stderr_hint(monkeypatch: pytest.MonkeyPatch, t

@pytest.mark.skipif(not _cocoindex_available(), reason="cocoindex not installed in venv")
def test_init_after_erase_succeeds(corpus_root: Path, tmp_path: Path) -> None:
"""Build a real index, erase it, then init again from a clean slate.

Regression for issue #346: the previous body erased an *empty* index dir and
then inited, so it never exercised "erase a real graph -> re-init" and stayed
green while erase silently left code_graph.lbug on disk.
"""
idx = tmp_path / "lifecycle_idx"
idx.mkdir(parents=True)
env = os.environ.copy()
env["JAVA_CODEBASE_RAG_INDEX_DIR"] = str(idx)
env["JAVA_CODEBASE_RAG_SOURCE_ROOT"] = str(corpus_root.resolve())
init1 = _run_cli(
["init", "--source-root", str(corpus_root), "--index-dir", str(idx), "--quiet"],
env=env,
)
assert init1.returncode == 0, init1.stdout + init1.stderr
assert (idx / "code_graph.lbug").exists(), "init did not build code_graph.lbug"
e1 = _run_cli(
["erase", "--source-root", str(corpus_root), "--index-dir", str(idx), "--yes"],
env=env,
)
assert e1.returncode == 0, e1.stderr
init = _run_cli(
assert not (idx / "code_graph.lbug").exists(), "erase left code_graph.lbug on disk"
init2 = _run_cli(
["init", "--source-root", str(corpus_root), "--index-dir", str(idx), "--quiet"],
env=env,
)
assert init.returncode == 0, init.stdout + init.stderr
assert init2.returncode == 0, init2.stdout + init2.stderr


@pytest.mark.skipif(not _cocoindex_available(), reason="cocoindex not installed in venv")
Expand Down
Loading