diff --git a/docs/JAVA-CODEBASE-RAG-CLI.md b/docs/JAVA-CODEBASE-RAG-CLI.md index 4e655f3..0ee1e4f 100644 --- a/docs/JAVA-CODEBASE-RAG-CLI.md +++ b/docs/JAVA-CODEBASE-RAG-CLI.md @@ -240,7 +240,7 @@ java-codebase-rag reprocess --source-root /path/to/java/repo --index-dir /path/t ### `erase` -Deletes cocoindex state, the LadybugDB directory, and Lance tables under the index dir. Requires **`--yes`** or interactive confirmation on a TTY. Non-TTY without `--yes` exits **2**. +Deletes cocoindex state, the LadybugDB graph (`code_graph.lbug`), the graph builder's content-hash store (`.graph_hashes.json`), and Lance tables under the index dir. Requires **`--yes`** or interactive confirmation on a TTY. Non-TTY without `--yes` exits **2**. ```bash java-codebase-rag erase --source-root /path/to/java/repo --index-dir /path/to/.java-codebase-rag --yes diff --git a/java_codebase_rag/cli.py b/java_codebase_rag/cli.py index 817cd08..47cf388 100644 --- a/java_codebase_rag/cli.py +++ b/java_codebase_rag/cli.py @@ -580,11 +580,33 @@ def _cmd_update(args: argparse.Namespace) -> int: ) +def _rm_any(path: Path) -> None: + """Remove ``path`` whether it is a regular file, directory, or symlink. + + ``code_graph.lbug`` is a single regular file in this repo, but kuzu may lay + the graph out as a directory; ``cocoindex.db`` is always a directory. + ``shutil.rmtree`` is a silent no-op on a regular file and ``Path.unlink`` + raises ``IsADirectoryError`` on a directory, so a type-blind delete left + index artifacts on disk (issue #346). A symlinked directory is unlinked, not + recursed into, so the link target is never followed. Failures are warned to + stderr rather than swallowed, so erase does not report success while leaving + an artifact behind (the exact failure mode issue #346 reported). + """ + try: + if path.is_dir() and not path.is_symlink(): + shutil.rmtree(path) + elif path.exists() or path.is_symlink(): + path.unlink(missing_ok=True) + except OSError as exc: + print(f"warning: failed to remove {path}: {exc}", file=sys.stderr) + + def _cmd_erase(args: argparse.Namespace) -> int: cfg = _resolved_from_ns(args) _startup_hints(cfg) cfg.apply_to_os_environ() - to_describe: list[Path] = [cfg.ladybug_path, cfg.cocoindex_db] + graph_hashes_path = cfg.ladybug_path.parent / ".graph_hashes.json" + to_describe: list[Path] = [cfg.ladybug_path, cfg.cocoindex_db, graph_hashes_path] if cfg.index_dir.is_dir(): try: import lancedb @@ -621,13 +643,15 @@ def work(progress: "PipelineProgress | None") -> int: ) elif drop.returncode != 0: print(clip(drop.stderr, 4000), file=sys.stderr) - if cfg.ladybug_path.exists(): - shutil.rmtree(cfg.ladybug_path, ignore_errors=True) - if cfg.cocoindex_db.exists(): - try: - cfg.cocoindex_db.unlink() - except OSError: - pass + # Remove the LadybugDB graph, the cocoindex state store, and the graph + # builder's content-hash store. Each is removed by type (see _rm_any): + # code_graph.lbug is a file here but may be a dir under kuzu, while + # cocoindex.db is a directory — a type-blind delete silently no-oped on + # one or the other, and .graph_hashes.json was never targeted at all + # (issue #346). + _rm_any(cfg.ladybug_path) + _rm_any(cfg.cocoindex_db) + _rm_any(graph_hashes_path) if cfg.index_dir.is_dir(): try: import lancedb diff --git a/pyproject.toml b/pyproject.toml index 07ef5e1..fb91162 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "java-codebase-rag" -version = "0.6.5" +version = "0.6.6" description = "MCP server for semantic + structural search over Java codebases" readme = "README.md" requires-python = ">=3.11" diff --git a/tests/test_java_codebase_rag_cli.py b/tests/test_java_codebase_rag_cli.py index cbac80d..5f0b6e7 100644 --- a/tests/test_java_codebase_rag_cli.py +++ b/tests/test_java_codebase_rag_cli.py @@ -109,6 +109,35 @@ def test_cli_erase_succeeds_with_yes_flag(tmp_path: Path) -> None: assert proc.returncode == 0, proc.stderr + proc.stdout +def test_erase_removes_graph_file_cocoindex_dir_and_hash_store(tmp_path: Path) -> None: + """erase must delete code_graph.lbug (file), cocoindex.db (dir), .graph_hashes.json. + + Regression for issue #346: a type-blind delete left both on disk. + shutil.rmtree is a silent no-op on a regular file (code_graph.lbug), and + Path.unlink raises IsADirectoryError on cocoindex.db (a directory) — both + swallowed — and .graph_hashes.json was never targeted. The follow-up init + then refused because code_graph.lbug survived. + """ + idx = tmp_path / "erase_artifacts" + idx.mkdir() + # Real on-disk layout: graph is a single FILE, cocoindex state is a DIR. + (idx / "code_graph.lbug").write_bytes(b"fake-kuzu-db") + (idx / "cocoindex.db").mkdir() + (idx / "cocoindex.db" / "state.json").write_text("{}", encoding="utf-8") + (idx / ".graph_hashes.json").write_text("{}", encoding="utf-8") + env = os.environ.copy() + env["JAVA_CODEBASE_RAG_INDEX_DIR"] = str(idx) + env["JAVA_CODEBASE_RAG_SOURCE_ROOT"] = str(tmp_path) + proc = _run_cli( + ["erase", "--source-root", str(tmp_path), "--index-dir", str(idx), "--yes"], + env=env, + ) + assert proc.returncode == 0, proc.stderr + proc.stdout + assert not (idx / "code_graph.lbug").exists(), "erase left code_graph.lbug on disk" + assert not (idx / "cocoindex.db").exists(), "erase left cocoindex.db/ on disk" + assert not (idx / ".graph_hashes.json").exists(), "erase left .graph_hashes.json on disk" + + def test_embedding_model_precedence_cli_over_env_over_yaml_over_default( tmp_path: Path, monkeypatch: pytest.MonkeyPatch ) -> None: @@ -377,21 +406,34 @@ def test_legacy_env_var_set_emits_stderr_hint(monkeypatch: pytest.MonkeyPatch, t @pytest.mark.skipif(not _cocoindex_available(), reason="cocoindex not installed in venv") def test_init_after_erase_succeeds(corpus_root: Path, tmp_path: Path) -> None: + """Build a real index, erase it, then init again from a clean slate. + + Regression for issue #346: the previous body erased an *empty* index dir and + then inited, so it never exercised "erase a real graph -> re-init" and stayed + green while erase silently left code_graph.lbug on disk. + """ idx = tmp_path / "lifecycle_idx" idx.mkdir(parents=True) env = os.environ.copy() env["JAVA_CODEBASE_RAG_INDEX_DIR"] = str(idx) env["JAVA_CODEBASE_RAG_SOURCE_ROOT"] = str(corpus_root.resolve()) + init1 = _run_cli( + ["init", "--source-root", str(corpus_root), "--index-dir", str(idx), "--quiet"], + env=env, + ) + assert init1.returncode == 0, init1.stdout + init1.stderr + assert (idx / "code_graph.lbug").exists(), "init did not build code_graph.lbug" e1 = _run_cli( ["erase", "--source-root", str(corpus_root), "--index-dir", str(idx), "--yes"], env=env, ) assert e1.returncode == 0, e1.stderr - init = _run_cli( + assert not (idx / "code_graph.lbug").exists(), "erase left code_graph.lbug on disk" + init2 = _run_cli( ["init", "--source-root", str(corpus_root), "--index-dir", str(idx), "--quiet"], env=env, ) - assert init.returncode == 0, init.stdout + init.stderr + assert init2.returncode == 0, init2.stdout + init2.stderr @pytest.mark.skipif(not _cocoindex_available(), reason="cocoindex not installed in venv")