Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 43 additions & 2 deletions src/semble/cli.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,26 @@
import argparse
import asyncio
import json
import re
import sys
import warnings
from importlib.util import find_spec
from shutil import rmtree
from typing import Literal

from model2vec.utils import get_package_extras

from semble.cache import find_index_from_cache_folder
from semble.cache import find_index_from_cache_folder, resolve_cache_folder
from semble.index import SembleIndex
from semble.index.types import PersistencePath
from semble.stats import format_savings_report
from semble.types import ContentType
from semble.utils import format_results, is_git_url, resolve_chunk

_CLI_DISPATCH_ARGS = frozenset({"search", "find-related", "install", "uninstall", "savings", "-h", "--help"})
_CLI_DISPATCH_ARGS = frozenset({"search", "find-related", "install", "uninstall", "savings", "-h", "--help", "clear"})
_CLEAR_CHOICE = Literal["all", "index", "savings"]

_SHA_256_REGEX = re.compile(r"^[a-f0-9]{64}$")


def _build_index(path: str, content: list[ContentType]) -> SembleIndex:
Expand Down Expand Up @@ -131,6 +138,35 @@ def _run_find_related(path: str, file_path: str, line: int, top_k: int, content:
_maybe_save_index(index, path)


def _run_clear(clear_type: _CLEAR_CHOICE) -> None:
"""Run the `clear` subcommand."""
cache_folder = resolve_cache_folder()
if clear_type == "index" or clear_type == "all":
indexes = []
for path in cache_folder.glob("*/index"):
if not _SHA_256_REGEX.match(path.parent.name):
continue
if PersistencePath.from_path(path).non_existing():
continue
indexes.append(path)

if not indexes:
print(f"No indexes found to clear in `{cache_folder}`")
else:
for path in indexes:
index_folder = path.parent
rmtree(index_folder)
print(f"Cleared index at `{index_folder}`")

if clear_type == "savings" or clear_type == "all":
path = cache_folder / "savings.jsonl"
if not path.exists():
print(f"No savings file found at `{path}`")
else:
path.unlink()
print(f"Cleared savings at `{path}`")


def _cli_main() -> None:
parser = argparse.ArgumentParser(prog="semble")
sub = parser.add_subparsers(dest="command")
Expand All @@ -141,6 +177,9 @@ def _cli_main() -> None:
search_p.add_argument("-k", "--top-k", type=int, default=5, help="Number of results (default: 5).")
_add_content_args(search_p)

clear_p = sub.add_parser("clear", help="Clear the index cache.")
clear_p.add_argument("type", choices=["all", "index", "savings"], help="Type of cache to clear.")

related_p = sub.add_parser("find-related", help="Find code similar to a specific location.")
related_p.add_argument("file_path", help="File path as shown in search results.")
related_p.add_argument("line", type=int, help="Line number (1-indexed).")
Expand All @@ -162,6 +201,8 @@ def _cli_main() -> None:
from semble.installer import run

run(args.command)
elif args.command == "clear":
_run_clear(args.type)
elif args.command == "search":
_run_search(args.path, args.query, args.top_k, _resolve_content(args.content, args.include_text_files))
elif args.command == "find-related":
Expand Down
12 changes: 8 additions & 4 deletions tests/test_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,14 +82,18 @@ def test_save_index_to_cache(tmp_path: Path) -> None:
[
("win32", "semble.cache._windows_cache_dir", Path("/win")),
("linux", "semble.cache._linux_cache_dir", Path("/linux")),
("darwin", "semble.cache._macos_cache_dir", Path("/macos")),
],
)
def test_resolve_cache_folder(platform: str, mock_target: str, expected: Path) -> None:
"""resolve_cache_folder calls the correct platform helper."""
with patch.object(sys, "platform", platform):
with patch(mock_target, return_value=expected) as mock_fn:
with patch("pathlib.Path.mkdir"):
result = resolve_cache_folder()
with (
patch.object(sys, "platform", platform),
patch.dict("os.environ", {}, clear=True),
patch(mock_target, return_value=expected) as mock_fn,
patch("pathlib.Path.mkdir"),
):
result = resolve_cache_folder()
mock_fn.assert_called_once_with("semble")
assert result == expected

Expand Down
146 changes: 143 additions & 3 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import sys
import warnings
from importlib.resources import files
from pathlib import Path
from unittest.mock import MagicMock, patch

import pytest

from semble.cli import _cli_main, _maybe_save_index, main
from semble.cli import _cli_main, _maybe_save_index, _run_clear, main
from semble.types import ContentType, SearchResult
from tests.conftest import make_chunk

Expand Down Expand Up @@ -172,8 +173,6 @@ def test_include_text_files_cli_deprecated(
capsys: pytest.CaptureFixture[str],
) -> None:
"""--include-text-files on CLI raises DeprecationWarning."""
import warnings

chunk = make_chunk("def foo(): pass", "src/foo.py")
fake_index = MagicMock()
fake_index.search.return_value = [SearchResult(chunk=chunk, score=0.9)]
Expand Down Expand Up @@ -229,3 +228,144 @@ def test_agent_file_tools_are_bash_only() -> None:
tools = [t.strip() for t in tools_line.removeprefix("tools:").split(",")]
assert set(tools) == {"Bash", "Read"}, f"Unexpected tools in agent file: {tools}"
assert not any("mcp__" in t for t in tools)


def _make_valid_index_dir(cache_folder: Path, sha: str = "a" * 64) -> Path:
"""Create a fake valid index directory with the expected structure."""
index_dir = cache_folder / sha / "index"
index_dir.mkdir(parents=True)
# Create the files that PersistencePath.non_existing checks
(index_dir / "chunks.json").write_text("[]")
(index_dir / "bm25_index").write_text("")
(index_dir / "semantic_index").write_text("")
(index_dir / "metadata.json").write_text("{}")
return index_dir


@pytest.mark.parametrize(
("scenario", "expected_in_output"),
[
("valid", ["Cleared index", "a" * 64, "b" * 64]),
("empty", ["No indexes found"]),
("non_sha", ["No indexes found"]),
("incomplete", ["No indexes found"]),
],
)
def test_run_clear_index(
scenario: str, expected_in_output: list[str], tmp_path: Path, capsys: pytest.CaptureFixture[str]
) -> None:
"""_run_clear('index') finds valid indexes, and skips non-SHA/incomplete/empty dirs."""
if scenario == "valid":
_make_valid_index_dir(tmp_path, "a" * 64)
_make_valid_index_dir(tmp_path, "b" * 64)
elif scenario == "non_sha":
bad_dir = tmp_path / "not-a-sha" / "index"
bad_dir.mkdir(parents=True)
(bad_dir / "chunks.json").write_text("[]")
(bad_dir / "bm25_index").write_text("")
(bad_dir / "semantic_index").write_text("")
(bad_dir / "metadata.json").write_text("{}")
elif scenario == "incomplete":
index_dir = tmp_path / ("c" * 64) / "index"
index_dir.mkdir(parents=True)

with patch("semble.cli.resolve_cache_folder", return_value=tmp_path):
_run_clear("index")

out = capsys.readouterr().out
for fragment in expected_in_output:
assert fragment in out

if scenario == "valid":
assert not (tmp_path / ("a" * 64)).exists()
assert not (tmp_path / ("b" * 64)).exists()


@pytest.mark.parametrize(
("create_file", "expected"),
[
(True, "Cleared savings"),
(False, "No savings file found"),
],
)
def test_run_clear_savings(
create_file: bool, expected: str, tmp_path: Path, capsys: pytest.CaptureFixture[str]
) -> None:
"""_run_clear('savings') deletes the file when present, reports missing otherwise."""
savings_file = tmp_path / "savings.jsonl"
if create_file:
savings_file.write_text('{"tokens": 100}\n')

with patch("semble.cli.resolve_cache_folder", return_value=tmp_path):
_run_clear("savings")

if create_file:
assert not savings_file.exists()
out = capsys.readouterr().out
assert expected in out


@pytest.mark.parametrize(
("populate", "expected_fragments"),
[
(True, ["Cleared index", "d" * 64, "Cleared savings"]),
(False, ["No indexes found", "No savings file found"]),
],
)
def test_run_clear_all(
populate: bool, expected_fragments: list[str], tmp_path: Path, capsys: pytest.CaptureFixture[str]
) -> None:
"""_run_clear('all') handles both indexes and savings."""
if populate:
_make_valid_index_dir(tmp_path, "d" * 64)
(tmp_path / "savings.jsonl").write_text('{"tokens": 50}\n')

with patch("semble.cli.resolve_cache_folder", return_value=tmp_path):
_run_clear("all")

out = capsys.readouterr().out
for fragment in expected_fragments:
assert fragment in out

if populate:
assert not (tmp_path / ("d" * 64)).exists()
assert not (tmp_path / "savings.jsonl").exists()


@pytest.mark.parametrize(
("subcommand", "setup_index", "setup_savings", "expected_fragments"),
[
("index", True, False, ["Cleared index", "e" * 64]),
("savings", False, True, ["Cleared savings"]),
("all", True, True, ["Cleared index", "Cleared savings"]),
],
)
def test_cli_clear_command(
subcommand: str,
setup_index: bool,
setup_savings: bool,
expected_fragments: list[str],
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
capsys: pytest.CaptureFixture[str],
) -> None:
"""The `semble clear <subcommand>` CLI dispatches to _run_clear correctly."""
sha = "e" * 64
if setup_index:
_make_valid_index_dir(tmp_path, sha)
savings_file = tmp_path / "savings.jsonl"
if setup_savings:
savings_file.write_text('{"tokens": 200}\n')

monkeypatch.setattr(sys, "argv", ["semble", "clear", subcommand])
with patch("semble.cli.resolve_cache_folder", return_value=tmp_path):
_cli_main()

out = capsys.readouterr().out
for fragment in expected_fragments:
assert fragment in out

if setup_index:
assert not (tmp_path / sha).exists()
if setup_savings:
assert not savings_file.exists()
2 changes: 1 addition & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading