Skip to content

Commit d855ad7

Browse files
committed
feat(cli): finished CLI mode (mostly).
1 parent 0277ad4 commit d855ad7

File tree

7 files changed

+97
-53
lines changed

7 files changed

+97
-53
lines changed

src/vectorcode/chunking.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -235,10 +235,11 @@ def chunk(self, data: str) -> Generator[Chunk, None, None]:
235235
"""
236236
assert os.path.isfile(data)
237237
with open(data) as fin:
238-
content = fin.read()
239-
if self.config.chunk_size < 0:
240-
yield content
241-
return
238+
lines = fin.readlines()
239+
content = "".join(lines)
240+
if self.config.chunk_size < 0 and content:
241+
yield Chunk(content, Point(1, 0), Point(len(lines), len(lines[-1]) - 1))
242+
return
242243
parser = None
243244
language = None
244245
lexer = self.__guess_type(data, content)

src/vectorcode/lsp_main.py

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
)
2525
from vectorcode.common import get_client, get_collection, try_server
2626
from vectorcode.subcommands.ls import get_collection_list
27-
from vectorcode.subcommands.query import get_query_result_files
27+
from vectorcode.subcommands.query import build_query_results
2828

2929
cached_project_configs: dict[str, Config] = {}
3030
DEFAULT_PROJECT_ROOT: str | None = None
@@ -108,20 +108,9 @@ async def execute_command(ls: LanguageServer, args: list[str]):
108108
)
109109
final_results = []
110110
try:
111-
for path in await get_query_result_files(
112-
collection=collection,
113-
configs=final_configs,
114-
):
115-
if os.path.isfile(path):
116-
with open(path) as fin:
117-
output_path = path
118-
if not final_configs.use_absolute_path:
119-
output_path = os.path.relpath(
120-
path, final_configs.project_root
121-
)
122-
final_results.append(
123-
{"path": output_path, "document": fin.read()}
124-
)
111+
final_results.extend(
112+
await build_query_results(collection, final_configs)
113+
)
125114
finally:
126115
ls.progress.end(
127116
progress_token,

src/vectorcode/subcommands/chunks.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,5 @@ async def chunks(configs: Config) -> int:
99
result = []
1010
for file_path in configs.files:
1111
result.append(list(chunker.chunk(str(file_path))))
12-
print(json.dumps(result))
12+
print(json.dumps(str(result)))
1313
return 0

src/vectorcode/subcommands/query/__init__.py

Lines changed: 67 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import os
33
import sys
44

5+
from chromadb import GetResult
56
from chromadb.api.models.AsyncCollection import AsyncCollection
67
from chromadb.api.types import IncludeEnum
78
from chromadb.errors import InvalidCollectionException, InvalidDimensionException
@@ -33,18 +34,20 @@ async def get_query_result_files(
3334
print("Empty collection!", file=sys.stderr)
3435
return []
3536
try:
37+
if len(configs.query_exclude):
38+
filtered_files: dict[str, dict] = {"path": {"$nin": configs.query_exclude}}
39+
else:
40+
filtered_files = {}
3641
num_query = configs.n_result
37-
if QueryInclude.chunk not in configs.include:
42+
if QueryInclude.chunk in configs.include:
43+
filtered_files["start"] = {"$gte": 0}
44+
else:
3845
num_query = await collection.count()
3946
if configs.query_multiplier > 0:
4047
num_query = min(
4148
int(configs.n_result * configs.query_multiplier),
4249
await collection.count(),
4350
)
44-
if len(configs.query_exclude):
45-
filtered_files = {"path": {"$nin": configs.query_exclude}}
46-
else:
47-
filtered_files = None
4851
results = await collection.query(
4952
query_texts=query_chunks,
5053
n_results=num_query,
@@ -72,6 +75,64 @@ async def get_query_result_files(
7275
return aggregated_results
7376

7477

78+
async def build_query_results(
79+
collection: AsyncCollection, configs: Config
80+
) -> list[dict[str, str | int]]:
81+
structured_result = []
82+
for identifier in await get_query_result_files(collection, configs):
83+
if os.path.isfile(identifier):
84+
if configs.use_absolute_path:
85+
output_path = os.path.abspath(identifier)
86+
else:
87+
output_path = os.path.relpath(identifier, configs.project_root)
88+
full_result = {"path": output_path}
89+
with open(identifier) as fin:
90+
document = fin.read()
91+
full_result["document"] = document
92+
93+
structured_result.append(
94+
{str(key): full_result[str(key)] for key in configs.include}
95+
)
96+
elif QueryInclude.chunk in configs.include:
97+
chunk: GetResult = await collection.get(
98+
identifier, include=[IncludeEnum.metadatas, IncludeEnum.documents]
99+
)
100+
meta = chunk.get(
101+
"metadatas",
102+
)
103+
if meta is not None and len(meta) != 0:
104+
full_result: dict[str, str | int] = {
105+
"chunk": str(chunk.get("documents", [""])[0])
106+
}
107+
if meta[0].get("start") is not None and meta[0].get("end") is not None:
108+
path = str(meta[0].get("path"))
109+
with open(path) as fin:
110+
start: int = meta[0]["start"]
111+
end: int = meta[0]["end"]
112+
full_result["chunk"] = "".join(fin.readlines()[start : end + 1])
113+
full_result["start_line"] = start
114+
full_result["end_line"] = end
115+
full_result["path"] = str(
116+
meta[0]["path"]
117+
if configs.use_absolute_path
118+
else os.path.relpath(meta[0]["path"], str(configs.project_root))
119+
)
120+
121+
structured_result.append(full_result)
122+
else:
123+
print(
124+
"This collection doesn't support chunk-mode output because it lacks the necessary metadata. Please re-vectorise it.",
125+
file=sys.stderr,
126+
)
127+
128+
else:
129+
print(
130+
f"{identifier} is no longer a valid file! Please re-run vectorcode vectorise to refresh the database.",
131+
file=sys.stderr,
132+
)
133+
return structured_result
134+
135+
75136
async def query(configs: Config) -> int:
76137
if (
77138
QueryInclude.chunk in configs.include
@@ -108,28 +169,7 @@ async def query(configs: Config) -> int:
108169
if not configs.pipe:
109170
print("Starting querying...")
110171

111-
structured_result = []
112-
113-
for path in await get_query_result_files(collection, configs):
114-
if os.path.isfile(path):
115-
if configs.use_absolute_path:
116-
output_path = os.path.abspath(path)
117-
else:
118-
output_path = os.path.relpath(path, configs.project_root)
119-
full_result = {"path": output_path}
120-
if QueryInclude.document in configs.include:
121-
with open(path) as fin:
122-
document = fin.read()
123-
full_result["document"] = document
124-
125-
structured_result.append(
126-
{str(key): full_result[str(key)] for key in configs.include}
127-
)
128-
else:
129-
print(
130-
f"{path} is no longer a valid file! Please re-run vectorcode vectorise to refresh the database.",
131-
file=sys.stderr,
132-
)
172+
structured_result = await build_query_results(collection, configs)
133173

134174
if configs.pipe:
135175
print(json.dumps(structured_result))

src/vectorcode/subcommands/vectorise.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,9 @@ async def chunked_add(
6565
for chunk in chunks:
6666
meta: dict[str, str | dict[str, int]] = {"path": full_path_str}
6767
if isinstance(chunk, Chunk):
68-
meta["start"] = {"row": chunk.start.row, "col": chunk.start.column}
69-
meta["end"] = {"row": chunk.end.row, "col": chunk.end.column}
68+
meta["start"] = chunk.start.row
69+
meta["end"] = chunk.end.row
70+
7071
metas.append(meta)
7172
async with collection_lock:
7273
for idx in range(0, len(chunks), max_batch_size):

tests/subcommands/query/test_query.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from chromadb.api.types import IncludeEnum
66
from chromadb.errors import InvalidCollectionException, InvalidDimensionException
77

8-
from vectorcode.cli_utils import Config, QueryInclude
8+
from vectorcode.cli_utils import CliAction, Config, QueryInclude
99
from vectorcode.subcommands.query import get_query_result_files, query
1010

1111

@@ -71,7 +71,7 @@ async def test_get_query_result_files(mock_collection, mock_config):
7171
assert IncludeEnum.metadatas in kwargs["include"]
7272
assert IncludeEnum.distances in kwargs["include"]
7373
assert IncludeEnum.documents in kwargs["include"]
74-
assert kwargs["where"] is None # Since query_exclude is empty
74+
assert not kwargs["where"] # Since query_exclude is empty
7575

7676
# Check reranker was used correctly
7777
MockReranker.assert_called_once_with(mock_config)
@@ -444,3 +444,11 @@ async def test_query_invalid_ef(mock_config):
444444

445445
# Verify the function returns error code
446446
assert result == 1
447+
448+
449+
@pytest.mark.asyncio
450+
async def test_query_invalid_include():
451+
faulty_config = Config(
452+
action=CliAction.query, include=[QueryInclude.chunk, QueryInclude.document]
453+
)
454+
assert await query(faulty_config) != 0

tests/test_lsp.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from pygls.server import LanguageServer
55

66
from vectorcode import __version__
7-
from vectorcode.cli_utils import CliAction, Config
7+
from vectorcode.cli_utils import CliAction, Config, QueryInclude
88
from vectorcode.lsp_main import (
99
execute_command,
1010
lsp_start,
@@ -23,13 +23,18 @@ def mock_language_server():
2323

2424
@pytest.fixture
2525
def mock_config():
26-
config = MagicMock(spec=Config)
26+
# config = MagicMock(spec=Config)
27+
config = Config()
2728
config.host = "localhost"
2829
config.port = 8000
2930
config.action = CliAction.query
3031
config.project_root = "/test/project"
3132
config.use_absolute_path = True
3233
config.pipe = False
34+
config.overlap_ratio = 0.2
35+
config.query_exclude = []
36+
config.include = [QueryInclude.path]
37+
config.query_multipler = 10
3338
return config
3439

3540

0 commit comments

Comments
 (0)