-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_newchunk.py
More file actions
38 lines (30 loc) · 1.33 KB
/
test_newchunk.py
File metadata and controls
38 lines (30 loc) · 1.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import json
from pathlib import Path
from src.chunk.chunkers import PythonChunker
from rtfs.chunk_resolution.chunk_graph import ChunkGraph
from src.config import CHUNKS_ROOT
from src.index import Indexer
repo = "codesearch-backend"
graph_dir = "C:\\Users\\jpeng\\Documents\\projects\\codesearch-data\\graphs\\{dir}"
repo_dir = "C:\\Users\\jpeng\\Documents\\projects\\codesearch-data\\repo\\{dir}"
repo_path = Path(repo_dir.format(dir=repo)).resolve()
graph_path = Path(graph_dir.format(dir=repo)).resolve()
chunker = PythonChunker(repo_path)
chunks = chunker.chunk(persist_path=CHUNKS_ROOT / repo)
if graph_path.exists() and graph_path.read_text():
print("Loading graph from json: ", graph_path)
graph_json = json.loads(graph_path.read_text())
cg = ChunkGraph.from_json(repo_path, graph_json)
else:
print("Creating new graph from chunks: ", graph_path)
cg = ChunkGraph.from_chunks(repo_path, chunks)
cg.cluster()
with open(graph_path, "w") as f:
f.write(json.dumps(cg.to_json(), indent=2))
indexer = Indexer(repo_path, chunks, cg, run_code=True, run_cluster=True, overwrite=True)
indexer.run()
cluster_str = ""
for cluster in cg.get_clusters(return_content=True):
cluster_str += cluster.to_str(return_content=True, return_imports=False)
with open("chunks.txt", "w", encoding="utf-8") as f:
f.write(cluster_str)