Skip to content

Commit cccc4ec

Browse files
authored
code_embedding: include all Python files with chunk size tuning. (#134)
1 parent a6e55b2 commit cccc4ec

File tree

1 file changed

+3
-2
lines changed

1 file changed

+3
-2
lines changed

examples/code_embedding/code_embedding.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,15 @@ def code_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoind
1515
"""
1616
Define an example flow that embeds files into a vector database.
1717
"""
18-
data_scope["files"] = flow_builder.add_source(cocoindex.sources.LocalFile(path="."))
18+
data_scope["files"] = flow_builder.add_source(
19+
cocoindex.sources.LocalFile(path="../../python", included_patterns=["**/*.py"]))
1920

2021
code_embeddings = data_scope.add_collector()
2122

2223
with data_scope["files"].row() as file:
2324
file["chunks"] = file["content"].transform(
2425
cocoindex.functions.SplitRecursively(),
25-
language="python", chunk_size=2000, chunk_overlap=500)
26+
language="python", chunk_size=1000, chunk_overlap=300)
2627
with file["chunks"].row() as chunk:
2728
chunk["embedding"] = chunk["text"].call(code_to_embedding)
2829
code_embeddings.collect(filename=file["filename"], location=chunk["location"],

0 commit comments

Comments
 (0)