Skip to content

Commit ef3d143

Browse files
committed
Adjust chunk sizes for examples.
1 parent cf5a632 commit ef3d143

File tree

5 files changed

+5
-5
lines changed

5 files changed

+5
-5
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def text_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoind
6363
# Split the document into chunks, put into `chunks` field
6464
doc["chunks"] = doc["content"].transform(
6565
cocoindex.functions.SplitRecursively(),
66-
language="markdown", chunk_size=300, chunk_overlap=100)
66+
language="markdown", chunk_size=2000, chunk_overlap=500)
6767
6868
# Transform data of each chunk
6969
with doc["chunks"].row() as chunk:

docs/docs/getting_started/quickstart.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def text_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoind
7979
# Split the document into chunks, put into `chunks` field
8080
doc["chunks"] = doc["content"].transform(
8181
cocoindex.functions.SplitRecursively(),
82-
language="markdown", chunk_size=300, chunk_overlap=100)
82+
language="markdown", chunk_size=2000, chunk_overlap=500)
8383
8484
# Transform data of each chunk
8585
with doc["chunks"].row() as chunk:

examples/code_embedding/code_embedding.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def code_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoind
2222
with data_scope["files"].row() as file:
2323
file["chunks"] = file["content"].transform(
2424
cocoindex.functions.SplitRecursively(),
25-
language="javascript", chunk_size=300, chunk_overlap=100)
25+
language="python", chunk_size=2000, chunk_overlap=500)
2626
with file["chunks"].row() as chunk:
2727
chunk["embedding"] = chunk["text"].call(code_to_embedding)
2828
code_embeddings.collect(filename=file["filename"], location=chunk["location"],

examples/pdf_embedding/pdf_embedding.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def pdf_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoinde
5151
doc["markdown"] = doc["content"].transform(PdfToMarkdown())
5252
doc["chunks"] = doc["markdown"].transform(
5353
cocoindex.functions.SplitRecursively(),
54-
language="markdown", chunk_size=300, chunk_overlap=100)
54+
language="markdown", chunk_size=2000, chunk_overlap=500)
5555

5656
with doc["chunks"].row() as chunk:
5757
chunk["embedding"] = chunk["text"].call(text_to_embedding)

examples/text_embedding/text_embedding.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def text_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoind
2424
with data_scope["documents"].row() as doc:
2525
doc["chunks"] = doc["content"].transform(
2626
cocoindex.functions.SplitRecursively(),
27-
language="markdown", chunk_size=300, chunk_overlap=100)
27+
language="markdown", chunk_size=2000, chunk_overlap=500)
2828

2929
with doc["chunks"].row() as chunk:
3030
chunk["embedding"] = text_to_embedding(chunk["text"])

0 commit comments

Comments
 (0)