33import cocoindex
44import os
55
6- def text_to_embedding (text : cocoindex .DataSlice ) -> cocoindex .DataSlice :
7- """
8- Embed the text using a SentenceTransformer model.
9- This is a shared logic between indexing and querying, so extract it as a function.
10- """
11- return text .transform (
12- cocoindex .functions .SentenceTransformerEmbed (
13- model = "sentence-transformers/all-MiniLM-L6-v2" ))
14-
156@cocoindex .flow_def (name = "GoogleDriveTextEmbedding" )
167def gdrive_text_embedding_flow (flow_builder : cocoindex .FlowBuilder , data_scope : cocoindex .DataScope ):
178 """
@@ -33,7 +24,9 @@ def gdrive_text_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope:
3324 language = "markdown" , chunk_size = 2000 , chunk_overlap = 500 )
3425
3526 with doc ["chunks" ].row () as chunk :
36- chunk ["embedding" ] = text_to_embedding (chunk ["text" ])
27+ chunk ["embedding" ] = chunk ["text" ].transform (
28+ cocoindex .functions .SentenceTransformerEmbed (
29+ model = "sentence-transformers/all-MiniLM-L6-v2" ))
3730 doc_embeddings .collect (filename = doc ["filename" ], location = chunk ["location" ],
3831 text = chunk ["text" ], embedding = chunk ["embedding" ])
3932
@@ -47,7 +40,9 @@ def gdrive_text_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope:
4740 name = "SemanticsSearch" ,
4841 flow = gdrive_text_embedding_flow ,
4942 target_name = "doc_embeddings" ,
50- query_transform_flow = text_to_embedding ,
43+ query_transform_flow = lambda text : text .transform (
44+ cocoindex .functions .SentenceTransformerEmbed (
45+ model = "sentence-transformers/all-MiniLM-L6-v2" )),
5146 default_similarity_metric = cocoindex .VectorSimilarityMetric .COSINE_SIMILARITY )
5247
5348@cocoindex .main_fn ()
0 commit comments