File tree Expand file tree Collapse file tree 1 file changed +9
-4
lines changed Expand file tree Collapse file tree 1 file changed +9
-4
lines changed Original file line number Diff line number Diff line change 11from dotenv import load_dotenv
22from psycopg_pool import ConnectionPool
33from pgvector .psycopg import register_vector
4+ from typing import Any
45import functools
56import cocoindex
67import os
78from numpy .typing import NDArray
89import numpy as np
910
1011
12+ @cocoindex .op .function ()
13+ def extract_extension (filename : str ) -> str :
14+ """Extract the extension of a filename."""
15+ return os .path .splitext (filename )[1 ]
16+
17+
1118@cocoindex .transform_flow ()
1219def code_to_embedding (
1320 text : cocoindex .DataSlice [str ],
@@ -46,12 +53,10 @@ def code_embedding_flow(
4653 code_embeddings = data_scope .add_collector ()
4754
4855 with data_scope ["files" ].row () as file :
49- file ["language" ] = file ["filename" ].transform (
50- cocoindex .functions .DetectProgrammingLanguage ()
51- )
56+ file ["extension" ] = file ["filename" ].transform (extract_extension )
5257 file ["chunks" ] = file ["content" ].transform (
5358 cocoindex .functions .SplitRecursively (),
54- language = file ["language " ],
59+ language = file ["extension " ],
5560 chunk_size = 1000 ,
5661 min_chunk_size = 300 ,
5762 chunk_overlap = 300 ,
You can’t perform that action at this time.
0 commit comments