File tree Expand file tree Collapse file tree 1 file changed +4
-9
lines changed Expand file tree Collapse file tree 1 file changed +4
-9
lines changed Original file line number Diff line number Diff line change 11from dotenv import load_dotenv
22from psycopg_pool import ConnectionPool
33from pgvector .psycopg import register_vector
4- from typing import Any
54import functools
65import cocoindex
76import os
87from numpy .typing import NDArray
98import numpy as np
109
1110
12- @cocoindex .op .function ()
13- def extract_extension (filename : str ) -> str :
14- """Extract the extension of a filename."""
15- return os .path .splitext (filename )[1 ]
16-
17-
1811@cocoindex .transform_flow ()
1912def code_to_embedding (
2013 text : cocoindex .DataSlice [str ],
@@ -53,10 +46,12 @@ def code_embedding_flow(
5346 code_embeddings = data_scope .add_collector ()
5447
5548 with data_scope ["files" ].row () as file :
56- file ["extension" ] = file ["filename" ].transform (extract_extension )
49+ file ["language" ] = file ["filename" ].transform (
50+ cocoindex .functions .DetectProgrammingLanguage ()
51+ )
5752 file ["chunks" ] = file ["content" ].transform (
5853 cocoindex .functions .SplitRecursively (),
59- language = file ["extension " ],
54+ language = file ["language " ],
6055 chunk_size = 1000 ,
6156 min_chunk_size = 300 ,
6257 chunk_overlap = 300 ,
You can’t perform that action at this time.
0 commit comments