Skip to content

Commit ed4610d

Browse files
authored
fix: revert code_embedding/main.py change - needs to wait for release (#1167)
1 parent b8c7172 commit ed4610d

File tree

1 file changed

+9
-4
lines changed

1 file changed

+9
-4
lines changed

examples/code_embedding/main.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,20 @@
11
from dotenv import load_dotenv
22
from psycopg_pool import ConnectionPool
33
from pgvector.psycopg import register_vector
4+
from typing import Any
45
import functools
56
import cocoindex
67
import os
78
from numpy.typing import NDArray
89
import numpy as np
910

1011

12+
@cocoindex.op.function()
13+
def extract_extension(filename: str) -> str:
14+
"""Extract the extension of a filename."""
15+
return os.path.splitext(filename)[1]
16+
17+
1118
@cocoindex.transform_flow()
1219
def code_to_embedding(
1320
text: cocoindex.DataSlice[str],
@@ -46,12 +53,10 @@ def code_embedding_flow(
4653
code_embeddings = data_scope.add_collector()
4754

4855
with data_scope["files"].row() as file:
49-
file["language"] = file["filename"].transform(
50-
cocoindex.functions.DetectProgrammingLanguage()
51-
)
56+
file["extension"] = file["filename"].transform(extract_extension)
5257
file["chunks"] = file["content"].transform(
5358
cocoindex.functions.SplitRecursively(),
54-
language=file["language"],
59+
language=file["extension"],
5560
chunk_size=1000,
5661
min_chunk_size=300,
5762
chunk_overlap=300,

0 commit comments

Comments
 (0)