Skip to content

Commit bd1fde3

Browse files
authored
example: update code_embedding/main.py to use new function (#1168)
This reverts commit 4f12ad9.
1 parent d796c73 commit bd1fde3

File tree

1 file changed

+4
-9
lines changed

1 file changed

+4
-9
lines changed

examples/code_embedding/main.py

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,13 @@
11
from dotenv import load_dotenv
22
from psycopg_pool import ConnectionPool
33
from pgvector.psycopg import register_vector
4-
from typing import Any
54
import functools
65
import cocoindex
76
import os
87
from numpy.typing import NDArray
98
import numpy as np
109

1110

12-
@cocoindex.op.function()
13-
def extract_extension(filename: str) -> str:
14-
"""Extract the extension of a filename."""
15-
return os.path.splitext(filename)[1]
16-
17-
1811
@cocoindex.transform_flow()
1912
def code_to_embedding(
2013
text: cocoindex.DataSlice[str],
@@ -53,10 +46,12 @@ def code_embedding_flow(
5346
code_embeddings = data_scope.add_collector()
5447

5548
with data_scope["files"].row() as file:
56-
file["extension"] = file["filename"].transform(extract_extension)
49+
file["language"] = file["filename"].transform(
50+
cocoindex.functions.DetectProgrammingLanguage()
51+
)
5752
file["chunks"] = file["content"].transform(
5853
cocoindex.functions.SplitRecursively(),
59-
language=file["extension"],
54+
language=file["language"],
6055
chunk_size=1000,
6156
min_chunk_size=300,
6257
chunk_overlap=300,

0 commit comments

Comments
 (0)