77from dataclasses import dataclass
88from pypdf import PdfReader
99from transformers import CLIPModel , CLIPProcessor
10- from typing import Literal , TypeAlias , Final , cast
10+ from typing import cast
1111
1212
1313QDRANT_GRPC_URL = "http://localhost:6334"
1414QDRANT_COLLECTION_IMAGE = "PdfElementsEmbeddingImage"
1515QDRANT_COLLECTION_TEXT = "PdfElementsEmbeddingText"
1616
1717CLIP_MODEL_NAME = "openai/clip-vit-large-patch14"
18- CLIP_MODEL_DIMENSION : Final = 768
19- ClipVectorType : TypeAlias = cocoindex .Vector [cocoindex .Float32 , Literal [768 ]]
2018
2119IMG_THUMBNAIL_SIZE = (512 , 512 )
2220
@@ -29,7 +27,7 @@ def get_clip_model() -> tuple[CLIPModel, CLIPProcessor]:
2927
3028
3129@cocoindex .op .function (cache = True , behavior_version = 1 , gpu = True )
32- def clip_embed_image (img_bytes : bytes ) -> ClipVectorType :
30+ def clip_embed_image (img_bytes : bytes ) -> list [ float ] :
3331 """
3432 Convert image to embedding using CLIP model.
3533 """
@@ -38,18 +36,18 @@ def clip_embed_image(img_bytes: bytes) -> ClipVectorType:
3836 inputs = processor (images = image , return_tensors = "pt" )
3937 with torch .no_grad ():
4038 features = model .get_image_features (** inputs )
41- return cast (ClipVectorType , features [0 ].tolist ())
39+ return cast (list [ float ] , features [0 ].tolist ())
4240
4341
44- def clip_embed_query (text : str ) -> ClipVectorType :
42+ def clip_embed_query (text : str ) -> list [ float ] :
4543 """
4644 Embed the caption using CLIP model.
4745 """
4846 model , processor = get_clip_model ()
4947 inputs = processor (text = [text ], return_tensors = "pt" , padding = True )
5048 with torch .no_grad ():
5149 features = model .get_text_features (** inputs )
52- return cast (ClipVectorType , features [0 ].tolist ())
50+ return cast (list [ float ] , features [0 ].tolist ())
5351
5452
5553@cocoindex .transform_flow ()
0 commit comments