Skip to content

Commit d783678

Browse files
committed
examples: review fixes\n- image_search/main: use CLIP_MODEL_DIMENSION in Literal via type alias for mypy; preserve schema inference\n- image_search/colpali_main: decorator with targeted untyped-decorator ignore\n- pdf_embedding/main: targeted type ignore on return instead of cast
1 parent 3705fa1 commit d783678

File tree

3 files changed

+20
-14
lines changed

3 files changed

+20
-14
lines changed

examples/image_search/colpali_main.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ async def lifespan(app: FastAPI) -> AsyncIterator[None]:
100100

101101

102102
# --- Search API ---
103+
@app.get("/search") # type: ignore
103104
def search(
104105
q: str = Query(..., description="Search query"),
105106
limit: int = Query(5, description="Number of results"),
@@ -133,5 +134,3 @@ def search(
133134
}
134135

135136

136-
# Attach route without using decorator to avoid untyped-decorator when FastAPI types are unavailable
137-
app.get("/search")(search)

examples/image_search/main.py

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import io
44
import os
55
from contextlib import asynccontextmanager
6-
from typing import Any, cast, AsyncIterator
6+
from typing import Any, cast, AsyncIterator, Literal, Final, TYPE_CHECKING
77

88
import cocoindex
99
import torch
@@ -19,7 +19,11 @@
1919
QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6334/")
2020
QDRANT_COLLECTION = "ImageSearch"
2121
CLIP_MODEL_NAME = "openai/clip-vit-large-patch14"
22-
# Using simple list[float] for embeddings for readability in example code.
22+
CLIP_MODEL_DIMENSION: Final[int] = 768
23+
if TYPE_CHECKING:
24+
ClipDim = Literal[768]
25+
else:
26+
ClipDim = int
2327

2428

2529
@functools.cache
@@ -29,21 +33,24 @@ def get_clip_model() -> tuple[CLIPModel, CLIPProcessor]:
2933
return model, processor
3034

3135

32-
def embed_query(text: str) -> list[float]:
36+
def embed_query(text: str) -> cocoindex.Vector[cocoindex.Float32, ClipDim]:
3337
"""
3438
Embed the caption using CLIP model.
3539
"""
3640
model, processor = get_clip_model()
3741
inputs = processor(text=[text], return_tensors="pt", padding=True)
3842
with torch.no_grad():
3943
features = model.get_text_features(**inputs)
40-
return cast(list[float], features[0].tolist())
44+
return cast(
45+
cocoindex.Vector[cocoindex.Float32, ClipDim],
46+
features[0].tolist(),
47+
)
4148

4249

4350
@cocoindex.op.function(cache=True, behavior_version=1, gpu=True)
4451
def embed_image(
4552
img_bytes: bytes,
46-
) -> list[float]:
53+
) -> cocoindex.Vector[cocoindex.Float32, ClipDim]:
4754
"""
4855
Convert image to embedding using CLIP model.
4956
"""
@@ -52,7 +59,10 @@ def embed_image(
5259
inputs = processor(images=image, return_tensors="pt")
5360
with torch.no_grad():
5461
features = model.get_image_features(**inputs)
55-
return cast(list[float], features[0].tolist())
62+
return cast(
63+
cocoindex.Vector[cocoindex.Float32, ClipDim],
64+
features[0].tolist(),
65+
)
5666

5767

5868
# CocoIndex flow: Ingest images, extract captions, embed, export to Qdrant
@@ -141,7 +151,7 @@ async def lifespan(app: FastAPI) -> AsyncIterator[None]:
141151

142152

143153
# --- Search API ---
144-
@app.get("/search") # type: ignore
154+
@app.get("/search") # type: ignore[untyped-decorator]
145155
def search(
146156
q: str = Query(..., description="Search query"),
147157
limit: int = Query(5, description="Number of results"),
@@ -170,5 +180,3 @@ def search(
170180
]
171181
}
172182

173-
174-
# Route attached via decorator above for readability

examples/pdf_embedding/main.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from marker.output import text_from_rendered
1010
from psycopg_pool import ConnectionPool
1111
from jinja2 import Template
12-
from typing import Any, cast
12+
from typing import Any
1313

1414

1515
class PdfToMarkdown(cocoindex.op.FunctionSpec):
@@ -34,8 +34,7 @@ def __call__(self, content: bytes) -> str:
3434
temp_file.write(content)
3535
temp_file.flush()
3636
text_any, _, _ = text_from_rendered(self._converter(temp_file.name))
37-
text: str = cast(str, text_any)
38-
return text
37+
return text_any # type: ignore
3938

4039

4140
@cocoindex.transform_flow()

0 commit comments

Comments
 (0)