|
15 | 15 | from qdrant_client import QdrantClient |
16 | 16 | from transformers import CLIPModel, CLIPProcessor |
17 | 17 |
|
| 18 | +OLLAMA_URL = os.getenv("OLLAMA_URL", "http://localhost:11434/") |
18 | 19 | QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6334/") |
19 | 20 | QDRANT_COLLECTION = "ImageSearch" |
20 | 21 | CLIP_MODEL_NAME = "openai/clip-vit-large-patch14" |
@@ -69,12 +70,39 @@ def image_object_embedding_flow( |
69 | 70 | ) |
70 | 71 | img_embeddings = data_scope.add_collector() |
71 | 72 | with data_scope["images"].row() as img: |
| 73 | + ollama_model_name = os.getenv("OLLAMA_MODEL") |
| 74 | + if ollama_model_name is not None: |
| 75 | + # If an Ollama model is specified, generate an image caption |
| 76 | + img["caption"] = flow_builder.transform( |
| 77 | + cocoindex.functions.ExtractByLlm( |
| 78 | + llm_spec=cocoindex.llm.LlmSpec( |
| 79 | + api_type=cocoindex.LlmApiType.OLLAMA, model=ollama_model_name |
| 80 | + ), |
| 81 | + instruction=( |
| 82 | + "Describe the image in one detailed sentence. " |
| 83 | + "Name all visible animal species, objects, and the main scene. " |
| 84 | + "Be specific about type, color, and notable features. " |
| 85 | + "Mention what each animal is doing." |
| 86 | + ), |
| 87 | + output_type=str, |
| 88 | + ), |
| 89 | + image=img["content"], |
| 90 | + ) |
72 | 91 | img["embedding"] = img["content"].transform(embed_image) |
73 | | - img_embeddings.collect( |
74 | | - id=cocoindex.GeneratedField.UUID, |
75 | | - filename=img["filename"], |
76 | | - embedding=img["embedding"], |
77 | | - ) |
| 92 | + |
| 93 | + collect_fields = { |
| 94 | + "id": cocoindex.GeneratedField.UUID, |
| 95 | + "filename": img["filename"], |
| 96 | + "embedding": img["embedding"], |
| 97 | + } |
| 98 | + |
| 99 | + if ollama_model_name is not None: |
| 100 | + print(f"Using Ollama model '{ollama_model_name}' for captioning.") |
| 101 | + collect_fields["caption"] = img["caption"] |
| 102 | + else: |
| 103 | + print(f"No Ollama model '{ollama_model_name}' found — skipping captioning.") |
| 104 | + |
| 105 | + img_embeddings.collect(**collect_fields) |
78 | 106 |
|
79 | 107 | img_embeddings.export( |
80 | 108 | "img_embeddings", |
@@ -126,11 +154,18 @@ def search( |
126 | 154 | collection_name=QDRANT_COLLECTION, |
127 | 155 | query_vector=("embedding", query_embedding), |
128 | 156 | limit=limit, |
| 157 | + with_payload=True, |
129 | 158 | ) |
130 | 159 |
|
131 | 160 | return { |
132 | 161 | "results": [ |
133 | | - {"filename": result.payload["filename"], "score": result.score} |
| 162 | + { |
| 163 | + "filename": result.payload["filename"], |
| 164 | + "score": result.score, |
| 165 | + "caption": result.payload.get( |
| 166 | + "caption" |
| 167 | + ), # Include caption if available |
| 168 | + } |
134 | 169 | for result in search_results |
135 | 170 | ] |
136 | 171 | } |
0 commit comments