Skip to content

Commit e2cfbc4

Browse files
committed
feat: Adding support Multimodal embedders.
1 parent 4e15544 commit e2cfbc4

File tree

1 file changed

+297
-0
lines changed

1 file changed

+297
-0
lines changed

tests/client/test_multimodal.py

Lines changed: 297 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,297 @@
1+
import base64
2+
import json
3+
import os
4+
from pathlib import Path
5+
6+
import pytest
7+
import requests
8+
from meilisearch import Client
9+
from tests import common
10+
11+
# ---------------- ENV ----------------
12+
VOYAGE_API_KEY = os.getenv("VOYAGE_API_KEY")
13+
14+
INDEX_UID = "multi-modal-search-test"
15+
EMBEDDER_NAME = "multimodal"
16+
17+
# ---------------- Paths ----------------
18+
# datasets folder (movies.json)
19+
DATASETS_DIR = Path(__file__).resolve().parent.parent.parent / "datasets"
20+
MOVIES = json.loads((DATASETS_DIR / "movies.json").read_text())
21+
22+
# fixtures folder (images)
23+
FIXTURES_DIR = Path(__file__).resolve().parent.parent / "fixtures"
24+
25+
26+
# ---------------- Helper ----------------
27+
def load_image_base64(file_name: str) -> str:
28+
"""
29+
Load an image from the fixtures folder and return as base64 string.
30+
"""
31+
file_path = FIXTURES_DIR / file_name
32+
encoded = base64.b64encode(file_path.read_bytes()).decode("utf-8")
33+
return encoded
34+
35+
36+
# ---------------- Embedder Config ----------------
37+
# Match JS test exactly - fragments have complex nested objects
38+
EMBEDDER_CONFIG = {
39+
"source": "rest",
40+
"url": "https://api.voyageai.com/v1/multimodalembeddings",
41+
"apiKey": VOYAGE_API_KEY,
42+
"dimensions": 1024,
43+
"indexingFragments": {
44+
"textAndPoster": {
45+
"value": {
46+
"content": [
47+
{
48+
"type": "text",
49+
"text": "A movie titled {{doc.title}} whose description starts with {{doc.overview|truncatewords:20}}.",
50+
},
51+
{
52+
"type": "image_url",
53+
"image_url": "{{doc.poster}}",
54+
},
55+
],
56+
},
57+
},
58+
"text": {
59+
"value": {
60+
"content": [
61+
{
62+
"type": "text",
63+
"text": "A movie titled {{doc.title}} whose description starts with {{doc.overview|truncatewords:20}}.",
64+
},
65+
],
66+
},
67+
},
68+
"poster": {
69+
"value": {
70+
"content": [
71+
{
72+
"type": "image_url",
73+
"image_url": "{{doc.poster}}",
74+
},
75+
],
76+
},
77+
},
78+
},
79+
"searchFragments": {
80+
"textAndPoster": {
81+
"value": {
82+
"content": [
83+
{
84+
"type": "text",
85+
"text": "{{media.textAndPoster.text}}",
86+
},
87+
{
88+
"type": "image_base64",
89+
"image_base64": "data:{{media.textAndPoster.image.mime}};base64,{{media.textAndPoster.image.data}}",
90+
},
91+
],
92+
},
93+
},
94+
"text": {
95+
"value": {
96+
"content": [
97+
{
98+
"type": "text",
99+
"text": "{{media.text.text}}",
100+
},
101+
],
102+
},
103+
},
104+
"poster": {
105+
"value": {
106+
"content": [
107+
{
108+
"type": "image_url",
109+
"image_url": "{{media.poster.poster}}",
110+
},
111+
],
112+
},
113+
},
114+
},
115+
"request": {
116+
"inputs": ["{{fragment}}", "{{..}}"],
117+
"model": "voyage-multimodal-3",
118+
},
119+
"response": {
120+
"data": [
121+
{
122+
"embedding": "{{embedding}}",
123+
},
124+
"{{..}}",
125+
],
126+
},
127+
}
128+
129+
130+
# ---------------- Tests ----------------
131+
@pytest.mark.skipif(not VOYAGE_API_KEY, reason="Voyage API key not set")
132+
class TestMultimodalSearch:
133+
"""Multi-modal search tests"""
134+
135+
@pytest.fixture(autouse=True)
136+
def clear_indexes(self, client):
137+
"""
138+
Override the global clear_indexes fixture to exclude the multimodal test index.
139+
This prevents the index from being deleted between tests in this class.
140+
"""
141+
yield
142+
# Delete all indexes except the multimodal test index
143+
indexes = client.get_indexes()
144+
for index in indexes["results"]:
145+
if index.uid != INDEX_UID:
146+
try:
147+
task = client.index(index.uid).delete()
148+
client.wait_for_task(task.task_uid)
149+
except Exception:
150+
pass
151+
152+
@pytest.fixture(scope="class", autouse=True)
153+
def setup_index(self, request):
154+
"""Setup index with embedder configuration."""
155+
client = Client(common.BASE_URL, common.MASTER_KEY)
156+
157+
# Enable multimodal experimental feature
158+
client.update_experimental_features({"multimodal": True})
159+
160+
# Delete the index if it already exists
161+
try:
162+
task = client.index(INDEX_UID).delete()
163+
client.wait_for_task(task.task_uid)
164+
except Exception:
165+
pass # Index doesn't exist, which is fine
166+
167+
# Create index
168+
task = client.create_index(INDEX_UID)
169+
client.wait_for_task(task.task_uid)
170+
171+
# Update settings with embedder config
172+
# Use raw HTTP request because fragments with complex objects
173+
# may not pass Pydantic validation
174+
settings_payload = {
175+
"searchableAttributes": ["title", "overview"],
176+
"embedders": {
177+
EMBEDDER_NAME: EMBEDDER_CONFIG,
178+
},
179+
}
180+
181+
response = requests.patch(
182+
f"{common.BASE_URL}/indexes/{INDEX_UID}/settings",
183+
headers={
184+
"Authorization": f"Bearer {common.MASTER_KEY}",
185+
"Content-Type": "application/json",
186+
},
187+
json=settings_payload,
188+
)
189+
response.raise_for_status()
190+
191+
# Wait for settings update task (embedder config can take longer)
192+
task_data = response.json()
193+
task_uid = task_data.get("taskUid")
194+
if task_uid:
195+
task = client.wait_for_task(
196+
task_uid, timeout_in_ms=60_000
197+
) # 1 minute for embedder setup
198+
if task.status != "succeeded":
199+
error_msg = f"Embedder setup failed: status={task.status}"
200+
if task.error:
201+
error_msg += f", error={task.error}"
202+
raise Exception(error_msg)
203+
204+
index = client.get_index(INDEX_UID)
205+
206+
# Add documents
207+
task = index.add_documents(MOVIES)
208+
# Use longer timeout for document indexing with embeddings
209+
# Each document needs embeddings generated via Voyage API, which can be slow
210+
task = client.wait_for_task(
211+
task.task_uid,
212+
timeout_in_ms=300_000, # 5 minutes timeout for embedding generation
213+
interval_in_ms=1000, # Poll every 1 second instead of 50ms to reduce log noise
214+
)
215+
if task.status != "succeeded":
216+
error_msg = f"Document indexing failed: status={task.status}"
217+
if task.error:
218+
error_msg += f", error={task.error}"
219+
raise Exception(error_msg)
220+
221+
# Verify index is ready by checking stats
222+
stats = index.get_stats()
223+
assert stats.number_of_documents == len(
224+
MOVIES
225+
), f"Expected {len(MOVIES)} documents, got {stats.number_of_documents}"
226+
227+
# Store for tests on the class
228+
request.cls.client = client
229+
request.cls.index = index
230+
request.cls.search_client = Client(common.BASE_URL, common.MASTER_KEY) # Search client
231+
232+
def test_text_query(self):
233+
"""Test text query search"""
234+
query = "The story follows Carol Danvers"
235+
response = self.search_client.index(INDEX_UID).search(
236+
query,
237+
{
238+
"media": {
239+
"text": {
240+
"text": query,
241+
},
242+
},
243+
"hybrid": {
244+
"embedder": EMBEDDER_NAME,
245+
"semanticRatio": 1,
246+
},
247+
},
248+
)
249+
assert response["hits"][0]["title"] == "Captain Marvel"
250+
251+
def test_image_query(self):
252+
"""Test image query search"""
253+
# Find Dumbo in the movies list
254+
dumbo_movie = next(m for m in MOVIES if m["title"] == "Dumbo")
255+
dumbo_poster = dumbo_movie["poster"]
256+
257+
response = self.search_client.index(INDEX_UID).search(
258+
None,
259+
{
260+
"media": {
261+
"poster": {
262+
"poster": dumbo_poster,
263+
},
264+
},
265+
"hybrid": {
266+
"embedder": EMBEDDER_NAME,
267+
"semanticRatio": 1,
268+
},
269+
},
270+
)
271+
assert response["hits"][0]["title"] == "Dumbo"
272+
273+
def test_text_and_image_query(self):
274+
"""Test text and image query"""
275+
query = "a futuristic movie"
276+
master_yoda_base64 = load_image_base64("master-yoda.jpeg")
277+
278+
response = self.search_client.index(INDEX_UID).search(
279+
None,
280+
{
281+
"q": query,
282+
"media": {
283+
"textAndPoster": {
284+
"text": query,
285+
"image": {
286+
"mime": "image/jpeg",
287+
"data": master_yoda_base64,
288+
},
289+
},
290+
},
291+
"hybrid": {
292+
"embedder": EMBEDDER_NAME,
293+
"semanticRatio": 1,
294+
},
295+
},
296+
)
297+
assert response["hits"][0]["title"] == "Captain Marvel"

0 commit comments

Comments
 (0)