Skip to content

Commit bfb74e6

Browse files
committed
Fix tests, add parameter
1 parent 121521a commit bfb74e6

File tree

10 files changed

+64
-11
lines changed

10 files changed

+64
-11
lines changed

app/backend/app.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -458,6 +458,7 @@ async def setup_clients():
458458
AZURE_SEARCH_QUERY_REWRITING = os.getenv("AZURE_SEARCH_QUERY_REWRITING", "false").lower()
459459
# This defaults to the previous field name "embedding", for backwards compatibility
460460
AZURE_SEARCH_FIELD_NAME_EMBEDDING = os.getenv("AZURE_SEARCH_FIELD_NAME_EMBEDDING", "embedding")
461+
AZURE_SEARCH_FIELD_NAME_IMAGE_EMBEDDING = os.getenv("AZURE_SEARCH_FIELD_NAME_IMAGE_EMBEDDING", "imageEmbedding")
461462

462463
AZURE_SPEECH_SERVICE_ID = os.getenv("AZURE_SPEECH_SERVICE_ID")
463464
AZURE_SPEECH_SERVICE_LOCATION = os.getenv("AZURE_SPEECH_SERVICE_LOCATION")
@@ -574,7 +575,11 @@ async def setup_clients():
574575
disable_vectors=os.getenv("USE_VECTORS", "").lower() == "false",
575576
)
576577
ingester = UploadUserFileStrategy(
577-
search_info=search_info, embeddings=text_embeddings_service, file_processors=file_processors
578+
search_info=search_info,
579+
embeddings=text_embeddings_service,
580+
file_processors=file_processors,
581+
search_field_name_embedding=AZURE_SEARCH_FIELD_NAME_EMBEDDING,
582+
search_field_name_image_embedding=AZURE_SEARCH_FIELD_NAME_IMAGE_EMBEDDING,
578583
)
579584
current_app.config[CONFIG_INGESTER] = ingester
580585

app/backend/prepdocs.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -432,8 +432,9 @@ async def main(strategy: Strategy, setup_index: bool = True):
432432
embeddings=openai_embeddings_service,
433433
image_embeddings=image_embeddings_service,
434434
search_analyzer_name=os.getenv("AZURE_SEARCH_ANALYZER_NAME"),
435-
search_field_name_embedding=os.getenv("AZURE_SEARCH_FIELD_NAME_EMBEDDING"),
436-
search_field_name_image_embedding=os.getenv("AZURE_SEARCH_FIELD_NAME_IMAGE_EMBEDDING"),
435+
# Default to the previous field names for backward compatibility
436+
search_field_name_embedding=os.getenv("AZURE_SEARCH_FIELD_NAME_EMBEDDING", "embedding"),
437+
search_field_name_image_embedding=os.getenv("AZURE_SEARCH_FIELD_NAME_IMAGE_EMBEDDING", "imageEmbedding"),
437438
use_acls=use_acls,
438439
category=args.category,
439440
use_content_understanding=use_content_understanding,

app/backend/prepdocslib/filestrategy.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,12 +136,25 @@ def __init__(
136136
file_processors: dict[str, FileProcessor],
137137
embeddings: Optional[OpenAIEmbeddings] = None,
138138
image_embeddings: Optional[ImageEmbeddings] = None,
139+
search_field_name_embedding: Optional[str] = None,
140+
search_field_name_image_embedding: Optional[str] = None,
139141
):
140142
self.file_processors = file_processors
141143
self.embeddings = embeddings
142144
self.image_embeddings = image_embeddings
143145
self.search_info = search_info
144-
self.search_manager = SearchManager(self.search_info, None, True, False, self.embeddings)
146+
self.search_manager = SearchManager(
147+
search_info=self.search_info,
148+
search_analyzer_name=None,
149+
use_acls=True,
150+
use_int_vectorization=False,
151+
embeddings=self.embeddings,
152+
field_name_embedding=search_field_name_embedding,
153+
field_name_image_embedding=search_field_name_image_embedding,
154+
search_images=False,
155+
)
156+
self.search_field_name_embedding = search_field_name_embedding
157+
self.search_field_name_image_embedding = search_field_name_image_embedding
145158

146159
async def add_file(self, file: File):
147160
if self.image_embeddings:

infra/main.bicep

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ param searchQueryLanguage string // Set in main.parameters.json
2828
param searchQuerySpeller string // Set in main.parameters.json
2929
param searchServiceSemanticRankerLevel string // Set in main.parameters.json
3030
param searchFieldNameEmbedding string // Set in main.parameters.json
31+
param searchFieldNameImageEmbedding string // Set in main.parameters.json
3132
var actualSearchServiceSemanticRankerLevel = (searchServiceSkuName == 'free')
3233
? 'disabled'
3334
: searchServiceSemanticRankerLevel
@@ -392,6 +393,7 @@ var appEnvVariables = {
392393
AZURE_SEARCH_QUERY_LANGUAGE: searchQueryLanguage
393394
AZURE_SEARCH_QUERY_SPELLER: searchQuerySpeller
394395
AZURE_SEARCH_FIELD_NAME_EMBEDDING: searchFieldNameEmbedding
396+
AZURE_SEARCH_FIELD_NAME_IMAGE_EMBEDDING: searchFieldNameImageEmbedding
395397
APPLICATIONINSIGHTS_CONNECTION_STRING: useApplicationInsights
396398
? monitoring.outputs.applicationInsightsConnectionString
397399
: ''
@@ -1288,6 +1290,7 @@ output AZURE_SEARCH_SERVICE_RESOURCE_GROUP string = searchServiceResourceGroup.n
12881290
output AZURE_SEARCH_SEMANTIC_RANKER string = actualSearchServiceSemanticRankerLevel
12891291
output AZURE_SEARCH_SERVICE_ASSIGNED_USERID string = searchService.outputs.principalId
12901292
output AZURE_SEARCH_FIELD_NAME_EMBEDDING string = searchFieldNameEmbedding
1293+
output AZURE_SEARCH_FIELD_NAME_IMAGE_EMBEDDING string = searchFieldNameEmbedding
12911294

12921295
output AZURE_COSMOSDB_ACCOUNT string = (useAuthentication && useChatHistoryCosmos) ? cosmosDb.outputs.name : ''
12931296
output AZURE_CHAT_HISTORY_DATABASE string = chatHistoryDatabaseName

infra/main.parameters.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,9 @@
8686
"searchFieldNameEmbedding": {
8787
"value": "${AZURE_SEARCH_FIELD_NAME_EMBEDDING=embedding3}"
8888
},
89+
"searchFieldNameImageEmbedding": {
90+
"value": "${AZURE_SEARCH_FIELD_NAME_IMAGE_EMBEDDING=imageEmbedding}"
91+
},
8992
"storageAccountName": {
9093
"value": "${AZURE_STORAGE_ACCOUNT}"
9194
},

tests/conftest.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,12 +246,16 @@ def mock_blob_container_client(monkeypatch):
246246
"OPENAI_HOST": "openai",
247247
"OPENAI_API_KEY": "secretkey",
248248
"OPENAI_ORGANIZATION": "organization",
249+
"AZURE_OPENAI_EMB_MODEL_NAME": "text-embedding-3-large",
250+
"AZURE_OPENAI_EMB_DIMENSIONS": "3072",
249251
},
250252
{
251253
"OPENAI_HOST": "azure",
252254
"AZURE_OPENAI_SERVICE": "test-openai-service",
253255
"AZURE_OPENAI_CHATGPT_DEPLOYMENT": "test-chatgpt",
254256
"AZURE_OPENAI_EMB_DEPLOYMENT": "test-ada",
257+
"AZURE_OPENAI_EMB_MODEL_NAME": "text-embedding-3-large",
258+
"AZURE_OPENAI_EMB_DIMENSIONS": "3072",
255259
"USE_GPT4V": "true",
256260
"AZURE_OPENAI_GPT4V_MODEL": "gpt-4",
257261
"VISION_ENDPOINT": "https://testvision.cognitiveservices.azure.com/",
@@ -264,6 +268,8 @@ def mock_blob_container_client(monkeypatch):
264268
"AZURE_OPENAI_SERVICE": "test-openai-service",
265269
"AZURE_OPENAI_CHATGPT_DEPLOYMENT": "test-chatgpt",
266270
"AZURE_OPENAI_EMB_DEPLOYMENT": "test-ada",
271+
"AZURE_OPENAI_EMB_MODEL_NAME": "text-embedding-3-large",
272+
"AZURE_OPENAI_EMB_DIMENSIONS": "3072",
267273
"AZURE_USE_AUTHENTICATION": "true",
268274
"AZURE_USER_STORAGE_ACCOUNT": "test-user-storage-account",
269275
"AZURE_USER_STORAGE_CONTAINER": "test-user-storage-container",
@@ -280,6 +286,8 @@ def mock_blob_container_client(monkeypatch):
280286
"AZURE_OPENAI_SERVICE": "test-openai-service",
281287
"AZURE_OPENAI_CHATGPT_DEPLOYMENT": "test-chatgpt",
282288
"AZURE_OPENAI_EMB_DEPLOYMENT": "test-ada",
289+
"AZURE_OPENAI_EMB_MODEL_NAME": "text-embedding-3-large",
290+
"AZURE_OPENAI_EMB_DIMENSIONS": "3072",
283291
"AZURE_USE_AUTHENTICATION": "true",
284292
"AZURE_ENABLE_GLOBAL_DOCUMENT_ACCESS": "true",
285293
"AZURE_ENABLE_UNAUTHENTICATED_ACCESS": "true",

tests/test_app_config.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ def minimal_env(monkeypatch):
1616
monkeypatch.setenv("AZURE_SEARCH_SERVICE", "test-search-service")
1717
monkeypatch.setenv("AZURE_OPENAI_SERVICE", "test-openai-service")
1818
monkeypatch.setenv("AZURE_OPENAI_CHATGPT_MODEL", "gpt-4o-mini")
19+
monkeypatch.setenv("AZURE_OPENAI_EMB_MODEL_NAME", "text-embedding-3-large")
20+
monkeypatch.setenv("AZURE_OPENAI_EMB_DIMENSIONS", 3072)
1921
yield
2022

2123

tests/test_chatapproach.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ def chat_approach():
3030
embedding_deployment="embeddings",
3131
embedding_model=MOCK_EMBEDDING_MODEL_NAME,
3232
embedding_dimensions=MOCK_EMBEDDING_DIMENSIONS,
33+
embedding_field="embedding3",
3334
sourcepage_field="",
3435
content_field="",
3536
query_language="en-us",
@@ -176,6 +177,7 @@ async def test_search_results_filtering_by_scores(
176177
embedding_deployment="embeddings",
177178
embedding_model=MOCK_EMBEDDING_MODEL_NAME,
178179
embedding_dimensions=MOCK_EMBEDDING_DIMENSIONS,
180+
embedding_field="embedding3",
179181
sourcepage_field="",
180182
content_field="",
181183
query_language="en-us",
@@ -214,6 +216,7 @@ async def test_search_results_query_rewriting(monkeypatch):
214216
embedding_deployment="embeddings",
215217
embedding_model=MOCK_EMBEDDING_MODEL_NAME,
216218
embedding_dimensions=MOCK_EMBEDDING_DIMENSIONS,
219+
embedding_field="embedding3",
217220
sourcepage_field="",
218221
content_field="",
219222
query_language="en-us",

tests/test_chatvisionapproach.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ def chat_approach(openai_client, mock_confidential_client_success):
6060
embedding_deployment="embeddings",
6161
embedding_model=MOCK_EMBEDDING_MODEL_NAME,
6262
embedding_dimensions=MOCK_EMBEDDING_DIMENSIONS,
63+
embedding_field="embedding3",
6364
sourcepage_field="",
6465
content_field="",
6566
query_language="en-us",
@@ -149,4 +150,4 @@ async def test_compute_text_embedding(chat_approach, openai_client, mock_openai_
149150
assert isinstance(result, VectorizedQuery)
150151
assert result.vector == [0.0023064255, -0.009327292, -0.0028842222]
151152
assert result.k_nearest_neighbors == 50
152-
assert result.fields == "embedding"
153+
assert result.fields == "embedding3"

tests/test_searchmanager.py

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,16 @@ async def mock_list_index_names(self):
5050
monkeypatch.setattr(SearchIndexClient, "create_index", mock_create_index)
5151
monkeypatch.setattr(SearchIndexClient, "list_index_names", mock_list_index_names)
5252

53-
manager = SearchManager(search_info)
53+
manager = SearchManager(
54+
search_info,
55+
use_int_vectorization=False,
56+
field_name_embedding="embedding",
57+
field_name_image_embedding="imageEmbedding",
58+
)
5459
await manager.create_index()
5560
assert len(indexes) == 1, "It should have created one index"
5661
assert indexes[0].name == "test"
57-
assert len(indexes[0].fields) == 7
62+
assert len(indexes[0].fields) == 6
5863

5964

6065
@pytest.mark.asyncio
@@ -71,11 +76,16 @@ async def mock_list_index_names(self):
7176
monkeypatch.setattr(SearchIndexClient, "create_index", mock_create_index)
7277
monkeypatch.setattr(SearchIndexClient, "list_index_names", mock_list_index_names)
7378

74-
manager = SearchManager(search_info, use_int_vectorization=True)
79+
manager = SearchManager(
80+
search_info,
81+
use_int_vectorization=True,
82+
field_name_embedding="embedding",
83+
field_name_image_embedding="image_embedding",
84+
)
7585
await manager.create_index()
7686
assert len(indexes) == 1, "It should have created one index"
7787
assert indexes[0].name == "test"
78-
assert len(indexes[0].fields) == 8
88+
assert len(indexes[0].fields) == 7
7989

8090

8191
@pytest.mark.asyncio
@@ -165,11 +175,13 @@ async def mock_list_index_names(self):
165175
manager = SearchManager(
166176
search_info,
167177
use_acls=True,
178+
field_name_embedding="embedding",
179+
field_name_image_embedding="image_embedding",
168180
)
169181
await manager.create_index()
170182
assert len(indexes) == 1, "It should have created one index"
171183
assert indexes[0].name == "test"
172-
assert len(indexes[0].fields) == 9
184+
assert len(indexes[0].fields) == 8
173185

174186

175187
@pytest.mark.asyncio
@@ -283,6 +295,8 @@ async def mock_upload_documents(self, documents):
283295
manager = SearchManager(
284296
search_info,
285297
embeddings=embeddings,
298+
field_name_embedding="embedding3",
299+
field_name_image_embedding="image_embedding",
286300
)
287301

288302
test_io = io.BytesIO(b"test content")
@@ -303,7 +317,7 @@ async def mock_upload_documents(self, documents):
303317
)
304318

305319
assert len(documents_uploaded) == 1, "It should have uploaded one document"
306-
assert documents_uploaded[0]["embedding"] == [
320+
assert documents_uploaded[0]["embedding3"] == [
307321
0.0023064255,
308322
-0.009327292,
309323
-0.0028842222,

0 commit comments

Comments
 (0)