Skip to content

Commit d4180ad

Browse files
committed
Increase test coverage
1 parent 617ba74 commit d4180ad

File tree

5 files changed

+120
-24
lines changed

5 files changed

+120
-24
lines changed

app/backend/app.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -560,7 +560,7 @@ async def setup_clients():
560560
# Wait until token is needed to fetch for the first time
561561
current_app.config[CONFIG_SPEECH_SERVICE_TOKEN] = None
562562

563-
openai_client = setup_openai_client(
563+
openai_client, azure_openai_endpoint = setup_openai_client(
564564
openai_host=OPENAI_HOST,
565565
azure_credential=azure_credential,
566566
azure_openai_service=AZURE_OPENAI_SERVICE,

app/backend/prepdocs.py

Lines changed: 21 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -169,23 +169,19 @@ def setup_embeddings_service(
169169
logger.info("Not setting up embeddings service")
170170
return None
171171

172-
azure_endpoint = None
173-
azure_deployment = None
174172
if openai_host in [OpenAIHost.AZURE, OpenAIHost.AZURE_CUSTOM]:
175173
if azure_openai_endpoint is None:
176174
raise ValueError("Azure OpenAI endpoint must be provided when using Azure OpenAI embeddings")
177175
if azure_openai_deployment is None:
178176
raise ValueError("Azure OpenAI deployment must be provided when using Azure OpenAI embeddings")
179-
azure_endpoint = azure_openai_endpoint
180-
azure_deployment = azure_openai_deployment
181177

182178
return OpenAIEmbeddings(
183179
open_ai_client=open_ai_client,
184180
open_ai_model_name=emb_model_name,
185181
open_ai_dimensions=emb_model_dimensions,
186182
disable_batch=disable_batch_vectors,
187-
azure_deployment_name=azure_deployment,
188-
azure_endpoint=azure_endpoint,
183+
azure_deployment_name=azure_openai_deployment,
184+
azure_endpoint=azure_openai_endpoint,
189185
)
190186

191187

@@ -197,33 +193,39 @@ def setup_openai_client(
197193
azure_openai_custom_url: Optional[str] = None,
198194
openai_api_key: Optional[str] = None,
199195
openai_organization: Optional[str] = None,
200-
):
196+
) -> tuple[AsyncOpenAI, Optional[str]]:
201197
if openai_host not in OpenAIHost:
202198
raise ValueError(f"Invalid OPENAI_HOST value: {openai_host}. Must be one of {[h.value for h in OpenAIHost]}.")
203199

204200
openai_client: AsyncOpenAI
201+
azure_openai_endpoint: Optional[str] = None
205202

206203
if openai_host in [OpenAIHost.AZURE, OpenAIHost.AZURE_CUSTOM]:
204+
base_url: Optional[str] = None
205+
api_key_or_token: Optional[str | AsyncTokenCredential] = None
207206
if openai_host == OpenAIHost.AZURE_CUSTOM:
208207
logger.info("OPENAI_HOST is azure_custom, setting up Azure OpenAI custom client")
209208
if not azure_openai_custom_url:
210209
raise ValueError("AZURE_OPENAI_CUSTOM_URL must be set when OPENAI_HOST is azure_custom")
211-
endpoint = azure_openai_custom_url
210+
base_url = azure_openai_custom_url
212211
else:
213212
logger.info("OPENAI_HOST is azure, setting up Azure OpenAI client")
214213
if not azure_openai_service:
215214
raise ValueError("AZURE_OPENAI_SERVICE must be set when OPENAI_HOST is azure")
216-
endpoint = f"https://{azure_openai_service}.openai.azure.com/openai/v1"
215+
azure_openai_endpoint = "https://{azure_openai_service}.openai.azure.com/"
216+
base_url = f"{azure_openai_endpoint}/openai/v1"
217217
if azure_openai_api_key:
218218
logger.info("AZURE_OPENAI_API_KEY_OVERRIDE found, using as api_key for Azure OpenAI client")
219-
openai_client = AsyncOpenAI(base_url=endpoint, api_key=azure_openai_api_key)
219+
api_key_or_token = azure_openai_api_key
220220
else:
221221
logger.info("Using Azure credential (passwordless authentication) for Azure OpenAI client")
222-
token_provider = get_bearer_token_provider(azure_credential, "https://cognitiveservices.azure.com/.default")
223-
openai_client = AsyncOpenAI(
224-
base_url=endpoint,
225-
api_key=token_provider,
222+
api_key_or_token = get_bearer_token_provider(
223+
azure_credential, "https://cognitiveservices.azure.com/.default"
226224
)
225+
openai_client = AsyncOpenAI(
226+
base_url=base_url,
227+
api_key=api_key_or_token,
228+
)
227229
elif openai_host == OpenAIHost.LOCAL:
228230
logger.info("OPENAI_HOST is local, setting up local OpenAI client for OPENAI_BASE_URL with no key")
229231
openai_client = AsyncOpenAI(
@@ -240,7 +242,7 @@ def setup_openai_client(
240242
api_key=openai_api_key,
241243
organization=openai_organization,
242244
)
243-
return openai_client
245+
return openai_client, azure_openai_endpoint
244246

245247

246248
def setup_file_processors(
@@ -349,7 +351,7 @@ async def main(strategy: Strategy, setup_index: bool = True):
349351
await strategy.run()
350352

351353

352-
if __name__ == "__main__":
354+
if __name__ == "__main__": # pragma: no cover
353355
parser = argparse.ArgumentParser(
354356
description="Prepare documents by extracting content from PDFs, splitting content into sections, uploading to blob storage, and indexing in a search index."
355357
)
@@ -500,7 +502,8 @@ async def main(strategy: Strategy, setup_index: bool = True):
500502
emb_model_dimensions = 1536
501503
if os.getenv("AZURE_OPENAI_EMB_DIMENSIONS"):
502504
emb_model_dimensions = int(os.environ["AZURE_OPENAI_EMB_DIMENSIONS"])
503-
openai_client = setup_openai_client(
505+
506+
openai_client, azure_openai_endpoint = setup_openai_client(
504507
openai_host=OPENAI_HOST,
505508
azure_credential=azd_credential,
506509
azure_openai_service=os.getenv("AZURE_OPENAI_SERVICE"),
@@ -509,17 +512,13 @@ async def main(strategy: Strategy, setup_index: bool = True):
509512
openai_api_key=clean_key_if_exists(os.getenv("OPENAI_API_KEY")),
510513
openai_organization=os.getenv("OPENAI_ORGANIZATION"),
511514
)
512-
azure_embedding_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT") or os.getenv("AZURE_OPENAI_CUSTOM_URL")
513-
if not azure_embedding_endpoint and OPENAI_HOST == OpenAIHost.AZURE:
514-
if service := os.getenv("AZURE_OPENAI_SERVICE"):
515-
azure_embedding_endpoint = f"https://{service}.openai.azure.com"
516515
openai_embeddings_service = setup_embeddings_service(
517516
open_ai_client=openai_client,
518517
openai_host=OPENAI_HOST,
519518
emb_model_name=os.environ["AZURE_OPENAI_EMB_MODEL_NAME"],
520519
emb_model_dimensions=emb_model_dimensions,
521520
azure_openai_deployment=os.getenv("AZURE_OPENAI_EMB_DEPLOYMENT"),
522-
azure_openai_endpoint=azure_embedding_endpoint,
521+
azure_openai_endpoint=azure_openai_endpoint,
523522
disable_vectors=dont_use_vectors,
524523
disable_batch_vectors=args.disablebatchvectors,
525524
)

app/backend/prepdocslib/integratedvectorizerstrategy.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
logger = logging.getLogger("scripts")
3030

3131

32-
class IntegratedVectorizerStrategy(Strategy):
32+
class IntegratedVectorizerStrategy(Strategy): # pragma: no cover
3333
"""
3434
Strategy for ingesting and vectorizing documents into a search service from files stored storage account
3535
"""

tests/test_prepdocs.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,35 @@ def test_setup_embeddings_service_requires_endpoint_for_azure() -> None:
313313
)
314314

315315

316+
def test_setup_embeddings_service_requires_deployment_for_azure() -> None:
317+
with pytest.raises(ValueError):
318+
prepdocs.setup_embeddings_service(
319+
open_ai_client=MockClient(
320+
MockEmbeddingsClient(
321+
openai.types.CreateEmbeddingResponse(
322+
object="list",
323+
data=[],
324+
model="text-embedding-3-large",
325+
usage=Usage(prompt_tokens=0, total_tokens=0),
326+
)
327+
)
328+
),
329+
openai_host=prepdocs.OpenAIHost.AZURE,
330+
emb_model_name=MOCK_EMBEDDING_MODEL_NAME,
331+
emb_model_dimensions=MOCK_EMBEDDING_DIMENSIONS,
332+
azure_openai_deployment=None,
333+
azure_openai_endpoint="https://service.openai.azure.com",
334+
)
335+
336+
337+
def test_setup_openai_client_requires_valid_host() -> None:
338+
with pytest.raises(ValueError, match="Invalid OPENAI_HOST value"):
339+
prepdocs.setup_openai_client(
340+
openai_host="invalid_host", # type: ignore
341+
azure_credential=MockAzureCredential(),
342+
)
343+
344+
316345
@pytest.mark.asyncio
317346
async def test_openai_embeddings_use_deployment_for_azure_model():
318347
class RecordingEmbeddingsClient:

tests/test_searchmanager.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
SearchIndex,
1313
SearchIndexPermissionFilterOption,
1414
SimpleField,
15+
VectorSearch,
1516
)
1617
from openai.types.create_embedding_response import Usage
1718

@@ -155,6 +156,73 @@ async def mock_create_or_update_index(self, index, *args, **kwargs):
155156
assert updated_indexes[0].fields[0].name == "storageUrl"
156157

157158

159+
@pytest.mark.asyncio
160+
async def test_create_index_adds_vectorizer_to_existing_index(monkeypatch, search_info):
161+
"""Test that a vectorizer is added to an existing index when embeddings are configured."""
162+
created_indexes = []
163+
updated_indexes = []
164+
165+
async def mock_create_index(self, index):
166+
created_indexes.append(index) # pragma: no cover
167+
168+
async def mock_list_index_names(self):
169+
yield "test"
170+
171+
async def mock_get_index(self, *args, **kwargs):
172+
# Return an existing index with vector_search but no vectorizers
173+
# Include embedding field to avoid triggering the embedding field addition code path
174+
return SearchIndex(
175+
name="test",
176+
fields=[
177+
SimpleField(
178+
name="storageUrl",
179+
type=SearchFieldDataType.String,
180+
filterable=True,
181+
),
182+
SimpleField(
183+
name="embedding",
184+
type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
185+
searchable=True,
186+
vector_search_dimensions=MOCK_EMBEDDING_DIMENSIONS,
187+
),
188+
],
189+
vector_search=VectorSearch(vectorizers=[]),
190+
)
191+
192+
async def mock_create_or_update_index(self, index, *args, **kwargs):
193+
updated_indexes.append(index)
194+
195+
monkeypatch.setattr(SearchIndexClient, "create_index", mock_create_index)
196+
monkeypatch.setattr(SearchIndexClient, "list_index_names", mock_list_index_names)
197+
monkeypatch.setattr(SearchIndexClient, "get_index", mock_get_index)
198+
monkeypatch.setattr(SearchIndexClient, "create_or_update_index", mock_create_or_update_index)
199+
200+
# Create a simple mock embeddings object with just the properties we need for index creation
201+
class MockEmbeddings:
202+
def __init__(self):
203+
self.azure_endpoint = "https://test.openai.azure.com"
204+
self.azure_deployment_name = "test-deployment"
205+
self.open_ai_model_name = MOCK_EMBEDDING_MODEL_NAME
206+
self.open_ai_dimensions = MOCK_EMBEDDING_DIMENSIONS
207+
208+
embeddings = MockEmbeddings()
209+
210+
manager = SearchManager(search_info, embeddings=embeddings, field_name_embedding="embedding")
211+
await manager.create_index()
212+
213+
assert len(created_indexes) == 0, "It should not have created a new index"
214+
assert len(updated_indexes) == 1, "It should have updated the existing index"
215+
assert updated_indexes[0].vector_search.vectorizers is not None
216+
assert len(updated_indexes[0].vector_search.vectorizers) == 1, "Should have added one vectorizer"
217+
# The vectorizer name for updating existing indexes uses index_name
218+
assert updated_indexes[0].vector_search.vectorizers[0].vectorizer_name == "test-vectorizer"
219+
# Verify the vectorizer parameters
220+
vectorizer = updated_indexes[0].vector_search.vectorizers[0]
221+
assert vectorizer.parameters.resource_url == "https://test.openai.azure.com"
222+
assert vectorizer.parameters.deployment_name == "test-deployment"
223+
assert vectorizer.parameters.model_name == MOCK_EMBEDDING_MODEL_NAME
224+
225+
158226
@pytest.mark.asyncio
159227
async def test_create_index_acls(monkeypatch, search_info):
160228
indexes = []

0 commit comments

Comments
 (0)