2727AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG = "default"
2828AZURE_SEARCH_CONVERSATIONS_LOG_INDEX = "mock-log-index"
2929USE_ADVANCED_IMAGE_PROCESSING = False
30+ AZURE_SEARCH_DOC_UPLOAD_BATCH_SIZE = 100
3031
3132
3233@pytest .fixture (autouse = True )
@@ -49,7 +50,9 @@ def llm_helper_mock():
4950
5051@pytest .fixture (autouse = True )
5152def env_helper_mock ():
52- with patch ("backend.batch.utilities.helpers.embedders.push_embedder.EnvHelper" ) as mock :
53+ with patch (
54+ "backend.batch.utilities.helpers.embedders.push_embedder.EnvHelper"
55+ ) as mock :
5356 env_helper = mock .return_value
5457 env_helper .AZURE_AUTH_TYPE = AZURE_AUTH_TYPE
5558 env_helper .AZURE_SEARCH_KEY = AZURE_SEARCH_KEY
@@ -58,7 +61,9 @@ def env_helper_mock():
5861 env_helper .AZURE_SEARCH_USE_SEMANTIC_SEARCH = AZURE_SEARCH_USE_SEMANTIC_SEARCH
5962 env_helper .AZURE_SEARCH_FIELDS_ID = AZURE_SEARCH_FIELDS_ID
6063 env_helper .AZURE_SEARCH_CONTENT_COLUMN = AZURE_SEARCH_CONTENT_COLUMN
61- env_helper .AZURE_SEARCH_CONTENT_VECTOR_COLUMN = AZURE_SEARCH_CONTENT_VECTOR_COLUMN
64+ env_helper .AZURE_SEARCH_CONTENT_VECTOR_COLUMN = (
65+ AZURE_SEARCH_CONTENT_VECTOR_COLUMN
66+ )
6267 env_helper .AZURE_SEARCH_TITLE_COLUMN = AZURE_SEARCH_TITLE_COLUMN
6368 env_helper .AZURE_SEARCH_FIELDS_METADATA = AZURE_SEARCH_FIELDS_METADATA
6469 env_helper .AZURE_SEARCH_SOURCE_COLUMN = AZURE_SEARCH_SOURCE_COLUMN
@@ -73,6 +78,9 @@ def env_helper_mock():
7378
7479 env_helper .USE_ADVANCED_IMAGE_PROCESSING = USE_ADVANCED_IMAGE_PROCESSING
7580 env_helper .is_auth_type_keys .return_value = True
81+ env_helper .AZURE_SEARCH_DOC_UPLOAD_BATCH_SIZE = (
82+ AZURE_SEARCH_DOC_UPLOAD_BATCH_SIZE
83+ )
7684 yield env_helper
7785
7886
@@ -291,7 +299,10 @@ def test_embed_file_advanced_image_processing_raises_exception_on_failure(
291299
292300
293301def test_embed_file_use_advanced_image_processing_does_not_vectorize_image_if_unsupported (
294- azure_computer_vision_mock , mock_config_helper , azure_search_helper_mock , env_helper_mock
302+ azure_computer_vision_mock ,
303+ mock_config_helper ,
304+ azure_search_helper_mock ,
305+ env_helper_mock ,
295306):
296307 # given
297308 mock_config_helper .document_processors = [
@@ -331,7 +342,9 @@ def test_embed_file_loads_documents(document_loading_mock, env_helper_mock):
331342 )
332343
333344
334- def test_embed_file_chunks_documents (document_loading_mock , document_chunking_mock , env_helper_mock ):
345+ def test_embed_file_chunks_documents (
346+ document_loading_mock , document_chunking_mock , env_helper_mock
347+ ):
335348 # given
336349 push_embedder = PushEmbedder (MagicMock (), env_helper_mock )
337350
@@ -347,7 +360,9 @@ def test_embed_file_chunks_documents(document_loading_mock, document_chunking_mo
347360 )
348361
349362
350- def test_embed_file_chunks_documents_upper_case (document_loading_mock , document_chunking_mock , env_helper_mock ):
363+ def test_embed_file_chunks_documents_upper_case (
364+ document_loading_mock , document_chunking_mock , env_helper_mock
365+ ):
351366 # given
352367 push_embedder = PushEmbedder (MagicMock (), env_helper_mock )
353368
@@ -363,7 +378,9 @@ def test_embed_file_chunks_documents_upper_case(document_loading_mock, document_
363378 )
364379
365380
366- def test_embed_file_generates_embeddings_for_documents (llm_helper_mock , env_helper_mock ):
381+ def test_embed_file_generates_embeddings_for_documents (
382+ llm_helper_mock , env_helper_mock
383+ ):
367384 # given
368385 push_embedder = PushEmbedder (MagicMock (), env_helper_mock )
369386
@@ -382,7 +399,8 @@ def test_embed_file_generates_embeddings_for_documents(llm_helper_mock, env_help
382399def test_embed_file_stores_documents_in_search_index (
383400 document_chunking_mock ,
384401 llm_helper_mock ,
385- azure_search_helper_mock : MagicMock , env_helper_mock
402+ azure_search_helper_mock : MagicMock ,
403+ env_helper_mock ,
386404):
387405 # given
388406 push_embedder = PushEmbedder (MagicMock (), env_helper_mock )
@@ -404,10 +422,14 @@ def test_embed_file_stores_documents_in_search_index(
404422 AZURE_SEARCH_FIELDS_METADATA : json .dumps (
405423 {
406424 AZURE_SEARCH_FIELDS_ID : expected_chunked_documents [0 ].id ,
407- AZURE_SEARCH_SOURCE_COLUMN : expected_chunked_documents [0 ].source ,
425+ AZURE_SEARCH_SOURCE_COLUMN : expected_chunked_documents [
426+ 0
427+ ].source ,
408428 AZURE_SEARCH_TITLE_COLUMN : expected_chunked_documents [0 ].title ,
409429 AZURE_SEARCH_CHUNK_COLUMN : expected_chunked_documents [0 ].chunk ,
410- AZURE_SEARCH_OFFSET_COLUMN : expected_chunked_documents [0 ].offset ,
430+ AZURE_SEARCH_OFFSET_COLUMN : expected_chunked_documents [
431+ 0
432+ ].offset ,
411433 "page_number" : expected_chunked_documents [0 ].page_number ,
412434 "chunk_id" : expected_chunked_documents [0 ].chunk_id ,
413435 }
@@ -424,10 +446,14 @@ def test_embed_file_stores_documents_in_search_index(
424446 AZURE_SEARCH_FIELDS_METADATA : json .dumps (
425447 {
426448 AZURE_SEARCH_FIELDS_ID : expected_chunked_documents [1 ].id ,
427- AZURE_SEARCH_SOURCE_COLUMN : expected_chunked_documents [1 ].source ,
449+ AZURE_SEARCH_SOURCE_COLUMN : expected_chunked_documents [
450+ 1
451+ ].source ,
428452 AZURE_SEARCH_TITLE_COLUMN : expected_chunked_documents [1 ].title ,
429453 AZURE_SEARCH_CHUNK_COLUMN : expected_chunked_documents [1 ].chunk ,
430- AZURE_SEARCH_OFFSET_COLUMN : expected_chunked_documents [1 ].offset ,
454+ AZURE_SEARCH_OFFSET_COLUMN : expected_chunked_documents [
455+ 1
456+ ].offset ,
431457 "page_number" : expected_chunked_documents [1 ].page_number ,
432458 "chunk_id" : expected_chunked_documents [1 ].chunk_id ,
433459 }
@@ -441,6 +467,30 @@ def test_embed_file_stores_documents_in_search_index(
441467 )
442468
443469
470+ def test_embed_file_stores_documents_in_search_index_in_batches (
471+ document_chunking_mock ,
472+ llm_helper_mock ,
473+ azure_search_helper_mock : MagicMock ,
474+ env_helper_mock ,
475+ ):
476+ # given
477+ env_helper_mock .AZURE_SEARCH_DOC_UPLOAD_BATCH_SIZE = 1
478+ push_embedder = PushEmbedder (MagicMock (), env_helper_mock )
479+
480+ # when
481+ push_embedder .embed_file (
482+ "some-url" ,
483+ "some-file-name.pdf" ,
484+ )
485+
486+ # then
487+ azure_search_helper_mock .return_value .get_search_client .return_value .upload_documents .assert_called ()
488+ assert (
489+ azure_search_helper_mock .return_value .get_search_client .return_value .upload_documents .call_count
490+ == 2
491+ )
492+
493+
444494def test_embed_file_raises_exception_on_failure (
445495 azure_search_helper_mock ,
446496):
0 commit comments