Skip to content

Commit f09e05f

Browse files
Prasanjeet-MicrosoftRoopan-MicrosoftAjitPadhi-MicrosoftPavan-Microsoftross-p-smith
authored
fix: Enhance Azure AI Search Integration and Refactor E2E Automation (#1843)
Signed-off-by: dependabot[bot] <[email protected]> Co-authored-by: Roopan-Microsoft <[email protected]> Co-authored-by: Ajit Padhi <[email protected]> Co-authored-by: Roopan P M <[email protected]> Co-authored-by: Pavan-Microsoft <[email protected]> Co-authored-by: Ross Smith <[email protected]> Co-authored-by: gpickett <[email protected]> Co-authored-by: Francia Riesco <[email protected]> Co-authored-by: Francia Riesco <[email protected]> Co-authored-by: Prajwal D C <[email protected]> Co-authored-by: Harmanpreet-Microsoft <[email protected]> Co-authored-by: UtkarshMishra-Microsoft <[email protected]> Co-authored-by: Priyanka-Microsoft <[email protected]> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Kiran-Siluveru-Microsoft <[email protected]> Co-authored-by: Prashant-Microsoft <[email protected]> Co-authored-by: Rohini-Microsoft <[email protected]> Co-authored-by: Avijit-Microsoft <[email protected]> Co-authored-by: RaviKiran-Microsoft <[email protected]> Co-authored-by: Somesh Joshi <[email protected]> Co-authored-by: Himanshi Agrawal <[email protected]> Co-authored-by: pradeepjha-microsoft <[email protected]> Co-authored-by: Harmanpreet Kaur <[email protected]> Co-authored-by: Bangarraju-Microsoft <[email protected]> Co-authored-by: Harsh-Microsoft <[email protected]> Co-authored-by: Kanchan-Microsoft <[email protected]> Co-authored-by: Cristopher Coronado <[email protected]> Co-authored-by: Cristopher Coronado Moreira <[email protected]> Co-authored-by: Vamshi-Microsoft <[email protected]> Co-authored-by: Thanusree-Microsoft <[email protected]> Co-authored-by: Niraj Chaudhari (Persistent Systems Inc) <[email protected]> Co-authored-by: Rohini-Microsoft <[email protected]>
1 parent 1e6b271 commit f09e05f

File tree

22 files changed

+522
-266
lines changed

22 files changed

+522
-266
lines changed

code/backend/batch/utilities/helpers/azure_search_helper.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,25 @@ def create_index(self):
132132
filterable=True,
133133
),
134134
]
135+
if self.env_helper.AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION:
136+
logger.info("Adding 'text' field for integrated vectorization.")
137+
fields.append(
138+
SearchableField(
139+
name=self.env_helper.AZURE_SEARCH_TEXT_COLUMN,
140+
type=SearchFieldDataType.String,
141+
filterable=False,
142+
sortable=False,
143+
)
144+
)
145+
logger.info("Adding 'layoutText' field for integrated vectorization.")
146+
fields.append(
147+
SearchableField(
148+
name=self.env_helper.AZURE_SEARCH_LAYOUT_TEXT_COLUMN,
149+
type=SearchFieldDataType.String,
150+
filterable=False,
151+
sortable=False,
152+
)
153+
)
135154

136155
if self.env_helper.USE_ADVANCED_IMAGE_PROCESSING:
137156
logger.info("Adding image_vector field to index")
@@ -274,7 +293,7 @@ def get_conversation_logger(self):
274293
embedding_function=self.llm_helper.get_embedding_model().embed_query,
275294
fields=fields,
276295
user_agent="langchain chatwithyourdata-sa",
277-
credential=credential # Add token credential or send none so it is auto handled by AzureSearch library
296+
credential=credential, # Add token credential or send none so it is auto handled by AzureSearch library
278297
)
279298
else:
280299
return AzureSearch(

code/backend/batch/utilities/helpers/env_helper.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,10 @@ def __load_config(self, **kwargs) -> None:
7676
self.AZURE_SEARCH_SOURCE_COLUMN = os.getenv(
7777
"AZURE_SEARCH_SOURCE_COLUMN", "source"
7878
)
79+
self.AZURE_SEARCH_TEXT_COLUMN = os.getenv("AZURE_SEARCH_TEXT_COLUMN", "text")
80+
self.AZURE_SEARCH_LAYOUT_TEXT_COLUMN = os.getenv(
81+
"AZURE_SEARCH_LAYOUT_TEXT_COLUMN", "layoutText"
82+
)
7983
self.AZURE_SEARCH_CHUNK_COLUMN = os.getenv("AZURE_SEARCH_CHUNK_COLUMN", "chunk")
8084
self.AZURE_SEARCH_OFFSET_COLUMN = os.getenv(
8185
"AZURE_SEARCH_OFFSET_COLUMN", "offset"
@@ -173,9 +177,7 @@ def __load_config(self, **kwargs) -> None:
173177
self.AZURE_OPENAI_MODEL_NAME = azure_openai_model_info.get("modelName", "")
174178
else:
175179
# Otherwise, fallback to individual environment variables
176-
self.AZURE_OPENAI_MODEL = os.getenv(
177-
"AZURE_OPENAI_MODEL", "gpt-4.1"
178-
)
180+
self.AZURE_OPENAI_MODEL = os.getenv("AZURE_OPENAI_MODEL", "gpt-4.1")
179181
self.AZURE_OPENAI_MODEL_NAME = os.getenv(
180182
"AZURE_OPENAI_MODEL_NAME", "gpt-4.1"
181183
)

code/backend/batch/utilities/integrated_vectorization/azure_search_index.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,26 @@ def create_or_update_index(self):
9999
),
100100
]
101101

102+
if self.env_helper.AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION:
103+
logger.info("Adding `text` field for integrated vectorization.")
104+
fields.append(
105+
SearchableField(
106+
name="text",
107+
type=SearchFieldDataType.String,
108+
filterable=False,
109+
sortable=False,
110+
)
111+
)
112+
logger.info("Adding `layoutText` field for integrated vectorization.")
113+
fields.append(
114+
SearchableField(
115+
name="layoutText",
116+
type=SearchFieldDataType.String,
117+
filterable=False,
118+
sortable=False,
119+
)
120+
)
121+
102122
vector_search = self.get_vector_search_config()
103123

104124
semantic_search = self.get_semantic_search_config()

code/create_app.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,11 @@ def conversation_with_data(conversation: Request, env_helper: EnvHelper):
209209
env_helper.AZURE_SEARCH_CONTENT_VECTOR_COLUMN
210210
],
211211
"title_field": env_helper.AZURE_SEARCH_TITLE_COLUMN or None,
212+
"source_field": env_helper.AZURE_SEARCH_SOURCE_COLUMN
213+
or None,
214+
"text_field": env_helper.AZURE_SEARCH_TEXT_COLUMN or None,
215+
"layoutText_field": env_helper.AZURE_SEARCH_LAYOUT_TEXT_COLUMN
216+
or None,
212217
"url_field": env_helper.AZURE_SEARCH_FIELDS_METADATA
213218
or None,
214219
"filepath_field": (

code/tests/functional/tests/backend_api/with_byod/test_conversation_flow.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,9 @@ def test_post_makes_correct_call_to_azure_openai(
147147
"AZURE_SEARCH_FIELDS_METADATA"
148148
),
149149
"filepath_field": "filepath",
150+
"source_field": "source",
151+
"text_field": "text",
152+
"layoutText_field": "layoutText",
150153
},
151154
"filter": app_config.get("AZURE_SEARCH_FILTER"),
152155
"in_scope": True,

code/tests/functional/tests/functions/integrated_vectorization/test_integrated_vectorization_resource_creation.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,9 @@ def test_integrated_vectorization_datasouce_created(
9999
"credentials": {
100100
"connectionString": f"DefaultEndpointsProtocol=https;AccountName={app_config.get_from_json('AZURE_BLOB_STORAGE_INFO','accountName')};AccountKey={app_config.get_from_json('AZURE_BLOB_STORAGE_INFO','accountKey')};EndpointSuffix=core.windows.net"
101101
},
102-
"container": {"name": f"{app_config.get_from_json('AZURE_BLOB_STORAGE_INFO','containerName')}"},
102+
"container": {
103+
"name": f"{app_config.get_from_json('AZURE_BLOB_STORAGE_INFO','containerName')}"
104+
},
103105
"dataDeletionDetectionPolicy": {
104106
"@odata.type": "#Microsoft.Azure.Search.NativeBlobSoftDeleteDeletionDetectionPolicy"
105107
},
@@ -210,6 +212,26 @@ def test_integrated_vectorization_index_created(
210212
"facetable": True,
211213
"analyzer": "keyword",
212214
},
215+
{
216+
"name": "text",
217+
"type": "Edm.String",
218+
"key": False,
219+
"retrievable": True,
220+
"searchable": True,
221+
"filterable": False,
222+
"sortable": False,
223+
"facetable": False,
224+
},
225+
{
226+
"name": "layoutText",
227+
"type": "Edm.String",
228+
"key": False,
229+
"retrievable": True,
230+
"searchable": True,
231+
"filterable": False,
232+
"sortable": False,
233+
"facetable": False,
234+
},
213235
],
214236
"semantic": {
215237
"configurations": [

code/tests/test_app.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@
2525
AZURE_SEARCH_CONTENT_COLUMN = "field1|field2"
2626
AZURE_SEARCH_CONTENT_VECTOR_COLUMN = "vector-column"
2727
AZURE_SEARCH_TITLE_COLUMN = "title"
28+
AZURE_SEARCH_SOURCE_COLUMN = "source"
29+
AZURE_SEARCH_TEXT_COLUMN = "text"
30+
AZURE_SEARCH_LAYOUT_TEXT_COLUMN = "layoutText"
2831
AZURE_SEARCH_FILENAME_COLUMN = "filename"
2932
AZURE_SEARCH_URL_COLUMN = "metadata"
3033
AZURE_SEARCH_FILTER = "filter"
@@ -73,6 +76,9 @@ def env_helper_mock():
7376
AZURE_SEARCH_CONTENT_VECTOR_COLUMN
7477
)
7578
env_helper.AZURE_SEARCH_TITLE_COLUMN = AZURE_SEARCH_TITLE_COLUMN
79+
env_helper.AZURE_SEARCH_SOURCE_COLUMN = AZURE_SEARCH_SOURCE_COLUMN
80+
env_helper.AZURE_SEARCH_TEXT_COLUMN = AZURE_SEARCH_TEXT_COLUMN
81+
env_helper.AZURE_SEARCH_LAYOUT_TEXT_COLUMN = AZURE_SEARCH_LAYOUT_TEXT_COLUMN
7682
env_helper.AZURE_SEARCH_FILENAME_COLUMN = AZURE_SEARCH_FILENAME_COLUMN
7783
env_helper.AZURE_SEARCH_URL_COLUMN = AZURE_SEARCH_URL_COLUMN
7884
env_helper.AZURE_SEARCH_FILTER = AZURE_SEARCH_FILTER
@@ -684,6 +690,9 @@ def test_conversation_azure_byod_returns_correct_response_when_streaming_with_da
684690
"title_field": AZURE_SEARCH_TITLE_COLUMN,
685691
"url_field": env_helper_mock.AZURE_SEARCH_FIELDS_METADATA,
686692
"filepath_field": AZURE_SEARCH_FILENAME_COLUMN,
693+
"source_field": AZURE_SEARCH_SOURCE_COLUMN,
694+
"text_field": AZURE_SEARCH_TEXT_COLUMN,
695+
"layoutText_field": AZURE_SEARCH_LAYOUT_TEXT_COLUMN,
687696
},
688697
"filter": AZURE_SEARCH_FILTER,
689698
"in_scope": AZURE_SEARCH_ENABLE_IN_DOMAIN,

code/tests/utilities/helpers/test_azure_search_helper.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232
AZURE_SEARCH_TITLE_COLUMN = "mock-title"
3333
AZURE_SEARCH_FIELDS_METADATA = "mock-metadata"
3434
AZURE_SEARCH_SOURCE_COLUMN = "mock-source"
35+
AZURE_SEARCH_TEXT_COLUMN = "mock-text"
36+
AZURE_SEARCH_LAYOUT_TEXT_COLUMN = "mock-layout-text"
3537
AZURE_SEARCH_CHUNK_COLUMN = "mock-chunk"
3638
AZURE_SEARCH_OFFSET_COLUMN = "mock-offset"
3739
AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG = "default"
@@ -78,6 +80,8 @@ def env_helper_mock():
7880
env_helper.AZURE_SEARCH_TITLE_COLUMN = AZURE_SEARCH_TITLE_COLUMN
7981
env_helper.AZURE_SEARCH_FIELDS_METADATA = AZURE_SEARCH_FIELDS_METADATA
8082
env_helper.AZURE_SEARCH_SOURCE_COLUMN = AZURE_SEARCH_SOURCE_COLUMN
83+
env_helper.AZURE_SEARCH_TEXT_COLUMN = AZURE_SEARCH_TEXT_COLUMN
84+
env_helper.AZURE_SEARCH_LAYOUT_TEXT_COLUMN = AZURE_SEARCH_LAYOUT_TEXT_COLUMN
8185
env_helper.AZURE_SEARCH_CHUNK_COLUMN = AZURE_SEARCH_CHUNK_COLUMN
8286
env_helper.AZURE_SEARCH_OFFSET_COLUMN = AZURE_SEARCH_OFFSET_COLUMN
8387
env_helper.AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG = (
@@ -232,6 +236,16 @@ def test_creates_search_index_if_not_exists(
232236
type=SearchFieldDataType.Int32,
233237
filterable=True,
234238
),
239+
SearchableField(
240+
name=AZURE_SEARCH_TEXT_COLUMN,
241+
type=SearchFieldDataType.String,
242+
filterable=False,
243+
),
244+
SearchableField(
245+
name=AZURE_SEARCH_LAYOUT_TEXT_COLUMN,
246+
type=SearchFieldDataType.String,
247+
filterable=False,
248+
),
235249
]
236250

237251
expected_index = SearchIndex(

docs/LOCAL_DEPLOYMENT.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,9 @@ Execute the above [shell command](#L81) to run the function locally. You may nee
186186
|AZURE_SEARCH_FIELDS_ID|id|`AZURE_SEARCH_FIELDS_ID`: Field from your Azure AI Search index that gives a unique idenitfier of the document chunk. `id` if you don't have a specific requirement.|
187187
|AZURE_SEARCH_FILENAME_COLUMN||`AZURE_SEARCH_FILENAME_COLUMN`: Field from your Azure AI Search index that gives a unique idenitfier of the source of your data to display in the UI.|
188188
|AZURE_SEARCH_TITLE_COLUMN||Field from your Azure AI Search index that gives a relevant title or header for your data content to display in the UI.|
189+
|AZURE_SEARCH_SOURCE_COLUMN|source|Field from your Azure AI Search index that identifies the source of your data. `source` if you don't have a specific requirement.|
190+
|AZURE_SEARCH_TEXT_COLUMN|text|Field from your Azure AI Search index that contains the main text content of your documents. `text` if you don't have a specific requirement.|
191+
|AZURE_SEARCH_LAYOUT_TEXT_COLUMN|layoutText|Field from your Azure AI Search index that contains the layout-aware text content of your documents. `layoutText` if you don't have a specific requirement.|
189192
|AZURE_SEARCH_URL_COLUMN||Field from your Azure AI Search index that contains a URL for the document, e.g. an Azure Blob Storage URI. This value is not currently used.|
190193
|AZURE_SEARCH_FIELDS_TAG|tag|Field from your Azure AI Search index that contains tags for the document. `tag` if you don't have a specific requirement.|
191194
|AZURE_SEARCH_FIELDS_METADATA|metadata|Field from your Azure AI Search index that contains metadata for the document. `metadata` if you don't have a specific requirement.|

docs/TEAMS_LOCAL_DEPLOYMENT.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,9 @@ Or use the [Azure Functions VS Code extension](https://marketplace.visualstudio.
5959
|AZURE_SEARCH_FIELDS_ID|id|`AZURE_SEARCH_FIELDS_ID`: Field from your Azure AI Search index that gives a unique idenitfier of the document chunk. `id` if you don't have a specific requirement.|
6060
|AZURE_SEARCH_FILENAME_COLUMN||`AZURE_SEARCH_FILENAME_COLUMN`: Field from your Azure AI Search index that gives a unique idenitfier of the source of your data to display in the UI.|
6161
|AZURE_SEARCH_TITLE_COLUMN||Field from your Azure AI Search index that gives a relevant title or header for your data content to display in the UI.|
62+
|AZURE_SEARCH_SOURCE_COLUMN|source|Field from your Azure AI Search index that identifies the source of your data. `source` if you don't have a specific requirement.|
63+
|AZURE_SEARCH_TEXT_COLUMN|text|Field from your Azure AI Search index that contains the main text content of your documents. `text` if you don't have a specific requirement.|
64+
|AZURE_SEARCH_LAYOUT_TEXT_COLUMN|layoutText|Field from your Azure AI Search index that contains the layout-aware text content of your documents. `layoutText` if you don't have a specific requirement.|
6265
|AZURE_SEARCH_URL_COLUMN||Field from your Azure AI Search index that contains a URL for the document, e.g. an Azure Blob Storage URI. This value is not currently used.|
6366
|AZURE_SEARCH_FIELDS_TAG|tag|Field from your Azure AI Search index that contains tags for the document. `tag` if you don't have a specific requirement.|
6467
|AZURE_SEARCH_FIELDS_METADATA|metadata|Field from your Azure AI Search index that contains metadata for the document. `metadata` if you don't have a specific requirement.|

0 commit comments

Comments
 (0)