Skip to content

Commit 19ecff1

Browse files
fix: Configured main index search field names in environment variables (#1198)
1 parent a2dc2a1 commit 19ecff1

File tree

17 files changed

+336
-134
lines changed

17 files changed

+336
-134
lines changed

.env.sample

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG=default
77
AZURE_SEARCH_TOP_K=5
88
AZURE_SEARCH_ENABLE_IN_DOMAIN=False
99
AZURE_SEARCH_FIELDS_ID=id
10-
AZURE_SEARCH_CONTENT_COLUMNS=content
11-
AZURE_SEARCH_CONTENT_VECTOR_COLUMNS=content_vector
10+
AZURE_SEARCH_CONTENT_COLUMN=content
11+
AZURE_SEARCH_CONTENT_VECTOR_COLUMN=content_vector
1212
AZURE_SEARCH_DIMENSIONS=1536
1313
AZURE_SEARCH_FIELDS_TAG=tag
1414
AZURE_SEARCH_FIELDS_METADATA=metadata

code/backend/batch/utilities/helpers/azure_search_helper.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -90,44 +90,44 @@ def image_search_dimensions(self) -> int:
9090
def create_index(self):
9191
fields = [
9292
SimpleField(
93-
name="id",
93+
name=self.env_helper.AZURE_SEARCH_FIELDS_ID,
9494
type=SearchFieldDataType.String,
9595
key=True,
9696
filterable=True,
9797
),
9898
SearchableField(
99-
name="content",
99+
name=self.env_helper.AZURE_SEARCH_CONTENT_COLUMN,
100100
type=SearchFieldDataType.String,
101101
),
102102
SearchField(
103-
name="content_vector",
103+
name=self.env_helper.AZURE_SEARCH_CONTENT_VECTOR_COLUMN,
104104
type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
105105
searchable=True,
106106
vector_search_dimensions=self.search_dimensions,
107107
vector_search_profile_name="myHnswProfile",
108108
),
109109
SearchableField(
110-
name="metadata",
110+
name=self.env_helper.AZURE_SEARCH_FIELDS_METADATA,
111111
type=SearchFieldDataType.String,
112112
),
113113
SearchableField(
114-
name="title",
114+
name=self.env_helper.AZURE_SEARCH_TITLE_COLUMN,
115115
type=SearchFieldDataType.String,
116116
facetable=True,
117117
filterable=True,
118118
),
119119
SearchableField(
120-
name="source",
120+
name=self.env_helper.AZURE_SEARCH_SOURCE_COLUMN,
121121
type=SearchFieldDataType.String,
122122
filterable=True,
123123
),
124124
SimpleField(
125-
name="chunk",
125+
name=self.env_helper.AZURE_SEARCH_CHUNK_COLUMN,
126126
type=SearchFieldDataType.Int32,
127127
filterable=True,
128128
),
129129
SimpleField(
130-
name="offset",
130+
name=self.env_helper.AZURE_SEARCH_OFFSET_COLUMN,
131131
type=SearchFieldDataType.Int32,
132132
filterable=True,
133133
),
@@ -155,7 +155,7 @@ def create_index(self):
155155
name=self.env_helper.AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG,
156156
prioritized_fields=SemanticPrioritizedFields(
157157
title_field=None,
158-
content_fields=[SemanticField(field_name="content")],
158+
content_fields=[SemanticField(field_name=self.env_helper.AZURE_SEARCH_CONTENT_COLUMN)],
159159
),
160160
)
161161
]

code/backend/batch/utilities/helpers/embedders/push_embedder.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -115,23 +115,23 @@ def __generate_image_caption(self, source_url):
115115
def __convert_to_search_document(self, document: SourceDocument):
116116
embedded_content = self.llm_helper.generate_embeddings(document.content)
117117
metadata = {
118-
"id": document.id,
119-
"source": document.source,
120-
"title": document.title,
121-
"chunk": document.chunk,
122-
"offset": document.offset,
118+
self.env_helper.AZURE_SEARCH_FIELDS_ID: document.id,
119+
self.env_helper.AZURE_SEARCH_SOURCE_COLUMN: document.source,
120+
self.env_helper.AZURE_SEARCH_TITLE_COLUMN: document.title,
121+
self.env_helper.AZURE_SEARCH_CHUNK_COLUMN: document.chunk,
122+
self.env_helper.AZURE_SEARCH_OFFSET_COLUMN: document.offset,
123123
"page_number": document.page_number,
124124
"chunk_id": document.chunk_id,
125125
}
126126
return {
127-
"id": document.id,
128-
"content": document.content,
129-
"content_vector": embedded_content,
130-
"metadata": json.dumps(metadata),
131-
"title": document.title,
132-
"source": document.source,
133-
"chunk": document.chunk,
134-
"offset": document.offset,
127+
self.env_helper.AZURE_SEARCH_FIELDS_ID: document.id,
128+
self.env_helper.AZURE_SEARCH_CONTENT_COLUMN: document.content,
129+
self.env_helper.AZURE_SEARCH_CONTENT_VECTOR_COLUMN: embedded_content,
130+
self.env_helper.AZURE_SEARCH_FIELDS_METADATA: json.dumps(metadata),
131+
self.env_helper.AZURE_SEARCH_TITLE_COLUMN: document.title,
132+
self.env_helper.AZURE_SEARCH_SOURCE_COLUMN: document.source,
133+
self.env_helper.AZURE_SEARCH_CHUNK_COLUMN: document.chunk,
134+
self.env_helper.AZURE_SEARCH_OFFSET_COLUMN: document.offset,
135135
}
136136

137137
def __generate_document_id(self, source_url: str) -> str:

code/backend/batch/utilities/helpers/env_helper.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,11 +53,11 @@ def __load_config(self, **kwargs) -> None:
5353
os.getenv("AZURE_SEARCH_ENABLE_IN_DOMAIN", "true").lower() == "true"
5454
)
5555
self.AZURE_SEARCH_FIELDS_ID = os.getenv("AZURE_SEARCH_FIELDS_ID", "id")
56-
self.AZURE_SEARCH_CONTENT_COLUMNS = os.getenv(
57-
"AZURE_SEARCH_CONTENT_COLUMNS", "content"
56+
self.AZURE_SEARCH_CONTENT_COLUMN = os.getenv(
57+
"AZURE_SEARCH_CONTENT_COLUMN", "content"
5858
)
59-
self.AZURE_SEARCH_CONTENT_VECTOR_COLUMNS = os.getenv(
60-
"AZURE_SEARCH_CONTENT_VECTOR_COLUMNS", "content_vector"
59+
self.AZURE_SEARCH_CONTENT_VECTOR_COLUMN = os.getenv(
60+
"AZURE_SEARCH_CONTENT_VECTOR_COLUMN", "content_vector"
6161
)
6262
self.AZURE_SEARCH_DIMENSIONS = os.getenv("AZURE_SEARCH_DIMENSIONS", "1536")
6363
self.AZURE_SEARCH_FILENAME_COLUMN = os.getenv(
@@ -69,6 +69,9 @@ def __load_config(self, **kwargs) -> None:
6969
self.AZURE_SEARCH_FIELDS_METADATA = os.getenv(
7070
"AZURE_SEARCH_FIELDS_METADATA", "metadata"
7171
)
72+
self.AZURE_SEARCH_SOURCE_COLUMN = os.getenv("AZURE_SEARCH_SOURCE_COLUMN", "source")
73+
self.AZURE_SEARCH_CHUNK_COLUMN = os.getenv("AZURE_SEARCH_CHUNK_COLUMN", "chunk")
74+
self.AZURE_SEARCH_OFFSET_COLUMN = os.getenv("AZURE_SEARCH_OFFSET_COLUMN", "offset")
7275
self.AZURE_SEARCH_CONVERSATIONS_LOG_INDEX = os.getenv(
7376
"AZURE_SEARCH_CONVERSATIONS_LOG_INDEX", "conversations"
7477
)

code/create_app.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -127,12 +127,12 @@ def conversation_with_data(conversation: Request, env_helper: EnvHelper):
127127
"index_name": env_helper.AZURE_SEARCH_INDEX,
128128
"fields_mapping": {
129129
"content_fields": (
130-
env_helper.AZURE_SEARCH_CONTENT_COLUMNS.split("|")
131-
if env_helper.AZURE_SEARCH_CONTENT_COLUMNS
130+
env_helper.AZURE_SEARCH_CONTENT_COLUMN.split("|")
131+
if env_helper.AZURE_SEARCH_CONTENT_COLUMN
132132
else []
133133
),
134134
"vector_fields": [
135-
env_helper.AZURE_SEARCH_CONTENT_VECTOR_COLUMNS
135+
env_helper.AZURE_SEARCH_CONTENT_VECTOR_COLUMN
136136
],
137137
"title_field": env_helper.AZURE_SEARCH_TITLE_COLUMN or None,
138138
"url_field": env_helper.AZURE_SEARCH_URL_COLUMN or None,

code/tests/functional/app_config.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ class AppConfig:
3838
"AZURE_OPENAI_TOP_P": "1.0",
3939
"AZURE_RESOURCE_GROUP": "some-resource-group",
4040
"AZURE_SEARCH_CONVERSATIONS_LOG_INDEX": "some-log-index",
41-
"AZURE_SEARCH_CONTENT_COLUMNS": "content",
42-
"AZURE_SEARCH_CONTENT_VECTOR_COLUMNS": "some-search-content-vector-columns",
41+
"AZURE_SEARCH_CONTENT_COLUMN": "content",
42+
"AZURE_SEARCH_CONTENT_VECTOR_COLUMN": "some-search-content-vector-columns",
4343
"AZURE_SEARCH_DIMENSIONS": "some-search-dimensions",
4444
"AZURE_SEARCH_ENABLE_IN_DOMAIN": "True",
4545
"AZURE_SEARCH_FIELDS_ID": "some-search-fields-id",
@@ -53,6 +53,9 @@ class AppConfig:
5353
"AZURE_SEARCH_SERVICE": "some-azure-search-service",
5454
"AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG": "some-search-semantic-search-config",
5555
"AZURE_SEARCH_TITLE_COLUMN": "title",
56+
"AZURE_SEARCH_CHUNK_COLUMN": "chunk",
57+
"AZURE_SEARCH_SOURCE_COLUMN": "source",
58+
"AZURE_SEARCH_OFFSET_COLUMN": "offset",
5659
"AZURE_SEARCH_TOP_K": "5",
5760
"AZURE_SEARCH_URL_COLUMN": "url",
5861
"AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION": "False",

code/tests/functional/tests/backend_api/default/test_conversation.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
)
1111
from tests.functional.app_config import AppConfig
1212

13+
1314
pytestmark = pytest.mark.functional
1415

1516
path = "/api/conversation"
@@ -365,7 +366,7 @@ def test_post_makes_correct_call_to_create_documents_search_index(
365366
"name": app_config.get("AZURE_SEARCH_INDEX"),
366367
"fields": [
367368
{
368-
"name": "id",
369+
"name": app_config.get("AZURE_SEARCH_FIELDS_ID"),
369370
"type": "Edm.String",
370371
"key": True,
371372
"retrievable": True,
@@ -375,7 +376,7 @@ def test_post_makes_correct_call_to_create_documents_search_index(
375376
"facetable": False,
376377
},
377378
{
378-
"name": "content",
379+
"name": app_config.get("AZURE_SEARCH_CONTENT_COLUMN"),
379380
"type": "Edm.String",
380381
"key": False,
381382
"retrievable": True,
@@ -385,14 +386,14 @@ def test_post_makes_correct_call_to_create_documents_search_index(
385386
"facetable": False,
386387
},
387388
{
388-
"name": "content_vector",
389+
"name": app_config.get("AZURE_SEARCH_CONTENT_VECTOR_COLUMN"),
389390
"type": "Collection(Edm.Single)",
390391
"searchable": True,
391392
"dimensions": 2,
392393
"vectorSearchProfile": "myHnswProfile",
393394
},
394395
{
395-
"name": "metadata",
396+
"name": app_config.get("AZURE_SEARCH_FIELDS_METADATA"),
396397
"type": "Edm.String",
397398
"key": False,
398399
"retrievable": True,
@@ -402,7 +403,7 @@ def test_post_makes_correct_call_to_create_documents_search_index(
402403
"facetable": False,
403404
},
404405
{
405-
"name": "title",
406+
"name": app_config.get("AZURE_SEARCH_TITLE_COLUMN"),
406407
"type": "Edm.String",
407408
"key": False,
408409
"retrievable": True,
@@ -412,7 +413,7 @@ def test_post_makes_correct_call_to_create_documents_search_index(
412413
"facetable": True,
413414
},
414415
{
415-
"name": "source",
416+
"name": app_config.get("AZURE_SEARCH_SOURCE_COLUMN"),
416417
"type": "Edm.String",
417418
"key": False,
418419
"retrievable": True,
@@ -422,7 +423,7 @@ def test_post_makes_correct_call_to_create_documents_search_index(
422423
"facetable": False,
423424
},
424425
{
425-
"name": "chunk",
426+
"name": app_config.get("AZURE_SEARCH_CHUNK_COLUMN"),
426427
"type": "Edm.Int32",
427428
"key": False,
428429
"retrievable": True,
@@ -432,7 +433,7 @@ def test_post_makes_correct_call_to_create_documents_search_index(
432433
"facetable": False,
433434
},
434435
{
435-
"name": "offset",
436+
"name": app_config.get("AZURE_SEARCH_OFFSET_COLUMN"),
436437
"type": "Edm.Int32",
437438
"key": False,
438439
"retrievable": True,
@@ -456,7 +457,8 @@ def test_post_makes_correct_call_to_create_documents_search_index(
456457
"AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG"
457458
),
458459
"prioritizedFields": {
459-
"prioritizedContentFields": [{"fieldName": "content"}]
460+
"prioritizedContentFields":
461+
[{"fieldName": app_config.get("AZURE_SEARCH_CONTENT_COLUMN")}]
460462
},
461463
}
462464
]

code/tests/functional/tests/backend_api/with_byod/test_conversation_flow.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ def test_post_makes_correct_call_to_azure_openai(
114114
"content_fields": ["content"],
115115
"vector_fields": [
116116
app_config.get(
117-
"AZURE_SEARCH_CONTENT_VECTOR_COLUMNS"
117+
"AZURE_SEARCH_CONTENT_VECTOR_COLUMN"
118118
)
119119
],
120120
"title_field": "title",

code/tests/functional/tests/functions/advanced_image_processing/test_advanced_image_processing.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ def test_makes_correct_call_to_create_documents_search_index(
296296
"name": app_config.get("AZURE_SEARCH_INDEX"),
297297
"fields": [
298298
{
299-
"name": "id",
299+
"name": app_config.get("AZURE_SEARCH_FIELDS_ID"),
300300
"type": "Edm.String",
301301
"key": True,
302302
"retrievable": True,
@@ -306,7 +306,7 @@ def test_makes_correct_call_to_create_documents_search_index(
306306
"facetable": False,
307307
},
308308
{
309-
"name": "content",
309+
"name": app_config.get("AZURE_SEARCH_CONTENT_COLUMN"),
310310
"type": "Edm.String",
311311
"key": False,
312312
"retrievable": True,
@@ -316,14 +316,14 @@ def test_makes_correct_call_to_create_documents_search_index(
316316
"facetable": False,
317317
},
318318
{
319-
"name": "content_vector",
319+
"name": app_config.get("AZURE_SEARCH_CONTENT_VECTOR_COLUMN"),
320320
"type": "Collection(Edm.Single)",
321321
"searchable": True,
322322
"dimensions": 2,
323323
"vectorSearchProfile": "myHnswProfile",
324324
},
325325
{
326-
"name": "metadata",
326+
"name": app_config.get("AZURE_SEARCH_FIELDS_METADATA"),
327327
"type": "Edm.String",
328328
"key": False,
329329
"retrievable": True,
@@ -333,7 +333,7 @@ def test_makes_correct_call_to_create_documents_search_index(
333333
"facetable": False,
334334
},
335335
{
336-
"name": "title",
336+
"name": app_config.get("AZURE_SEARCH_TITLE_COLUMN"),
337337
"type": "Edm.String",
338338
"key": False,
339339
"retrievable": True,
@@ -343,7 +343,7 @@ def test_makes_correct_call_to_create_documents_search_index(
343343
"facetable": True,
344344
},
345345
{
346-
"name": "source",
346+
"name": app_config.get("AZURE_SEARCH_SOURCE_COLUMN"),
347347
"type": "Edm.String",
348348
"key": False,
349349
"retrievable": True,
@@ -353,7 +353,7 @@ def test_makes_correct_call_to_create_documents_search_index(
353353
"facetable": False,
354354
},
355355
{
356-
"name": "chunk",
356+
"name": app_config.get("AZURE_SEARCH_CHUNK_COLUMN"),
357357
"type": "Edm.Int32",
358358
"key": False,
359359
"retrievable": True,
@@ -363,7 +363,7 @@ def test_makes_correct_call_to_create_documents_search_index(
363363
"facetable": False,
364364
},
365365
{
366-
"name": "offset",
366+
"name": app_config.get("AZURE_SEARCH_OFFSET_COLUMN"),
367367
"type": "Edm.Int32",
368368
"key": False,
369369
"retrievable": True,
@@ -387,7 +387,8 @@ def test_makes_correct_call_to_create_documents_search_index(
387387
"AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG"
388388
),
389389
"prioritizedFields": {
390-
"prioritizedContentFields": [{"fieldName": "content"}]
390+
"prioritizedContentFields":
391+
[{"fieldName": app_config.get("AZURE_SEARCH_CONTENT_COLUMN")}]
391392
},
392393
}
393394
]

code/tests/test_app.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@
2222
AZURE_SEARCH_KEY = "mock-search-key"
2323
AZURE_SEARCH_INDEX = "mock-search-index"
2424
AZURE_SEARCH_SERVICE = "mock-search-service"
25-
AZURE_SEARCH_CONTENT_COLUMNS = "field1|field2"
26-
AZURE_SEARCH_CONTENT_VECTOR_COLUMNS = "vector-column"
25+
AZURE_SEARCH_CONTENT_COLUMN = "field1|field2"
26+
AZURE_SEARCH_CONTENT_VECTOR_COLUMN = "vector-column"
2727
AZURE_SEARCH_TITLE_COLUMN = "title"
2828
AZURE_SEARCH_FILENAME_COLUMN = "filename"
2929
AZURE_SEARCH_URL_COLUMN = "url"
@@ -68,9 +68,9 @@ def env_helper_mock():
6868
env_helper.AZURE_OPENAI_STOP_SEQUENCE = AZURE_OPENAI_STOP_SEQUENCE
6969
env_helper.AZURE_SEARCH_INDEX = AZURE_SEARCH_INDEX
7070
env_helper.AZURE_SEARCH_SERVICE = AZURE_SEARCH_SERVICE
71-
env_helper.AZURE_SEARCH_CONTENT_COLUMNS = AZURE_SEARCH_CONTENT_COLUMNS
72-
env_helper.AZURE_SEARCH_CONTENT_VECTOR_COLUMNS = (
73-
AZURE_SEARCH_CONTENT_VECTOR_COLUMNS
71+
env_helper.AZURE_SEARCH_CONTENT_COLUMN = AZURE_SEARCH_CONTENT_COLUMN
72+
env_helper.AZURE_SEARCH_CONTENT_VECTOR_COLUMN = (
73+
AZURE_SEARCH_CONTENT_VECTOR_COLUMN
7474
)
7575
env_helper.AZURE_SEARCH_TITLE_COLUMN = AZURE_SEARCH_TITLE_COLUMN
7676
env_helper.AZURE_SEARCH_FILENAME_COLUMN = AZURE_SEARCH_FILENAME_COLUMN
@@ -617,7 +617,7 @@ def test_conversation_azure_byod_returns_correct_response_when_streaming_with_da
617617
"index_name": AZURE_SEARCH_INDEX,
618618
"fields_mapping": {
619619
"content_fields": ["field1", "field2"],
620-
"vector_fields": [AZURE_SEARCH_CONTENT_VECTOR_COLUMNS],
620+
"vector_fields": [AZURE_SEARCH_CONTENT_VECTOR_COLUMN],
621621
"title_field": AZURE_SEARCH_TITLE_COLUMN,
622622
"url_field": AZURE_SEARCH_URL_COLUMN,
623623
"filepath_field": AZURE_SEARCH_FILENAME_COLUMN,

0 commit comments

Comments
 (0)