6
6
from azure .search .documents .indexes .models import (
7
7
AzureOpenAIVectorizer ,
8
8
AzureOpenAIVectorizerParameters ,
9
+ BinaryQuantizationCompression ,
9
10
HnswAlgorithmConfiguration ,
10
11
HnswParameters ,
12
+ RescoringOptions ,
11
13
SearchableField ,
12
14
SearchField ,
13
15
SearchFieldDataType ,
18
20
SemanticSearch ,
19
21
SimpleField ,
20
22
VectorSearch ,
23
+ VectorSearchCompressionRescoreStorageMethod ,
21
24
VectorSearchProfile ,
22
- VectorSearchVectorizer ,
23
25
)
24
26
25
27
from .blobmanager import BlobManager
@@ -69,11 +71,44 @@ def __init__(
69
71
self .embedding_field = embedding_field
70
72
self .search_images = search_images
71
73
72
- async def create_index (self , vectorizers : Optional [ List [ VectorSearchVectorizer ]] = None ):
74
+ async def create_index (self ):
73
75
logger .info ("Checking whether search index %s exists..." , self .search_info .index_name )
74
76
75
77
async with self .search_info .create_search_index_client () as search_index_client :
76
78
79
+ vectorizer = None
80
+ embedding_field = None
81
+ if self .embeddings and isinstance (self .embeddings , AzureOpenAIEmbeddingService ):
82
+ vectorizer = AzureOpenAIVectorizer (
83
+ vectorizer_name = f"{ self .search_info .index_name } -vectorizer" ,
84
+ parameters = AzureOpenAIVectorizerParameters (
85
+ resource_url = self .embeddings .open_ai_endpoint ,
86
+ deployment_name = self .embeddings .open_ai_deployment ,
87
+ model_name = self .embeddings .open_ai_model_name ,
88
+ ),
89
+ )
90
+ if self .embeddings :
91
+ if self .embedding_dimensions is None :
92
+ raise ValueError (
93
+ "Embedding dimensions must be set in order to add an embedding field to the search index"
94
+ )
95
+ if self .embedding_field is None :
96
+ raise ValueError (
97
+ "Embedding field must be set in order to add an embedding field to the search index"
98
+ )
99
+ embedding_field = SearchField (
100
+ name = self .embedding_field ,
101
+ type = SearchFieldDataType .Collection (SearchFieldDataType .Single ),
102
+ hidden = True ,
103
+ searchable = True ,
104
+ filterable = False ,
105
+ sortable = False ,
106
+ facetable = False ,
107
+ vector_search_dimensions = self .embedding_dimensions ,
108
+ vector_search_profile_name = "embedding_config" ,
109
+ stored = False ,
110
+ )
111
+
77
112
if self .search_info .index_name not in [name async for name in search_index_client .list_index_names ()]:
78
113
logger .info ("Creating new search index %s" , self .search_info .index_name )
79
114
fields = [
@@ -95,17 +130,6 @@ async def create_index(self, vectorizers: Optional[List[VectorSearchVectorizer]]
95
130
type = "Edm.String" ,
96
131
analyzer_name = self .search_analyzer_name ,
97
132
),
98
- SearchField (
99
- name = self .embedding_field ,
100
- type = SearchFieldDataType .Collection (SearchFieldDataType .Single ),
101
- hidden = False ,
102
- searchable = True ,
103
- filterable = False ,
104
- sortable = False ,
105
- facetable = False ,
106
- vector_search_dimensions = self .embedding_dimensions ,
107
- vector_search_profile_name = "embedding_config" ,
108
- ),
109
133
SimpleField (name = "category" , type = "Edm.String" , filterable = True , facetable = True ),
110
134
SimpleField (
111
135
name = "sourcepage" ,
@@ -160,27 +184,50 @@ async def create_index(self, vectorizers: Optional[List[VectorSearchVectorizer]]
160
184
),
161
185
)
162
186
163
- vectorizers = []
164
- if self .embeddings and isinstance (self .embeddings , AzureOpenAIEmbeddingService ):
165
- logger .info (
166
- "Including vectorizer for search index %s, using Azure OpenAI service %s" ,
167
- self .search_info .index_name ,
168
- self .embeddings .open_ai_service ,
169
- )
170
- vectorizers .append (
171
- AzureOpenAIVectorizer (
172
- vectorizer_name = f"{ self .search_info .index_name } -vectorizer" ,
173
- parameters = AzureOpenAIVectorizerParameters (
174
- resource_url = self .embeddings .open_ai_endpoint ,
175
- deployment_name = self .embeddings .open_ai_deployment ,
176
- model_name = self .embeddings .open_ai_model_name ,
177
- ),
187
+ vector_search = None
188
+ if self .embeddings :
189
+ logger .info ("Including embedding field in new index %s" , self .search_info .index_name )
190
+ fields .append (embedding_field )
191
+
192
+ vectorizers = []
193
+ if vectorizer is not None :
194
+ logger .info ("Including vectorizer in new index %s" , self .search_info .index_name )
195
+ vectorizers .append (vectorizer )
196
+ else :
197
+ logger .info (
198
+ "New index %s will not have vectorizer, since no Azure OpenAI service is set" ,
199
+ self .search_info .index_name ,
178
200
)
179
- )
180
- else :
181
- logger .info (
182
- "Not including vectorizer for search index %s, no Azure OpenAI service found" ,
183
- self .search_info .index_name ,
201
+
202
+ vector_search = VectorSearch (
203
+ profiles = [
204
+ VectorSearchProfile (
205
+ name = "embedding_config" ,
206
+ algorithm_configuration_name = "hnsw_config" ,
207
+ compression_name = "binary-quantization" ,
208
+ ** ({"vectorizer_name" : vectorizer .vectorizer_name if vectorizer else None }),
209
+ ),
210
+ ],
211
+ algorithms = [
212
+ HnswAlgorithmConfiguration (
213
+ name = "hnsw_config" ,
214
+ parameters = HnswParameters (metric = "cosine" ),
215
+ )
216
+ ],
217
+ vectorizers = vectorizers ,
218
+ compressions = [
219
+ BinaryQuantizationCompression (
220
+ compression_name = "binary-quantization" ,
221
+ rescoring_options = RescoringOptions (
222
+ enable_rescoring = True ,
223
+ default_oversampling = 10 ,
224
+ rescore_storage_method = VectorSearchCompressionRescoreStorageMethod .PRESERVE_ORIGINALS ,
225
+ ),
226
+ # Explicitly set deprecated parameters to None
227
+ rerank_with_original_vectors = None ,
228
+ default_oversampling = None ,
229
+ )
230
+ ],
184
231
)
185
232
186
233
index = SearchIndex (
@@ -196,22 +243,7 @@ async def create_index(self, vectorizers: Optional[List[VectorSearchVectorizer]]
196
243
)
197
244
]
198
245
),
199
- vector_search = VectorSearch (
200
- algorithms = [
201
- HnswAlgorithmConfiguration (
202
- name = "hnsw_config" ,
203
- parameters = HnswParameters (metric = "cosine" ),
204
- )
205
- ],
206
- profiles = [
207
- VectorSearchProfile (
208
- name = "embedding_config" ,
209
- algorithm_configuration_name = "hnsw_config" ,
210
- vectorizer_name = (f"{ self .search_info .index_name } -vectorizer" ),
211
- ),
212
- ],
213
- vectorizers = vectorizers ,
214
- ),
246
+ vector_search = vector_search ,
215
247
)
216
248
217
249
await search_index_client .create_index (index )
@@ -229,45 +261,23 @@ async def create_index(self, vectorizers: Optional[List[VectorSearchVectorizer]]
229
261
),
230
262
)
231
263
await search_index_client .create_or_update_index (existing_index )
232
- # check if embedding field exists
233
- if not any (field .name == self .embedding_field for field in existing_index .fields ):
264
+ # check if embedding field exists - TODO: will this really work if we havent redfined vector search?
265
+ if self . embeddings and not any (field .name == self .embedding_field for field in existing_index .fields ):
234
266
logger .info ("Adding embedding field to index %s" , self .search_info .index_name )
235
- existing_index .fields .append (
236
- SearchField (
237
- name = self .embedding_field ,
238
- type = SearchFieldDataType .Collection (SearchFieldDataType .Single ),
239
- hidden = False ,
240
- searchable = True ,
241
- filterable = False ,
242
- sortable = False ,
243
- facetable = False ,
244
- # TODO: use optimizations here
245
- vector_search_dimensions = self .embedding_dimensions ,
246
- vector_search_profile_name = "embedding_config" ,
247
- ),
248
- )
267
+ existing_index .fields .append (embedding_field )
249
268
await search_index_client .create_or_update_index (existing_index )
250
269
if existing_index .vector_search is not None and (
251
270
existing_index .vector_search .vectorizers is None
252
271
or len (existing_index .vector_search .vectorizers ) == 0
253
272
):
254
273
if self .embeddings is not None and isinstance (self .embeddings , AzureOpenAIEmbeddingService ):
255
274
logger .info ("Adding vectorizer to search index %s" , self .search_info .index_name )
256
- existing_index .vector_search .vectorizers = [
257
- AzureOpenAIVectorizer (
258
- vectorizer_name = f"{ self .search_info .index_name } -vectorizer" ,
259
- parameters = AzureOpenAIVectorizerParameters (
260
- resource_url = self .embeddings .open_ai_endpoint ,
261
- deployment_name = self .embeddings .open_ai_deployment ,
262
- model_name = self .embeddings .open_ai_model_name ,
263
- ),
264
- )
265
- ]
275
+ existing_index .vector_search .vectorizers = [vectorizer ]
266
276
await search_index_client .create_or_update_index (existing_index )
267
277
else :
268
278
logger .info (
269
- "Can't add vectorizer to search index %s since no Azure OpenAI embeddings service is defined " ,
270
- self .search_info ,
279
+ "Search index %s will not have vectorizer, since no Azure OpenAI service is set " ,
280
+ self .search_info . index_name ,
271
281
)
272
282
273
283
async def update_content (
0 commit comments