Skip to content

Commit 13e4cd8

Browse files
authored
[Backend/Prepdocs] Update to latest version of search SDK (#1010)
* Update to latest version of search * Prefilter is default for new index * Prefilter is default for new index * Fix typing issue
1 parent f6e8729 commit 13e4cd8

File tree

7 files changed

+30
-33
lines changed

7 files changed

+30
-33
lines changed

app/backend/approaches/chatreadretrieveread.py

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import aiohttp
77
import openai
88
from azure.search.documents.aio import SearchClient
9-
from azure.search.documents.models import QueryType
9+
from azure.search.documents.models import QueryType, RawVectorQuery, VectorQuery
1010

1111
from approaches.approach import Approach
1212
from core.messagebuilder import MessageBuilder
@@ -143,12 +143,12 @@ async def run_until_final_call(
143143
# STEP 2: Retrieve relevant documents from the search index with the GPT optimized query
144144

145145
# If retrieval mode includes vectors, compute an embedding for the query
146+
vectors: list[VectorQuery] = []
146147
if has_vector:
147148
embedding_args = {"deployment_id": self.embedding_deployment} if self.openai_host == "azure" else {}
148149
embedding = await openai.Embedding.acreate(**embedding_args, model=self.embedding_model, input=query_text)
149150
query_vector = embedding["data"][0]["embedding"]
150-
else:
151-
query_vector = None
151+
vectors.append(RawVectorQuery(vector=query_vector, k=50, fields="embedding"))
152152

153153
# Only keep the text query if the retrieval mode uses text, otherwise drop it
154154
if not has_text:
@@ -165,19 +165,10 @@ async def run_until_final_call(
165165
semantic_configuration_name="default",
166166
top=top,
167167
query_caption="extractive|highlight-false" if use_semantic_captions else None,
168-
vector=query_vector,
169-
top_k=50 if query_vector else None,
170-
vector_fields="embedding" if query_vector else None,
168+
vector_queries=vectors,
171169
)
172170
else:
173-
r = await self.search_client.search(
174-
query_text,
175-
filter=filter,
176-
top=top,
177-
vector=query_vector,
178-
top_k=50 if query_vector else None,
179-
vector_fields="embedding" if query_vector else None,
180-
)
171+
r = await self.search_client.search(query_text, filter=filter, top=top, vector_queries=vectors)
181172
if use_semantic_captions:
182173
results = [
183174
doc[self.sourcepage_field] + ": " + nonewlines(" . ".join([c.text for c in doc["@search.captions"]]))

app/backend/approaches/retrievethenread.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import openai
44
from azure.search.documents.aio import SearchClient
5-
from azure.search.documents.models import QueryType
5+
from azure.search.documents.models import QueryType, RawVectorQuery, VectorQuery
66

77
from approaches.approach import Approach
88
from core.messagebuilder import MessageBuilder
@@ -78,12 +78,12 @@ async def run(
7878
filter = self.build_filter(overrides, auth_claims)
7979

8080
# If retrieval mode includes vectors, compute an embedding for the query
81+
vectors: list[VectorQuery] = []
8182
if has_vector:
8283
embedding_args = {"deployment_id": self.embedding_deployment} if self.openai_host == "azure" else {}
8384
embedding = await openai.Embedding.acreate(**embedding_args, model=self.embedding_model, input=q)
8485
query_vector = embedding["data"][0]["embedding"]
85-
else:
86-
query_vector = None
86+
vectors.append(RawVectorQuery(vector=query_vector, k=50, fields="embedding"))
8787

8888
# Only keep the text query if the retrieval mode uses text, otherwise drop it
8989
query_text = q if has_text else ""
@@ -99,18 +99,14 @@ async def run(
9999
semantic_configuration_name="default",
100100
top=top,
101101
query_caption="extractive|highlight-false" if use_semantic_captions else None,
102-
vector=query_vector,
103-
top_k=50 if query_vector else None,
104-
vector_fields="embedding" if query_vector else None,
102+
vector_queries=vectors,
105103
)
106104
else:
107105
r = await self.search_client.search(
108106
query_text,
109107
filter=filter,
110108
top=top,
111-
vector=query_vector,
112-
top_k=50 if query_vector else None,
113-
vector_fields="embedding" if query_vector else None,
109+
vector_queries=vectors,
114110
)
115111
if use_semantic_captions:
116112
results = [

app/backend/requirements.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ quart
33
quart-cors
44
openai[datalib]==0.28.1
55
tiktoken
6-
azure-search-documents==11.4.0b6
6+
azure-search-documents==11.4.0b11
77
azure-storage-blob
88
uvicorn
99
aiohttp

app/backend/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ azure-monitor-opentelemetry==1.1.0
3737
# via -r requirements.in
3838
azure-monitor-opentelemetry-exporter==1.0.0b18
3939
# via azure-monitor-opentelemetry
40-
azure-search-documents==11.4.0b6
40+
azure-search-documents==11.4.0b11
4141
# via -r requirements.in
4242
azure-storage-blob==12.19.0
4343
# via -r requirements.in

scripts/prepdocslib/searchmanager.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from azure.search.documents.indexes.models import (
66
HnswParameters,
7+
HnswVectorSearchAlgorithmConfiguration,
78
PrioritizedFields,
89
SearchableField,
910
SearchField,
@@ -14,7 +15,8 @@
1415
SemanticSettings,
1516
SimpleField,
1617
VectorSearch,
17-
VectorSearchAlgorithmConfiguration,
18+
VectorSearchAlgorithmKind,
19+
VectorSearchProfile,
1820
)
1921

2022
from .blobmanager import BlobManager
@@ -70,7 +72,7 @@ async def create_index(self):
7072
sortable=False,
7173
facetable=False,
7274
vector_search_dimensions=1536,
73-
vector_search_configuration="default",
75+
vector_search_profile="embedding_config",
7476
),
7577
SimpleField(name="category", type="Edm.String", filterable=True, facetable=True),
7678
SimpleField(name="sourcepage", type="Edm.String", filterable=True, facetable=True),
@@ -102,11 +104,19 @@ async def create_index(self):
102104
]
103105
),
104106
vector_search=VectorSearch(
105-
algorithm_configurations=[
106-
VectorSearchAlgorithmConfiguration(
107-
name="default", kind="hnsw", hnsw_parameters=HnswParameters(metric="cosine")
107+
algorithms=[
108+
HnswVectorSearchAlgorithmConfiguration(
109+
name="hnsw_config",
110+
kind=VectorSearchAlgorithmKind.HNSW,
111+
parameters=HnswParameters(metric="cosine"),
108112
)
109-
]
113+
],
114+
profiles=[
115+
VectorSearchProfile(
116+
name="embedding_config",
117+
algorithm="hnsw_config",
118+
),
119+
],
110120
),
111121
)
112122
if self.search_info.index_name not in [name async for name in search_index_client.list_index_names()]:

scripts/requirements.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
pypdf
22
azure-identity
3-
azure-search-documents==11.4.0b6
3+
azure-search-documents==11.4.0b11
44
azure-ai-formrecognizer
55
azure-storage-blob
66
azure-storage-file-datalake

scripts/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ azure-core==1.29.5
2828
# msrest
2929
azure-identity==1.15.0
3030
# via -r requirements.in
31-
azure-search-documents==11.4.0b6
31+
azure-search-documents==11.4.0b11
3232
# via -r requirements.in
3333
azure-storage-blob==12.19.0
3434
# via

0 commit comments

Comments
 (0)