Skip to content

Commit d4e40b8

Browse files
committed
Update integrated vectorization
1 parent e99f6e2 commit d4e40b8

File tree

4 files changed

+27
-24
lines changed

4 files changed

+27
-24
lines changed

app/backend/prepdocslib/integratedvectorizerstrategy.py

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,15 @@
66
)
77
from azure.search.documents.indexes.models import (
88
AzureOpenAIEmbeddingSkill,
9-
AzureOpenAIParameters,
10-
AzureOpenAIVectorizer,
119
FieldMapping,
1210
IndexProjectionMode,
1311
InputFieldMappingEntry,
1412
OutputFieldMappingEntry,
1513
SearchIndexer,
1614
SearchIndexerDataContainer,
1715
SearchIndexerDataSourceConnection,
18-
SearchIndexerIndexProjections,
16+
SearchIndexerDataSourceType,
17+
SearchIndexerIndexProjection,
1918
SearchIndexerIndexProjectionSelector,
2019
SearchIndexerIndexProjectionsParameters,
2120
SearchIndexerSkillset,
@@ -67,6 +66,7 @@ async def create_embedding_skill(self, index_name: str):
6766
skillset_name = f"{index_name}-skillset"
6867

6968
split_skill = SplitSkill(
69+
name=f"{index_name}-split-skill",
7070
description="Split skill to chunk documents",
7171
text_split_mode="pages",
7272
context="/document",
@@ -82,17 +82,20 @@ async def create_embedding_skill(self, index_name: str):
8282
raise ValueError("Expecting Azure Open AI instance")
8383

8484
embedding_skill = AzureOpenAIEmbeddingSkill(
85+
name=f"{index_name}-embedding-skill",
8586
description="Skill to generate embeddings via Azure OpenAI",
8687
context="/document/pages/*",
87-
resource_uri=f"https://{self.embeddings.open_ai_service}.openai.azure.com",
88-
deployment_id=self.embeddings.open_ai_deployment,
88+
resource_url=f"https://{self.embeddings.open_ai_service}.openai.azure.com",
89+
deployment_name=self.embeddings.open_ai_deployment,
90+
model_name=self.embeddings.open_ai_model_name,
91+
dimensions=self.embeddings.open_ai_dimensions,
8992
inputs=[
9093
InputFieldMappingEntry(name="text", source="/document/pages/*"),
9194
],
9295
outputs=[OutputFieldMappingEntry(name="embedding", target_name="vector")],
9396
)
9497

95-
index_projections = SearchIndexerIndexProjections(
98+
index_projection = SearchIndexerIndexProjection(
9699
selectors=[
97100
SearchIndexerIndexProjectionSelector(
98101
target_index_name=index_name,
@@ -114,7 +117,7 @@ async def create_embedding_skill(self, index_name: str):
114117
name=skillset_name,
115118
description="Skillset to chunk documents and generate embeddings",
116119
skills=[split_skill, embedding_skill],
117-
index_projections=index_projections,
120+
index_projection=index_projection,
118121
)
119122

120123
return skillset
@@ -132,25 +135,14 @@ async def setup(self):
132135
if self.embeddings is None:
133136
raise ValueError("Expecting Azure Open AI instance")
134137

135-
await search_manager.create_index(
136-
vectorizers=[
137-
AzureOpenAIVectorizer(
138-
name=f"{self.search_info.index_name}-vectorizer",
139-
kind="azureOpenAI",
140-
azure_open_ai_parameters=AzureOpenAIParameters(
141-
resource_uri=f"https://{self.embeddings.open_ai_service}.openai.azure.com",
142-
deployment_id=self.embeddings.open_ai_deployment,
143-
),
144-
),
145-
]
146-
)
138+
await search_manager.create_index()
147139

148140
# create indexer client
149141
ds_client = self.search_info.create_search_indexer_client()
150142
ds_container = SearchIndexerDataContainer(name=self.blob_manager.container)
151143
data_source_connection = SearchIndexerDataSourceConnection(
152144
name=f"{self.search_info.index_name}-blob",
153-
type="azureblob",
145+
type=SearchIndexerDataSourceType.AZURE_BLOB,
154146
connection_string=self.blob_manager.get_managedidentity_connectionstring(),
155147
container=ds_container,
156148
data_deletion_detection_policy=NativeBlobSoftDeleteDeletionDetectionPolicy(),

app/backend/prepdocslib/searchmanager.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
from typing import List, Optional
55

66
from azure.search.documents.indexes.models import (
7+
AzureOpenAIVectorizer,
8+
AzureOpenAIVectorizerParameters,
79
HnswAlgorithmConfiguration,
810
HnswParameters,
911
SearchableField,
@@ -174,12 +176,21 @@ async def create_index(self, vectorizers: Optional[List[VectorSearchVectorizer]]
174176
VectorSearchProfile(
175177
name="embedding_config",
176178
algorithm_configuration_name="hnsw_config",
177-
vectorizer=(
179+
vectorizer_name=(
178180
f"{self.search_info.index_name}-vectorizer" if self.use_int_vectorization else None
179181
),
180182
),
181183
],
182-
vectorizers=vectorizers,
184+
vectorizers=[
185+
AzureOpenAIVectorizer(
186+
vectorizer_name=f"{self.search_info.index_name}-vectorizer",
187+
parameters=AzureOpenAIVectorizerParameters(
188+
resource_url=f"https://{self.embeddings.open_ai_service}.openai.azure.com",
189+
deployment_name=self.embeddings.open_ai_deployment,
190+
model_name=self.embeddings.open_ai_model_name,
191+
),
192+
),
193+
],
183194
),
184195
)
185196
if self.search_info.index_name not in [name async for name in search_index_client.list_index_names()]:

app/backend/requirements.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ tiktoken
77
tenacity
88
azure-ai-documentintelligence
99
azure-cognitiveservices-speech
10-
azure-search-documents==11.6.0b5
10+
azure-search-documents==11.6.0b6
1111
azure-storage-blob
1212
azure-storage-file-datalake
1313
uvicorn

app/backend/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ azure-monitor-opentelemetry==1.6.1
5252
# via -r requirements.in
5353
azure-monitor-opentelemetry-exporter==1.0.0b28
5454
# via azure-monitor-opentelemetry
55-
azure-search-documents==11.6.0b1
55+
azure-search-documents==11.6.0b6
5656
# via -r requirements.in
5757
azure-storage-blob==12.22.0
5858
# via

0 commit comments

Comments
 (0)