66)
77from azure .search .documents .indexes .models import (
88 AzureOpenAIEmbeddingSkill ,
9- AzureOpenAIParameters ,
10- AzureOpenAIVectorizer ,
119 FieldMapping ,
1210 IndexProjectionMode ,
1311 InputFieldMappingEntry ,
1412 OutputFieldMappingEntry ,
1513 SearchIndexer ,
1614 SearchIndexerDataContainer ,
1715 SearchIndexerDataSourceConnection ,
18- SearchIndexerIndexProjections ,
16+ SearchIndexerDataSourceType ,
17+ SearchIndexerIndexProjection ,
1918 SearchIndexerIndexProjectionSelector ,
2019 SearchIndexerIndexProjectionsParameters ,
2120 SearchIndexerSkillset ,
@@ -41,16 +40,14 @@ def __init__(
4140 list_file_strategy : ListFileStrategy ,
4241 blob_manager : BlobManager ,
4342 search_info : SearchInfo ,
44- embeddings : Optional [ AzureOpenAIEmbeddingService ] ,
43+ embeddings : AzureOpenAIEmbeddingService ,
4544 subscription_id : str ,
4645 search_service_user_assigned_id : str ,
4746 document_action : DocumentAction = DocumentAction .Add ,
4847 search_analyzer_name : Optional [str ] = None ,
4948 use_acls : bool = False ,
5049 category : Optional [str ] = None ,
5150 ):
52- if not embeddings or not isinstance (embeddings , AzureOpenAIEmbeddingService ):
53- raise Exception ("Expecting AzureOpenAI embedding service" )
5451
5552 self .list_file_strategy = list_file_strategy
5653 self .blob_manager = blob_manager
@@ -67,6 +64,7 @@ async def create_embedding_skill(self, index_name: str):
6764 skillset_name = f"{ index_name } -skillset"
6865
6966 split_skill = SplitSkill (
67+ name = f"{ index_name } -split-skill" ,
7068 description = "Split skill to chunk documents" ,
7169 text_split_mode = "pages" ,
7270 context = "/document" ,
@@ -78,21 +76,21 @@ async def create_embedding_skill(self, index_name: str):
7876 outputs = [OutputFieldMappingEntry (name = "textItems" , target_name = "pages" )],
7977 )
8078
81- if self .embeddings is None :
82- raise ValueError ("Expecting Azure Open AI instance" )
83-
8479 embedding_skill = AzureOpenAIEmbeddingSkill (
80+ name = f"{ index_name } -embedding-skill" ,
8581 description = "Skill to generate embeddings via Azure OpenAI" ,
8682 context = "/document/pages/*" ,
87- resource_uri = f"https://{ self .embeddings .open_ai_service } .openai.azure.com" ,
88- deployment_id = self .embeddings .open_ai_deployment ,
83+ resource_url = f"https://{ self .embeddings .open_ai_service } .openai.azure.com" ,
84+ deployment_name = self .embeddings .open_ai_deployment ,
85+ model_name = self .embeddings .open_ai_model_name ,
86+ dimensions = self .embeddings .open_ai_dimensions ,
8987 inputs = [
9088 InputFieldMappingEntry (name = "text" , source = "/document/pages/*" ),
9189 ],
9290 outputs = [OutputFieldMappingEntry (name = "embedding" , target_name = "vector" )],
9391 )
9492
95- index_projections = SearchIndexerIndexProjections (
93+ index_projection = SearchIndexerIndexProjection (
9694 selectors = [
9795 SearchIndexerIndexProjectionSelector (
9896 target_index_name = index_name ,
@@ -114,12 +112,13 @@ async def create_embedding_skill(self, index_name: str):
114112 name = skillset_name ,
115113 description = "Skillset to chunk documents and generate embeddings" ,
116114 skills = [split_skill , embedding_skill ],
117- index_projections = index_projections ,
115+ index_projection = index_projection ,
118116 )
119117
120118 return skillset
121119
122120 async def setup (self ):
121+ logger .info ("Setting up search index using integrated vectorization..." )
123122 search_manager = SearchManager (
124123 search_info = self .search_info ,
125124 search_analyzer_name = self .search_analyzer_name ,
@@ -129,35 +128,19 @@ async def setup(self):
129128 search_images = False ,
130129 )
131130
132- if self .embeddings is None :
133- raise ValueError ("Expecting Azure Open AI instance" )
134-
135- await search_manager .create_index (
136- vectorizers = [
137- AzureOpenAIVectorizer (
138- name = f"{ self .search_info .index_name } -vectorizer" ,
139- kind = "azureOpenAI" ,
140- azure_open_ai_parameters = AzureOpenAIParameters (
141- resource_uri = f"https://{ self .embeddings .open_ai_service } .openai.azure.com" ,
142- deployment_id = self .embeddings .open_ai_deployment ,
143- ),
144- ),
145- ]
146- )
131+ await search_manager .create_index ()
147132
148- # create indexer client
149133 ds_client = self .search_info .create_search_indexer_client ()
150134 ds_container = SearchIndexerDataContainer (name = self .blob_manager .container )
151135 data_source_connection = SearchIndexerDataSourceConnection (
152136 name = f"{ self .search_info .index_name } -blob" ,
153- type = "azureblob" ,
137+ type = SearchIndexerDataSourceType . AZURE_BLOB ,
154138 connection_string = self .blob_manager .get_managedidentity_connectionstring (),
155139 container = ds_container ,
156140 data_deletion_detection_policy = NativeBlobSoftDeleteDeletionDetectionPolicy (),
157141 )
158142
159143 await ds_client .create_or_update_data_source_connection (data_source_connection )
160- logger .info ("Search indexer data source connection updated." )
161144
162145 embedding_skillset = await self .create_embedding_skill (self .search_info .index_name )
163146 await ds_client .create_or_update_skillset (embedding_skillset )
0 commit comments