6
6
)
7
7
from azure .search .documents .indexes .models import (
8
8
AzureOpenAIEmbeddingSkill ,
9
- AzureOpenAIParameters ,
10
- AzureOpenAIVectorizer ,
11
9
FieldMapping ,
12
10
IndexProjectionMode ,
13
11
InputFieldMappingEntry ,
14
12
OutputFieldMappingEntry ,
15
13
SearchIndexer ,
16
14
SearchIndexerDataContainer ,
17
15
SearchIndexerDataSourceConnection ,
18
- SearchIndexerIndexProjections ,
16
+ SearchIndexerDataSourceType ,
17
+ SearchIndexerIndexProjection ,
19
18
SearchIndexerIndexProjectionSelector ,
20
19
SearchIndexerIndexProjectionsParameters ,
21
20
SearchIndexerSkillset ,
@@ -41,16 +40,14 @@ def __init__(
41
40
list_file_strategy : ListFileStrategy ,
42
41
blob_manager : BlobManager ,
43
42
search_info : SearchInfo ,
44
- embeddings : Optional [ AzureOpenAIEmbeddingService ] ,
43
+ embeddings : AzureOpenAIEmbeddingService ,
45
44
subscription_id : str ,
46
45
search_service_user_assigned_id : str ,
47
46
document_action : DocumentAction = DocumentAction .Add ,
48
47
search_analyzer_name : Optional [str ] = None ,
49
48
use_acls : bool = False ,
50
49
category : Optional [str ] = None ,
51
50
):
52
- if not embeddings or not isinstance (embeddings , AzureOpenAIEmbeddingService ):
53
- raise Exception ("Expecting AzureOpenAI embedding service" )
54
51
55
52
self .list_file_strategy = list_file_strategy
56
53
self .blob_manager = blob_manager
@@ -67,6 +64,7 @@ async def create_embedding_skill(self, index_name: str):
67
64
skillset_name = f"{ index_name } -skillset"
68
65
69
66
split_skill = SplitSkill (
67
+ name = f"{ index_name } -split-skill" ,
70
68
description = "Split skill to chunk documents" ,
71
69
text_split_mode = "pages" ,
72
70
context = "/document" ,
@@ -78,21 +76,21 @@ async def create_embedding_skill(self, index_name: str):
78
76
outputs = [OutputFieldMappingEntry (name = "textItems" , target_name = "pages" )],
79
77
)
80
78
81
- if self .embeddings is None :
82
- raise ValueError ("Expecting Azure Open AI instance" )
83
-
84
79
embedding_skill = AzureOpenAIEmbeddingSkill (
80
+ name = f"{ index_name } -embedding-skill" ,
85
81
description = "Skill to generate embeddings via Azure OpenAI" ,
86
82
context = "/document/pages/*" ,
87
- resource_uri = f"https://{ self .embeddings .open_ai_service } .openai.azure.com" ,
88
- deployment_id = self .embeddings .open_ai_deployment ,
83
+ resource_url = f"https://{ self .embeddings .open_ai_service } .openai.azure.com" ,
84
+ deployment_name = self .embeddings .open_ai_deployment ,
85
+ model_name = self .embeddings .open_ai_model_name ,
86
+ dimensions = self .embeddings .open_ai_dimensions ,
89
87
inputs = [
90
88
InputFieldMappingEntry (name = "text" , source = "/document/pages/*" ),
91
89
],
92
90
outputs = [OutputFieldMappingEntry (name = "embedding" , target_name = "vector" )],
93
91
)
94
92
95
- index_projections = SearchIndexerIndexProjections (
93
+ index_projection = SearchIndexerIndexProjection (
96
94
selectors = [
97
95
SearchIndexerIndexProjectionSelector (
98
96
target_index_name = index_name ,
@@ -114,12 +112,13 @@ async def create_embedding_skill(self, index_name: str):
114
112
name = skillset_name ,
115
113
description = "Skillset to chunk documents and generate embeddings" ,
116
114
skills = [split_skill , embedding_skill ],
117
- index_projections = index_projections ,
115
+ index_projection = index_projection ,
118
116
)
119
117
120
118
return skillset
121
119
122
120
async def setup (self ):
121
+ logger .info ("Setting up search index using integrated vectorization..." )
123
122
search_manager = SearchManager (
124
123
search_info = self .search_info ,
125
124
search_analyzer_name = self .search_analyzer_name ,
@@ -129,35 +128,19 @@ async def setup(self):
129
128
search_images = False ,
130
129
)
131
130
132
- if self .embeddings is None :
133
- raise ValueError ("Expecting Azure Open AI instance" )
134
-
135
- await search_manager .create_index (
136
- vectorizers = [
137
- AzureOpenAIVectorizer (
138
- name = f"{ self .search_info .index_name } -vectorizer" ,
139
- kind = "azureOpenAI" ,
140
- azure_open_ai_parameters = AzureOpenAIParameters (
141
- resource_uri = f"https://{ self .embeddings .open_ai_service } .openai.azure.com" ,
142
- deployment_id = self .embeddings .open_ai_deployment ,
143
- ),
144
- ),
145
- ]
146
- )
131
+ await search_manager .create_index ()
147
132
148
- # create indexer client
149
133
ds_client = self .search_info .create_search_indexer_client ()
150
134
ds_container = SearchIndexerDataContainer (name = self .blob_manager .container )
151
135
data_source_connection = SearchIndexerDataSourceConnection (
152
136
name = f"{ self .search_info .index_name } -blob" ,
153
- type = "azureblob" ,
137
+ type = SearchIndexerDataSourceType . AZURE_BLOB ,
154
138
connection_string = self .blob_manager .get_managedidentity_connectionstring (),
155
139
container = ds_container ,
156
140
data_deletion_detection_policy = NativeBlobSoftDeleteDeletionDetectionPolicy (),
157
141
)
158
142
159
143
await ds_client .create_or_update_data_source_connection (data_source_connection )
160
- logger .info ("Search indexer data source connection updated." )
161
144
162
145
embedding_skillset = await self .create_embedding_skill (self .search_info .index_name )
163
146
await ds_client .create_or_update_skillset (embedding_skillset )
0 commit comments