Skip to content

Commit be744e5

Browse files
committed
Update rag examples
1 parent 2521e76 commit be744e5

File tree

4 files changed

+60
-16
lines changed

4 files changed

+60
-16
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
2+
FunctionApp__Endpoint=<FunctionAppEndpoint>
3+
FunctionApp__Key=<FunctionAppKey>
4+
FunctionApp__LayoutAnalysis__FunctionName=layout_analysis
5+
FunctionApp__FigureAnalysis__FunctionName=figure_analysis
6+
FunctionApp__LayoutAndFigureMerger__FunctionName=layout_and_figure_merger
7+
FunctionApp__MarkUpCleaner__FunctionName=mark_up_cleaner
8+
FunctionApp__SemanticTextChunker__FunctionName=semantic_text_chunker
9+
FunctionApp__AppRegistrationResourceId=<App registration in form api://appRegistrationclientId if using identity based connections>
10+
IdentityType=key # system_assigned or user_assigned or key
11+
AIService__AzureSearchOptions__Endpoint=<AzureSearchEndpoint>
12+
AIService__AzureSearchOptions__Identity__ClientId=<clientId if using user assigned identity>
13+
AIService__AzureSearchOptions__Key=<AzureSearchKey>
14+
AIService__AzureSearchOptions__UsePrivateEndpoint=false
15+
AIService__AzureSearchOptions__Identity__FQName=<fully qualified name of the identity if using user assigned identity>
16+
StorageAccount__FQEndpoint=<Fully qualified endpoint in form ResourceId=resourceId if using identity based connections>
17+
StorageAccount__ConnectionString=<StorageAccountConnectionString>
18+
OpenAI__ApiKey=<OpenAIKey>
19+
OpenAI__Endpoint=<OpenAIEndpoint>
20+
OpenAI__EmbeddingModel="text-embedding-ada-002"
21+
OpenAI__EmbeddingDeployment="text-embedding-ada-002"
22+
OpenAI__EmbeddingDimensions=1536
23+
Text2Sql__DatabaseEngine=<DatabaseEngine>
24+
StorageAccount__RagDocuments__Container=<RagDocumentsContainer>
25+
StorageAccount__Text2SqlQueryCache__Container=<Text2SqlQueryCacheContainer>
26+
StorageAccount__Text2SqlSchemaStore__Container=<Text2SqlSchemaStoreContainer>
27+
StorageAccount__Text2SqlColumnValueStore__Container=<Text2SqlColumnValueStoreContainer>

deploy_ai_search/src/deploy_ai_search/ai_search.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ def get_mark_up_cleaner_skill(self, context, source) -> WebApiSkill:
253253

254254
return mark_up_cleaner_skill
255255

256-
def get_text_split_skill(
256+
def get_semantic_chunker_skill(
257257
self,
258258
context,
259259
source,
@@ -292,7 +292,7 @@ def get_text_split_skill(
292292
]
293293

294294
semantic_text_chunker_skill = WebApiSkill(
295-
name="Mark Up Cleaner Skill",
295+
name="Semantic Chunker Skill",
296296
description="Skill to clean the data before sending to embedding",
297297
context=context,
298298
uri=self.environment.get_custom_skill_function_url("semantic_text_chunker"),

deploy_ai_search/src/deploy_ai_search/environment.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ def function_app_mark_up_cleaner_route(self) -> str:
210210
"""
211211
This function returns function app data cleanup function name
212212
"""
213-
return os.environ.get("FunctionApp__PreEmbeddingCleaner__FunctionName")
213+
return os.environ.get("FunctionApp__MarkUpCleaner__FunctionName")
214214

215215
@property
216216
def function_app_semantic_text_chunker_route(self) -> str:
@@ -220,18 +220,25 @@ def function_app_semantic_text_chunker_route(self) -> str:
220220
return os.environ.get("FunctionApp__SemanticTextChunker__FunctionName")
221221

222222
@property
223-
def function_app_adi_route(self) -> str:
223+
def function_app_layout_analysis_route(self) -> str:
224224
"""
225225
This function returns function app adi name
226226
"""
227-
return os.environ.get("FunctionApp__ADI__FunctionName")
227+
return os.environ.get("FunctionApp__LayoutAnalysis__FunctionName")
228228

229229
@property
230-
def function_app_key_phrase_extractor_route(self) -> str:
230+
def function_app_figure_analysis_route(self) -> str:
231231
"""
232-
This function returns function app keyphrase extractor name
232+
This function returns function app figure analysis name
233233
"""
234-
return os.environ.get("FunctionApp__KeyPhraseExtractor__FunctionName")
234+
return os.environ.get("FunctionApp__FigureAnalysis__FunctionName")
235+
236+
@property
237+
def function_app_layout_and_figure_merger_route(self) -> str:
238+
"""
239+
This function returns function app layout and figure merger name
240+
"""
241+
return os.environ.get("FunctionApp__LayoutAndFigureMerger__FunctionName")
235242

236243
@property
237244
def open_ai_embedding_dimensions(self) -> str:
@@ -260,10 +267,12 @@ def get_custom_skill_function_url(self, skill_type: str):
260267
"""
261268
if skill_type == "mark_up_cleaner":
262269
route = self.function_app_mark_up_cleaner_route
263-
elif skill_type == "adi":
264-
route = self.function_app_adi_route
265-
elif skill_type == "key_phrase_extraction":
266-
route = self.function_app_key_phrase_extractor_route
270+
elif skill_type == "layout_analysis":
271+
route = self.function_app_layout_analysis_route
272+
elif skill_type == "figure_analysis":
273+
route = self.function_app_figure_analysis
274+
elif skill_type == "layout_and_figure_merger":
275+
route = self.function_app_layout_and_figure_merger
267276
elif skill_type == "semantic_text_chunker":
268277
route = self.function_app_semantic_text_chunker_route
269278
else:

deploy_ai_search/src/deploy_ai_search/rag_documents.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -179,9 +179,13 @@ def get_skills(self) -> list:
179179
Returns:
180180
list: The skillsets used in the indexer"""
181181

182-
adi_skill = self.get_adi_skill(self.enable_page_by_chunking)
182+
layout_skill = self.get_layout_analysis_skill(self.enable_page_by_chunking)
183183

184-
text_split_skill = self.get_text_split_skill(
184+
figure_skill = self.get_figure_analysis_skill()
185+
186+
merger_skill = self.get_layout_and_figure_merger_skill()
187+
188+
text_split_skill = self.get_semantic_chunker_skill(
185189
"/document", "/document/extracted_content/content"
186190
)
187191

@@ -195,13 +199,17 @@ def get_skills(self) -> list:
195199

196200
if self.enable_page_by_chunking:
197201
skills = [
198-
adi_skill,
202+
layout_skill,
203+
figure_skill,
204+
merger_skill,
199205
mark_up_cleaner_skill,
200206
embedding_skill,
201207
]
202208
else:
203209
skills = [
204-
adi_skill,
210+
layout_skill,
211+
figure_skill,
212+
merger_skill,
205213
text_split_skill,
206214
mark_up_cleaner_skill,
207215
embedding_skill,

0 commit comments

Comments
 (0)