|
25 | 25 | SynonymMap, |
26 | 26 | SplitSkill, |
27 | 27 | SearchIndexerIndexProjections, |
| 28 | + BlobIndexerParsingMode, |
28 | 29 | ) |
29 | 30 | from azure.core.exceptions import HttpResponseError |
30 | 31 | from azure.search.documents.indexes import SearchIndexerClient, SearchIndexClient |
@@ -66,12 +67,16 @@ def __init__( |
66 | 67 | self.environment = AISearchEnvironment(indexer_type=self.indexer_type) |
67 | 68 |
|
68 | 69 | self._search_indexer_client = SearchIndexerClient( |
69 | | - self.environment.ai_search_endpoint, self.environment.ai_search_credential |
| 70 | + endpoint=self.environment.ai_search_endpoint, |
| 71 | + credential=self.environment.ai_search_credential, |
70 | 72 | ) |
71 | 73 | self._search_index_client = SearchIndexClient( |
72 | | - self.environment.ai_search_endpoint, self.environment.ai_search_credential |
| 74 | + endpoint=self.environment.ai_search_endpoint, |
| 75 | + credential=self.environment.ai_search_credential, |
73 | 76 | ) |
74 | 77 |
|
| 78 | + self.parsing_mode = BlobIndexerParsingMode.DEFAULT |
| 79 | + |
75 | 80 | @property |
76 | 81 | def indexer_name(self): |
77 | 82 | """Get the indexer name for the indexer.""" |
@@ -156,7 +161,16 @@ def get_data_source(self) -> SearchIndexerDataSourceConnection: |
156 | 161 | if self.get_indexer() is None: |
157 | 162 | return None |
158 | 163 |
|
159 | | - data_deletion_detection_policy = NativeBlobSoftDeleteDeletionDetectionPolicy() |
| 164 | + if self.parsing_mode in [ |
| 165 | + BlobIndexerParsingMode.DEFAULT, |
| 166 | + BlobIndexerParsingMode.TEXT, |
| 167 | + BlobIndexerParsingMode.JSON, |
| 168 | + ]: |
| 169 | + data_deletion_detection_policy = ( |
| 170 | + NativeBlobSoftDeleteDeletionDetectionPolicy() |
| 171 | + ) |
| 172 | + else: |
| 173 | + data_deletion_detection_policy = None |
160 | 174 |
|
161 | 175 | data_change_detection_policy = HighWaterMarkChangeDetectionPolicy( |
162 | 176 | high_water_mark_column_name="metadata_storage_last_modified" |
@@ -268,6 +282,10 @@ def get_text_split_skill(self, context, source) -> SplitSkill: |
268 | 282 | def get_adi_skill(self, chunk_by_page=False) -> WebApiSkill: |
269 | 283 | """Get the custom skill for adi. |
270 | 284 |
|
| 285 | + Args: |
| 286 | + ----- |
| 287 | + chunk_by_page (bool, optional): Whether to chunk by page. Defaults to False. |
| 288 | +
|
271 | 289 | Returns: |
272 | 290 | -------- |
273 | 291 | WebApiSkill: The custom skill for adi""" |
@@ -528,6 +546,11 @@ def run_indexer(self): |
528 | 546 |
|
529 | 547 | def reset_indexer(self): |
530 | 548 | """This function runs the indexer.""" |
| 549 | + |
| 550 | + if self.get_indexer() is None: |
| 551 | + logging.warning("Indexer not defined. Skipping reset operation.") |
| 552 | + |
| 553 | + return |
531 | 554 | self._search_indexer_client.reset_indexer(self.indexer_name) |
532 | 555 |
|
533 | 556 | logging.info("%s reset.", self.indexer_name) |
|
0 commit comments