diff --git a/integrations/azure_ai_search/src/haystack_integrations/document_stores/azure_ai_search/document_store.py b/integrations/azure_ai_search/src/haystack_integrations/document_stores/azure_ai_search/document_store.py index bf78739ce8..d9e4b32bf5 100644 --- a/integrations/azure_ai_search/src/haystack_integrations/document_stores/azure_ai_search/document_store.py +++ b/integrations/azure_ai_search/src/haystack_integrations/document_stores/azure_ai_search/document_store.py @@ -383,6 +383,44 @@ def delete_documents(self, document_ids: List[str]) -> None: if documents: self.client.delete_documents(documents) + def delete_all_documents(self, recreate_index: bool = False) -> None: # noqa: FBT002, FBT001 + """ + Deletes all documents in the document store. + + :param recreate_index: If True, the index will be deleted and recreated with the original schema. + If False, all documents will be deleted while preserving the index. + """ + try: + if recreate_index: + # Get current index definition + if self._index_client is None: + msg = "Index client is not initialized" + raise ValueError(msg) + current_index = self._index_client.get_index(self._index_name) + + # Delete and recreate index + self._index_client.delete_index(self._index_name) + self._index_client.create_index(current_index) + logger.info("Index '{idx_name}' recreated with original schema.", idx_name=self._index_name) + else: + # Delete all documents without recreating index + if self.count_documents() == 0: + return + + # Search for all documents (pagination handled by Azure SDK) + all_docs = list(self.client.search(search_text="*", select=["id"], top=100000)) + + if all_docs: + self.client.delete_documents(all_docs) + logger.info( + "Deleted {n_docs} documents from index '{idx_name}'.", + n_docs=len(all_docs), + idx_name=self._index_name, + ) + except Exception as e: + msg = f"Failed to delete all documents from Azure AI Search: {e!s}" + raise HttpResponseError(msg) from e + def get_documents_by_id(self, document_ids: List[str]) -> List[Document]: return self._convert_search_result_to_documents(self._get_raw_documents_by_id(document_ids)) diff --git a/integrations/azure_ai_search/tests/test_document_store.py b/integrations/azure_ai_search/tests/test_document_store.py index e215776314..7665af154a 100644 --- a/integrations/azure_ai_search/tests/test_document_store.py +++ b/integrations/azure_ai_search/tests/test_document_store.py @@ -291,6 +291,19 @@ def test_write_documents_duplicate_fail(self, document_store: AzureAISearchDocum @pytest.mark.skip(reason="Azure AI search index overwrites duplicate documents by default") def test_write_documents_duplicate_skip(self, document_store: AzureAISearchDocumentStore): ... + def test_delete_all_documents(self, document_store: AzureAISearchDocumentStore): + docs = [Document(content="first doc"), Document(content="second doc")] + document_store.write_documents(docs) + assert document_store.count_documents() == 2 + + document_store.delete_all_documents() + assert document_store.count_documents() == 0 + + def test_delete_all_documents_empty_index(self, document_store: AzureAISearchDocumentStore): + assert document_store.count_documents() == 0 + document_store.delete_all_documents() + assert document_store.count_documents() == 0 + def _random_embeddings(n): return [round(random.random(), 7) for _ in range(n)] # nosec: S311