@@ -201,19 +201,19 @@ def delete(self, collection_name: str, **kwargs: Any):
201201 raise ValueError (f"Collection { collection_name } does not exist." )
202202 self ._opensearch_client .indices .delete (index = collection_name )
203203
204- def is_empty (self , collection_name : str ):
204+ def is_empty (self , collection_name : str ) -> bool :
205205 response = self ._opensearch_client .count (index = collection_name )
206206 return response ["count" ] == 0
207207
208- def collection_exists (self , collection_name : str ):
208+ def collection_exists (self , collection_name : str ) -> bool :
209209 return self ._opensearch_client .indices .exists (index = collection_name )
210210
211- def list_all_collection (self ):
211+ def list_all_collection (self ) -> list :
212212 """List all index name of OpenSearch."""
213213 response = self ._opensearch_client .indices .get_alias ()
214214 return list (response .keys ())
215215
216- def get_all_docs (self , collection_name : str , size : int = 10000 ):
216+ def get_all_docs (self , collection_name : str , size : int = 10000 ) -> list [ dict ] :
217217 """Match all docs in one index of OpenSearch"""
218218 if not self .collection_exists (collection_name ):
219219 logger .warning (
@@ -223,7 +223,13 @@ def get_all_docs(self, collection_name: str, size: int = 10000):
223223
224224 query = {"size" : size , "query" : {"match_all" : {}}}
225225 response = self ._opensearch_client .search (index = collection_name , body = query )
226- return [hit ["_source" ]["page_content" ] for hit in response ["hits" ]["hits" ]]
226+ return [
227+ {
228+ "id" : hit ["_id" ],
229+ "page_content" : hit ["_source" ]["page_content" ],
230+ }
231+ for hit in response ["hits" ]["hits" ]
232+ ]
227233
228234 def delete_by_query (self , collection_name : str , query : str ):
229235 """Delete docs by query in one index of OpenSearch"""
@@ -236,3 +242,12 @@ def delete_by_query(self, collection_name: str, query: str):
236242 )
237243 self ._opensearch_client .indices .refresh (index = collection_name )
238244 return response
245+
246+ def delete_by_id (self , collection_name : str , id : str ):
247+ """Delete docs by id in index of OpenSearch"""
248+ if not self .collection_exists (collection_name ):
249+ raise ValueError (f"Collection { collection_name } does not exist." )
250+
251+ response = self ._opensearch_client .delete (index = collection_name , id = id )
252+ self ._opensearch_client .indices .refresh (index = collection_name )
253+ return response
0 commit comments