@@ -6,16 +6,14 @@ use heed::types::{ByteSlice, DecodeIgnore, Str};
66use heed:: Database ;
77use roaring:: RoaringBitmap ;
88use serde:: { Deserialize , Serialize } ;
9- use serde_json:: Value ;
109use time:: OffsetDateTime ;
1110
1211use super :: facet:: delete:: FacetsDelete ;
1312use super :: ClearDocuments ;
14- use crate :: error:: { InternalError , UserError } ;
13+ use crate :: error:: InternalError ;
1514use crate :: facet:: FacetType ;
1615use crate :: heed_codec:: facet:: FieldDocIdFacetCodec ;
1716use crate :: heed_codec:: CboRoaringBitmapCodec ;
18- use crate :: index:: { db_name, main_key} ;
1917use crate :: {
2018 ExternalDocumentsIds , FieldId , FieldIdMapMissingEntry , Index , Result , RoaringBitmapCodec ,
2119 SmallString32 , BEU32 ,
@@ -186,6 +184,10 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
186184
187185 soft_deleted_docids |= & self . to_delete_docids ;
188186
187+ // We always soft-delete the documents, even if they will be permanently
188+ // deleted immediately after.
189+ self . index . put_soft_deleted_documents_ids ( self . wtxn , & soft_deleted_docids) ?;
190+
189191 // decide for a hard or soft deletion depending on the strategy
190192 let soft_deletion = match self . strategy {
191193 DeletionStrategy :: Dynamic => {
@@ -214,31 +216,14 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
214216
215217 if soft_deletion {
216218 // Keep the soft-deleted in the DB
217- self . index . put_soft_deleted_documents_ids ( self . wtxn , & soft_deleted_docids) ?;
218219 return Ok ( DetailedDocumentDeletionResult {
219220 deleted_documents : self . to_delete_docids . len ( ) ,
220221 remaining_documents : documents_ids. len ( ) ,
221222 soft_deletion_used : true ,
222223 } ) ;
223224 }
224225
225- // Erase soft-deleted from DB
226226 self . to_delete_docids = soft_deleted_docids;
227- // and we can reset the soft deleted bitmap
228- self . index . put_soft_deleted_documents_ids ( self . wtxn , & RoaringBitmap :: new ( ) ) ?;
229-
230- let primary_key =
231- self . index . primary_key ( self . wtxn ) ?. ok_or ( InternalError :: DatabaseMissingEntry {
232- db_name : db_name:: MAIN ,
233- key : Some ( main_key:: PRIMARY_KEY_KEY ) ,
234- } ) ?;
235-
236- // Since we already checked if the DB was empty, if we can't find the primary key, then
237- // something is wrong, and we must return an error.
238- let id_field = match fields_ids_map. id ( primary_key) {
239- Some ( field) => field,
240- None => return Err ( UserError :: MissingPrimaryKey . into ( ) ) ,
241- } ;
242227
243228 let Index {
244229 env : _env,
@@ -262,33 +247,14 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
262247 documents,
263248 } = self . index ;
264249
265- // Retrieve the words and the external documents ids contained in the documents.
250+ // Retrieve the words contained in the documents.
266251 let mut words = Vec :: new ( ) ;
267- let mut external_ids = Vec :: new ( ) ;
268252 for docid in & self . to_delete_docids {
269- // We create an iterator to be able to get the content and delete the document
270- // content itself. It's faster to acquire a cursor to get and delete,
271- // as we avoid traversing the LMDB B-Tree two times but only once.
272- let key = BEU32 :: new ( docid) ;
273- let mut iter = documents. range_mut ( self . wtxn , & ( key..=key) ) ?;
274- if let Some ( ( _key, obkv) ) = iter. next ( ) . transpose ( ) ? {
275- if let Some ( content) = obkv. get ( id_field) {
276- let external_id = match serde_json:: from_slice ( content) . unwrap ( ) {
277- Value :: String ( string) => SmallString32 :: from ( string. as_str ( ) ) ,
278- Value :: Number ( number) => SmallString32 :: from ( number. to_string ( ) ) ,
279- document_id => {
280- return Err ( UserError :: InvalidDocumentId { document_id } . into ( ) )
281- }
282- } ;
283- external_ids. push ( external_id) ;
284- }
285- // safety: we don't keep references from inside the LMDB database.
286- unsafe { iter. del_current ( ) ? } ;
287- }
288- drop ( iter) ;
253+ documents. delete ( self . wtxn , & BEU32 :: new ( docid) ) ?;
289254
290- // We iterate through the words positions of the document id,
291- // retrieve the word and delete the positions.
255+ // We iterate through the words positions of the document id, retrieve the word and delete the positions.
256+ // We create an iterator to be able to get the content and delete the key-value itself.
257+ // It's faster to acquire a cursor to get and delete, as we avoid traversing the LMDB B-Tree two times but only once.
292258 let mut iter = docid_word_positions. prefix_iter_mut ( self . wtxn , & ( docid, "" ) ) ?;
293259 while let Some ( result) = iter. next ( ) {
294260 let ( ( _docid, word) , _positions) = result?;
@@ -298,17 +264,12 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
298264 unsafe { iter. del_current ( ) ? } ;
299265 }
300266 }
301-
302- // We create the FST map of the external ids that we must delete.
303- external_ids. sort_unstable ( ) ;
304- let external_ids_to_delete = fst:: Set :: from_iter ( external_ids) ?;
305-
306267 // We acquire the current external documents ids map...
268+ // Note that its soft-deleted document ids field will be equal to the `to_delete_docids`
307269 let mut new_external_documents_ids = self . index . external_documents_ids ( self . wtxn ) ?;
308- // ...and remove the to-delete external ids.
309- new_external_documents_ids. delete_ids ( external_ids_to_delete) ?;
310-
311- // We write the new external ids into the main database.
270+ // We then remove the soft-deleted docids from it
271+ new_external_documents_ids. delete_soft_deleted_documents_ids_from_fsts ( ) ?;
272+ // and write it back to the main database.
312273 let new_external_documents_ids = new_external_documents_ids. into_static ( ) ;
313274 self . index . put_external_documents_ids ( self . wtxn , & new_external_documents_ids) ?;
314275
@@ -545,6 +506,8 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
545506 & self . to_delete_docids ,
546507 ) ?;
547508
509+ self . index . put_soft_deleted_documents_ids ( self . wtxn , & RoaringBitmap :: new ( ) ) ?;
510+
548511 Ok ( DetailedDocumentDeletionResult {
549512 deleted_documents : self . to_delete_docids . len ( ) ,
550513 remaining_documents : documents_ids. len ( ) ,
@@ -1125,14 +1088,16 @@ mod tests {
11251088 id
11261089 ) ;
11271090 }
1091+ wtxn. commit ( ) . unwrap ( ) ;
1092+
1093+ let rtxn = index. read_txn ( ) . unwrap ( ) ;
11281094
11291095 // get internal docids from deleted external document ids
1130- let results = index. external_documents_ids ( & wtxn ) . unwrap ( ) ;
1096+ let results = index. external_documents_ids ( & rtxn ) . unwrap ( ) ;
11311097 for id in deleted_external_ids {
11321098 assert ! ( results. get( id) . is_none( ) , "The document {} was supposed to be deleted" , id) ;
11331099 }
1134-
1135- wtxn. commit ( ) . unwrap ( ) ;
1100+ drop ( rtxn) ;
11361101
11371102 db_snap ! ( index, soft_deleted_documents_ids, deletion_strategy) ;
11381103 }
0 commit comments