@@ -189,29 +189,24 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
189189 // decide for a hard or soft deletion depending on the strategy
190190 let soft_deletion = match self . strategy {
191191 DeletionStrategy :: Dynamic => {
192- // if we have less documents to delete than the threshold we simply save them in
193- // the `soft_deleted_documents_ids` bitmap and early exit.
192+ // decide to keep the soft deleted in the DB for now if they meet 2 criteria:
193+ // 1. There is less than a fixed rate of 50% of soft-deleted to actual documents, *and*
194+ // 2. Soft-deleted occupy an average of less than a fixed size on disk
195+
194196 let size_used = self . index . used_size ( ) ?;
195- let map_size = self . index . env . map_size ( ) ? as u64 ;
196197 let nb_documents = self . index . number_of_documents ( self . wtxn ) ?;
197198 let nb_soft_deleted = soft_deleted_docids. len ( ) ;
198199
199- let percentage_available = 100 - ( size_used * 100 / map_size) ;
200- let estimated_document_size = size_used / ( nb_documents + nb_soft_deleted) ;
201- let estimated_size_used_by_soft_deleted = estimated_document_size * nb_soft_deleted;
202- let percentage_used_by_soft_deleted_documents =
203- estimated_size_used_by_soft_deleted * 100 / map_size;
204-
205- // if we have more than 10% of disk space available and the soft deleted
206- // documents uses less than 10% of the total space available,
207- // we skip the deletion. Eg.
208- // - With 100Go of disk and 20Go used including 5Go of soft-deleted documents
209- // We don’t delete anything.
210- // - With 100Go of disk and 95Go used including 1mo of soft-deleted documents
211- // We run the deletion.
212- // - With 100Go of disk and 50Go used including 15Go of soft-deleted documents
213- // We run the deletion.
214- percentage_available > 10 && percentage_used_by_soft_deleted_documents < 10
200+ ( nb_soft_deleted < nb_documents) && {
201+ const SOFT_DELETED_SIZE_BYTE_THRESHOLD : u64 = 1_073_741_824 ; // 1GiB
202+
203+ // nb_documents + nb_soft_deleted !=0 because if nb_documents is 0 we short-circuit earlier, and then we moved the documents to delete
204+ // from the documents_docids to the soft_deleted_docids.
205+ let estimated_document_size = size_used / ( nb_documents + nb_soft_deleted) ;
206+ let estimated_size_used_by_soft_deleted =
207+ estimated_document_size * nb_soft_deleted;
208+ estimated_size_used_by_soft_deleted < SOFT_DELETED_SIZE_BYTE_THRESHOLD
209+ }
215210 }
216211 DeletionStrategy :: AlwaysSoft => true ,
217212 DeletionStrategy :: AlwaysHard => false ,
@@ -227,7 +222,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
227222 } ) ;
228223 }
229224
230- // There is more than documents to delete than the threshold we needs to delete them all
225+ // Erase soft-deleted from DB
231226 self . to_delete_docids = soft_deleted_docids;
232227 // and we can reset the soft deleted bitmap
233228 self . index . put_soft_deleted_documents_ids ( self . wtxn , & RoaringBitmap :: new ( ) ) ?;
0 commit comments