Skip to content
This repository was archived by the owner on Apr 4, 2023. It is now read-only.

Commit 171c942

Browse files
committed
Soft-deletion computation no longer takes into account the mapsize
Implemented solution 2.3 from meilisearch/meilisearch#3231 (comment)
1 parent e2ae3b2 commit 171c942

File tree

1 file changed

+15
-20
lines changed

1 file changed

+15
-20
lines changed

milli/src/update/delete_documents.rs

Lines changed: 15 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -189,29 +189,24 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
189189
// decide for a hard or soft deletion depending on the strategy
190190
let soft_deletion = match self.strategy {
191191
DeletionStrategy::Dynamic => {
192-
// if we have less documents to delete than the threshold we simply save them in
193-
// the `soft_deleted_documents_ids` bitmap and early exit.
192+
// decide to keep the soft deleted in the DB for now if they meet 2 criteria:
193+
// 1. There is less than a fixed rate of 50% of soft-deleted to actual documents, *and*
194+
// 2. Soft-deleted occupy an average of less than a fixed size on disk
195+
194196
let size_used = self.index.used_size()?;
195-
let map_size = self.index.env.map_size()? as u64;
196197
let nb_documents = self.index.number_of_documents(self.wtxn)?;
197198
let nb_soft_deleted = soft_deleted_docids.len();
198199

199-
let percentage_available = 100 - (size_used * 100 / map_size);
200-
let estimated_document_size = size_used / (nb_documents + nb_soft_deleted);
201-
let estimated_size_used_by_soft_deleted = estimated_document_size * nb_soft_deleted;
202-
let percentage_used_by_soft_deleted_documents =
203-
estimated_size_used_by_soft_deleted * 100 / map_size;
204-
205-
// if we have more than 10% of disk space available and the soft deleted
206-
// documents uses less than 10% of the total space available,
207-
// we skip the deletion. Eg.
208-
// - With 100Go of disk and 20Go used including 5Go of soft-deleted documents
209-
// We don’t delete anything.
210-
// - With 100Go of disk and 95Go used including 1mo of soft-deleted documents
211-
// We run the deletion.
212-
// - With 100Go of disk and 50Go used including 15Go of soft-deleted documents
213-
// We run the deletion.
214-
percentage_available > 10 && percentage_used_by_soft_deleted_documents < 10
200+
(nb_soft_deleted < nb_documents) && {
201+
const SOFT_DELETED_SIZE_BYTE_THRESHOLD: u64 = 1_073_741_824; // 1GiB
202+
203+
// nb_documents + nb_soft_deleted !=0 because if nb_documents is 0 we short-circuit earlier, and then we moved the documents to delete
204+
// from the documents_docids to the soft_deleted_docids.
205+
let estimated_document_size = size_used / (nb_documents + nb_soft_deleted);
206+
let estimated_size_used_by_soft_deleted =
207+
estimated_document_size * nb_soft_deleted;
208+
estimated_size_used_by_soft_deleted < SOFT_DELETED_SIZE_BYTE_THRESHOLD
209+
}
215210
}
216211
DeletionStrategy::AlwaysSoft => true,
217212
DeletionStrategy::AlwaysHard => false,
@@ -227,7 +222,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
227222
});
228223
}
229224

230-
// There is more than documents to delete than the threshold we needs to delete them all
225+
// Erase soft-deleted from DB
231226
self.to_delete_docids = soft_deleted_docids;
232227
// and we can reset the soft deleted bitmap
233228
self.index.put_soft_deleted_documents_ids(self.wtxn, &RoaringBitmap::new())?;

0 commit comments

Comments
 (0)