Skip to content
This repository was archived by the owner on Apr 4, 2023. It is now read-only.

Commit 97fb64e

Browse files
bors[bot]dureuillirevoire
authored
Merge #747
747: Soft-deletion computation no longer depends on the mapsize r=irevoire a=dureuill # Pull Request ## Related issue Related to meilisearch/meilisearch#3231: After removing `--max-index-size`, the `mapsize` will always be unrelated to the actual max size the user wants for their DB, so it doesn't make sense to use these values any longer. This implements solution 2.3 from meilisearch/meilisearch#3231 (comment) ## What does this PR do? ### User-visible - Soft-deleted are no longer deleted when there is less than 10% of the mapsize available or when they take more than 10% of the mapsize - Instead, they are deleted when they are more soft deleted than regular documents, or when they take more than 1GiB disk space (estimated). ### Implementation standpoint 1. Adds a `DeletionStrategy` struct to replace the boolean `disable_soft_deletion` that we had up until now. This enum allows us to specify that we want "always hard", "always soft", or to use the dynamic soft-deletion strategy (default). 2. Uses the current strategy when deleting documents, with the new heuristics being used in the `DeletionStrategy::Dynamic` variant. 3. Updates the tests to use the appropriate DeletionStrategy whenever needed (one of `AlwaysHard` or `AlwaysSoft` depending on the test) Note to reviewers: this PR is optimized for a commit-by-commit review. ## PR checklist Please check if your PR fulfills the following requirements: - [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)? - [x] Have you read the contributing guidelines? - [x] Have you made sure that the title is accurate and descriptive of the changes? Thank you so much for contributing to Meilisearch! Co-authored-by: Louis Dureuil <[email protected]> Co-authored-by: Tamo <[email protected]>
2 parents 8957251 + 69edbf9 commit 97fb64e

File tree

49 files changed

+177
-129
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+177
-129
lines changed

milli/src/index.rs

Lines changed: 28 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1192,8 +1192,8 @@ pub(crate) mod tests {
11921192
use crate::error::{Error, InternalError};
11931193
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
11941194
use crate::update::{
1195-
self, DeleteDocuments, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod,
1196-
IndexerConfig, Settings,
1195+
self, DeleteDocuments, DeletionStrategy, IndexDocuments, IndexDocumentsConfig,
1196+
IndexDocumentsMethod, IndexerConfig, Settings,
11971197
};
11981198
use crate::{db_snap, obkv_to_json, Index};
11991199

@@ -1282,6 +1282,17 @@ pub(crate) mod tests {
12821282
builder.execute(drop, || false)?;
12831283
Ok(())
12841284
}
1285+
1286+
pub fn delete_document(&self, external_document_id: &str) {
1287+
let mut wtxn = self.write_txn().unwrap();
1288+
1289+
let mut delete = DeleteDocuments::new(&mut wtxn, &self).unwrap();
1290+
delete.strategy(self.index_documents_config.deletion_strategy);
1291+
1292+
delete.delete_external_id(external_document_id);
1293+
delete.execute().unwrap();
1294+
wtxn.commit().unwrap();
1295+
}
12851296
}
12861297

12871298
#[test]
@@ -1487,7 +1498,9 @@ pub(crate) mod tests {
14871498
use big_s::S;
14881499
use maplit::hashset;
14891500

1490-
let index = TempIndex::new();
1501+
let mut index = TempIndex::new();
1502+
index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysSoft;
1503+
let index = index;
14911504

14921505
index
14931506
.update_settings(|settings| {
@@ -1657,7 +1670,8 @@ pub(crate) mod tests {
16571670
}
16581671
// Second Batch: replace the documents with soft-deletion
16591672
{
1660-
index.index_documents_config.disable_soft_deletion = false;
1673+
index.index_documents_config.deletion_strategy =
1674+
crate::update::DeletionStrategy::AlwaysSoft;
16611675
let mut docs1 = vec![];
16621676
for i in 0..3 {
16631677
docs1.push(serde_json::json!(
@@ -1726,7 +1740,7 @@ pub(crate) mod tests {
17261740
drop(rtxn);
17271741
// Third Batch: replace the documents with soft-deletion again
17281742
{
1729-
index.index_documents_config.disable_soft_deletion = false;
1743+
index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysSoft;
17301744
let mut docs1 = vec![];
17311745
for i in 0..3 {
17321746
docs1.push(serde_json::json!(
@@ -1795,7 +1809,7 @@ pub(crate) mod tests {
17951809

17961810
// Fourth Batch: replace the documents without soft-deletion
17971811
{
1798-
index.index_documents_config.disable_soft_deletion = true;
1812+
index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysHard;
17991813
let mut docs1 = vec![];
18001814
for i in 0..3 {
18011815
docs1.push(serde_json::json!(
@@ -1867,6 +1881,7 @@ pub(crate) mod tests {
18671881
fn bug_3021_first() {
18681882
// https://github.com/meilisearch/meilisearch/issues/3021
18691883
let mut index = TempIndex::new();
1884+
index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysSoft;
18701885
index.index_documents_config.update_method = IndexDocumentsMethod::ReplaceDocuments;
18711886

18721887
index
@@ -1891,11 +1906,7 @@ pub(crate) mod tests {
18911906
"###);
18921907
db_snap!(index, soft_deleted_documents_ids, 1, @"[]");
18931908

1894-
let mut wtxn = index.write_txn().unwrap();
1895-
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
1896-
delete.delete_external_id("34");
1897-
delete.execute().unwrap();
1898-
wtxn.commit().unwrap();
1909+
index.delete_document("34");
18991910

19001911
db_snap!(index, documents_ids, @"[0, ]");
19011912
db_snap!(index, external_documents_ids, 2, @r###"
@@ -1936,11 +1947,7 @@ pub(crate) mod tests {
19361947
db_snap!(index, soft_deleted_documents_ids, 4, @"[]");
19371948

19381949
// We do the test again, but deleting the document with id 0 instead of id 1 now
1939-
let mut wtxn = index.write_txn().unwrap();
1940-
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
1941-
delete.delete_external_id("38");
1942-
delete.execute().unwrap();
1943-
wtxn.commit().unwrap();
1950+
index.delete_document("38");
19441951

19451952
db_snap!(index, documents_ids, @"[1, ]");
19461953
db_snap!(index, external_documents_ids, 5, @r###"
@@ -1987,6 +1994,7 @@ pub(crate) mod tests {
19871994
fn bug_3021_second() {
19881995
// https://github.com/meilisearch/meilisearch/issues/3021
19891996
let mut index = TempIndex::new();
1997+
index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysSoft;
19901998
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
19911999

19922000
index
@@ -2011,11 +2019,7 @@ pub(crate) mod tests {
20112019
"###);
20122020
db_snap!(index, soft_deleted_documents_ids, 1, @"[]");
20132021

2014-
let mut wtxn = index.write_txn().unwrap();
2015-
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
2016-
delete.delete_external_id("34");
2017-
delete.execute().unwrap();
2018-
wtxn.commit().unwrap();
2022+
index.delete_document("34");
20192023

20202024
db_snap!(index, documents_ids, @"[0, ]");
20212025
db_snap!(index, external_documents_ids, 2, @r###"
@@ -2116,6 +2120,7 @@ pub(crate) mod tests {
21162120
fn bug_3021_third() {
21172121
// https://github.com/meilisearch/meilisearch/issues/3021
21182122
let mut index = TempIndex::new();
2123+
index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysSoft;
21192124
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
21202125

21212126
index
@@ -2142,11 +2147,7 @@ pub(crate) mod tests {
21422147
"###);
21432148
db_snap!(index, soft_deleted_documents_ids, 1, @"[]");
21442149

2145-
let mut wtxn = index.write_txn().unwrap();
2146-
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
2147-
delete.delete_external_id("3");
2148-
delete.execute().unwrap();
2149-
wtxn.commit().unwrap();
2150+
index.delete_document("3");
21502151

21512152
db_snap!(index, documents_ids, @"[1, 2, ]");
21522153
db_snap!(index, external_documents_ids, 2, @r###"
@@ -2158,7 +2159,7 @@ pub(crate) mod tests {
21582159
"###);
21592160
db_snap!(index, soft_deleted_documents_ids, 2, @"[0, ]");
21602161

2161-
index.index_documents_config.disable_soft_deletion = true;
2162+
index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysHard;
21622163

21632164
index.add_documents(documents!([{ "primary_key": "4", "a": 2 }])).unwrap();
21642165

0 commit comments

Comments
 (0)