Skip to content
This repository was archived by the owner on Apr 4, 2023. It is now read-only.

Commit 098c410

Browse files
bors[bot]loiclec
andauthored
Merge #727
727: Fix bug in filter search r=Kerollmops a=loiclec # Pull Request ## Related issue Fixes (partially, until merged into meilisearch) meilisearch/meilisearch#3178 ## What does this PR do? The most important change is this one: ```rust // in milli/src/search/facet/facet_range_search.rs, line 239 let should_stop = { match self.right { Bound::Included(right) => right < previous_key.left_bound, Bound::Excluded(right) => right <= previous_key.left_bound, Bound::Unbounded => false, } }; ``` where the operations `<` and `<=` between the two branches were switched. This caused (very few) documents to be missing from filter results. The second change is a simplification of the algorithm for filters such as `field = value`, where we now perform a direct query into the "Level 0" of the facet db to retrieve the docids instead of invoking the full facet search algorithm. This change is done in `milli/src/search/facet/filter.rs`. I have added yet more insta-snapshot tests, rechecked the content of the snapshots, and added some integration tests as well. This is purely a fix in the search algorithms. Based on this PR alone, a dump will not be necessary to switch from v0.30.1 (where this bug is present) to v0.30.2 (where this PR is merged). Co-authored-by: Loïc Lecrenier <[email protected]>
2 parents ee10cb8 + d38cc73 commit 098c410

File tree

51 files changed

+899
-94
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+899
-94
lines changed

milli/src/search/facet/facet_range_search.rs

Lines changed: 172 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,8 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
171171
}
172172

173173
// should we stop?
174+
// We should if the the search range doesn't include any
175+
// element from the previous key or its successors
174176
let should_stop = {
175177
match self.right {
176178
Bound::Included(right) => right < previous_key.left_bound,
@@ -233,10 +235,12 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
233235
}
234236

235237
// should we stop?
238+
// We should if the the search range doesn't include any
239+
// element from the previous key or its successors
236240
let should_stop = {
237241
match self.right {
238-
Bound::Included(right) => right <= previous_key.left_bound,
239-
Bound::Excluded(right) => right < previous_key.left_bound,
242+
Bound::Included(right) => right < previous_key.left_bound,
243+
Bound::Excluded(right) => right <= previous_key.left_bound,
240244
Bound::Unbounded => false,
241245
}
242246
};
@@ -321,8 +325,27 @@ mod tests {
321325
#[test]
322326
fn random_looking_index_snap() {
323327
let index = get_random_looking_index();
324-
milli_snap!(format!("{index}"));
328+
milli_snap!(format!("{index}"), @"3256c76a7c1b768a013e78d5fa6e9ff9");
325329
}
330+
331+
#[test]
332+
fn random_looking_index_with_multiple_field_ids_snap() {
333+
let index = get_random_looking_index_with_multiple_field_ids();
334+
milli_snap!(format!("{index}"), @"c3e5fe06a8f1c404ed4935b32c90a89b");
335+
}
336+
337+
#[test]
338+
fn simple_index_snap() {
339+
let index = get_simple_index();
340+
milli_snap!(format!("{index}"), @"5dbfa134cc44abeb3ab6242fc182e48e");
341+
}
342+
343+
#[test]
344+
fn simple_index_with_multiple_field_ids_snap() {
345+
let index = get_simple_index_with_multiple_field_ids();
346+
milli_snap!(format!("{index}"), @"a4893298218f682bc76357f46777448c");
347+
}
348+
326349
#[test]
327350
fn filter_range_increasing() {
328351
let indexes = [
@@ -349,7 +372,7 @@ mod tests {
349372
)
350373
.unwrap();
351374
#[allow(clippy::format_push_string)]
352-
results.push_str(&format!("{}\n", display_bitmap(&docids)));
375+
results.push_str(&format!("0 <= . <= {i} : {}\n", display_bitmap(&docids)));
353376
}
354377
milli_snap!(results, format!("included_{i}"));
355378
let mut results = String::new();
@@ -368,7 +391,7 @@ mod tests {
368391
)
369392
.unwrap();
370393
#[allow(clippy::format_push_string)]
371-
results.push_str(&format!("{}\n", display_bitmap(&docids)));
394+
results.push_str(&format!("0 < . < {i} : {}\n", display_bitmap(&docids)));
372395
}
373396
milli_snap!(results, format!("excluded_{i}"));
374397
txn.commit().unwrap();
@@ -401,7 +424,7 @@ mod tests {
401424
&mut docids,
402425
)
403426
.unwrap();
404-
results.push_str(&format!("{}\n", display_bitmap(&docids)));
427+
results.push_str(&format!("{i} <= . <= 255 : {}\n", display_bitmap(&docids)));
405428
}
406429

407430
milli_snap!(results, format!("included_{i}"));
@@ -422,7 +445,7 @@ mod tests {
422445
&mut docids,
423446
)
424447
.unwrap();
425-
results.push_str(&format!("{}\n", display_bitmap(&docids)));
448+
results.push_str(&format!("{i} < . < 255 : {}\n", display_bitmap(&docids)));
426449
}
427450

428451
milli_snap!(results, format!("excluded_{i}"));
@@ -457,7 +480,11 @@ mod tests {
457480
&mut docids,
458481
)
459482
.unwrap();
460-
results.push_str(&format!("{}\n", display_bitmap(&docids)));
483+
results.push_str(&format!(
484+
"{i} <= . <= {r} : {docids}\n",
485+
r = 255. - i,
486+
docids = display_bitmap(&docids)
487+
));
461488
}
462489

463490
milli_snap!(results, format!("included_{i}"));
@@ -478,12 +505,148 @@ mod tests {
478505
&mut docids,
479506
)
480507
.unwrap();
481-
results.push_str(&format!("{}\n", display_bitmap(&docids)));
508+
results.push_str(&format!(
509+
"{i} < . < {r} {docids}\n",
510+
r = 255. - i,
511+
docids = display_bitmap(&docids)
512+
));
482513
}
483514

484515
milli_snap!(results, format!("excluded_{i}"));
485516

486517
txn.commit().unwrap();
487518
}
488519
}
520+
521+
#[test]
522+
fn filter_range_unbounded() {
523+
let indexes = [
524+
get_simple_index(),
525+
get_random_looking_index(),
526+
get_simple_index_with_multiple_field_ids(),
527+
get_random_looking_index_with_multiple_field_ids(),
528+
];
529+
for (i, index) in indexes.iter().enumerate() {
530+
let txn = index.env.read_txn().unwrap();
531+
let mut results = String::new();
532+
for i in 0..=255 {
533+
let i = i as f64;
534+
let start = Bound::Included(i);
535+
let end = Bound::Unbounded;
536+
let mut docids = RoaringBitmap::new();
537+
find_docids_of_facet_within_bounds::<OrderedF64Codec>(
538+
&txn,
539+
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
540+
0,
541+
&start,
542+
&end,
543+
&mut docids,
544+
)
545+
.unwrap();
546+
#[allow(clippy::format_push_string)]
547+
results.push_str(&format!(">= {i}: {}\n", display_bitmap(&docids)));
548+
}
549+
milli_snap!(results, format!("start_from_included_{i}"));
550+
let mut results = String::new();
551+
for i in 0..=255 {
552+
let i = i as f64;
553+
let start = Bound::Unbounded;
554+
let end = Bound::Included(i);
555+
let mut docids = RoaringBitmap::new();
556+
find_docids_of_facet_within_bounds::<OrderedF64Codec>(
557+
&txn,
558+
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
559+
0,
560+
&start,
561+
&end,
562+
&mut docids,
563+
)
564+
.unwrap();
565+
#[allow(clippy::format_push_string)]
566+
results.push_str(&format!("<= {i}: {}\n", display_bitmap(&docids)));
567+
}
568+
milli_snap!(results, format!("end_at_included_{i}"));
569+
570+
let mut docids = RoaringBitmap::new();
571+
find_docids_of_facet_within_bounds::<OrderedF64Codec>(
572+
&txn,
573+
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
574+
0,
575+
&Bound::Unbounded,
576+
&Bound::Unbounded,
577+
&mut docids,
578+
)
579+
.unwrap();
580+
milli_snap!(
581+
&format!("all field_id 0: {}\n", display_bitmap(&docids)),
582+
format!("unbounded_field_id_0_{i}")
583+
);
584+
585+
let mut docids = RoaringBitmap::new();
586+
find_docids_of_facet_within_bounds::<OrderedF64Codec>(
587+
&txn,
588+
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
589+
1,
590+
&Bound::Unbounded,
591+
&Bound::Unbounded,
592+
&mut docids,
593+
)
594+
.unwrap();
595+
milli_snap!(
596+
&format!("all field_id 1: {}\n", display_bitmap(&docids)),
597+
format!("unbounded_field_id_1_{i}")
598+
);
599+
600+
drop(txn);
601+
}
602+
}
603+
604+
#[test]
605+
fn filter_range_exact() {
606+
let indexes = [
607+
get_simple_index(),
608+
get_random_looking_index(),
609+
get_simple_index_with_multiple_field_ids(),
610+
get_random_looking_index_with_multiple_field_ids(),
611+
];
612+
for (i, index) in indexes.iter().enumerate() {
613+
let txn = index.env.read_txn().unwrap();
614+
let mut results_0 = String::new();
615+
let mut results_1 = String::new();
616+
for i in 0..=255 {
617+
let i = i as f64;
618+
let start = Bound::Included(i);
619+
let end = Bound::Included(i);
620+
let mut docids = RoaringBitmap::new();
621+
find_docids_of_facet_within_bounds::<OrderedF64Codec>(
622+
&txn,
623+
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
624+
0,
625+
&start,
626+
&end,
627+
&mut docids,
628+
)
629+
.unwrap();
630+
#[allow(clippy::format_push_string)]
631+
results_0.push_str(&format!("{i}: {}\n", display_bitmap(&docids)));
632+
633+
let mut docids = RoaringBitmap::new();
634+
find_docids_of_facet_within_bounds::<OrderedF64Codec>(
635+
&txn,
636+
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
637+
1,
638+
&start,
639+
&end,
640+
&mut docids,
641+
)
642+
.unwrap();
643+
#[allow(clippy::format_push_string)]
644+
results_1.push_str(&format!("{i}: {}\n", display_bitmap(&docids)));
645+
}
646+
milli_snap!(results_0, format!("field_id_0_exact_{i}"));
647+
milli_snap!(results_1, format!("field_id_1_exact_{i}"));
648+
649+
drop(txn);
650+
}
651+
}
489652
}

0 commit comments

Comments
 (0)