Skip to content
This repository was archived by the owner on Apr 4, 2023. It is now read-only.

Commit 1f1beae

Browse files
Merge #729
729: Fix distincted exhaustive hits r=Kerollmops a=ManyTheFish This PR changes the name and behavior of `bucket_candidates`: - `bucket_candidates` become `initial_candidates` that is less confusing - `initial_candidates` is no more a simple `RoaringBitmap` but an enum allowing us to precise if the candidates are exhaustive or not - this enum ensures that any modification is allowed only if the candidates are not already exhaustive. The bug occurred because `initial_candidates` are modified during the bucket sort allowing the estimation to be more and more precise along the search, and this was an issue when the `initial_candidates` were already exhaustive, now, if candidates are exhaustive, then no modifications are made. Co-authored-by: ManyTheFish <[email protected]>
2 parents 098c410 + 55724f2 commit 1f1beae

File tree

12 files changed

+285
-113
lines changed

12 files changed

+285
-113
lines changed

milli/src/search/criteria/asc_desc.rs

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use super::{Criterion, CriterionParameters, CriterionResult};
99
use crate::facet::FacetType;
1010
use crate::heed_codec::facet::FacetGroupKeyCodec;
1111
use crate::heed_codec::ByteSliceRefCodec;
12-
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
12+
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder, InitialCandidates};
1313
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
1414
use crate::search::query_tree::Operation;
1515
use crate::{FieldId, Index, Result};
@@ -27,7 +27,7 @@ pub struct AscDesc<'t> {
2727
query_tree: Option<Operation>,
2828
candidates: Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>,
2929
allowed_candidates: RoaringBitmap,
30-
bucket_candidates: RoaringBitmap,
30+
initial_candidates: InitialCandidates,
3131
faceted_candidates: RoaringBitmap,
3232
parent: Box<dyn Criterion + 't>,
3333
}
@@ -81,7 +81,7 @@ impl<'t> AscDesc<'t> {
8181
candidates: Box::new(std::iter::empty()),
8282
allowed_candidates: RoaringBitmap::new(),
8383
faceted_candidates,
84-
bucket_candidates: RoaringBitmap::new(),
84+
initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
8585
parent,
8686
})
8787
}
@@ -106,15 +106,15 @@ impl<'t> Criterion for AscDesc<'t> {
106106
query_tree: self.query_tree.clone(),
107107
candidates: Some(take(&mut self.allowed_candidates)),
108108
filtered_candidates: None,
109-
bucket_candidates: Some(take(&mut self.bucket_candidates)),
109+
initial_candidates: Some(self.initial_candidates.take()),
110110
}));
111111
}
112112
None => match self.parent.next(params)? {
113113
Some(CriterionResult {
114114
query_tree,
115115
candidates,
116116
filtered_candidates,
117-
bucket_candidates,
117+
initial_candidates,
118118
}) => {
119119
self.query_tree = query_tree;
120120
let mut candidates = match (&self.query_tree, candidates) {
@@ -130,9 +130,11 @@ impl<'t> Criterion for AscDesc<'t> {
130130
candidates &= filtered_candidates;
131131
}
132132

133-
match bucket_candidates {
134-
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
135-
None => self.bucket_candidates |= &candidates,
133+
match initial_candidates {
134+
Some(initial_candidates) => {
135+
self.initial_candidates |= initial_candidates
136+
}
137+
None => self.initial_candidates.map_inplace(|c| c | &candidates),
136138
}
137139

138140
if candidates.is_empty() {
@@ -160,7 +162,7 @@ impl<'t> Criterion for AscDesc<'t> {
160162
query_tree: self.query_tree.clone(),
161163
candidates: Some(candidates),
162164
filtered_candidates: None,
163-
bucket_candidates: Some(take(&mut self.bucket_candidates)),
165+
initial_candidates: Some(self.initial_candidates.take()),
164166
}));
165167
}
166168
}

milli/src/search/criteria/attribute.rs

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use std::mem::take;
77
use roaring::RoaringBitmap;
88

99
use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult};
10-
use crate::search::criteria::Query;
10+
use crate::search::criteria::{InitialCandidates, Query};
1111
use crate::search::query_tree::{Operation, QueryKind};
1212
use crate::search::{build_dfa, word_derivations, WordDerivationsCache};
1313
use crate::Result;
@@ -26,7 +26,7 @@ type FlattenedQueryTree = Vec<Vec<Vec<Query>>>;
2626
pub struct Attribute<'t> {
2727
ctx: &'t dyn Context<'t>,
2828
state: Option<(Operation, FlattenedQueryTree, RoaringBitmap)>,
29-
bucket_candidates: RoaringBitmap,
29+
initial_candidates: InitialCandidates,
3030
parent: Box<dyn Criterion + 't>,
3131
linear_buckets: Option<btree_map::IntoIter<u64, RoaringBitmap>>,
3232
set_buckets: Option<BinaryHeap<Branch<'t>>>,
@@ -37,7 +37,7 @@ impl<'t> Attribute<'t> {
3737
Attribute {
3838
ctx,
3939
state: None,
40-
bucket_candidates: RoaringBitmap::new(),
40+
initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
4141
parent,
4242
linear_buckets: None,
4343
set_buckets: None,
@@ -60,7 +60,7 @@ impl<'t> Criterion for Attribute<'t> {
6060
query_tree: Some(query_tree),
6161
candidates: Some(RoaringBitmap::new()),
6262
filtered_candidates: None,
63-
bucket_candidates: Some(take(&mut self.bucket_candidates)),
63+
initial_candidates: Some(self.initial_candidates.take()),
6464
}));
6565
}
6666
Some((query_tree, flattened_query_tree, mut allowed_candidates)) => {
@@ -84,7 +84,7 @@ impl<'t> Criterion for Attribute<'t> {
8484
query_tree: Some(query_tree),
8585
candidates: Some(RoaringBitmap::new()),
8686
filtered_candidates: None,
87-
bucket_candidates: Some(take(&mut self.bucket_candidates)),
87+
initial_candidates: Some(self.initial_candidates.take()),
8888
}));
8989
}
9090
}
@@ -109,7 +109,7 @@ impl<'t> Criterion for Attribute<'t> {
109109
query_tree: Some(query_tree),
110110
candidates: Some(RoaringBitmap::new()),
111111
filtered_candidates: None,
112-
bucket_candidates: Some(take(&mut self.bucket_candidates)),
112+
initial_candidates: Some(self.initial_candidates.take()),
113113
}));
114114
}
115115
}
@@ -124,15 +124,15 @@ impl<'t> Criterion for Attribute<'t> {
124124
query_tree: Some(query_tree),
125125
candidates: Some(found_candidates),
126126
filtered_candidates: None,
127-
bucket_candidates: Some(take(&mut self.bucket_candidates)),
127+
initial_candidates: Some(self.initial_candidates.take()),
128128
}));
129129
}
130130
None => match self.parent.next(params)? {
131131
Some(CriterionResult {
132132
query_tree: Some(query_tree),
133133
candidates,
134134
filtered_candidates,
135-
bucket_candidates,
135+
initial_candidates,
136136
}) => {
137137
let mut candidates = match candidates {
138138
Some(candidates) => candidates,
@@ -148,9 +148,11 @@ impl<'t> Criterion for Attribute<'t> {
148148

149149
let flattened_query_tree = flatten_query_tree(&query_tree);
150150

151-
match bucket_candidates {
152-
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
153-
None => self.bucket_candidates |= &candidates,
151+
match initial_candidates {
152+
Some(initial_candidates) => {
153+
self.initial_candidates |= initial_candidates
154+
}
155+
None => self.initial_candidates.map_inplace(|c| c | &candidates),
154156
}
155157

156158
self.state = Some((query_tree, flattened_query_tree, candidates));
@@ -160,13 +162,13 @@ impl<'t> Criterion for Attribute<'t> {
160162
query_tree: None,
161163
candidates,
162164
filtered_candidates,
163-
bucket_candidates,
165+
initial_candidates,
164166
}) => {
165167
return Ok(Some(CriterionResult {
166168
query_tree: None,
167169
candidates,
168170
filtered_candidates,
169-
bucket_candidates,
171+
initial_candidates,
170172
}));
171173
}
172174
None => return Ok(None),

milli/src/search/criteria/exactness.rs

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use roaring::RoaringBitmap;
88

99
use crate::search::criteria::{
1010
resolve_phrase, resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult,
11+
InitialCandidates,
1112
};
1213
use crate::search::query_tree::{Operation, PrimitiveQueryPart};
1314
use crate::{absolute_from_relative_position, FieldId, Result};
@@ -16,7 +17,7 @@ pub struct Exactness<'t> {
1617
ctx: &'t dyn Context<'t>,
1718
query_tree: Option<Operation>,
1819
state: Option<State>,
19-
bucket_candidates: RoaringBitmap,
20+
initial_candidates: InitialCandidates,
2021
parent: Box<dyn Criterion + 't>,
2122
query: Vec<ExactQueryPart>,
2223
}
@@ -36,7 +37,7 @@ impl<'t> Exactness<'t> {
3637
ctx,
3738
query_tree: None,
3839
state: None,
39-
bucket_candidates: RoaringBitmap::new(),
40+
initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
4041
parent,
4142
query,
4243
})
@@ -68,15 +69,15 @@ impl<'t> Criterion for Exactness<'t> {
6869
query_tree: self.query_tree.clone(),
6970
candidates: Some(candidates),
7071
filtered_candidates: None,
71-
bucket_candidates: Some(take(&mut self.bucket_candidates)),
72+
initial_candidates: Some(self.initial_candidates.take()),
7273
}));
7374
}
7475
None => match self.parent.next(params)? {
7576
Some(CriterionResult {
7677
query_tree: Some(query_tree),
7778
candidates,
7879
filtered_candidates,
79-
bucket_candidates,
80+
initial_candidates,
8081
}) => {
8182
let mut candidates = match candidates {
8283
Some(candidates) => candidates,
@@ -90,9 +91,11 @@ impl<'t> Criterion for Exactness<'t> {
9091
candidates &= filtered_candidates;
9192
}
9293

93-
match bucket_candidates {
94-
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
95-
None => self.bucket_candidates |= &candidates,
94+
match initial_candidates {
95+
Some(initial_candidates) => {
96+
self.initial_candidates |= initial_candidates
97+
}
98+
None => self.initial_candidates.map_inplace(|c| c | &candidates),
9699
}
97100

98101
self.state = Some(State::new(candidates));
@@ -102,13 +105,13 @@ impl<'t> Criterion for Exactness<'t> {
102105
query_tree: None,
103106
candidates,
104107
filtered_candidates,
105-
bucket_candidates,
108+
initial_candidates,
106109
}) => {
107110
return Ok(Some(CriterionResult {
108111
query_tree: None,
109112
candidates,
110113
filtered_candidates,
111-
bucket_candidates,
114+
initial_candidates,
112115
}));
113116
}
114117
None => return Ok(None),

milli/src/search/criteria/final.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use log::debug;
22
use roaring::RoaringBitmap;
33

44
use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult};
5+
use crate::search::criteria::InitialCandidates;
56
use crate::search::query_tree::Operation;
67
use crate::search::WordDerivationsCache;
78
use crate::Result;
@@ -14,7 +15,7 @@ pub struct FinalResult {
1415
/// The candidates of the current bucket of the last criterion.
1516
pub candidates: RoaringBitmap,
1617
/// Candidates that comes from the current bucket of the initial criterion.
17-
pub bucket_candidates: RoaringBitmap,
18+
pub initial_candidates: InitialCandidates,
1819
}
1920

2021
pub struct Final<'t> {
@@ -49,7 +50,7 @@ impl<'t> Final<'t> {
4950
query_tree,
5051
candidates,
5152
filtered_candidates,
52-
bucket_candidates,
53+
initial_candidates,
5354
}) => {
5455
let mut candidates = match (candidates, query_tree.as_ref()) {
5556
(Some(candidates), _) => candidates,
@@ -63,11 +64,12 @@ impl<'t> Final<'t> {
6364
candidates &= filtered_candidates;
6465
}
6566

66-
let bucket_candidates = bucket_candidates.unwrap_or_else(|| candidates.clone());
67+
let initial_candidates = initial_candidates
68+
.unwrap_or_else(|| InitialCandidates::Estimated(candidates.clone()));
6769

6870
self.returned_candidates |= &candidates;
6971

70-
Ok(Some(FinalResult { query_tree, candidates, bucket_candidates }))
72+
Ok(Some(FinalResult { query_tree, candidates, initial_candidates }))
7173
}
7274
None => Ok(None),
7375
}

milli/src/search/criteria/geo.rs

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use roaring::RoaringBitmap;
44
use rstar::RTree;
55

66
use super::{Criterion, CriterionParameters, CriterionResult};
7-
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
7+
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder, InitialCandidates};
88
use crate::{lat_lng_to_xyz, GeoPoint, Index, Result};
99

1010
pub struct Geo<'t> {
@@ -14,7 +14,7 @@ pub struct Geo<'t> {
1414
parent: Box<dyn Criterion + 't>,
1515
candidates: Box<dyn Iterator<Item = RoaringBitmap>>,
1616
allowed_candidates: RoaringBitmap,
17-
bucket_candidates: RoaringBitmap,
17+
initial_candidates: InitialCandidates,
1818
rtree: Option<RTree<GeoPoint>>,
1919
point: [f64; 2],
2020
}
@@ -47,7 +47,7 @@ impl<'t> Geo<'t> {
4747
) -> Result<Self> {
4848
let candidates = Box::new(iter::empty());
4949
let allowed_candidates = index.geo_faceted_documents_ids(rtxn)?;
50-
let bucket_candidates = RoaringBitmap::new();
50+
let initial_candidates = InitialCandidates::Estimated(RoaringBitmap::new());
5151
let rtree = index.geo_rtree(rtxn)?;
5252

5353
Ok(Self {
@@ -57,7 +57,7 @@ impl<'t> Geo<'t> {
5757
parent,
5858
candidates,
5959
allowed_candidates,
60-
bucket_candidates,
60+
initial_candidates,
6161
rtree,
6262
point,
6363
})
@@ -77,15 +77,15 @@ impl Criterion for Geo<'_> {
7777
query_tree: None,
7878
candidates: Some(candidates),
7979
filtered_candidates: None,
80-
bucket_candidates: Some(self.bucket_candidates.clone()),
80+
initial_candidates: Some(self.initial_candidates.clone()),
8181
}));
8282
}
8383
None => match self.parent.next(params)? {
8484
Some(CriterionResult {
8585
query_tree,
8686
candidates,
8787
filtered_candidates,
88-
bucket_candidates,
88+
initial_candidates,
8989
}) => {
9090
let mut candidates = match (&query_tree, candidates) {
9191
(_, Some(candidates)) => candidates,
@@ -100,9 +100,11 @@ impl Criterion for Geo<'_> {
100100
candidates &= filtered_candidates;
101101
}
102102

103-
match bucket_candidates {
104-
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
105-
None => self.bucket_candidates |= &candidates,
103+
match initial_candidates {
104+
Some(initial_candidates) => {
105+
self.initial_candidates |= initial_candidates
106+
}
107+
None => self.initial_candidates.map_inplace(|c| c | &candidates),
106108
}
107109

108110
if candidates.is_empty() {

0 commit comments

Comments
 (0)