Skip to content
This repository was archived by the owner on Apr 4, 2023. It is now read-only.

Commit 229405a

Browse files
committed
Choose implementation strategy of criterion at runtime
1 parent 97fb64e commit 229405a

File tree

7 files changed

+155
-49
lines changed

7 files changed

+155
-49
lines changed

cli/src/main.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ use milli::update::UpdateIndexingStep::{
1313
ComputeIdsAndMergeDocuments, IndexDocuments, MergeDataIntoFinalDatabase, RemapDocumentAddition,
1414
};
1515
use milli::update::{self, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig};
16-
use milli::{heed, Index, Object};
16+
use milli::{heed, CriterionImplementationStrategy, Index, Object};
1717
use structopt::StructOpt;
1818

1919
#[global_allocator]
@@ -441,7 +441,7 @@ impl Search {
441441
if let Some(limit) = limit {
442442
search.limit(*limit);
443443
}
444-
444+
search.criterion_implementation_strategy(CriterionImplementationStrategy::OnlyIterative);
445445
let result = search.execute()?;
446446

447447
let fields_ids_map = index.fields_ids_map(&txn)?;

milli/src/lib.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,9 @@ pub use self::heed_codec::{
4242
};
4343
pub use self::index::Index;
4444
pub use self::search::{
45-
FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWord,
46-
MatchingWords, Search, SearchResult, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
45+
CriterionImplementationStrategy, FacetDistribution, Filter, FormatOptions, MatchBounds,
46+
MatcherBuilder, MatchingWord, MatchingWords, Search, SearchResult, TermsMatchingStrategy,
47+
DEFAULT_VALUES_PER_FACET,
4748
};
4849

4950
pub type Result<T> = std::result::Result<T, error::Error>;

milli/src/search/criteria/asc_desc.rs

Lines changed: 69 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use crate::heed_codec::ByteSliceRefCodec;
1212
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder, InitialCandidates};
1313
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
1414
use crate::search::query_tree::Operation;
15+
use crate::search::CriterionImplementationStrategy;
1516
use crate::{FieldId, Index, Result};
1617

1718
/// Threshold on the number of candidates that will make
@@ -29,6 +30,7 @@ pub struct AscDesc<'t> {
2930
allowed_candidates: RoaringBitmap,
3031
initial_candidates: InitialCandidates,
3132
faceted_candidates: RoaringBitmap,
33+
implementation_strategy: CriterionImplementationStrategy,
3234
parent: Box<dyn Criterion + 't>,
3335
}
3436

@@ -38,17 +40,19 @@ impl<'t> AscDesc<'t> {
3840
rtxn: &'t heed::RoTxn,
3941
parent: Box<dyn Criterion + 't>,
4042
field_name: String,
43+
implementation_strategy: CriterionImplementationStrategy,
4144
) -> Result<Self> {
42-
Self::new(index, rtxn, parent, field_name, true)
45+
Self::new(index, rtxn, parent, field_name, true, implementation_strategy)
4346
}
4447

4548
pub fn desc(
4649
index: &'t Index,
4750
rtxn: &'t heed::RoTxn,
4851
parent: Box<dyn Criterion + 't>,
4952
field_name: String,
53+
implementation_strategy: CriterionImplementationStrategy,
5054
) -> Result<Self> {
51-
Self::new(index, rtxn, parent, field_name, false)
55+
Self::new(index, rtxn, parent, field_name, false, implementation_strategy)
5256
}
5357

5458
fn new(
@@ -57,6 +61,7 @@ impl<'t> AscDesc<'t> {
5761
parent: Box<dyn Criterion + 't>,
5862
field_name: String,
5963
is_ascending: bool,
64+
implementation_strategy: CriterionImplementationStrategy,
6065
) -> Result<Self> {
6166
let fields_ids_map = index.fields_ids_map(rtxn)?;
6267
let field_id = fields_ids_map.id(&field_name);
@@ -82,6 +87,7 @@ impl<'t> AscDesc<'t> {
8287
allowed_candidates: RoaringBitmap::new(),
8388
faceted_candidates,
8489
initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
90+
implementation_strategy,
8591
parent,
8692
})
8793
}
@@ -149,6 +155,7 @@ impl<'t> Criterion for AscDesc<'t> {
149155
field_id,
150156
self.is_ascending,
151157
candidates & &self.faceted_candidates,
158+
self.implementation_strategy,
152159
)?,
153160
None => Box::new(std::iter::empty()),
154161
};
@@ -170,6 +177,51 @@ impl<'t> Criterion for AscDesc<'t> {
170177
}
171178
}
172179

180+
fn facet_ordered_iterative<'t>(
181+
index: &'t Index,
182+
rtxn: &'t heed::RoTxn,
183+
field_id: FieldId,
184+
is_ascending: bool,
185+
candidates: RoaringBitmap,
186+
) -> Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
187+
let number_iter = iterative_facet_number_ordered_iter(
188+
index,
189+
rtxn,
190+
field_id,
191+
is_ascending,
192+
candidates.clone(),
193+
)?;
194+
let string_iter =
195+
iterative_facet_string_ordered_iter(index, rtxn, field_id, is_ascending, candidates)?;
196+
Ok(Box::new(number_iter.chain(string_iter).map(Ok)) as Box<dyn Iterator<Item = _>>)
197+
}
198+
199+
fn facet_ordered_set_based<'t>(
200+
index: &'t Index,
201+
rtxn: &'t heed::RoTxn,
202+
field_id: FieldId,
203+
is_ascending: bool,
204+
candidates: RoaringBitmap,
205+
) -> Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
206+
let make_iter = if is_ascending { ascending_facet_sort } else { descending_facet_sort };
207+
208+
let number_iter = make_iter(
209+
rtxn,
210+
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
211+
field_id,
212+
candidates.clone(),
213+
)?;
214+
215+
let string_iter = make_iter(
216+
rtxn,
217+
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
218+
field_id,
219+
candidates,
220+
)?;
221+
222+
Ok(Box::new(number_iter.chain(string_iter)))
223+
}
224+
173225
/// Returns an iterator over groups of the given candidates in ascending or descending order.
174226
///
175227
/// It will either use an iterative or a recursive method on the whole facet database depending
@@ -180,36 +232,22 @@ fn facet_ordered<'t>(
180232
field_id: FieldId,
181233
is_ascending: bool,
182234
candidates: RoaringBitmap,
235+
implementation_strategy: CriterionImplementationStrategy,
183236
) -> Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
184-
if candidates.len() <= CANDIDATES_THRESHOLD {
185-
let number_iter = iterative_facet_number_ordered_iter(
186-
index,
187-
rtxn,
188-
field_id,
189-
is_ascending,
190-
candidates.clone(),
191-
)?;
192-
let string_iter =
193-
iterative_facet_string_ordered_iter(index, rtxn, field_id, is_ascending, candidates)?;
194-
Ok(Box::new(number_iter.chain(string_iter).map(Ok)) as Box<dyn Iterator<Item = _>>)
195-
} else {
196-
let make_iter = if is_ascending { ascending_facet_sort } else { descending_facet_sort };
197-
198-
let number_iter = make_iter(
199-
rtxn,
200-
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
201-
field_id,
202-
candidates.clone(),
203-
)?;
204-
205-
let string_iter = make_iter(
206-
rtxn,
207-
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
208-
field_id,
209-
candidates,
210-
)?;
211-
212-
Ok(Box::new(number_iter.chain(string_iter)))
237+
match implementation_strategy {
238+
CriterionImplementationStrategy::OnlyIterative => {
239+
facet_ordered_iterative(index, rtxn, field_id, is_ascending, candidates)
240+
}
241+
CriterionImplementationStrategy::OnlySetBased => {
242+
facet_ordered_set_based(index, rtxn, field_id, is_ascending, candidates)
243+
}
244+
CriterionImplementationStrategy::Dynamic => {
245+
if candidates.len() <= CANDIDATES_THRESHOLD {
246+
facet_ordered_iterative(index, rtxn, field_id, is_ascending, candidates)
247+
} else {
248+
facet_ordered_set_based(index, rtxn, field_id, is_ascending, candidates)
249+
}
250+
}
213251
}
214252
}
215253

milli/src/search/criteria/attribute.rs

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@ use roaring::RoaringBitmap;
99
use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult};
1010
use crate::search::criteria::{InitialCandidates, Query};
1111
use crate::search::query_tree::{Operation, QueryKind};
12-
use crate::search::{build_dfa, word_derivations, WordDerivationsCache};
12+
use crate::search::{
13+
build_dfa, word_derivations, CriterionImplementationStrategy, WordDerivationsCache,
14+
};
1315
use crate::Result;
1416

1517
/// To be able to divide integers by the number of words in the query
@@ -30,17 +32,23 @@ pub struct Attribute<'t> {
3032
parent: Box<dyn Criterion + 't>,
3133
linear_buckets: Option<btree_map::IntoIter<u64, RoaringBitmap>>,
3234
set_buckets: Option<BinaryHeap<Branch<'t>>>,
35+
implementation_strategy: CriterionImplementationStrategy,
3336
}
3437

3538
impl<'t> Attribute<'t> {
36-
pub fn new(ctx: &'t dyn Context<'t>, parent: Box<dyn Criterion + 't>) -> Self {
39+
pub fn new(
40+
ctx: &'t dyn Context<'t>,
41+
parent: Box<dyn Criterion + 't>,
42+
implementation_strategy: CriterionImplementationStrategy,
43+
) -> Self {
3744
Attribute {
3845
ctx,
3946
state: None,
4047
initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
4148
parent,
4249
linear_buckets: None,
4350
set_buckets: None,
51+
implementation_strategy,
4452
}
4553
}
4654
}
@@ -64,7 +72,15 @@ impl<'t> Criterion for Attribute<'t> {
6472
}));
6573
}
6674
Some((query_tree, flattened_query_tree, mut allowed_candidates)) => {
67-
let found_candidates = if allowed_candidates.len() < CANDIDATES_THRESHOLD {
75+
let found_candidates = if matches!(
76+
self.implementation_strategy,
77+
CriterionImplementationStrategy::OnlyIterative
78+
) || (matches!(
79+
self.implementation_strategy,
80+
CriterionImplementationStrategy::Dynamic
81+
) && allowed_candidates.len()
82+
< CANDIDATES_THRESHOLD)
83+
{
6884
let linear_buckets = match self.linear_buckets.as_mut() {
6985
Some(linear_buckets) => linear_buckets,
7086
None => {

milli/src/search/criteria/mod.rs

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ use self::r#final::Final;
1414
use self::typo::Typo;
1515
use self::words::Words;
1616
use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind};
17+
use super::CriterionImplementationStrategy;
1718
use crate::search::criteria::geo::Geo;
1819
use crate::search::{word_derivations, Distinct, WordDerivationsCache};
1920
use crate::{AscDesc as AscDescName, DocumentId, FieldId, Index, Member, Result};
@@ -377,6 +378,7 @@ impl<'t> CriteriaBuilder<'t> {
377378
sort_criteria: Option<Vec<AscDescName>>,
378379
exhaustive_number_hits: bool,
379380
distinct: Option<D>,
381+
implementation_strategy: CriterionImplementationStrategy,
380382
) -> Result<Final<'t>> {
381383
use crate::criterion::Criterion as Name;
382384

@@ -402,12 +404,14 @@ impl<'t> CriteriaBuilder<'t> {
402404
self.rtxn,
403405
criterion,
404406
field.to_string(),
407+
implementation_strategy,
405408
)?),
406409
AscDescName::Desc(Member::Field(field)) => Box::new(AscDesc::desc(
407410
self.index,
408411
self.rtxn,
409412
criterion,
410413
field.to_string(),
414+
implementation_strategy,
411415
)?),
412416
AscDescName::Asc(Member::Geo(point)) => {
413417
Box::new(Geo::asc(self.index, self.rtxn, criterion, *point)?)
@@ -421,15 +425,27 @@ impl<'t> CriteriaBuilder<'t> {
421425
}
422426
None => criterion,
423427
},
424-
Name::Proximity => Box::new(Proximity::new(self, criterion)),
425-
Name::Attribute => Box::new(Attribute::new(self, criterion)),
426-
Name::Exactness => Box::new(Exactness::new(self, criterion, &primitive_query)?),
427-
Name::Asc(field) => {
428-
Box::new(AscDesc::asc(self.index, self.rtxn, criterion, field)?)
428+
Name::Proximity => {
429+
Box::new(Proximity::new(self, criterion, implementation_strategy))
429430
}
430-
Name::Desc(field) => {
431-
Box::new(AscDesc::desc(self.index, self.rtxn, criterion, field)?)
431+
Name::Attribute => {
432+
Box::new(Attribute::new(self, criterion, implementation_strategy))
432433
}
434+
Name::Exactness => Box::new(Exactness::new(self, criterion, &primitive_query)?),
435+
Name::Asc(field) => Box::new(AscDesc::asc(
436+
self.index,
437+
self.rtxn,
438+
criterion,
439+
field,
440+
implementation_strategy,
441+
)?),
442+
Name::Desc(field) => Box::new(AscDesc::desc(
443+
self.index,
444+
self.rtxn,
445+
criterion,
446+
field,
447+
implementation_strategy,
448+
)?),
433449
};
434450
}
435451

milli/src/search/criteria/proximity.rs

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use super::{
1111
};
1212
use crate::search::criteria::InitialCandidates;
1313
use crate::search::query_tree::{maximum_proximity, Operation, Query, QueryKind};
14-
use crate::search::{build_dfa, WordDerivationsCache};
14+
use crate::search::{build_dfa, CriterionImplementationStrategy, WordDerivationsCache};
1515
use crate::{Position, Result};
1616

1717
type Cache = HashMap<(Operation, u8), Vec<(Query, Query, RoaringBitmap)>>;
@@ -33,10 +33,15 @@ pub struct Proximity<'t> {
3333
parent: Box<dyn Criterion + 't>,
3434
candidates_cache: Cache,
3535
plane_sweep_cache: Option<btree_map::IntoIter<u8, RoaringBitmap>>,
36+
implementation_strategy: CriterionImplementationStrategy,
3637
}
3738

3839
impl<'t> Proximity<'t> {
39-
pub fn new(ctx: &'t dyn Context<'t>, parent: Box<dyn Criterion + 't>) -> Self {
40+
pub fn new(
41+
ctx: &'t dyn Context<'t>,
42+
parent: Box<dyn Criterion + 't>,
43+
implementation_strategy: CriterionImplementationStrategy,
44+
) -> Self {
4045
Proximity {
4146
ctx,
4247
state: None,
@@ -45,6 +50,7 @@ impl<'t> Proximity<'t> {
4550
parent,
4651
candidates_cache: Cache::new(),
4752
plane_sweep_cache: None,
53+
implementation_strategy,
4854
}
4955
}
5056
}
@@ -72,8 +78,15 @@ impl<'t> Criterion for Proximity<'t> {
7278
self.state = None; // reset state
7379
}
7480
Some((_, query_tree, allowed_candidates)) => {
75-
let mut new_candidates = if allowed_candidates.len() <= CANDIDATES_THRESHOLD
76-
&& self.proximity > PROXIMITY_THRESHOLD
81+
let mut new_candidates = if matches!(
82+
self.implementation_strategy,
83+
CriterionImplementationStrategy::OnlyIterative
84+
) || (matches!(
85+
self.implementation_strategy,
86+
CriterionImplementationStrategy::Dynamic
87+
) && allowed_candidates.len()
88+
<= CANDIDATES_THRESHOLD
89+
&& self.proximity > PROXIMITY_THRESHOLD)
7790
{
7891
if let Some(cache) = self.plane_sweep_cache.as_mut() {
7992
match cache.next() {

0 commit comments

Comments
 (0)