Skip to content

Commit a2c5f1d

Browse files
authored
4829 minimum should match (#5488)
* Added support for minimum_should_match in quickwit es API Fixing rest compatilibility tests. Closes #4828 * CR comments
1 parent 4dc60f0 commit a2c5f1d

File tree

14 files changed

+517
-75
lines changed

14 files changed

+517
-75
lines changed

docs/reference/es_compatible_api.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,7 @@ The following query types are supported.
434434
| `should` | `JsonObject[]` (Optional) | Sub-queries that should match the documents. | [] |
435435
| `filter` | `JsonObject[]` | Like must queries, but the match does not influence the `_score`. | [] |
436436
| `boost` | `Number` | Multiplier boost for score computation. | 1.0 |
437+
| `minimum_should_match` | `Number` or `Str` | If present, quickwit will only match documents for which at least `minimum_should_match` should clauses are matching. `2`, `-1`, `"10%"` and `"-10%"` are supported. | |
437438

438439
### `range`
439440

quickwit/quickwit-query/src/elastic_query_dsl/bool_query.rs

Lines changed: 189 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,79 @@ pub struct BoolQuery {
4646
filter: Vec<ElasticQueryDslInner>,
4747
#[serde(default)]
4848
pub boost: Option<NotNaNf32>,
49+
#[serde(default)]
50+
pub minimum_should_match: Option<MinimumShouldMatch>,
51+
}
52+
53+
#[derive(Deserialize, Debug, Eq, PartialEq, Clone)]
54+
#[serde(untagged)]
55+
pub enum MinimumShouldMatch {
56+
Str(String),
57+
Int(isize),
58+
}
59+
60+
impl MinimumShouldMatch {
61+
fn resolve(&self, num_should_clauses: usize) -> anyhow::Result<MinimumShouldMatchResolved> {
62+
match self {
63+
MinimumShouldMatch::Str(minimum_should_match_dsl) => {
64+
let Some(percentage) = parse_percentage(minimum_should_match_dsl) else {
65+
anyhow::bail!(
66+
"Unsupported minimum should match dsl {}. quickwit currently only \
67+
supports the format '35%' and `-35%`",
68+
minimum_should_match_dsl
69+
);
70+
};
71+
let min_should_match = percentage * num_should_clauses as isize / 100;
72+
MinimumShouldMatch::Int(min_should_match).resolve(num_should_clauses)
73+
}
74+
MinimumShouldMatch::Int(neg_num_missing_should_clauses)
75+
if *neg_num_missing_should_clauses < 0 =>
76+
{
77+
let num_missing_should_clauses = -neg_num_missing_should_clauses as usize;
78+
if num_missing_should_clauses >= num_should_clauses {
79+
Ok(MinimumShouldMatchResolved::Unspecified)
80+
} else {
81+
Ok(MinimumShouldMatchResolved::Min(
82+
num_should_clauses - num_missing_should_clauses,
83+
))
84+
}
85+
}
86+
MinimumShouldMatch::Int(num_required_should_clauses) => {
87+
let num_required_should_clauses: usize = *num_required_should_clauses as usize;
88+
if num_required_should_clauses > num_should_clauses {
89+
Ok(MinimumShouldMatchResolved::NoMatch)
90+
} else {
91+
Ok(MinimumShouldMatchResolved::Min(num_required_should_clauses))
92+
}
93+
}
94+
}
95+
}
96+
}
97+
98+
#[derive(Deserialize, Debug, Copy, Clone, Eq, PartialEq)]
99+
enum MinimumShouldMatchResolved {
100+
Unspecified,
101+
Min(usize),
102+
NoMatch,
103+
}
104+
105+
fn parse_percentage(s: &str) -> Option<isize> {
106+
let percentage_str = s.strip_suffix('%')?;
107+
let percentage_isize = percentage_str.parse::<isize>().ok()?;
108+
if percentage_isize.abs() > 100 {
109+
return None;
110+
}
111+
Some(percentage_isize)
112+
}
113+
114+
impl BoolQuery {
115+
fn resolve_minimum_should_match(&self) -> anyhow::Result<MinimumShouldMatchResolved> {
116+
let num_should_clauses = self.should.len();
117+
let Some(minimum_should_match) = &self.minimum_should_match else {
118+
return Ok(MinimumShouldMatchResolved::Unspecified);
119+
};
120+
minimum_should_match.resolve(num_should_clauses)
121+
}
49122
}
50123

51124
impl BoolQuery {
@@ -57,6 +130,7 @@ impl BoolQuery {
57130
should: children,
58131
filter: Vec::new(),
59132
boost: None,
133+
minimum_should_match: None,
60134
}
61135
}
62136
}
@@ -70,11 +144,25 @@ fn convert_vec(query_dsls: Vec<ElasticQueryDslInner>) -> anyhow::Result<Vec<Quer
70144

71145
impl ConvertibleToQueryAst for BoolQuery {
72146
fn convert_to_query_ast(self) -> anyhow::Result<QueryAst> {
147+
let minimum_should_match_resolved = self.resolve_minimum_should_match()?;
148+
let must = convert_vec(self.must)?;
149+
let must_not = convert_vec(self.must_not)?;
150+
let should = convert_vec(self.should)?;
151+
let filter = convert_vec(self.filter)?;
152+
153+
let minimum_should_match_opt = match minimum_should_match_resolved {
154+
MinimumShouldMatchResolved::Unspecified => None,
155+
MinimumShouldMatchResolved::Min(minimum_should_match) => Some(minimum_should_match),
156+
MinimumShouldMatchResolved::NoMatch => {
157+
return Ok(QueryAst::MatchNone);
158+
}
159+
};
73160
let bool_query_ast = query_ast::BoolQuery {
74-
must: convert_vec(self.must)?,
75-
must_not: convert_vec(self.must_not)?,
76-
should: convert_vec(self.should)?,
77-
filter: convert_vec(self.filter)?,
161+
must,
162+
must_not,
163+
should,
164+
filter,
165+
minimum_should_match: minimum_should_match_opt,
78166
};
79167
Ok(bool_query_ast.into())
80168
}
@@ -88,8 +176,13 @@ impl From<BoolQuery> for ElasticQueryDslInner {
88176

89177
#[cfg(test)]
90178
mod tests {
91-
use crate::elastic_query_dsl::bool_query::BoolQuery;
179+
use super::parse_percentage;
180+
use crate::elastic_query_dsl::bool_query::{
181+
BoolQuery, MinimumShouldMatch, MinimumShouldMatchResolved,
182+
};
92183
use crate::elastic_query_dsl::term_query::term_query_from_field_value;
184+
use crate::elastic_query_dsl::ConvertibleToQueryAst;
185+
use crate::query_ast::QueryAst;
93186

94187
#[test]
95188
fn test_dsl_bool_query_deserialize_simple() {
@@ -111,6 +204,7 @@ mod tests {
111204
should: Vec::new(),
112205
filter: Vec::new(),
113206
boost: None,
207+
minimum_should_match: None
114208
}
115209
);
116210
}
@@ -130,6 +224,7 @@ mod tests {
130224
should: Vec::new(),
131225
filter: vec![term_query_from_field_value("product_id", "2").into(),],
132226
boost: None,
227+
minimum_should_match: None,
133228
}
134229
);
135230
}
@@ -152,7 +247,96 @@ mod tests {
152247
should: Vec::new(),
153248
filter: Vec::new(),
154249
boost: None,
250+
minimum_should_match: None,
155251
}
156252
);
157253
}
254+
255+
#[test]
256+
fn test_dsl_bool_query_deserialize_minimum_should_match() {
257+
let bool_query: super::BoolQuery = serde_json::from_str(
258+
r#"{
259+
"must": [
260+
{ "term": {"product_id": {"value": "1" }} },
261+
{ "term": {"product_id": {"value": "2" }} }
262+
],
263+
"minimum_should_match": -2
264+
}"#,
265+
)
266+
.unwrap();
267+
assert_eq!(
268+
bool_query.minimum_should_match.as_ref().unwrap(),
269+
&MinimumShouldMatch::Int(-2)
270+
);
271+
}
272+
273+
#[test]
274+
fn test_dsl_query_with_minimum_should_match() {
275+
let bool_query_json = r#"{
276+
"should": [
277+
{ "term": {"product_id": {"value": "1" }} },
278+
{ "term": {"product_id": {"value": "2" }} },
279+
{ "term": {"product_id": {"value": "3" }} }
280+
],
281+
"minimum_should_match": 2
282+
}"#;
283+
let bool_query: BoolQuery = serde_json::from_str(bool_query_json).unwrap();
284+
assert_eq!(bool_query.should.len(), 3);
285+
assert_eq!(
286+
bool_query.minimum_should_match.as_ref().unwrap(),
287+
&super::MinimumShouldMatch::Int(2)
288+
);
289+
let QueryAst::Bool(bool_query_ast) = bool_query.convert_to_query_ast().unwrap() else {
290+
panic!();
291+
};
292+
assert_eq!(bool_query_ast.should.len(), 3);
293+
assert_eq!(bool_query_ast.minimum_should_match, Some(2));
294+
}
295+
296+
#[test]
297+
fn test_parse_percentage() {
298+
assert_eq!(parse_percentage("10%"), Some(10));
299+
assert_eq!(parse_percentage("101%"), None);
300+
assert_eq!(parse_percentage("0%"), Some(0));
301+
assert_eq!(parse_percentage("100%"), Some(100));
302+
assert_eq!(parse_percentage("-20%"), Some(-20));
303+
assert_eq!(parse_percentage("20"), None);
304+
assert_eq!(parse_percentage("20a%"), None);
305+
}
306+
307+
#[test]
308+
fn test_resolve_minimum_should_match() {
309+
assert_eq!(
310+
MinimumShouldMatch::Str("30%".to_string())
311+
.resolve(10)
312+
.unwrap(),
313+
MinimumShouldMatchResolved::Min(3)
314+
);
315+
// not supported yet
316+
assert_eq!(
317+
MinimumShouldMatch::Str("-30%".to_string())
318+
.resolve(10)
319+
.unwrap(),
320+
MinimumShouldMatchResolved::Min(7)
321+
);
322+
assert!(MinimumShouldMatch::Str("-30!".to_string())
323+
.resolve(10)
324+
.is_err());
325+
assert_eq!(
326+
MinimumShouldMatch::Int(10).resolve(11).unwrap(),
327+
MinimumShouldMatchResolved::Min(10)
328+
);
329+
assert_eq!(
330+
MinimumShouldMatch::Int(-10).resolve(11).unwrap(),
331+
MinimumShouldMatchResolved::Min(1)
332+
);
333+
assert_eq!(
334+
MinimumShouldMatch::Int(-12).resolve(11).unwrap(),
335+
MinimumShouldMatchResolved::Unspecified
336+
);
337+
assert_eq!(
338+
MinimumShouldMatch::Int(12).resolve(11).unwrap(),
339+
MinimumShouldMatchResolved::NoMatch
340+
);
341+
}
158342
}

quickwit/quickwit-query/src/query_ast/bool_query.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ pub struct BoolQuery {
4848
pub should: Vec<QueryAst>,
4949
#[serde(default, skip_serializing_if = "Vec::is_empty")]
5050
pub filter: Vec<QueryAst>,
51+
#[serde(default, skip_serializing_if = "Option::is_none")]
52+
pub minimum_should_match: Option<usize>,
5153
}
5254

5355
impl From<BoolQuery> for QueryAst {
@@ -64,7 +66,10 @@ impl BuildTantivyAst for BoolQuery {
6466
search_fields: &[String],
6567
with_validation: bool,
6668
) -> Result<TantivyQueryAst, InvalidQuery> {
67-
let mut boolean_query = super::tantivy_query_ast::TantivyBoolQuery::default();
69+
let mut boolean_query = super::tantivy_query_ast::TantivyBoolQuery {
70+
minimum_should_match: self.minimum_should_match,
71+
..Default::default()
72+
};
6873
for must in &self.must {
6974
let must_leaf = must.build_tantivy_ast_call(
7075
schema,

quickwit/quickwit-query/src/query_ast/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ impl QueryAst {
8282
must_not,
8383
should,
8484
filter,
85+
minimum_should_match,
8586
}) => {
8687
let must = parse_user_query_in_asts(must, default_search_fields)?;
8788
let must_not = parse_user_query_in_asts(must_not, default_search_fields)?;
@@ -92,6 +93,7 @@ impl QueryAst {
9293
must_not,
9394
should,
9495
filter,
96+
minimum_should_match,
9597
}
9698
.into())
9799
}

quickwit/quickwit-query/src/query_ast/range_query.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -420,8 +420,8 @@ mod tests {
420420
type=I64, 1980)), upper_bound: Included(Term(field=6, type=Json, path=hello, \
421421
type=I64, 1989)) } }), Leaf(FastFieldRangeQuery { bounds: BoundsRange { lower_bound: \
422422
Included(Term(field=6, type=Json, path=hello, type=Str, \"1980\")), upper_bound: \
423-
Included(Term(field=6, type=Json, path=hello, type=Str, \"1989\")) } })], filter: [] \
424-
})"
423+
Included(Term(field=6, type=Json, path=hello, type=Str, \"1989\")) } })], filter: \
424+
[], minimum_should_match: None })"
425425
);
426426
}
427427

0 commit comments

Comments
 (0)