Skip to content

Commit 1eeacc9

Browse files
authored
Make regex lenient to start and end anchors (quickwit-oss#6089)
1 parent 98ced0f commit 1eeacc9

File tree

2 files changed

+51
-2
lines changed

2 files changed

+51
-2
lines changed

quickwit/quickwit-query/src/query_ast/regex_query.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,12 @@ impl RegexQuery {
6060
};
6161
let field_type = field_entry.field_type();
6262

63+
// tantivy_fst matches `(re)` as if was `^(re)$` and errors if the
64+
// anchors are present. We strip them to make the queries more lenient
65+
// (matches Elasticsearch behavior).
66+
let stripped_regex = self.regex.strip_prefix('^').unwrap_or(&self.regex);
67+
let stripped_regex = stripped_regex.strip_suffix('$').unwrap_or(stripped_regex);
68+
6369
match field_type {
6470
FieldType::Str(text_options) => {
6571
text_options.get_indexing_options().ok_or_else(|| {
@@ -69,7 +75,7 @@ impl RegexQuery {
6975
))
7076
})?;
7177

72-
Ok((field, None, self.regex.clone()))
78+
Ok((field, None, stripped_regex.to_string()))
7379
}
7480
FieldType::JsonObject(json_options) => {
7581
json_options.get_text_indexing_options().ok_or_else(|| {
@@ -90,7 +96,7 @@ impl RegexQuery {
9096
// We skip the 1st byte which is a marker to tell this is json. This isn't present
9197
// in the dictionary
9298
let byte_path_prefix = value.as_serialized()[1..].to_owned();
93-
Ok((field, Some(byte_path_prefix), self.regex.clone()))
99+
Ok((field, Some(byte_path_prefix), stripped_regex.to_string()))
94100
}
95101
_ => Err(InvalidQuery::SchemaError(
96102
"trying to run a regex query on a non-text field".to_string(),

quickwit/rest-api-tests/scenarii/es_compatibility/0031-regex.yaml

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,21 @@ expected:
1313
value: 100
1414
relation: "eq"
1515
---
16+
# Regex always match from start to end (`(re)` equivalent to `^(re)$`)
17+
params:
18+
size: 3
19+
json:
20+
track_total_hits: true
21+
query:
22+
regexp:
23+
type:
24+
value: "event"
25+
expected:
26+
hits:
27+
total:
28+
value: 0
29+
relation: "eq"
30+
---
1631
# Regex with case_insensitive flag
1732
params:
1833
size: 3
@@ -45,3 +60,31 @@ expected:
4560
total:
4661
value: 0
4762
relation: "eq"
63+
---
64+
# leading ^ and trailing $ are ignored
65+
params:
66+
size: 3
67+
json:
68+
track_total_hits: true
69+
query:
70+
regexp:
71+
type:
72+
value: "pushevent"
73+
expected:
74+
hits:
75+
total:
76+
value: 60
77+
relation: "eq"
78+
---
79+
# regex in query_string
80+
params:
81+
size: 10
82+
json:
83+
query:
84+
query_string:
85+
query: "type:/pushevent/"
86+
expected:
87+
hits:
88+
total:
89+
value: 60
90+
relation: "eq"

0 commit comments

Comments
 (0)