Skip to content

Commit e2d9623

Browse files
authored
Revert lenient start and end anchors quickwit-oss#6089 (quickwit-oss#6098)
* Revert lenient start and end anchors * Improve api test stability
1 parent 79757e9 commit e2d9623

File tree

10 files changed

+117
-21
lines changed

10 files changed

+117
-21
lines changed

quickwit/quickwit-query/src/query_ast/regex_query.rs

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -60,12 +60,6 @@ impl RegexQuery {
6060
};
6161
let field_type = field_entry.field_type();
6262

63-
// tantivy_fst matches `(re)` as if was `^(re)$` and errors if the
64-
// anchors are present. We strip them to make the queries more lenient
65-
// (matches Elasticsearch behavior).
66-
let stripped_regex = self.regex.strip_prefix('^').unwrap_or(&self.regex);
67-
let stripped_regex = stripped_regex.strip_suffix('$').unwrap_or(stripped_regex);
68-
6963
match field_type {
7064
FieldType::Str(text_options) => {
7165
text_options.get_indexing_options().ok_or_else(|| {
@@ -75,7 +69,7 @@ impl RegexQuery {
7569
))
7670
})?;
7771

78-
Ok((field, None, stripped_regex.to_string()))
72+
Ok((field, None, self.regex.to_string()))
7973
}
8074
FieldType::JsonObject(json_options) => {
8175
json_options.get_text_indexing_options().ok_or_else(|| {
@@ -96,7 +90,7 @@ impl RegexQuery {
9690
// We skip the 1st byte which is a marker to tell this is json. This isn't present
9791
// in the dictionary
9892
let byte_path_prefix = value.as_serialized()[1..].to_owned();
99-
Ok((field, Some(byte_path_prefix), stripped_regex.to_string()))
93+
Ok((field, Some(byte_path_prefix), self.regex.to_string()))
10094
}
10195
_ => Err(InvalidQuery::SchemaError(
10296
"trying to run a regex query on a non-text field".to_string(),

quickwit/rest-api-tests/scenarii/es_compatibility/0020-stats.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,12 +64,12 @@ expected:
6464
_all:
6565
primaries:
6666
docs:
67-
count: 102
67+
count: 104
6868
total:
6969
segments:
70-
count: 2
70+
count: 3
7171
docs:
72-
count: 102
72+
count: 104
7373
indices:
7474
gharchive:
7575
primaries:

quickwit/rest-api-tests/scenarii/es_compatibility/0021-cat-indices.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ expected:
2626
docs.count: '0'
2727
- index: otel-traces-v0_9
2828
docs.count: '0'
29+
- index: simple_es_compat
30+
docs.count: '2'
2931
---
3032
method: [GET]
3133
engines:

quickwit/rest-api-tests/scenarii/es_compatibility/0031-regex.yaml

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -61,19 +61,42 @@ expected:
6161
value: 0
6262
relation: "eq"
6363
---
64-
# leading ^ and trailing $ are ignored
64+
# In Elasticsearch, ^ and $ are escaped when they are used as anchors, so
65+
# ^pushevent$ only matches if the original term is "^pushevent$". In Quickwit
66+
# this fails (for now) because tantivy-fst returns an error on all zero width
67+
# assertions.
68+
engines:
69+
- elasticsearch
70+
endpoint: "simple_es_compat/_search"
6571
params:
6672
size: 3
6773
json:
6874
track_total_hits: true
6975
query:
7076
regexp:
71-
type:
72-
value: "pushevent"
77+
keyword_text:
78+
value: "red$"
7379
expected:
7480
hits:
7581
total:
76-
value: 60
82+
value: 0
83+
relation: "eq"
84+
---
85+
engines:
86+
- elasticsearch
87+
endpoint: "simple_es_compat/_search"
88+
params:
89+
size: 3
90+
json:
91+
track_total_hits: true
92+
query:
93+
regexp:
94+
keyword_text:
95+
value: "gold$"
96+
expected:
97+
hits:
98+
total:
99+
value: 1
77100
relation: "eq"
78101
---
79102
# regex in query_string
@@ -87,4 +110,4 @@ expected:
87110
hits:
88111
total:
89112
value: 60
90-
relation: "eq"
113+
relation: "eq"

quickwit/rest-api-tests/scenarii/es_compatibility/_setup.elasticsearch.yaml

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ method: DELETE
77
endpoint: empty_index
88
status_code: null
99
---
10+
method: DELETE
11+
endpoint: simple_es_compat
12+
status_code: null
13+
---
1014
# empty index
1115
method: PUT
1216
endpoint: empty_index
@@ -118,10 +122,38 @@ method: PUT
118122
endpoint: gharchive/_settings
119123
json: { "number_of_replicas": 0 }
120124
---
125+
# Create index
126+
method: PUT
127+
endpoint: simple_es_compat
128+
json: {
129+
"mappings": {
130+
"properties": {
131+
"keyword_text": {
132+
"type": "keyword",
133+
}
134+
}
135+
}
136+
}
137+
---
138+
method: PUT
139+
endpoint: simple_es_compat/_settings
140+
json: { "number_of_replicas": 0 }
141+
---
121142
# Ingest documents
122143
method: POST
123144
endpoint: _bulk
124145
params:
125146
refresh: "true"
126147
headers: {"Content-Type": "application/json", "content-encoding": "gzip"}
127148
body_from_file: gharchive-bulk.json.gz
149+
---
150+
method: POST
151+
endpoint: _bulk
152+
params:
153+
refresh: "true"
154+
headers: {"Content-Type": "application/json"}
155+
ndjson:
156+
- {"index":{"_index":"simple_es_compat"}}
157+
- {"keyword_text": "red"}
158+
- {"index":{"_index":"simple_es_compat"}}
159+
- {"keyword_text": "gold$"}

quickwit/rest-api-tests/scenarii/es_compatibility/_setup.quickwit.yaml

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,12 @@ api_root: http://localhost:7280/api/v1/
1010
endpoint: indexes/empty_index
1111
status_code: null
1212
---
13+
# Delete possibly remaining index
14+
method: DELETE
15+
api_root: http://localhost:7280/api/v1/
16+
endpoint: indexes/simple_es_compat
17+
status_code: null
18+
---
1319
# Create index
1420
method: POST
1521
api_root: http://localhost:7280/api/v1/
@@ -104,3 +110,30 @@ params:
104110
ndjson:
105111
- {"fast_text": "abc-123", "obj": {"nested_text": "abc-123"}}
106112
- {"fast_text": "def-456", "obj": {"nested_text": "ghi-789"}}
113+
114+
---
115+
# Create simple_es_compat index
116+
method: POST
117+
api_root: http://localhost:7280/api/v1/
118+
endpoint: indexes/
119+
json:
120+
version: "0.7"
121+
index_id: simple_es_compat
122+
doc_mapping:
123+
field_mappings:
124+
- name: keyword_text
125+
type: text
126+
fast: true
127+
indexed: true
128+
tokenizer: raw
129+
sleep_after: 1
130+
---
131+
# Ingest documents into simple_es_compat
132+
method: POST
133+
api_root: http://localhost:7280/api/v1/
134+
endpoint: simple_es_compat/ingest
135+
params:
136+
commit: force
137+
ndjson:
138+
- {"keyword_text": "red"}
139+
- {"keyword_text": "gold$"}

quickwit/rest-api-tests/scenarii/es_compatibility/bulk/0007-illegal-index-name.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
# allowed characters are different between ES and Quickwit
2+
engines:
3+
- quickwit
14
ndjson:
25
- index: { "_index": "test-index" }
36
- message: Hola, Mundo!

quickwit/rest-api-tests/scenarii/es_compatibility/bulk/_setup.elasticsearch.yaml

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,6 @@
11
# Delete possibly remaining index
22
method: DELETE
3-
endpoint: test-index
4-
status_code: null
5-
---
6-
method: DELETE
7-
endpoint: test-index-pattern-777
3+
endpoint: test-index*
84
status_code: null
95
---
106
method: PUT

quickwit/rest-api-tests/scenarii/es_compatibility/bulk/_setup.quickwit.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,11 @@ status_code: null
66
---
77
method: DELETE
88
api_root: http://localhost:7280/api/v1/
9+
endpoint: indexes/test-index-pattern-11
10+
status_code: null
11+
---
12+
method: DELETE
13+
api_root: http://localhost:7280/api/v1/
914
endpoint: indexes/test-index-pattern-777
1015
status_code: null
1116
---

quickwit/rest-api-tests/scenarii/es_compatibility/bulk/_teardown.quickwit.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,12 @@ endpoint: indexes/test-index
44
---
55
method: DELETE
66
api_root: http://localhost:7280/api/v1/
7+
endpoint: indexes/test-index-pattern-11
8+
---
9+
method: DELETE
10+
api_root: http://localhost:7280/api/v1/
11+
endpoint: indexes/test-index-pattern-777
12+
---
13+
method: DELETE
14+
api_root: http://localhost:7280/api/v1/
715
endpoint: templates/test-index-template

0 commit comments

Comments
 (0)