Skip to content

Commit 55e144b

Browse files
committed
ESQL: Fix case insensitive comparisons to ""
This fixes the compute engine side of case insensitive ==. You can trigger it by writing: ``` FROM foo | WHERE TO_LOWER(field) == "" ``` But *only* when we can't push the comparison to lucene - like if `field` is not indexed or is a `text` field. Closes #127431
1 parent 23b7a31 commit 55e144b

File tree

5 files changed

+95
-4
lines changed

5 files changed

+95
-4
lines changed

muted-tests.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -435,9 +435,6 @@ tests:
435435
- class: org.elasticsearch.action.admin.cluster.state.TransportClusterStateActionDisruptionIT
436436
method: testNonLocalRequestAlwaysFindsMasterAndWaitsForMetadata
437437
issue: https://github.com/elastic/elasticsearch/issues/127422
438-
- class: org.elasticsearch.xpack.esql.qa.single_node.PushQueriesIT
439-
method: testPushCaseInsensitiveEqualityOnDefaults
440-
issue: https://github.com/elastic/elasticsearch/issues/127431
441438
- class: org.elasticsearch.action.admin.cluster.state.TransportClusterStateActionDisruptionIT
442439
method: testLocalRequestAlwaysSucceeds
443440
issue: https://github.com/elastic/elasticsearch/issues/127423

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1049,7 +1049,12 @@ public enum Cap {
10491049
/**
10501050
* The {@code _query} API now gives a cast recommendation if multiple types are found in certain instances.
10511051
*/
1052-
SUGGESTED_CAST;
1052+
SUGGESTED_CAST,
1053+
1054+
/**
1055+
* Guards a bug fix matching {@code TO_LOWER(f) == ""}.
1056+
*/
1057+
TO_LOWER_EMPTY_STRING;
10531058

10541059
private final boolean enabled;
10551060

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/InsensitiveEquals.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,10 @@
77
package org.elasticsearch.xpack.esql.expression.predicate.operator.comparison;
88

99
import org.apache.lucene.util.BytesRef;
10+
import org.apache.lucene.util.automaton.Automata;
1011
import org.apache.lucene.util.automaton.Automaton;
1112
import org.apache.lucene.util.automaton.ByteRunAutomaton;
13+
import org.apache.lucene.util.automaton.Operations;
1214
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
1315
import org.elasticsearch.common.io.stream.StreamInput;
1416
import org.elasticsearch.common.lucene.BytesRefs;
@@ -83,6 +85,10 @@ protected TypeResolution resolveType() {
8385
}
8486

8587
public static Automaton automaton(BytesRef val) {
88+
if (val.length == 0) {
89+
// toCaseInsensitiveString doesn't match empty strings properly so let's do it ourselves
90+
return Automata.makeEmptyString();
91+
}
8692
return AutomatonQueries.toCaseInsensitiveString(val.utf8ToString());
8793
}
8894

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/InsensitiveEqualsTests.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ public void testFold() {
2626
assertTrue(insensitiveEquals(l("foo*"), l("FOO*")).fold(FoldContext.small()));
2727
assertTrue(insensitiveEquals(l("foo?bar"), l("foo?bar")).fold(FoldContext.small()));
2828
assertTrue(insensitiveEquals(l("foo?bar"), l("FOO?BAR")).fold(FoldContext.small()));
29+
assertTrue(insensitiveEquals(l(""), l("")).fold(FoldContext.small()));
30+
2931
assertFalse(insensitiveEquals(l("Foo"), l("fo*")).fold(FoldContext.small()));
3032
assertFalse(insensitiveEquals(l("Fox"), l("fo?")).fold(FoldContext.small()));
3133
assertFalse(insensitiveEquals(l("Foo"), l("*OO")).fold(FoldContext.small()));
@@ -60,6 +62,8 @@ public void testProcess() {
6062
assertTrue(InsensitiveEquals.process(BytesRefs.toBytesRef("foo*"), BytesRefs.toBytesRef("FOO*")));
6163
assertTrue(InsensitiveEquals.process(BytesRefs.toBytesRef("foo?bar"), BytesRefs.toBytesRef("foo?bar")));
6264
assertTrue(InsensitiveEquals.process(BytesRefs.toBytesRef("foo?bar"), BytesRefs.toBytesRef("FOO?BAR")));
65+
assertTrue(InsensitiveEquals.process(BytesRefs.toBytesRef(""), BytesRefs.toBytesRef("")));
66+
6367
assertFalse(InsensitiveEquals.process(BytesRefs.toBytesRef("Foo"), BytesRefs.toBytesRef("fo*")));
6468
assertFalse(InsensitiveEquals.process(BytesRefs.toBytesRef("Fox"), BytesRefs.toBytesRef("fo?")));
6569
assertFalse(InsensitiveEquals.process(BytesRefs.toBytesRef("Foo"), BytesRefs.toBytesRef("*OO")));
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
---
2+
setup:
3+
- requires:
4+
test_runner_features: [ capabilities ]
5+
capabilities:
6+
- method: POST
7+
path: /_query
8+
parameters: [ ]
9+
capabilities: [ query_monitoring ]
10+
reason: "uses query monitoring"
11+
12+
- do:
13+
bulk:
14+
index: "test"
15+
refresh: true
16+
body:
17+
- { "index": { } }
18+
- { "@timestamp": "2023-10-23T13:55:01.543Z", "message": "" }
19+
- { "index": { } }
20+
- { "@timestamp": "2023-10-23T13:55:01.544Z" }
21+
- { "index": { } }
22+
- { "@timestamp": "2023-10-23T13:55:01.545Z", "message": "a" }
23+
24+
---
25+
keyword equals empty string:
26+
- do:
27+
esql.query:
28+
body:
29+
query: 'FROM test | WHERE message.keyword == "" | SORT @timestamp ASC | KEEP @timestamp | LIMIT 10'
30+
31+
- match: { columns.0.name: "@timestamp" }
32+
- match: { columns.0.type: "date" }
33+
- length: { values: 1 }
34+
- match: { values.0.0: 2023-10-23T13:55:01.543Z }
35+
36+
---
37+
keyword to_lower equals empty string:
38+
- do:
39+
esql.query:
40+
body:
41+
query: 'FROM test | WHERE TO_LOWER(message.keyword) == "" | SORT @timestamp ASC | KEEP @timestamp | LIMIT 10'
42+
43+
- match: { columns.0.name: "@timestamp" }
44+
- match: { columns.0.type: "date" }
45+
- length: { values: 1 }
46+
- match: { values.0.0: 2023-10-23T13:55:01.543Z }
47+
48+
---
49+
text equals empty string:
50+
- do:
51+
esql.query:
52+
body:
53+
query: 'FROM test | WHERE message == "" | SORT @timestamp ASC | KEEP @timestamp | LIMIT 10'
54+
55+
- match: { columns.0.name: "@timestamp" }
56+
- match: { columns.0.type: "date" }
57+
- length: { values: 1 }
58+
- match: { values.0.0: 2023-10-23T13:55:01.543Z }
59+
60+
---
61+
text to_lower equals empty string:
62+
- requires:
63+
test_runner_features: [ capabilities ]
64+
capabilities:
65+
- method: POST
66+
path: /_query
67+
parameters: [ ]
68+
capabilities: [ to_lower_empty_string ]
69+
reason: "bug"
70+
71+
- do:
72+
esql.query:
73+
body:
74+
query: 'FROM test | WHERE TO_LOWER(message) == "" | SORT @timestamp ASC | KEEP @timestamp | LIMIT 10'
75+
76+
- match: { columns.0.name: "@timestamp" }
77+
- match: { columns.0.type: "date" }
78+
- length: { values: 1 }
79+
- match: { values.0.0: 2023-10-23T13:55:01.543Z }

0 commit comments

Comments
 (0)