Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/127532.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 127532
summary: Fix case insensitive comparisons to ""
area: ES|QL
type: bug
issues:
- 127431
3 changes: 0 additions & 3 deletions muted-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -435,9 +435,6 @@ tests:
- class: org.elasticsearch.action.admin.cluster.state.TransportClusterStateActionDisruptionIT
method: testNonLocalRequestAlwaysFindsMasterAndWaitsForMetadata
issue: https://github.com/elastic/elasticsearch/issues/127422
- class: org.elasticsearch.xpack.esql.qa.single_node.PushQueriesIT
method: testPushCaseInsensitiveEqualityOnDefaults
issue: https://github.com/elastic/elasticsearch/issues/127431
- class: org.elasticsearch.action.admin.cluster.state.TransportClusterStateActionDisruptionIT
method: testLocalRequestAlwaysSucceeds
issue: https://github.com/elastic/elasticsearch/issues/127423
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1049,7 +1049,12 @@ public enum Cap {
/**
* The {@code _query} API now gives a cast recommendation if multiple types are found in certain instances.
*/
SUGGESTED_CAST;
SUGGESTED_CAST,

/**
* Guards a bug fix matching {@code TO_LOWER(f) == ""}.
*/
TO_LOWER_EMPTY_STRING;

private final boolean enabled;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
package org.elasticsearch.xpack.esql.expression.predicate.operator.comparison;

import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.ByteRunAutomaton;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
Expand Down Expand Up @@ -83,6 +84,10 @@ protected TypeResolution resolveType() {
}

public static Automaton automaton(BytesRef val) {
if (val.length == 0) {
// toCaseInsensitiveString doesn't match empty strings properly so let's do it ourselves
return Automata.makeEmptyString();
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_search works around this as well with:

        // check if valueForSearch is the same as an empty string
        // if we have a length of zero, just do a regular term query
        if (valueForSearch.length == 0) {
            return termQuery(value, context);
        }

Which makes sense. And it's slightly faster. Here, instead of doing a term query I just make an automata for the empty string. We are already pushing a proper term query using the _search infrastructure when possible.

}
return AutomatonQueries.toCaseInsensitiveString(val.utf8ToString());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ public void testFold() {
assertTrue(insensitiveEquals(l("foo*"), l("FOO*")).fold(FoldContext.small()));
assertTrue(insensitiveEquals(l("foo?bar"), l("foo?bar")).fold(FoldContext.small()));
assertTrue(insensitiveEquals(l("foo?bar"), l("FOO?BAR")).fold(FoldContext.small()));
assertTrue(insensitiveEquals(l(""), l("")).fold(FoldContext.small()));

assertFalse(insensitiveEquals(l("Foo"), l("fo*")).fold(FoldContext.small()));
assertFalse(insensitiveEquals(l("Fox"), l("fo?")).fold(FoldContext.small()));
assertFalse(insensitiveEquals(l("Foo"), l("*OO")).fold(FoldContext.small()));
Expand Down Expand Up @@ -60,6 +62,8 @@ public void testProcess() {
assertTrue(InsensitiveEquals.process(BytesRefs.toBytesRef("foo*"), BytesRefs.toBytesRef("FOO*")));
assertTrue(InsensitiveEquals.process(BytesRefs.toBytesRef("foo?bar"), BytesRefs.toBytesRef("foo?bar")));
assertTrue(InsensitiveEquals.process(BytesRefs.toBytesRef("foo?bar"), BytesRefs.toBytesRef("FOO?BAR")));
assertTrue(InsensitiveEquals.process(BytesRefs.toBytesRef(""), BytesRefs.toBytesRef("")));

assertFalse(InsensitiveEquals.process(BytesRefs.toBytesRef("Foo"), BytesRefs.toBytesRef("fo*")));
assertFalse(InsensitiveEquals.process(BytesRefs.toBytesRef("Fox"), BytesRefs.toBytesRef("fo?")));
assertFalse(InsensitiveEquals.process(BytesRefs.toBytesRef("Foo"), BytesRefs.toBytesRef("*OO")));
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
---
setup:
- requires:
test_runner_features: [ capabilities ]
capabilities:
- method: POST
path: /_query
parameters: [ ]
capabilities: [ query_monitoring ]
reason: "uses query monitoring"

- do:
bulk:
index: "test"
refresh: true
body:
- { "index": { } }
- { "@timestamp": "2023-10-23T13:55:01.543Z", "message": "" }
- { "index": { } }
- { "@timestamp": "2023-10-23T13:55:01.544Z" }
- { "index": { } }
- { "@timestamp": "2023-10-23T13:55:01.545Z", "message": "a" }

---
keyword equals empty string:
- do:
esql.query:
body:
query: 'FROM test | WHERE message.keyword == "" | SORT @timestamp ASC | KEEP @timestamp | LIMIT 10'

- match: { columns.0.name: "@timestamp" }
- match: { columns.0.type: "date" }
- length: { values: 1 }
- match: { values.0.0: 2023-10-23T13:55:01.543Z }

---
keyword to_lower equals empty string:
- do:
esql.query:
body:
query: 'FROM test | WHERE TO_LOWER(message.keyword) == "" | SORT @timestamp ASC | KEEP @timestamp | LIMIT 10'

- match: { columns.0.name: "@timestamp" }
- match: { columns.0.type: "date" }
- length: { values: 1 }
- match: { values.0.0: 2023-10-23T13:55:01.543Z }

---
text equals empty string:
- do:
esql.query:
body:
query: 'FROM test | WHERE message == "" | SORT @timestamp ASC | KEEP @timestamp | LIMIT 10'

- match: { columns.0.name: "@timestamp" }
- match: { columns.0.type: "date" }
- length: { values: 1 }
- match: { values.0.0: 2023-10-23T13:55:01.543Z }

---
text to_lower equals empty string:
- requires:
test_runner_features: [ capabilities ]
capabilities:
- method: POST
path: /_query
parameters: [ ]
capabilities: [ to_lower_empty_string ]
reason: "bug"

- do:
esql.query:
body:
query: 'FROM test | WHERE TO_LOWER(message) == "" | SORT @timestamp ASC | KEEP @timestamp | LIMIT 10'

- match: { columns.0.name: "@timestamp" }
- match: { columns.0.type: "date" }
- length: { values: 1 }
- match: { values.0.0: 2023-10-23T13:55:01.543Z }
Loading