Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/127532.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 127532
summary: Fix case insensitive comparisons to ""
area: ES|QL
type: bug
issues:
- 127431
Original file line number Diff line number Diff line change
Expand Up @@ -912,7 +912,12 @@ public enum Cap {
* Avid GROK and DISSECT attributes being removed when resolving fields.
* see <a href="https://github.com/elastic/elasticsearch/issues/127468"> ES|QL: Grok only supports KEYWORD or TEXT values, found expression [type] type [INTEGER] #127468 </a>
*/
KEEP_REGEX_EXTRACT_ATTRIBUTES;
KEEP_REGEX_EXTRACT_ATTRIBUTES,

/**
* Guards a bug fix matching {@code TO_LOWER(f) == ""}.
*/
TO_LOWER_EMPTY_STRING;

private final boolean enabled;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
package org.elasticsearch.xpack.esql.expression.predicate.operator.comparison;

import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.ByteRunAutomaton;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
Expand Down Expand Up @@ -83,6 +84,10 @@ protected TypeResolution resolveType() {
}

public static Automaton automaton(BytesRef val) {
if (val.length == 0) {
// toCaseInsensitiveString doesn't match empty strings properly so let's do it ourselves
return Automata.makeEmptyString();
}
return AutomatonQueries.toCaseInsensitiveString(val.utf8ToString());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ public void testFold() {
assertTrue(insensitiveEquals(l("foo*"), l("FOO*")).fold(FoldContext.small()));
assertTrue(insensitiveEquals(l("foo?bar"), l("foo?bar")).fold(FoldContext.small()));
assertTrue(insensitiveEquals(l("foo?bar"), l("FOO?BAR")).fold(FoldContext.small()));
assertTrue(insensitiveEquals(l(""), l("")).fold(FoldContext.small()));

assertFalse(insensitiveEquals(l("Foo"), l("fo*")).fold(FoldContext.small()));
assertFalse(insensitiveEquals(l("Fox"), l("fo?")).fold(FoldContext.small()));
assertFalse(insensitiveEquals(l("Foo"), l("*OO")).fold(FoldContext.small()));
Expand Down Expand Up @@ -60,6 +62,8 @@ public void testProcess() {
assertTrue(InsensitiveEquals.process(BytesRefs.toBytesRef("foo*"), BytesRefs.toBytesRef("FOO*")));
assertTrue(InsensitiveEquals.process(BytesRefs.toBytesRef("foo?bar"), BytesRefs.toBytesRef("foo?bar")));
assertTrue(InsensitiveEquals.process(BytesRefs.toBytesRef("foo?bar"), BytesRefs.toBytesRef("FOO?BAR")));
assertTrue(InsensitiveEquals.process(BytesRefs.toBytesRef(""), BytesRefs.toBytesRef("")));

assertFalse(InsensitiveEquals.process(BytesRefs.toBytesRef("Foo"), BytesRefs.toBytesRef("fo*")));
assertFalse(InsensitiveEquals.process(BytesRefs.toBytesRef("Fox"), BytesRefs.toBytesRef("fo?")));
assertFalse(InsensitiveEquals.process(BytesRefs.toBytesRef("Foo"), BytesRefs.toBytesRef("*OO")));
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
---
setup:
- requires:
test_runner_features: [ capabilities ]
capabilities:
- method: POST
path: /_query
parameters: [ ]
capabilities: [ query_monitoring ]
reason: "uses query monitoring"

- do:
bulk:
index: "test"
refresh: true
body:
- { "index": { } }
- { "@timestamp": "2023-10-23T13:55:01.543Z", "message": "" }
- { "index": { } }
- { "@timestamp": "2023-10-23T13:55:01.544Z" }
- { "index": { } }
- { "@timestamp": "2023-10-23T13:55:01.545Z", "message": "a" }

---
keyword equals empty string:
- do:
esql.query:
body:
query: 'FROM test | WHERE message.keyword == "" | SORT @timestamp ASC | KEEP @timestamp | LIMIT 10'

- match: { columns.0.name: "@timestamp" }
- match: { columns.0.type: "date" }
- length: { values: 1 }
- match: { values.0.0: 2023-10-23T13:55:01.543Z }

---
keyword to_lower equals empty string:
- do:
esql.query:
body:
query: 'FROM test | WHERE TO_LOWER(message.keyword) == "" | SORT @timestamp ASC | KEEP @timestamp | LIMIT 10'

- match: { columns.0.name: "@timestamp" }
- match: { columns.0.type: "date" }
- length: { values: 1 }
- match: { values.0.0: 2023-10-23T13:55:01.543Z }

---
text equals empty string:
- do:
esql.query:
body:
query: 'FROM test | WHERE message == "" | SORT @timestamp ASC | KEEP @timestamp | LIMIT 10'

- match: { columns.0.name: "@timestamp" }
- match: { columns.0.type: "date" }
- length: { values: 1 }
- match: { values.0.0: 2023-10-23T13:55:01.543Z }

---
text to_lower equals empty string:
- requires:
test_runner_features: [ capabilities ]
capabilities:
- method: POST
path: /_query
parameters: [ ]
capabilities: [ to_lower_empty_string ]
reason: "bug"

- do:
esql.query:
body:
query: 'FROM test | WHERE TO_LOWER(message) == "" | SORT @timestamp ASC | KEEP @timestamp | LIMIT 10'

- match: { columns.0.name: "@timestamp" }
- match: { columns.0.type: "date" }
- length: { values: 1 }
- match: { values.0.0: 2023-10-23T13:55:01.543Z }