diff --git a/docs/changelog/130387.yaml b/docs/changelog/130387.yaml new file mode 100644 index 0000000000000..ea2c95364e932 --- /dev/null +++ b/docs/changelog/130387.yaml @@ -0,0 +1,5 @@ +pr: 130387 +summary: Push `==` to `text` fields to lucene +area: ES|QL +type: enhancement +issues: [] diff --git a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java index 8ee639ffc8431..b96d544dc610c 100644 --- a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java +++ b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java @@ -492,7 +492,7 @@ private AnnotatedTextFieldType( KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate, Map meta ) { - super(name, true, store, tsi, isSyntheticSource, syntheticSourceDelegate, meta, false, false); + super(name, true, store, tsi, isSyntheticSource, syntheticSourceDelegate, meta, false, false, false); } public AnnotatedTextFieldType(String name, Map meta) { diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 3d2b89f5a1d48..4b3ed767c1bca 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -67,6 +67,7 @@ import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData; import org.elasticsearch.index.fielddata.StoredFieldSortedBinaryIndexFieldData; import org.elasticsearch.index.fielddata.plain.PagedBytesIndexFieldData; +import org.elasticsearch.index.query.MatchQueryBuilder; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.index.similarity.SimilarityProvider; import org.elasticsearch.script.field.DelegateDocValuesField; @@ -413,7 +414,8 @@ private TextFieldType buildFieldType( SyntheticSourceHelper.syntheticSourceDelegate(fieldType, multiFields), meta.getValue(), eagerGlobalOrdinals.getValue(), - indexPhrases.getValue() + indexPhrases.getValue(), + matchQueryYieldsCandidateMatchesForEquality() ); if (fieldData.getValue()) { ft.setFielddata(true, freqFilter.getValue()); @@ -422,6 +424,25 @@ private TextFieldType buildFieldType( return ft; } + /** + * Does a `match` query generate all valid candidates for `==`? Meaning, + * if I do a match query for any string, say `foo bar baz`, then that + * query will find all documents that indexed the same string. + *

+ * This should be true for most sanely configured text fields. That's + * just how we use them for search. But it's quite possible to make + * the index analyzer not agree with the search analyzer, for example. + *

+ *

+ * So this implementation is ultra-paranoid. + *

+ */ + private boolean matchQueryYieldsCandidateMatchesForEquality() { + return index.getValue() == Boolean.TRUE + && analyzers.indexAnalyzer.isConfigured() == false + && analyzers.searchAnalyzer.isConfigured() == false; + } + private SubFieldInfo buildPrefixInfo(MapperBuilderContext context, FieldType fieldType, TextFieldType tft) { if (indexPrefixes.get() == null) { return null; @@ -694,6 +715,12 @@ public static class TextFieldType extends StringFieldType { */ private final KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate; + /** + * Does a {@link MatchQueryBuilder} produce all documents + * that might have equal text to the query's value. + */ + private final boolean matchQueryYieldsCandidateMatchesForEquality; + public TextFieldType( String name, boolean indexed, @@ -703,7 +730,8 @@ public TextFieldType( KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate, Map meta, boolean eagerGlobalOrdinals, - boolean indexPhrases + boolean indexPhrases, + boolean matchQueryYieldsCandidateMatchesForEquality ) { super(name, indexed, stored, false, tsi, meta); fielddata = false; @@ -712,6 +740,7 @@ public TextFieldType( this.syntheticSourceDelegate = syntheticSourceDelegate; this.eagerGlobalOrdinals = eagerGlobalOrdinals; this.indexPhrases = indexPhrases; + this.matchQueryYieldsCandidateMatchesForEquality = matchQueryYieldsCandidateMatchesForEquality; } public TextFieldType(String name, boolean indexed, boolean stored, Map meta) { @@ -728,6 +757,7 @@ public TextFieldType(String name, boolean indexed, boolean stored, Map meta) { - super(name, indexed, stored, tsi, false, null, meta, false, false); + super(name, indexed, stored, tsi, false, null, meta, false, false, /* unused */ false); } public ConstantScoreTextFieldType(String name) { diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java index 0c227b5411e25..0f27823bc212b 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java @@ -1538,6 +1538,7 @@ private TextFieldMapper.TextFieldType storedTextField(String name) { null, Map.of(), false, + false, false ); } @@ -1552,6 +1553,7 @@ private TextFieldMapper.TextFieldType textFieldWithDelegate(String name, Keyword delegate, Map.of(), false, + false, false ); } diff --git a/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/PushQueriesIT.java b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/PushQueriesIT.java index 70452976ca14a..fd60d51f60c22 100644 --- a/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/PushQueriesIT.java +++ b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/PushQueriesIT.java @@ -10,6 +10,7 @@ import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; +import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.elasticsearch.client.Request; import org.elasticsearch.client.Response; import org.elasticsearch.client.ResponseException; @@ -62,8 +63,13 @@ public static List args() { public enum Type { AUTO(false), + CONSTANT_KEYWORD(false), KEYWORD(false), + MATCH_ONLY_TEXT(false), + SEMANTIC_TEXT(true), + TEXT(false), + MATCH_ONLY_TEXT_WITH_KEYWORD(false), SEMANTIC_TEXT_WITH_KEYWORD(true), TEXT_WITH_KEYWORD(false); @@ -87,35 +93,63 @@ public void testEquality() throws IOException { FROM test | WHERE test == "%value" """; - String luceneQuery = switch (type) { - case AUTO, TEXT_WITH_KEYWORD -> "#test.keyword:%value -_ignored:test.keyword"; - case KEYWORD -> "test:%value"; - case CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD -> "*:*"; - case SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]"; + List luceneQuery = switch (type) { + case AUTO, TEXT_WITH_KEYWORD -> List.of("#test.keyword:%value -_ignored:test.keyword"); + case KEYWORD -> List.of("test:%value"); + case TEXT -> emulateLargeTextTokens(value); + case CONSTANT_KEYWORD, MATCH_ONLY_TEXT, MATCH_ONLY_TEXT_WITH_KEYWORD -> List.of("*:*"); + case SEMANTIC_TEXT, SEMANTIC_TEXT_WITH_KEYWORD -> List.of("FieldExistsQuery [field=_primary_term]"); }; ComputeSignature dataNodeSignature = switch (type) { case AUTO, CONSTANT_KEYWORD, KEYWORD, TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_QUERY; - case MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_COMPUTE; + case MATCH_ONLY_TEXT, MATCH_ONLY_TEXT_WITH_KEYWORD, TEXT, SEMANTIC_TEXT, SEMANTIC_TEXT_WITH_KEYWORD -> + ComputeSignature.FILTER_IN_COMPUTE; }; - testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, true); + testPushQuery(value, esqlQuery, luceneQuery, dataNodeSignature, true); } public void testEqualityTooBigToPush() throws IOException { - String value = "a".repeat(between(257, 1000)); + String value = "v".repeat(between(257, 1000)); String esqlQuery = """ FROM test | WHERE test == "%value" """; - String luceneQuery = switch (type) { - case AUTO, CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, TEXT_WITH_KEYWORD -> "*:*"; - case KEYWORD -> "#test:%value #single_value_match(test)"; - case SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]"; + List luceneQuery = switch (type) { + case AUTO, TEXT, TEXT_WITH_KEYWORD -> emulateLargeTextTokens(value); + case CONSTANT_KEYWORD, MATCH_ONLY_TEXT, MATCH_ONLY_TEXT_WITH_KEYWORD -> List.of("*:*"); + case KEYWORD -> List.of("#test:%value #single_value_match(test)"); + case SEMANTIC_TEXT, SEMANTIC_TEXT_WITH_KEYWORD -> List.of("FieldExistsQuery [field=_primary_term]"); }; ComputeSignature dataNodeSignature = switch (type) { case CONSTANT_KEYWORD, KEYWORD -> ComputeSignature.FILTER_IN_QUERY; - case AUTO, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD, TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_COMPUTE; + case AUTO, MATCH_ONLY_TEXT, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT, SEMANTIC_TEXT_WITH_KEYWORD, TEXT, TEXT_WITH_KEYWORD -> + ComputeSignature.FILTER_IN_COMPUTE; }; - testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, type != Type.KEYWORD); + testPushQuery(value, esqlQuery, luceneQuery, dataNodeSignature, type != Type.KEYWORD); + } + + /** + * {@code NOT !=} should function just like {@code ==}. + */ + public void testNotInequality() throws IOException { + String value = "v".repeat(between(0, 256)); + String esqlQuery = """ + FROM test + | WHERE NOT test != "%value" + """; + List luceneQuery = switch (type) { + case AUTO, TEXT_WITH_KEYWORD -> List.of("#test.keyword:%value -_ignored:test.keyword"); + case KEYWORD -> List.of("test:%value"); + case TEXT -> emulateLargeTextTokens(value); + case CONSTANT_KEYWORD, MATCH_ONLY_TEXT, MATCH_ONLY_TEXT_WITH_KEYWORD -> List.of("*:*"); + case SEMANTIC_TEXT, SEMANTIC_TEXT_WITH_KEYWORD -> List.of("FieldExistsQuery [field=_primary_term]"); + }; + ComputeSignature dataNodeSignature = switch (type) { + case AUTO, CONSTANT_KEYWORD, KEYWORD, TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_QUERY; + case MATCH_ONLY_TEXT, MATCH_ONLY_TEXT_WITH_KEYWORD, TEXT, SEMANTIC_TEXT, SEMANTIC_TEXT_WITH_KEYWORD -> + ComputeSignature.FILTER_IN_COMPUTE; + }; + testPushQuery(value, esqlQuery, luceneQuery, dataNodeSignature, true); } /** @@ -129,38 +163,51 @@ public void testEqualityOrTooBig() throws IOException { | WHERE test == "%value" OR test == "%tooBig" """.replace("%tooBig", tooBig); List luceneQuery = switch (type) { - case AUTO, CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, TEXT_WITH_KEYWORD -> List.of("*:*"); + case AUTO, CONSTANT_KEYWORD, MATCH_ONLY_TEXT, MATCH_ONLY_TEXT_WITH_KEYWORD, TEXT, TEXT_WITH_KEYWORD -> List.of("*:*"); case KEYWORD -> List.of("test:(%tooBig %value)".replace("%tooBig", tooBig), "test:(%value %tooBig)".replace("%tooBig", tooBig)); - case SEMANTIC_TEXT_WITH_KEYWORD -> List.of("FieldExistsQuery [field=_primary_term]"); + case SEMANTIC_TEXT, SEMANTIC_TEXT_WITH_KEYWORD -> List.of("FieldExistsQuery [field=_primary_term]"); }; ComputeSignature dataNodeSignature = switch (type) { case CONSTANT_KEYWORD, KEYWORD -> ComputeSignature.FILTER_IN_QUERY; - case AUTO, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD, TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_COMPUTE; + case AUTO, MATCH_ONLY_TEXT, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT, SEMANTIC_TEXT_WITH_KEYWORD, TEXT, TEXT_WITH_KEYWORD -> + ComputeSignature.FILTER_IN_COMPUTE; }; testPushQuery(value, esqlQuery, luceneQuery, dataNodeSignature, true); } public void testEqualityOrOther() throws IOException { - String value = "v".repeat(between(0, 256)); + String value = "v".repeat(256); String esqlQuery = """ FROM test | WHERE test == "%value" OR foo == 2 """; - String luceneQuery = switch (type) { - case AUTO, TEXT_WITH_KEYWORD -> "(#test.keyword:%value -_ignored:test.keyword) foo:[2 TO 2]"; - case KEYWORD -> "test:%value foo:[2 TO 2]"; - case CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD -> "*:*"; - case SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]"; + List luceneQuery = switch (type) { + case AUTO, TEXT_WITH_KEYWORD -> List.of("(#test.keyword:%value -_ignored:test.keyword) foo:[2 TO 2]"); + case TEXT -> { + List expected = new ArrayList<>(); + for (String q : emulateLargeTextTokens(value)) { + if (q.contains(" ")) { + q = "(" + q + ")"; + } + expected.add(String.format(Locale.ROOT, "%s foo:[2 TO 2]", q)); + expected.add(String.format(Locale.ROOT, "foo:[2 TO 2] %s", q)); + } + yield expected; + } + case KEYWORD -> List.of("test:%value foo:[2 TO 2]"); + case CONSTANT_KEYWORD, MATCH_ONLY_TEXT, MATCH_ONLY_TEXT_WITH_KEYWORD -> List.of("*:*"); + case SEMANTIC_TEXT, SEMANTIC_TEXT_WITH_KEYWORD -> List.of("FieldExistsQuery [field=_primary_term]"); }; ComputeSignature dataNodeSignature = switch (type) { case AUTO, CONSTANT_KEYWORD, KEYWORD, TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_QUERY; - case MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_COMPUTE; + case MATCH_ONLY_TEXT, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT, SEMANTIC_TEXT_WITH_KEYWORD, TEXT -> + ComputeSignature.FILTER_IN_COMPUTE; }; - testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, true); + testPushQuery(value, esqlQuery, luceneQuery, dataNodeSignature, true); } public void testEqualityAndOther() throws IOException { - String value = "v".repeat(between(0, 256)); + String value = "v".repeat(256); String esqlQuery = """ FROM test | WHERE test == "%value" AND foo == 1 @@ -168,8 +215,19 @@ public void testEqualityAndOther() throws IOException { List luceneQueryOptions = switch (type) { case AUTO, TEXT_WITH_KEYWORD -> List.of("#test.keyword:%value -_ignored:test.keyword #foo:[1 TO 1]"); case KEYWORD -> List.of("#test:%value #foo:[1 TO 1]"); - case CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD -> List.of("foo:[1 TO 1]"); - case SEMANTIC_TEXT_WITH_KEYWORD -> + case TEXT -> { + List expected = new ArrayList<>(); + for (String q : emulateLargeTextTokens(value)) { + if (q.startsWith("#") == false) { + q = "#" + q; + } + expected.add(String.format(Locale.ROOT, "%s #foo:[1 TO 1]", q)); + expected.add(String.format(Locale.ROOT, "#foo:[1 TO 1] %s", q)); + } + yield expected; + } + case CONSTANT_KEYWORD, MATCH_ONLY_TEXT, MATCH_ONLY_TEXT_WITH_KEYWORD -> List.of("foo:[1 TO 1]"); + case SEMANTIC_TEXT, SEMANTIC_TEXT_WITH_KEYWORD -> /* * single_value_match is here because there are extra documents hiding in the index * that don't have the `foo` field. @@ -178,7 +236,8 @@ public void testEqualityAndOther() throws IOException { }; ComputeSignature dataNodeSignature = switch (type) { case AUTO, CONSTANT_KEYWORD, KEYWORD, TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_QUERY; - case MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_COMPUTE; + case MATCH_ONLY_TEXT, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT, SEMANTIC_TEXT_WITH_KEYWORD, TEXT -> + ComputeSignature.FILTER_IN_COMPUTE; }; testPushQuery(value, esqlQuery, luceneQueryOptions, dataNodeSignature, true); } @@ -191,30 +250,32 @@ public void testInequality() throws IOException { """; String luceneQuery = switch (type) { case AUTO, TEXT_WITH_KEYWORD -> "(-test.keyword:%different_value #*:*) _ignored:test.keyword"; - case CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD -> "*:*"; + case CONSTANT_KEYWORD, MATCH_ONLY_TEXT, MATCH_ONLY_TEXT_WITH_KEYWORD, TEXT -> "*:*"; case KEYWORD -> "-test:%different_value #*:*"; - case SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]"; + case SEMANTIC_TEXT, SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]"; }; ComputeSignature dataNodeSignature = switch (type) { case CONSTANT_KEYWORD, KEYWORD -> ComputeSignature.FILTER_IN_QUERY; - case AUTO, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD, TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_COMPUTE; + case AUTO, MATCH_ONLY_TEXT, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT, SEMANTIC_TEXT_WITH_KEYWORD, TEXT, TEXT_WITH_KEYWORD -> + ComputeSignature.FILTER_IN_COMPUTE; }; testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, true); } public void testInequalityTooBigToPush() throws IOException { - String value = "a".repeat(between(257, 1000)); + String value = "v".repeat(between(257, 1000)); String esqlQuery = """ FROM test | WHERE test != "%value" """; String luceneQuery = switch (type) { - case AUTO, CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, TEXT_WITH_KEYWORD -> "*:*"; + case AUTO, CONSTANT_KEYWORD, MATCH_ONLY_TEXT, MATCH_ONLY_TEXT_WITH_KEYWORD, TEXT, TEXT_WITH_KEYWORD -> "*:*"; case KEYWORD -> "-test:%value #single_value_match(test)"; - case SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]"; + case SEMANTIC_TEXT, SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]"; }; ComputeSignature dataNodeSignature = switch (type) { - case AUTO, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD, TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_COMPUTE; + case AUTO, MATCH_ONLY_TEXT, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT, SEMANTIC_TEXT_WITH_KEYWORD, TEXT, TEXT_WITH_KEYWORD -> + ComputeSignature.FILTER_IN_COMPUTE; case CONSTANT_KEYWORD -> ComputeSignature.FIND_NONE; case KEYWORD -> ComputeSignature.FILTER_IN_QUERY; }; @@ -222,19 +283,20 @@ public void testInequalityTooBigToPush() throws IOException { } public void testCaseInsensitiveEquality() throws IOException { - String value = "a".repeat(between(0, 256)); + String value = "v".repeat(between(0, 256)); String esqlQuery = """ FROM test | WHERE TO_LOWER(test) == "%value" """; String luceneQuery = switch (type) { - case AUTO, CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, TEXT_WITH_KEYWORD -> "*:*"; - case KEYWORD -> "".equals(value) ? "test:" : "CaseInsensitiveTermQuery{test:%value}"; - case SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]"; + case AUTO, CONSTANT_KEYWORD, MATCH_ONLY_TEXT, MATCH_ONLY_TEXT_WITH_KEYWORD, TEXT, TEXT_WITH_KEYWORD -> "*:*"; + case KEYWORD -> value.isEmpty() ? "test:" : "CaseInsensitiveTermQuery{test:%value}"; + case SEMANTIC_TEXT, SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]"; }; ComputeSignature dataNodeSignature = switch (type) { case CONSTANT_KEYWORD, KEYWORD -> ComputeSignature.FILTER_IN_QUERY; - case AUTO, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD, TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_COMPUTE; + case AUTO, MATCH_ONLY_TEXT, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT, SEMANTIC_TEXT_WITH_KEYWORD, TEXT, TEXT_WITH_KEYWORD -> + ComputeSignature.FILTER_IN_COMPUTE; }; testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, true); } @@ -247,12 +309,13 @@ public void testLike() throws IOException { """; String luceneQuery = switch (type) { case KEYWORD -> "test:%value*"; - case CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, AUTO, TEXT_WITH_KEYWORD -> "*:*"; - case SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]"; + case CONSTANT_KEYWORD, MATCH_ONLY_TEXT, MATCH_ONLY_TEXT_WITH_KEYWORD, AUTO, TEXT, TEXT_WITH_KEYWORD -> "*:*"; + case SEMANTIC_TEXT, SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]"; }; ComputeSignature dataNodeSignature = switch (type) { case CONSTANT_KEYWORD, KEYWORD -> ComputeSignature.FILTER_IN_QUERY; - case AUTO, TEXT_WITH_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_COMPUTE; + case AUTO, TEXT, TEXT_WITH_KEYWORD, MATCH_ONLY_TEXT, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT, SEMANTIC_TEXT_WITH_KEYWORD -> + ComputeSignature.FILTER_IN_COMPUTE; }; testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, true); } @@ -264,13 +327,14 @@ public void testLikeList() throws IOException { | WHERE test like ("%value*", "abc*") """; String luceneQuery = switch (type) { - case CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, AUTO, TEXT_WITH_KEYWORD -> "*:*"; - case SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]"; + case CONSTANT_KEYWORD, MATCH_ONLY_TEXT, MATCH_ONLY_TEXT_WITH_KEYWORD, AUTO, TEXT, TEXT_WITH_KEYWORD -> "*:*"; + case SEMANTIC_TEXT, SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]"; case KEYWORD -> "test:LIKE(\"%value*\", \"abc*\"), caseInsensitive=false"; }; ComputeSignature dataNodeSignature = switch (type) { case CONSTANT_KEYWORD, KEYWORD -> ComputeSignature.FILTER_IN_QUERY; - case AUTO, TEXT_WITH_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_COMPUTE; + case AUTO, TEXT, TEXT_WITH_KEYWORD, MATCH_ONLY_TEXT, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT, SEMANTIC_TEXT_WITH_KEYWORD -> + ComputeSignature.FILTER_IN_COMPUTE; }; testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, true); } @@ -283,12 +347,13 @@ public void testRLike() throws IOException { """; String luceneQuery = switch (type) { case KEYWORD -> "test:/%value.*/"; - case CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, AUTO, TEXT_WITH_KEYWORD -> "*:*"; - case SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]"; + case CONSTANT_KEYWORD, MATCH_ONLY_TEXT, MATCH_ONLY_TEXT_WITH_KEYWORD, AUTO, TEXT, TEXT_WITH_KEYWORD -> "*:*"; + case SEMANTIC_TEXT, SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]"; }; ComputeSignature dataNodeSignature = switch (type) { case CONSTANT_KEYWORD, KEYWORD -> ComputeSignature.FILTER_IN_QUERY; - case AUTO, TEXT_WITH_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_COMPUTE; + case AUTO, TEXT, TEXT_WITH_KEYWORD, MATCH_ONLY_TEXT, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT, SEMANTIC_TEXT_WITH_KEYWORD -> + ComputeSignature.FILTER_IN_COMPUTE; }; testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, true); } @@ -300,13 +365,14 @@ public void testRLikeList() throws IOException { | WHERE test rlike ("%value.*", "abc.*") """; String luceneQuery = switch (type) { - case CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, AUTO, TEXT_WITH_KEYWORD -> "*:*"; - case SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]"; + case CONSTANT_KEYWORD, MATCH_ONLY_TEXT, MATCH_ONLY_TEXT_WITH_KEYWORD, AUTO, TEXT, TEXT_WITH_KEYWORD -> "*:*"; + case SEMANTIC_TEXT, SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]"; case KEYWORD -> "test:RLIKE(\"%value.*\", \"abc.*\"), caseInsensitive=false"; }; ComputeSignature dataNodeSignature = switch (type) { case CONSTANT_KEYWORD, KEYWORD -> ComputeSignature.FILTER_IN_QUERY; - case AUTO, TEXT_WITH_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_COMPUTE; + case AUTO, TEXT_WITH_KEYWORD, MATCH_ONLY_TEXT, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT, SEMANTIC_TEXT_WITH_KEYWORD, TEXT -> + ComputeSignature.FILTER_IN_COMPUTE; }; testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, true); } @@ -348,6 +414,7 @@ private void testPushQuery( String replacedQuery = esqlQuery.replaceAll("%value", value).replaceAll("%different_value", differentValue); RestEsqlTestCase.RequestObjectBuilder builder = requestObjectBuilder().query(replacedQuery + "\n| KEEP test"); builder.profile(true); + builder.allowPartialResults(false); Map result = runEsql(builder, new AssertWarnings.NoWarnings(), RestEsqlTestCase.Mode.SYNC); assertResultMap( result, @@ -422,8 +489,9 @@ private void indexValue(String value) throws IOException { }"""; json += switch (type) { case AUTO -> ""; - case CONSTANT_KEYWORD -> justType(); + case CONSTANT_KEYWORD, MATCH_ONLY_TEXT, TEXT -> justType(); case KEYWORD -> keyword(); + case SEMANTIC_TEXT -> justSemanticText(); case SEMANTIC_TEXT_WITH_KEYWORD -> semanticTextWithKeyword(); case TEXT_WITH_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD -> typeWithKeyword(); }; @@ -488,6 +556,19 @@ private String typeWithKeyword() { }""".replace("%type", type.name().replace("_WITH_KEYWORD", "").toLowerCase(Locale.ROOT)); } + private String justSemanticText() { + return """ + , + "mappings": { + "properties": { + "test": { + "type": "semantic_text", + "inference_id": "test" + } + } + }"""; + } + private String semanticTextWithKeyword() { return """ , @@ -527,7 +608,7 @@ protected String getTestRestCluster() { @Override protected boolean preserveClusterUponCompletion() { - // Preserve the cluser to speed up the semantic_text tests + // Preserve the cluster to speed up the semantic_text tests return true; } @@ -554,4 +635,15 @@ public void setUpTextEmbeddingInferenceEndpoint() throws IOException { """); adminClient().performRequest(request); } + + private List emulateLargeTextTokens(String value) { + // The default tokenizer splits at 255 characters + if (value.length() <= StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH) { + return List.of("test:" + "v".repeat(value.length())); + } + String first = "#test:" + "v".repeat(StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH); + String rest = "#test:" + "v".repeat(value.length() % StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH); + // We don't know what order they'll show up, so either is fine. + return List.of(first + " " + rest, rest + " " + first); + } } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java index ed15caa17ad3d..dec5a9d3b4070 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java @@ -308,6 +308,11 @@ public boolean isSingleValue(FieldName field) { public boolean canUseEqualityOnSyntheticSourceDelegate(FieldName name, String value) { return false; } + + @Override + public boolean matchQueryYieldsCandidateMatchesForEquality(String name) { + return false; + } } /** diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/TranslationAware.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/TranslationAware.java index 730cccb4dce45..b95d994be482b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/TranslationAware.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/TranslationAware.java @@ -14,6 +14,7 @@ import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.querydsl.query.Query; +import org.elasticsearch.xpack.esql.expression.predicate.logical.Not; import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates; import org.elasticsearch.xpack.esql.planner.TranslatorHandler; @@ -137,11 +138,19 @@ public FinishedTranslatable finish() { return finish; } + /** + * Essentially the {@link TranslationAware#translatable} + * implementation for the {@link Not} expression. When you wrap an expression + * in {@link Not} the result is mostly pushable in the same + * way as the original expression. But there are some expressions that aren't + * need rechecks or can't be pushed at all. This handles that. + */ public Translatable negate() { - if (this == YES_BUT_RECHECK_NEGATED) { - return RECHECK; - } - return this; + return switch (this) { + case YES_BUT_RECHECK_NEGATED -> Translatable.RECHECK; + case RECHECK -> Translatable.NO; + default -> this; + }; } /** diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/Equals.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/Equals.java index 01f57342da1f0..b0b3679b84faa 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/Equals.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/Equals.java @@ -10,6 +10,7 @@ import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.time.DateUtils; import org.elasticsearch.compute.ann.Evaluator; +import org.elasticsearch.index.query.MatchQueryBuilder; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.Literal; @@ -24,6 +25,7 @@ import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates; import org.elasticsearch.xpack.esql.planner.TranslatorHandler; import org.elasticsearch.xpack.esql.querydsl.query.EqualsSyntheticSourceDelegate; +import org.elasticsearch.xpack.esql.querydsl.query.MatchQuery; import org.elasticsearch.xpack.esql.querydsl.query.SingleValueQuery; import java.time.ZoneId; @@ -129,26 +131,37 @@ public Equals(Source source, Expression left, Expression right, ZoneId zoneId) { @Override public Translatable translatable(LucenePushdownPredicates pushdownPredicates) { - if (right() instanceof Literal lit) { - if (left().dataType() == DataType.TEXT && left() instanceof FieldAttribute fa) { - if (pushdownPredicates.canUseEqualityOnSyntheticSourceDelegate(fa, ((BytesRef) lit.value()).utf8ToString())) { - return Translatable.YES_BUT_RECHECK_NEGATED; - } - } + if (right() instanceof Literal rhs && left().dataType() == DataType.TEXT && left() instanceof FieldAttribute lhs) { + return translatableText(pushdownPredicates, lhs, ((BytesRef) rhs.value()).utf8ToString()); + } + return super.translatable(pushdownPredicates); + } + + private Translatable translatableText(LucenePushdownPredicates pushdownPredicates, FieldAttribute lhs, String rhs) { + if (pushdownPredicates.canUseEqualityOnSyntheticSourceDelegate(lhs, rhs)) { + return Translatable.YES_BUT_RECHECK_NEGATED; + } + if (pushdownPredicates.matchQueryYieldsCandidateMatchesForEquality(lhs)) { + return Translatable.RECHECK; } return super.translatable(pushdownPredicates); } @Override public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) { - if (right() instanceof Literal lit) { - if (left().dataType() == DataType.TEXT && left() instanceof FieldAttribute fa) { - String value = ((BytesRef) lit.value()).utf8ToString(); - if (pushdownPredicates.canUseEqualityOnSyntheticSourceDelegate(fa, value)) { - String name = handler.nameOf(fa); - return new SingleValueQuery(new EqualsSyntheticSourceDelegate(source(), name, value), name, true); - } - } + if (right() instanceof Literal rhs && left().dataType() == DataType.TEXT && left() instanceof FieldAttribute lhs) { + return asQueryText(pushdownPredicates, handler, lhs, ((BytesRef) rhs.value()).utf8ToString()); + } + return handler.forceToSingleValueQuery(left(), super.asQuery(pushdownPredicates, handler)); + } + + private Query asQueryText(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler, FieldAttribute lhs, String rhs) { + String name = handler.nameOf(lhs); + if (pushdownPredicates.canUseEqualityOnSyntheticSourceDelegate(lhs, rhs)) { + return new SingleValueQuery(new EqualsSyntheticSourceDelegate(source(), name, rhs), name, true); + } + if (pushdownPredicates.matchQueryYieldsCandidateMatchesForEquality(lhs)) { + return new MatchQuery(source(), name, rhs, Map.of(MatchQueryBuilder.OPERATOR_FIELD.getPreferredName(), "AND")); } return super.asQuery(pushdownPredicates, handler); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/EsqlBinaryComparison.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/EsqlBinaryComparison.java index 69ef99ba04d15..f2562a66ef125 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/EsqlBinaryComparison.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/EsqlBinaryComparison.java @@ -66,10 +66,7 @@ import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.ipToString; import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.versionToString; -public abstract class EsqlBinaryComparison extends BinaryComparison - implements - EvaluatorMapper, - TranslationAware.SingleValueTranslationAware { +public abstract class EsqlBinaryComparison extends BinaryComparison implements EvaluatorMapper, TranslationAware { private static final Logger logger = LogManager.getLogger(EsqlBinaryComparison.class); @@ -367,12 +364,7 @@ public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHand ); Query translated = translateOutOfRangeComparisons(); - return translated != null ? translated : translate(handler); - } - - @Override - public Expression singleValueField() { - return left(); + return translated != null ? translated : handler.forceToSingleValueQuery(left(), translate(handler)); } private Query translate(TranslatorHandler handler) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/GreaterThan.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/GreaterThan.java index 7205f66749b0b..33d1d7346608c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/GreaterThan.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/GreaterThan.java @@ -10,6 +10,7 @@ import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.time.DateUtils; import org.elasticsearch.compute.ann.Evaluator; +import org.elasticsearch.xpack.esql.capabilities.TranslationAware; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.predicate.Negatable; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; @@ -22,7 +23,10 @@ import java.time.ZoneId; import java.util.Map; -public class GreaterThan extends EsqlBinaryComparison implements Negatable { +public class GreaterThan extends EsqlBinaryComparison + implements + Negatable, + TranslationAware.SingleValueTranslationAware { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( Expression.class, "GreaterThan", @@ -117,6 +121,11 @@ public EsqlBinaryComparison reverse() { return new LessThan(source(), left(), right(), zoneId()); } + @Override + public Expression singleValueField() { + return left(); + } + @Evaluator(extraName = "Ints") static boolean processInts(int lhs, int rhs) { return lhs > rhs; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/GreaterThanOrEqual.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/GreaterThanOrEqual.java index 7fadea9a3cf7a..c28253ce1e374 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/GreaterThanOrEqual.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/GreaterThanOrEqual.java @@ -10,6 +10,7 @@ import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.time.DateUtils; import org.elasticsearch.compute.ann.Evaluator; +import org.elasticsearch.xpack.esql.capabilities.TranslationAware; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.predicate.Negatable; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; @@ -22,7 +23,10 @@ import java.time.ZoneId; import java.util.Map; -public class GreaterThanOrEqual extends EsqlBinaryComparison implements Negatable { +public class GreaterThanOrEqual extends EsqlBinaryComparison + implements + Negatable, + TranslationAware.SingleValueTranslationAware { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( Expression.class, "GreaterThanOrEqual", @@ -112,6 +116,11 @@ public LessThan negate() { return new LessThan(source(), left(), right(), zoneId()); } + @Override + public Expression singleValueField() { + return left(); + } + @Override public EsqlBinaryComparison reverse() { return new LessThanOrEqual(source(), left(), right(), zoneId()); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/LessThan.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/LessThan.java index 728a01b81ead3..149d52e190a06 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/LessThan.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/LessThan.java @@ -10,6 +10,7 @@ import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.time.DateUtils; import org.elasticsearch.compute.ann.Evaluator; +import org.elasticsearch.xpack.esql.capabilities.TranslationAware; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.predicate.Negatable; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; @@ -22,7 +23,10 @@ import java.time.ZoneId; import java.util.Map; -public class LessThan extends EsqlBinaryComparison implements Negatable { +public class LessThan extends EsqlBinaryComparison + implements + Negatable, + TranslationAware.SingleValueTranslationAware { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( Expression.class, "LessThan", @@ -109,6 +113,11 @@ public EsqlBinaryComparison reverse() { return new GreaterThan(source(), left(), right(), zoneId()); } + @Override + public Expression singleValueField() { + return left(); + } + @Evaluator(extraName = "Ints") static boolean processInts(int lhs, int rhs) { return lhs < rhs; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/LessThanOrEqual.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/LessThanOrEqual.java index 64cd835f62722..9cc2efee56ed1 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/LessThanOrEqual.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/LessThanOrEqual.java @@ -10,6 +10,7 @@ import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.time.DateUtils; import org.elasticsearch.compute.ann.Evaluator; +import org.elasticsearch.xpack.esql.capabilities.TranslationAware; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.predicate.Negatable; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; @@ -22,7 +23,10 @@ import java.time.ZoneId; import java.util.Map; -public class LessThanOrEqual extends EsqlBinaryComparison implements Negatable { +public class LessThanOrEqual extends EsqlBinaryComparison + implements + Negatable, + TranslationAware.SingleValueTranslationAware { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( Expression.class, "LessThanOrEqual", @@ -109,6 +113,11 @@ public EsqlBinaryComparison reverse() { return new GreaterThanOrEqual(source(), left(), right(), zoneId()); } + @Override + public Expression singleValueField() { + return left(); + } + @Evaluator(extraName = "Ints") static boolean processInts(int lhs, int rhs) { return lhs <= rhs; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/NotEquals.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/NotEquals.java index d020f983ec180..a534967a9e5cc 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/NotEquals.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/NotEquals.java @@ -10,6 +10,7 @@ import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.time.DateUtils; import org.elasticsearch.compute.ann.Evaluator; +import org.elasticsearch.xpack.esql.capabilities.TranslationAware; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.predicate.Negatable; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; @@ -22,7 +23,10 @@ import java.time.ZoneId; import java.util.Map; -public class NotEquals extends EsqlBinaryComparison implements Negatable { +public class NotEquals extends EsqlBinaryComparison + implements + Negatable, + TranslationAware.SingleValueTranslationAware { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( Expression.class, "NotEquals", @@ -189,4 +193,9 @@ public NotEquals swapLeftAndRight() { public EsqlBinaryComparison negate() { return new Equals(source(), left(), right(), zoneId()); } + + @Override + public Expression singleValueField() { + return left(); + } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/LucenePushdownPredicates.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/LucenePushdownPredicates.java index aa9ea3b0e004b..1798e2c8c33b3 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/LucenePushdownPredicates.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/LucenePushdownPredicates.java @@ -9,6 +9,7 @@ import org.elasticsearch.TransportVersion; import org.elasticsearch.core.Nullable; +import org.elasticsearch.index.query.MatchQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; @@ -71,8 +72,17 @@ public interface LucenePushdownPredicates { */ boolean isIndexed(FieldAttribute attr); + /** + * Can the synthetic _source delegate perform {@code ==} on the provided string? + */ boolean canUseEqualityOnSyntheticSourceDelegate(FieldAttribute attr, String value); + /** + * Does a {@link MatchQueryBuilder} produce a complete list of all possible documents + * that might be {@code ==} to the value passed to the query. + */ + boolean matchQueryYieldsCandidateMatchesForEquality(FieldAttribute attr); + /** * We see fields as pushable if either they are aggregatable or they are indexed. * This covers non-indexed cases like AbstractScriptFieldType which hard-coded isAggregatable to true, @@ -163,6 +173,11 @@ public boolean isIndexed(FieldAttribute attr) { public boolean canUseEqualityOnSyntheticSourceDelegate(FieldAttribute attr, String value) { return false; } + + @Override + public boolean matchQueryYieldsCandidateMatchesForEquality(FieldAttribute attr) { + return false; + } }; } @@ -208,6 +223,11 @@ public boolean isIndexed(FieldAttribute attr) { public boolean canUseEqualityOnSyntheticSourceDelegate(FieldAttribute attr, String value) { return stats.canUseEqualityOnSyntheticSourceDelegate(attr.fieldName(), value); } + + @Override + public boolean matchQueryYieldsCandidateMatchesForEquality(FieldAttribute attr) { + return stats.matchQueryYieldsCandidateMatchesForEquality(attr.field().getName()); + } }; } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/TranslatorHandler.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/TranslatorHandler.java index 4b7af5bf49de8..f88b3a748d05e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/TranslatorHandler.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/TranslatorHandler.java @@ -33,13 +33,15 @@ private TranslatorHandler() {} public Query asQuery(LucenePushdownPredicates predicates, Expression e) { if (e instanceof TranslationAware ta) { Query query = ta.asQuery(predicates, this); - return ta instanceof TranslationAware.SingleValueTranslationAware sv ? wrapFunctionQuery(sv.singleValueField(), query) : query; + return ta instanceof TranslationAware.SingleValueTranslationAware sv + ? forceToSingleValueQuery(sv.singleValueField(), query) + : query; } throw new QlIllegalArgumentException("Don't know how to translate {} {}", e.nodeName(), e); } - private static Query wrapFunctionQuery(Expression field, Query query) { + public Query forceToSingleValueQuery(Expression field, Query query) { if (query instanceof SingleValueQuery) { // Already wrapped return query; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchContextStats.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchContextStats.java index 89aa2402248b8..174f770aad21a 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchContextStats.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchContextStats.java @@ -343,6 +343,24 @@ public boolean canUseEqualityOnSyntheticSourceDelegate(FieldAttribute.FieldName return true; } + @Override + public boolean matchQueryYieldsCandidateMatchesForEquality(String name) { + for (SearchExecutionContext ctx : contexts) { + MappedFieldType type = ctx.getFieldType(name); + if (type == null) { + return false; + } + if (type instanceof TextFieldMapper.TextFieldType t) { + if (false == t.matchQueryYieldsCandidateMatchesForEquality()) { + return false; + } + } else { + return false; + } + } + return true; + } + @Override public String constantValue(FieldAttribute.FieldName name) { String val = null; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchStats.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchStats.java index 5c7ab1fdd6242..3822831faa373 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchStats.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchStats.java @@ -40,6 +40,8 @@ public interface SearchStats { boolean canUseEqualityOnSyntheticSourceDelegate(FieldName name, String value); + boolean matchQueryYieldsCandidateMatchesForEquality(String name); + /** * Returns the value for a field if it's a constant (eg. a constant_keyword with only one value for the involved indices). * NULL if the field is not a constant. @@ -107,5 +109,10 @@ public boolean isSingleValue(FieldName field) { public boolean canUseEqualityOnSyntheticSourceDelegate(FieldName name, String value) { return false; } + + @Override + public boolean matchQueryYieldsCandidateMatchesForEquality(String name) { + return false; + } } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java index a604e1d26d313..47a283c36fc1c 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java @@ -806,7 +806,7 @@ public void testOutOfRangeFilterPushdown() { var query = "from test | where " + comparison; Source expectedSource = new Source(1, 18, comparison); - logger.info("Query: " + query); + logger.info("Query: {}", query); EsQueryExec actualQueryExec = doTestOutOfRangeFilterPushdown(query, allTypeMappingAnalyzer); assertThat(actualQueryExec.query(), is(instanceOf(SingleValueQuery.Builder.class))); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/DisabledSearchStats.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/DisabledSearchStats.java index 308d21da05c6d..9a7f3d6832c14 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/DisabledSearchStats.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/DisabledSearchStats.java @@ -66,4 +66,9 @@ public boolean isSingleValue(FieldName field) { public boolean canUseEqualityOnSyntheticSourceDelegate(FieldName name, String value) { return false; } + + @Override + public boolean matchQueryYieldsCandidateMatchesForEquality(String name) { + return false; + } }