diff --git a/docs/changelog/124001.yaml b/docs/changelog/124001.yaml new file mode 100644 index 0000000000000..374a7ad7efb58 --- /dev/null +++ b/docs/changelog/124001.yaml @@ -0,0 +1,7 @@ +pr: 124001 +summary: Use a must boolean statement when pushing down to Lucene when scoring is + also needed +area: ES|QL +type: bug +issues: + - 123967 diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/MetadataAttribute.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/MetadataAttribute.java index dc75ac3a96248..a07e2d9589034 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/MetadataAttribute.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/MetadataAttribute.java @@ -29,7 +29,7 @@ import static org.elasticsearch.core.Tuple.tuple; public class MetadataAttribute extends TypedAttribute { - public static final String TIMESTAMP_FIELD = "@timestamp"; + public static final String TIMESTAMP_FIELD = "@timestamp"; // this is not a true metadata attribute public static final String TSID_FIELD = "_tsid"; public static final String SCORE = "_score"; public static final String INDEX = "_index"; diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/MatchOperatorIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/MatchOperatorIT.java index c978dead8f4fd..216786798ad34 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/MatchOperatorIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/MatchOperatorIT.java @@ -11,12 +11,16 @@ import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.support.WriteRequest; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.xpack.esql.VerificationException; import org.elasticsearch.xpack.esql.action.AbstractEsqlIntegTestCase; import org.junit.Before; import java.util.List; +import static org.elasticsearch.index.query.QueryBuilders.boolQuery; +import static org.elasticsearch.index.query.QueryBuilders.matchQuery; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.hamcrest.CoreMatchers.containsString; @@ -120,6 +124,119 @@ public void testWhereMatchWithScoring() { } } + /** + * Test for https://github.com/elastic/elasticsearch/issues/123967 + */ + public void testWhereMatchWithScoring_AndRequestFilter() { + var query = """ + FROM test METADATA _score + | WHERE content:"fox" + | SORT _score DESC + | KEEP content, _score + """; + + QueryBuilder filter = boolQuery().must(matchQuery("content", "brown")); + + try (var resp = run(query, randomPragmas(), filter)) { + assertColumnNames(resp.columns(), List.of("content", "_score")); + assertColumnTypes(resp.columns(), List.of("text", "double")); + assertValues( + resp.values(), + List.of( + List.of("This is a brown fox", 1.4274532794952393), + List.of("The quick brown fox jumps over the lazy dog", 1.1248724460601807) + ) + ); + } + } + + public void testWhereMatchWithScoring_AndNoScoreRequestFilter() { + var query = """ + FROM test METADATA _score + | WHERE content:"fox" + | SORT _score DESC + | KEEP content, _score + """; + + QueryBuilder filter = boolQuery().filter(matchQuery("content", "brown")); + + try (var resp = run(query, randomPragmas(), filter)) { + assertColumnNames(resp.columns(), List.of("content", "_score")); + assertColumnTypes(resp.columns(), List.of("text", "double")); + assertValues( + resp.values(), + List.of( + List.of("This is a brown fox", 1.156558871269226), + List.of("The quick brown fox jumps over the lazy dog", 0.9114001989364624) + ) + ); + } + } + + public void testWhereMatchWithScoring_And_MatchAllRequestFilter() { + var query = """ + FROM test METADATA _score + | WHERE content:"fox" + | SORT _score DESC + | KEEP content, _score + """; + + QueryBuilder filter = QueryBuilders.matchAllQuery(); + + try (var resp = run(query, randomPragmas(), filter)) { + assertColumnNames(resp.columns(), List.of("content", "_score")); + assertColumnTypes(resp.columns(), List.of("text", "double")); + assertValues( + resp.values(), + List.of( + List.of("This is a brown fox", 2.1565589904785156), + List.of("The quick brown fox jumps over the lazy dog", 1.9114001989364624) + ) + ); + } + } + + public void testScoringOutsideQuery() { + var query = """ + FROM test METADATA _score + | SORT _score DESC + | KEEP content, _score + """; + + QueryBuilder filter = boolQuery().must(matchQuery("content", "fox")); + + try (var resp = run(query, randomPragmas(), filter)) { + assertColumnNames(resp.columns(), List.of("content", "_score")); + assertColumnTypes(resp.columns(), List.of("text", "double")); + assertValues( + resp.values(), + List.of( + List.of("This is a brown fox", 1.156558871269226), + List.of("The quick brown fox jumps over the lazy dog", 0.9114001989364624) + ) + ); + } + } + + public void testScoring_Zero_OutsideQuery() { + var query = """ + FROM test METADATA _score + | SORT _score DESC + | KEEP content, _score + """; + + QueryBuilder filter = boolQuery().filter(matchQuery("content", "fox")); + + try (var resp = run(query, randomPragmas(), filter)) { + assertColumnNames(resp.columns(), List.of("content", "_score")); + assertColumnTypes(resp.columns(), List.of("text", "double")); + assertValues( + resp.values(), + List.of(List.of("This is a brown fox", 0.0), List.of("The quick brown fox jumps over the lazy dog", 0.0)) + ); + } + } + public void testWhereMatchWithScoringDifferentSort() { var query = """ FROM test diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushFiltersToSource.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushFiltersToSource.java index 2f28b1a0e41ba..f902f261e7dc9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushFiltersToSource.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushFiltersToSource.java @@ -101,7 +101,8 @@ private static PhysicalPlan rewrite( if (newPushable.size() > 0) { // update the executable with pushable conditions Query queryDSL = TRANSLATOR_HANDLER.asQuery(Predicates.combineAnd(newPushable)); QueryBuilder planQuery = queryDSL.asBuilder(); - var query = Queries.combine(Queries.Clause.FILTER, asList(queryExec.query(), planQuery)); + Queries.Clause combiningQueryClauseType = queryExec.hasScoring() ? Queries.Clause.MUST : Queries.Clause.FILTER; + var query = Queries.combine(combiningQueryClauseType, asList(queryExec.query(), planQuery)); queryExec = new EsQueryExec( queryExec.source(), queryExec.indexPattern(), diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/ReplaceSourceAttributes.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/ReplaceSourceAttributes.java index 4f3358c539b05..4730f561348c9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/ReplaceSourceAttributes.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/ReplaceSourceAttributes.java @@ -32,27 +32,36 @@ protected PhysicalPlan rule(EsSourceExec plan) { var docId = new FieldAttribute(plan.source(), EsQueryExec.DOC_ID_FIELD.getName(), EsQueryExec.DOC_ID_FIELD); final List attributes = new ArrayList<>(); attributes.add(docId); - if (plan.indexMode() == IndexMode.TIME_SERIES) { - Attribute tsid = null, timestamp = null; - for (Attribute attr : plan.output()) { - String name = attr.name(); - if (name.equals(MetadataAttribute.TSID_FIELD)) { + + var outputIterator = plan.output().iterator(); + var isTimeSeries = plan.indexMode() == IndexMode.TIME_SERIES; + var keepIterating = true; + Attribute tsid = null, timestamp = null, score = null; + + while (keepIterating && outputIterator.hasNext()) { + Attribute attr = outputIterator.next(); + if (attr instanceof MetadataAttribute ma) { + if (ma.name().equals(MetadataAttribute.SCORE)) { + score = attr; + } else if (isTimeSeries && ma.name().equals(MetadataAttribute.TSID_FIELD)) { tsid = attr; - } else if (name.equals(MetadataAttribute.TIMESTAMP_FIELD)) { - timestamp = attr; } + } else if (attr.name().equals(MetadataAttribute.TIMESTAMP_FIELD)) { + timestamp = attr; } + keepIterating = score == null || (isTimeSeries && (tsid == null || timestamp == null)); + } + if (isTimeSeries) { if (tsid == null || timestamp == null) { throw new IllegalStateException("_tsid or @timestamp are missing from the time-series source"); } attributes.add(tsid); attributes.add(timestamp); } - plan.output().forEach(attr -> { - if (attr instanceof MetadataAttribute ma && ma.name().equals(MetadataAttribute.SCORE)) { - attributes.add(ma); - } - }); + if (score != null) { + attributes.add(score); + } + return new EsQueryExec(plan.source(), plan.indexPattern(), plan.indexMode(), plan.indexNameWithModes(), attributes, plan.query()); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EsQueryExec.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EsQueryExec.java index a3fc62d935795..60e7eb535f444 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EsQueryExec.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EsQueryExec.java @@ -21,6 +21,7 @@ import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; +import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.NodeUtils; import org.elasticsearch.xpack.esql.core.tree.Source; @@ -204,6 +205,15 @@ public static boolean isSourceAttribute(Attribute attr) { return DOC_ID_FIELD.getName().equals(attr.name()); } + public boolean hasScoring() { + for (Attribute a : attrs()) { + if (a instanceof MetadataAttribute && a.name().equals(MetadataAttribute.SCORE)) { + return true; + } + } + return false; + } + @Override protected NodeInfo info() { return NodeInfo.create( diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EsStatsQueryExec.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EsStatsQueryExec.java index 96214652b87cb..5519e7fbc7083 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EsStatsQueryExec.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EsStatsQueryExec.java @@ -37,7 +37,6 @@ public enum StatsType { } public record Stat(String name, StatsType type, QueryBuilder query) { - public QueryBuilder filter(QueryBuilder sourceQuery) { return query == null ? sourceQuery : Queries.combine(Queries.Clause.FILTER, asList(sourceQuery, query)); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java index 10c380f2db56d..e1e296fc12de9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java @@ -49,7 +49,6 @@ import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.FoldContext; -import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.type.KeywordEsField; import org.elasticsearch.xpack.esql.core.type.MultiTypeEsField; @@ -186,9 +185,7 @@ public final PhysicalOperation sourcePhysicalOperation(EsQueryExec esQueryExec, assert esQueryExec.estimatedRowSize() != null : "estimated row size not initialized"; int rowEstimatedSize = esQueryExec.estimatedRowSize(); int limit = esQueryExec.limit() != null ? (Integer) esQueryExec.limit().fold(context.foldCtx()) : NO_LIMIT; - boolean scoring = esQueryExec.attrs() - .stream() - .anyMatch(a -> a instanceof MetadataAttribute && a.name().equals(MetadataAttribute.SCORE)); + boolean scoring = esQueryExec.hasScoring(); if ((sorts != null && sorts.isEmpty() == false)) { List> sortBuilders = new ArrayList<>(sorts.size()); for (Sort sort : sorts) {