Skip to content
7 changes: 7 additions & 0 deletions docs/changelog/124001.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
pr: 124001
summary: Use a must boolean statement when pushing down to Lucene when scoring is
also needed
area: ES|QL
type: bug
issues:
- 123967
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
import static org.elasticsearch.core.Tuple.tuple;

public class MetadataAttribute extends TypedAttribute {
public static final String TIMESTAMP_FIELD = "@timestamp";
public static final String TIMESTAMP_FIELD = "@timestamp"; // this is not a true metadata attribute
public static final String TSID_FIELD = "_tsid";
public static final String SCORE = "_score";
public static final String INDEX = "_index";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,16 @@
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.support.WriteRequest;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.xpack.esql.VerificationException;
import org.elasticsearch.xpack.esql.action.AbstractEsqlIntegTestCase;
import org.junit.Before;

import java.util.List;

import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.hamcrest.CoreMatchers.containsString;

Expand Down Expand Up @@ -120,6 +124,119 @@ public void testWhereMatchWithScoring() {
}
}

/**
* Test for https://github.com/elastic/elasticsearch/issues/123967
*/
public void testWhereMatchWithScoring_AndRequestFilter() {
var query = """
FROM test METADATA _score
| WHERE content:"fox"
| SORT _score DESC
| KEEP content, _score
""";

QueryBuilder filter = boolQuery().must(matchQuery("content", "brown"));

try (var resp = run(query, randomPragmas(), filter)) {
assertColumnNames(resp.columns(), List.of("content", "_score"));
assertColumnTypes(resp.columns(), List.of("text", "double"));
assertValues(
resp.values(),
List.of(
List.of("This is a brown fox", 1.4274532794952393),
List.of("The quick brown fox jumps over the lazy dog", 1.1248724460601807)
)
);
}
}

public void testWhereMatchWithScoring_AndNoScoreRequestFilter() {
var query = """
FROM test METADATA _score
| WHERE content:"fox"
| SORT _score DESC
| KEEP content, _score
""";

QueryBuilder filter = boolQuery().filter(matchQuery("content", "brown"));

try (var resp = run(query, randomPragmas(), filter)) {
assertColumnNames(resp.columns(), List.of("content", "_score"));
assertColumnTypes(resp.columns(), List.of("text", "double"));
assertValues(
resp.values(),
List.of(
List.of("This is a brown fox", 1.156558871269226),
List.of("The quick brown fox jumps over the lazy dog", 0.9114001989364624)
)
);
}
}

public void testWhereMatchWithScoring_And_MatchAllRequestFilter() {
var query = """
FROM test METADATA _score
| WHERE content:"fox"
| SORT _score DESC
| KEEP content, _score
""";

QueryBuilder filter = QueryBuilders.matchAllQuery();

try (var resp = run(query, randomPragmas(), filter)) {
assertColumnNames(resp.columns(), List.of("content", "_score"));
assertColumnTypes(resp.columns(), List.of("text", "double"));
assertValues(
resp.values(),
List.of(
List.of("This is a brown fox", 2.1565589904785156),
List.of("The quick brown fox jumps over the lazy dog", 1.9114001989364624)
)
);
}
}

public void testScoringOutsideQuery() {
var query = """
FROM test METADATA _score
| SORT _score DESC
| KEEP content, _score
""";

QueryBuilder filter = boolQuery().must(matchQuery("content", "fox"));

try (var resp = run(query, randomPragmas(), filter)) {
assertColumnNames(resp.columns(), List.of("content", "_score"));
assertColumnTypes(resp.columns(), List.of("text", "double"));
assertValues(
resp.values(),
List.of(
List.of("This is a brown fox", 1.156558871269226),
List.of("The quick brown fox jumps over the lazy dog", 0.9114001989364624)
)
);
}
}

public void testScoring_Zero_OutsideQuery() {
var query = """
FROM test METADATA _score
| SORT _score DESC
| KEEP content, _score
""";

QueryBuilder filter = boolQuery().filter(matchQuery("content", "fox"));

try (var resp = run(query, randomPragmas(), filter)) {
assertColumnNames(resp.columns(), List.of("content", "_score"));
assertColumnTypes(resp.columns(), List.of("text", "double"));
assertValues(
resp.values(),
List.of(List.of("This is a brown fox", 0.0), List.of("The quick brown fox jumps over the lazy dog", 0.0))
);
}
}

public void testWhereMatchWithScoringDifferentSort() {
var query = """
FROM test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,8 @@ private static PhysicalPlan rewrite(
if (newPushable.size() > 0) { // update the executable with pushable conditions
Query queryDSL = TRANSLATOR_HANDLER.asQuery(Predicates.combineAnd(newPushable));
QueryBuilder planQuery = queryDSL.asBuilder();
var query = Queries.combine(Queries.Clause.FILTER, asList(queryExec.query(), planQuery));
Queries.Clause combiningQueryClauseType = queryExec.hasScoring() ? Queries.Clause.MUST : Queries.Clause.FILTER;
var query = Queries.combine(combiningQueryClauseType, asList(queryExec.query(), planQuery));
queryExec = new EsQueryExec(
queryExec.source(),
queryExec.indexPattern(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,27 +32,36 @@ protected PhysicalPlan rule(EsSourceExec plan) {
var docId = new FieldAttribute(plan.source(), EsQueryExec.DOC_ID_FIELD.getName(), EsQueryExec.DOC_ID_FIELD);
final List<Attribute> attributes = new ArrayList<>();
attributes.add(docId);
if (plan.indexMode() == IndexMode.TIME_SERIES) {
Attribute tsid = null, timestamp = null;
for (Attribute attr : plan.output()) {
String name = attr.name();
if (name.equals(MetadataAttribute.TSID_FIELD)) {

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just refactoring to eliminate double iteration over plan.output() attributes.

var outputIterator = plan.output().iterator();
var isTimeSeries = plan.indexMode() == IndexMode.TIME_SERIES;
var keepIterating = true;
Attribute tsid = null, timestamp = null, score = null;

while (keepIterating && outputIterator.hasNext()) {
Attribute attr = outputIterator.next();
if (attr instanceof MetadataAttribute ma) {
if (ma.name().equals(MetadataAttribute.SCORE)) {
score = attr;
} else if (isTimeSeries && ma.name().equals(MetadataAttribute.TSID_FIELD)) {
tsid = attr;
} else if (name.equals(MetadataAttribute.TIMESTAMP_FIELD)) {
timestamp = attr;
}
} else if (attr.name().equals(MetadataAttribute.TIMESTAMP_FIELD)) {
timestamp = attr;
}
keepIterating = score == null || (isTimeSeries && (tsid == null || timestamp == null));
}
if (isTimeSeries) {
if (tsid == null || timestamp == null) {
throw new IllegalStateException("_tsid or @timestamp are missing from the time-series source");
}
attributes.add(tsid);
attributes.add(timestamp);
}
plan.output().forEach(attr -> {
if (attr instanceof MetadataAttribute ma && ma.name().equals(MetadataAttribute.SCORE)) {
attributes.add(ma);
}
});
if (score != null) {
attributes.add(score);
}

return new EsQueryExec(plan.source(), plan.indexPattern(), plan.indexMode(), plan.indexNameWithModes(), attributes, plan.query());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.elasticsearch.xpack.esql.core.expression.Attribute;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute;
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
import org.elasticsearch.xpack.esql.core.tree.NodeUtils;
import org.elasticsearch.xpack.esql.core.tree.Source;
Expand Down Expand Up @@ -204,6 +205,15 @@ public static boolean isSourceAttribute(Attribute attr) {
return DOC_ID_FIELD.getName().equals(attr.name());
}

public boolean hasScoring() {
for (Attribute a : attrs()) {
if (a instanceof MetadataAttribute && a.name().equals(MetadataAttribute.SCORE)) {
return true;
}
}
return false;
}

@Override
protected NodeInfo<EsQueryExec> info() {
return NodeInfo.create(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ public enum StatsType {
}

public record Stat(String name, StatsType type, QueryBuilder query) {

public QueryBuilder filter(QueryBuilder sourceQuery) {
return query == null ? sourceQuery : Queries.combine(Queries.Clause.FILTER, asList(sourceQuery, query));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.core.type.KeywordEsField;
import org.elasticsearch.xpack.esql.core.type.MultiTypeEsField;
Expand Down Expand Up @@ -186,9 +185,7 @@ public final PhysicalOperation sourcePhysicalOperation(EsQueryExec esQueryExec,
assert esQueryExec.estimatedRowSize() != null : "estimated row size not initialized";
int rowEstimatedSize = esQueryExec.estimatedRowSize();
int limit = esQueryExec.limit() != null ? (Integer) esQueryExec.limit().fold(context.foldCtx()) : NO_LIMIT;
boolean scoring = esQueryExec.attrs()
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Small refactoring.

.stream()
.anyMatch(a -> a instanceof MetadataAttribute && a.name().equals(MetadataAttribute.SCORE));
boolean scoring = esQueryExec.hasScoring();
if ((sorts != null && sorts.isEmpty() == false)) {
List<SortBuilder<?>> sortBuilders = new ArrayList<>(sorts.size());
for (Sort sort : sorts) {
Expand Down