Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docs/changelog/124001.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
pr: 124001
summary: Use a must boolean statement when pushing down to Lucene when scoring is
also needed
area: ES|QL
type: bug
issues:
- 123967
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
import static org.elasticsearch.core.Tuple.tuple;

public class MetadataAttribute extends TypedAttribute {
public static final String TIMESTAMP_FIELD = "@timestamp";
public static final String TIMESTAMP_FIELD = "@timestamp"; // this is not a true metadata attribute
public static final String TSID_FIELD = "_tsid";
public static final String SCORE = "_score";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,17 @@
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.support.WriteRequest;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.xpack.esql.VerificationException;
import org.elasticsearch.xpack.esql.action.AbstractEsqlIntegTestCase;
import org.elasticsearch.xpack.esql.action.EsqlCapabilities;
import org.junit.Before;

import java.util.List;

import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.hamcrest.CoreMatchers.containsString;

Expand Down Expand Up @@ -122,6 +126,119 @@ public void testWhereMatchWithScoring() {
}
}

/**
* Test for https://github.com/elastic/elasticsearch/issues/123967
*/
public void testWhereMatchWithScoring_AndRequestFilter() {
var query = """
FROM test METADATA _score
| WHERE content:"fox"
| SORT _score DESC
| KEEP content, _score
""";

QueryBuilder filter = boolQuery().must(matchQuery("content", "brown"));

try (var resp = run(query, randomPragmas(), filter)) {
assertColumnNames(resp.columns(), List.of("content", "_score"));
assertColumnTypes(resp.columns(), List.of("text", "double"));
assertValues(
resp.values(),
List.of(
List.of("This is a brown fox", 1.4274532794952393),
List.of("The quick brown fox jumps over the lazy dog", 1.1248724460601807)
)
);
}
}

public void testWhereMatchWithScoring_AndNoScoreRequestFilter() {
var query = """
FROM test METADATA _score
| WHERE content:"fox"
| SORT _score DESC
| KEEP content, _score
""";

QueryBuilder filter = boolQuery().filter(matchQuery("content", "brown"));

try (var resp = run(query, randomPragmas(), filter)) {
assertColumnNames(resp.columns(), List.of("content", "_score"));
assertColumnTypes(resp.columns(), List.of("text", "double"));
assertValues(
resp.values(),
List.of(
List.of("This is a brown fox", 1.156558871269226),
List.of("The quick brown fox jumps over the lazy dog", 0.9114001989364624)
)
);
}
}

public void testWhereMatchWithScoring_And_MatchAllRequestFilter() {
var query = """
FROM test METADATA _score
| WHERE content:"fox"
| SORT _score DESC
| KEEP content, _score
""";

QueryBuilder filter = QueryBuilders.matchAllQuery();

try (var resp = run(query, randomPragmas(), filter)) {
assertColumnNames(resp.columns(), List.of("content", "_score"));
assertColumnTypes(resp.columns(), List.of("text", "double"));
assertValues(
resp.values(),
List.of(
List.of("This is a brown fox", 2.1565589904785156),
List.of("The quick brown fox jumps over the lazy dog", 1.9114001989364624)
)
);
}
}

public void testScoringOutsideQuery() {
var query = """
FROM test METADATA _score
| SORT _score DESC
| KEEP content, _score
""";

QueryBuilder filter = boolQuery().must(matchQuery("content", "fox"));

try (var resp = run(query, randomPragmas(), filter)) {
assertColumnNames(resp.columns(), List.of("content", "_score"));
assertColumnTypes(resp.columns(), List.of("text", "double"));
assertValues(
resp.values(),
List.of(
List.of("This is a brown fox", 1.156558871269226),
List.of("The quick brown fox jumps over the lazy dog", 0.9114001989364624)
)
);
}
}

public void testScoring_Zero_OutsideQuery() {
var query = """
FROM test METADATA _score
| SORT _score DESC
| KEEP content, _score
""";

QueryBuilder filter = boolQuery().filter(matchQuery("content", "fox"));

try (var resp = run(query, randomPragmas(), filter)) {
assertColumnNames(resp.columns(), List.of("content", "_score"));
assertColumnTypes(resp.columns(), List.of("text", "double"));
assertValues(
resp.values(),
List.of(List.of("This is a brown fox", 0.0), List.of("The quick brown fox jumps over the lazy dog", 0.0))
);
}
}

public void testWhereMatchWithScoringDifferentSort() {
assumeTrue("'METADATA _score' is disabled", EsqlCapabilities.Cap.METADATA_SCORE.isEnabled());
var query = """
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,8 @@ private static PhysicalPlan rewrite(
if (newPushable.size() > 0) { // update the executable with pushable conditions
Query queryDSL = TRANSLATOR_HANDLER.asQuery(Predicates.combineAnd(newPushable));
QueryBuilder planQuery = queryDSL.asBuilder();
var query = Queries.combine(Queries.Clause.FILTER, asList(queryExec.query(), planQuery));
Queries.Clause combiningQueryClauseType = queryExec.hasScoring() ? Queries.Clause.MUST : Queries.Clause.FILTER;
var query = Queries.combine(combiningQueryClauseType, asList(queryExec.query(), planQuery));
queryExec = new EsQueryExec(
queryExec.source(),
queryExec.indexPattern(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,27 +32,36 @@ protected PhysicalPlan rule(EsSourceExec plan) {
var docId = new FieldAttribute(plan.source(), EsQueryExec.DOC_ID_FIELD.getName(), EsQueryExec.DOC_ID_FIELD);
final List<Attribute> attributes = new ArrayList<>();
attributes.add(docId);
if (plan.indexMode() == IndexMode.TIME_SERIES) {
Attribute tsid = null, timestamp = null;
for (Attribute attr : plan.output()) {
String name = attr.name();
if (name.equals(MetadataAttribute.TSID_FIELD)) {

var outputIterator = plan.output().iterator();
var isTimeSeries = plan.indexMode() == IndexMode.TIME_SERIES;
var keepIterating = true;
Attribute tsid = null, timestamp = null, score = null;

while (keepIterating && outputIterator.hasNext()) {
Attribute attr = outputIterator.next();
if (attr instanceof MetadataAttribute ma) {
if (ma.name().equals(MetadataAttribute.SCORE)) {
score = attr;
} else if (isTimeSeries && ma.name().equals(MetadataAttribute.TSID_FIELD)) {
tsid = attr;
} else if (name.equals(MetadataAttribute.TIMESTAMP_FIELD)) {
timestamp = attr;
}
} else if (attr.name().equals(MetadataAttribute.TIMESTAMP_FIELD)) {
timestamp = attr;
}
keepIterating = score == null || (isTimeSeries && (tsid == null || timestamp == null));
}
if (isTimeSeries) {
if (tsid == null || timestamp == null) {
throw new IllegalStateException("_tsid or @timestamp are missing from the time-series source");
}
attributes.add(tsid);
attributes.add(timestamp);
}
plan.output().forEach(attr -> {
if (attr instanceof MetadataAttribute ma && ma.name().equals(MetadataAttribute.SCORE)) {
attributes.add(ma);
}
});
if (score != null) {
attributes.add(score);
}

return new EsQueryExec(plan.source(), plan.indexPattern(), plan.indexMode(), plan.indexNameWithModes(), attributes, plan.query());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.elasticsearch.xpack.esql.core.expression.Attribute;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute;
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
import org.elasticsearch.xpack.esql.core.tree.NodeUtils;
import org.elasticsearch.xpack.esql.core.tree.Source;
Expand Down Expand Up @@ -204,6 +205,15 @@ public static boolean isSourceAttribute(Attribute attr) {
return DOC_ID_FIELD.getName().equals(attr.name());
}

public boolean hasScoring() {
for (Attribute a : attrs()) {
if (a instanceof MetadataAttribute && a.name().equals(MetadataAttribute.SCORE)) {
return true;
}
}
return false;
}

@Override
protected NodeInfo<EsQueryExec> info() {
return NodeInfo.create(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ public enum StatsType {
}

public record Stat(String name, StatsType type, QueryBuilder query) {

public QueryBuilder filter(QueryBuilder sourceQuery) {
return query == null ? sourceQuery : Queries.combine(Queries.Clause.FILTER, asList(sourceQuery, query));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.core.type.MultiTypeEsField;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.AbstractConvertFunction;
Expand Down Expand Up @@ -162,9 +161,7 @@ public final PhysicalOperation sourcePhysicalOperation(EsQueryExec esQueryExec,
assert esQueryExec.estimatedRowSize() != null : "estimated row size not initialized";
int rowEstimatedSize = esQueryExec.estimatedRowSize();
int limit = esQueryExec.limit() != null ? (Integer) esQueryExec.limit().fold(context.foldCtx()) : NO_LIMIT;
boolean scoring = esQueryExec.attrs()
.stream()
.anyMatch(a -> a instanceof MetadataAttribute && a.name().equals(MetadataAttribute.SCORE));
boolean scoring = esQueryExec.hasScoring();
if ((sorts != null && sorts.isEmpty() == false)) {
List<SortBuilder<?>> sortBuilders = new ArrayList<>(sorts.size());
for (Sort sort : sorts) {
Expand Down