Skip to content

Commit 137e684

Browse files
Mikep86mridula-s109
authored andcommitted
Don't allow field caps to use semantic queries as index filters (elastic#131111)
1 parent 2e58400 commit 137e684

File tree

3 files changed

+191
-0
lines changed

3 files changed

+191
-0
lines changed

docs/changelog/131111.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 131111
2+
summary: Don't allow field caps to use semantic queries as index filters
3+
area: Search
4+
type: bug
5+
issues: []

server/src/main/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesRequest.java

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,13 @@
1818
import org.elasticsearch.common.Strings;
1919
import org.elasticsearch.common.io.stream.StreamInput;
2020
import org.elasticsearch.common.io.stream.StreamOutput;
21+
import org.elasticsearch.index.query.BoolQueryBuilder;
22+
import org.elasticsearch.index.query.BoostingQueryBuilder;
23+
import org.elasticsearch.index.query.ConstantScoreQueryBuilder;
24+
import org.elasticsearch.index.query.DisMaxQueryBuilder;
25+
import org.elasticsearch.index.query.NestedQueryBuilder;
2126
import org.elasticsearch.index.query.QueryBuilder;
27+
import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder;
2228
import org.elasticsearch.tasks.CancellableTask;
2329
import org.elasticsearch.tasks.Task;
2430
import org.elasticsearch.tasks.TaskId;
@@ -268,9 +274,53 @@ public ActionRequestValidationException validate() {
268274
if (fields == null || fields.length == 0) {
269275
validationException = ValidateActions.addValidationError("no fields specified", validationException);
270276
}
277+
278+
// Band-aid fix for https://github.com/elastic/elasticsearch/issues/116106.
279+
// Semantic queries are high-recall queries, making them poor filters and effectively the same as an exists query when used in that
280+
// context.
281+
if (containsSemanticQuery(indexFilter)) {
282+
validationException = ValidateActions.addValidationError(
283+
"index filter cannot contain semantic queries. Use an exists query instead.",
284+
validationException
285+
);
286+
}
287+
271288
return validationException;
272289
}
273290

291+
/**
292+
* Recursively checks if a query builder contains any semantic queries
293+
*/
294+
private static boolean containsSemanticQuery(QueryBuilder queryBuilder) {
295+
boolean containsSemanticQuery = false;
296+
297+
if (queryBuilder == null) {
298+
return containsSemanticQuery;
299+
}
300+
301+
if ("semantic".equals(queryBuilder.getWriteableName())) {
302+
containsSemanticQuery = true;
303+
} else if (queryBuilder instanceof BoolQueryBuilder boolQuery) {
304+
containsSemanticQuery = boolQuery.must().stream().anyMatch(FieldCapabilitiesRequest::containsSemanticQuery)
305+
|| boolQuery.mustNot().stream().anyMatch(FieldCapabilitiesRequest::containsSemanticQuery)
306+
|| boolQuery.should().stream().anyMatch(FieldCapabilitiesRequest::containsSemanticQuery)
307+
|| boolQuery.filter().stream().anyMatch(FieldCapabilitiesRequest::containsSemanticQuery);
308+
} else if (queryBuilder instanceof DisMaxQueryBuilder disMaxQuery) {
309+
containsSemanticQuery = disMaxQuery.innerQueries().stream().anyMatch(FieldCapabilitiesRequest::containsSemanticQuery);
310+
} else if (queryBuilder instanceof NestedQueryBuilder nestedQuery) {
311+
containsSemanticQuery = containsSemanticQuery(nestedQuery.query());
312+
} else if (queryBuilder instanceof BoostingQueryBuilder boostingQuery) {
313+
containsSemanticQuery = containsSemanticQuery(boostingQuery.positiveQuery())
314+
|| containsSemanticQuery(boostingQuery.negativeQuery());
315+
} else if (queryBuilder instanceof ConstantScoreQueryBuilder constantScoreQuery) {
316+
containsSemanticQuery = containsSemanticQuery(constantScoreQuery.innerQuery());
317+
} else if (queryBuilder instanceof FunctionScoreQueryBuilder functionScoreQuery) {
318+
containsSemanticQuery = containsSemanticQuery(functionScoreQuery.query());
319+
}
320+
321+
return containsSemanticQuery;
322+
}
323+
274324
@Override
275325
public boolean equals(Object o) {
276326
if (this == o) return true;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.action.fieldcaps;
9+
10+
import org.apache.lucene.search.join.ScoreMode;
11+
import org.elasticsearch.action.ActionRequestValidationException;
12+
import org.elasticsearch.index.query.BoolQueryBuilder;
13+
import org.elasticsearch.index.query.BoostingQueryBuilder;
14+
import org.elasticsearch.index.query.ConstantScoreQueryBuilder;
15+
import org.elasticsearch.index.query.DisMaxQueryBuilder;
16+
import org.elasticsearch.index.query.MatchAllQueryBuilder;
17+
import org.elasticsearch.index.query.NestedQueryBuilder;
18+
import org.elasticsearch.index.query.QueryBuilder;
19+
import org.elasticsearch.index.query.TermQueryBuilder;
20+
import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder;
21+
import org.elasticsearch.test.ESTestCase;
22+
import org.elasticsearch.xpack.inference.queries.SemanticQueryBuilder;
23+
24+
import static org.hamcrest.Matchers.containsString;
25+
import static org.hamcrest.Matchers.notNullValue;
26+
27+
public class FieldCapabilitiesRequestSemanticIndexFilterTests extends ESTestCase {
28+
private static final String EXPECTED_ERROR_MESSAGE = "index filter cannot contain semantic queries. Use an exists query instead.";
29+
30+
public void testValidateWithoutIndexFilter() {
31+
FieldCapabilitiesRequest request = new FieldCapabilitiesRequest();
32+
request.fields("field1", "field2");
33+
34+
ActionRequestValidationException validationException = request.validate();
35+
assertNull(validationException);
36+
}
37+
38+
public void testValidateWithNonSemanticIndexFilter() {
39+
FieldCapabilitiesRequest request = new FieldCapabilitiesRequest();
40+
request.fields("field1", "field2");
41+
request.indexFilter(randomNonSemanticQuery());
42+
43+
ActionRequestValidationException validationException = request.validate();
44+
assertNull(validationException);
45+
}
46+
47+
public void testValidateWithDirectSemanticQuery() {
48+
FieldCapabilitiesRequest request = new FieldCapabilitiesRequest();
49+
request.fields("field1", "field2");
50+
request.indexFilter(randomSemanticQuery());
51+
52+
ActionRequestValidationException validationException = request.validate();
53+
assertThat(validationException, notNullValue());
54+
assertThat(validationException.getMessage(), containsString(EXPECTED_ERROR_MESSAGE));
55+
}
56+
57+
public void testValidateWithRandomCompoundQueryContainingSemantic() {
58+
for (int i = 0; i < 100; i++) {
59+
FieldCapabilitiesRequest request = new FieldCapabilitiesRequest();
60+
request.fields("field1", "field2");
61+
62+
// Create a randomly structured compound query containing semantic query
63+
QueryBuilder randomCompoundQuery = randomCompoundQueryWithSemantic(randomIntBetween(1, 3));
64+
request.indexFilter(randomCompoundQuery);
65+
66+
ActionRequestValidationException validationException = request.validate();
67+
assertThat(validationException, notNullValue());
68+
assertThat(validationException.getMessage(), containsString(EXPECTED_ERROR_MESSAGE));
69+
}
70+
}
71+
72+
private static SemanticQueryBuilder randomSemanticQuery() {
73+
return new SemanticQueryBuilder(randomAlphaOfLength(5), randomAlphaOfLength(10));
74+
}
75+
76+
private static QueryBuilder randomNonSemanticQuery() {
77+
return switch (randomIntBetween(0, 2)) {
78+
case 0 -> new TermQueryBuilder(randomAlphaOfLength(5), randomAlphaOfLength(5));
79+
case 1 -> new MatchAllQueryBuilder();
80+
case 2 -> {
81+
BoolQueryBuilder boolQuery = new BoolQueryBuilder();
82+
boolQuery.must(new TermQueryBuilder(randomAlphaOfLength(5), randomAlphaOfLength(5)));
83+
yield boolQuery;
84+
}
85+
default -> throw new IllegalStateException("Unexpected value");
86+
};
87+
}
88+
89+
private static QueryBuilder randomCompoundQueryWithSemantic(int depth) {
90+
if (depth <= 0) {
91+
return randomSemanticQuery();
92+
}
93+
94+
return switch (randomIntBetween(0, 5)) {
95+
case 0 -> {
96+
BoolQueryBuilder boolQuery = new BoolQueryBuilder();
97+
QueryBuilder clauseQuery = randomCompoundQueryWithSemantic(depth - 1);
98+
switch (randomIntBetween(0, 3)) {
99+
case 0 -> boolQuery.must(clauseQuery);
100+
case 1 -> boolQuery.mustNot(clauseQuery);
101+
case 2 -> boolQuery.should(clauseQuery);
102+
case 3 -> boolQuery.filter(clauseQuery);
103+
default -> throw new IllegalStateException("Unexpected value");
104+
}
105+
106+
if (randomBoolean()) {
107+
boolQuery.should(randomNonSemanticQuery());
108+
}
109+
110+
yield boolQuery;
111+
}
112+
case 1 -> {
113+
DisMaxQueryBuilder disMax = new DisMaxQueryBuilder();
114+
disMax.add(randomCompoundQueryWithSemantic(depth - 1));
115+
if (randomBoolean()) {
116+
disMax.add(randomNonSemanticQuery());
117+
}
118+
yield disMax;
119+
}
120+
case 2 -> new NestedQueryBuilder(randomAlphaOfLength(5), randomCompoundQueryWithSemantic(depth - 1), ScoreMode.Max);
121+
case 3 -> {
122+
boolean positiveSemanticQuery = randomBoolean();
123+
QueryBuilder semanticQuery = randomCompoundQueryWithSemantic(depth - 1);
124+
QueryBuilder nonSemanticQuery = randomNonSemanticQuery();
125+
126+
yield new BoostingQueryBuilder(
127+
positiveSemanticQuery ? semanticQuery : nonSemanticQuery,
128+
positiveSemanticQuery ? nonSemanticQuery : semanticQuery
129+
);
130+
}
131+
case 4 -> new ConstantScoreQueryBuilder(randomCompoundQueryWithSemantic(depth - 1));
132+
case 5 -> new FunctionScoreQueryBuilder(randomCompoundQueryWithSemantic(depth - 1));
133+
default -> throw new IllegalStateException("Unexpected value");
134+
};
135+
}
136+
}

0 commit comments

Comments
 (0)