From 0f154621012523f29e347cd2543ed9cd3c69b620 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Fri, 11 Jul 2025 10:57:39 -0400 Subject: [PATCH 1/6] Check if field caps uses a semantic query as a filter --- .../fieldcaps/FieldCapabilitiesRequest.java | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesRequest.java b/server/src/main/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesRequest.java index 2e24858d9781f..15ce402d3a062 100644 --- a/server/src/main/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesRequest.java +++ b/server/src/main/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesRequest.java @@ -18,7 +18,13 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.index.query.BoolQueryBuilder; +import org.elasticsearch.index.query.BoostingQueryBuilder; +import org.elasticsearch.index.query.ConstantScoreQueryBuilder; +import org.elasticsearch.index.query.DisMaxQueryBuilder; +import org.elasticsearch.index.query.NestedQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder; import org.elasticsearch.tasks.CancellableTask; import org.elasticsearch.tasks.Task; import org.elasticsearch.tasks.TaskId; @@ -268,9 +274,53 @@ public ActionRequestValidationException validate() { if (fields == null || fields.length == 0) { validationException = ValidateActions.addValidationError("no fields specified", validationException); } + + // Band-aid fix for https://github.com/elastic/elasticsearch/issues/116106. + // Semantic queries are high-recall queries, making them poor filters and effectively the same as an exists query when used in that + // context. + if (containsSemanticQuery(indexFilter)) { + validationException = ValidateActions.addValidationError( + "index filter cannot contain semantic queries. Use an exists query instead.", + validationException + ); + } + return validationException; } + /** + * Recursively checks if a query builder contains any semantic queries + */ + private static boolean containsSemanticQuery(QueryBuilder queryBuilder) { + boolean containsSemanticQuery = false; + + if (queryBuilder == null) { + return containsSemanticQuery; + } + + if ("semantic".equals(queryBuilder.getWriteableName())) { + containsSemanticQuery = true; + } else if (queryBuilder instanceof BoolQueryBuilder boolQuery) { + return boolQuery.must().stream().anyMatch(FieldCapabilitiesRequest::containsSemanticQuery) + || boolQuery.mustNot().stream().anyMatch(FieldCapabilitiesRequest::containsSemanticQuery) + || boolQuery.should().stream().anyMatch(FieldCapabilitiesRequest::containsSemanticQuery) + || boolQuery.filter().stream().anyMatch(FieldCapabilitiesRequest::containsSemanticQuery); + } else if (queryBuilder instanceof DisMaxQueryBuilder disMaxQuery) { + containsSemanticQuery = disMaxQuery.innerQueries().stream().anyMatch(FieldCapabilitiesRequest::containsSemanticQuery); + } else if (queryBuilder instanceof NestedQueryBuilder nestedQuery) { + containsSemanticQuery = containsSemanticQuery(nestedQuery.query()); + } else if (queryBuilder instanceof BoostingQueryBuilder boostingQuery) { + containsSemanticQuery = containsSemanticQuery(boostingQuery.positiveQuery()) + || containsSemanticQuery(boostingQuery.negativeQuery()); + } else if (queryBuilder instanceof ConstantScoreQueryBuilder constantScoreQuery) { + containsSemanticQuery = containsSemanticQuery(constantScoreQuery.innerQuery()); + } else if (queryBuilder instanceof FunctionScoreQueryBuilder functionScoreQuery) { + containsSemanticQuery = containsSemanticQuery(functionScoreQuery.query()); + } + + return containsSemanticQuery; + } + @Override public boolean equals(Object o) { if (this == o) return true; From 624133e35bb284f5d31549de9980b38d297bf1a1 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Fri, 11 Jul 2025 11:45:58 -0400 Subject: [PATCH 2/6] Added unit tests --- ...litiesRequestSemanticIndexFilterTests.java | 143 ++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 x-pack/plugin/inference/src/test/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesRequestSemanticIndexFilterTests.java diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesRequestSemanticIndexFilterTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesRequestSemanticIndexFilterTests.java new file mode 100644 index 0000000000000..2e1a378a81230 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesRequestSemanticIndexFilterTests.java @@ -0,0 +1,143 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.action.fieldcaps; + +import org.elasticsearch.action.ActionRequestValidationException; +import org.elasticsearch.index.query.BoolQueryBuilder; +import org.elasticsearch.index.query.BoostingQueryBuilder; +import org.elasticsearch.index.query.ConstantScoreQueryBuilder; +import org.elasticsearch.index.query.DisMaxQueryBuilder; +import org.elasticsearch.index.query.MatchAllQueryBuilder; +import org.elasticsearch.index.query.NestedQueryBuilder; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.TermQueryBuilder; +import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.inference.queries.SemanticQueryBuilder; +import org.apache.lucene.search.join.ScoreMode; + +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.notNullValue; + +public class FieldCapabilitiesRequestSemanticIndexFilterTests extends ESTestCase { + private static final String EXPECTED_ERROR_MESSAGE = "index filter cannot contain semantic queries. Use an exists query instead."; + + public void testValidateWithoutIndexFilter() { + FieldCapabilitiesRequest request = new FieldCapabilitiesRequest(); + request.fields("field1", "field2"); + + ActionRequestValidationException validationException = request.validate(); + assertNull(validationException); + } + + public void testValidateWithNonSemanticIndexFilter() { + FieldCapabilitiesRequest request = new FieldCapabilitiesRequest(); + request.fields("field1", "field2"); + request.indexFilter(randomNonSemanticQuery()); + + ActionRequestValidationException validationException = request.validate(); + assertNull(validationException); + } + + public void testValidateWithDirectSemanticQuery() { + FieldCapabilitiesRequest request = new FieldCapabilitiesRequest(); + request.fields("field1", "field2"); + request.indexFilter(randomSemanticQuery()); + + ActionRequestValidationException validationException = request.validate(); + assertThat(validationException, notNullValue()); + assertThat(validationException.getMessage(), containsString(EXPECTED_ERROR_MESSAGE)); + } + + public void testValidateWithRandomCompoundQueryContainingSemantic() { + for (int i = 0; i < 100; i++) { + FieldCapabilitiesRequest request = new FieldCapabilitiesRequest(); + request.fields("field1", "field2"); + + // Create a randomly structured compound query containing semantic query + QueryBuilder randomCompoundQuery = randomCompoundQueryWithSemantic(randomIntBetween(1, 3)); + request.indexFilter(randomCompoundQuery); + + ActionRequestValidationException validationException = request.validate(); + assertThat(validationException, notNullValue()); + assertThat(validationException.getMessage(), containsString(EXPECTED_ERROR_MESSAGE)); + } + } + + private static SemanticQueryBuilder randomSemanticQuery() { + return new SemanticQueryBuilder( + randomAlphaOfLength(5), + randomAlphaOfLength(10) + ); + } + + private static QueryBuilder randomNonSemanticQuery() { + return switch (randomIntBetween(0, 2)) { + case 0 -> new TermQueryBuilder(randomAlphaOfLength(5), randomAlphaOfLength(5)); + case 1 -> new MatchAllQueryBuilder(); + case 2 -> { + BoolQueryBuilder boolQuery = new BoolQueryBuilder(); + boolQuery.must(new TermQueryBuilder(randomAlphaOfLength(5), randomAlphaOfLength(5))); + yield boolQuery; + } + default -> throw new IllegalStateException("Unexpected value"); + }; + } + + private static QueryBuilder randomCompoundQueryWithSemantic(int depth) { + if (depth <= 0) { + return randomSemanticQuery(); + } + + return switch (randomIntBetween(0, 5)) { + case 0 -> { + BoolQueryBuilder boolQuery = new BoolQueryBuilder(); + QueryBuilder clauseQuery = randomCompoundQueryWithSemantic(depth - 1); + switch (randomIntBetween(0, 3)) { + case 0 -> boolQuery.must(clauseQuery); + case 1 -> boolQuery.mustNot(clauseQuery); + case 2 -> boolQuery.should(clauseQuery); + case 3 -> boolQuery.filter(clauseQuery); + default -> throw new IllegalStateException("Unexpected value"); + } + + if (randomBoolean()) { + boolQuery.should(randomNonSemanticQuery()); + } + + yield boolQuery; + } + case 1 -> { + DisMaxQueryBuilder disMax = new DisMaxQueryBuilder(); + disMax.add(randomCompoundQueryWithSemantic(depth - 1)); + if (randomBoolean()) { + disMax.add(randomNonSemanticQuery()); + } + yield disMax; + } + case 2 -> new NestedQueryBuilder( + randomAlphaOfLength(5), + randomCompoundQueryWithSemantic(depth - 1), + ScoreMode.Max + ); + case 3 -> { + boolean positiveSemanticQuery = randomBoolean(); + QueryBuilder semanticQuery = randomCompoundQueryWithSemantic(depth - 1); + QueryBuilder nonSemanticQuery = randomNonSemanticQuery(); + + yield new BoostingQueryBuilder( + positiveSemanticQuery ? semanticQuery : nonSemanticQuery, + positiveSemanticQuery ? nonSemanticQuery : semanticQuery + ); + } + case 4 -> new ConstantScoreQueryBuilder(randomCompoundQueryWithSemantic(depth - 1)); + case 5 -> new FunctionScoreQueryBuilder(randomCompoundQueryWithSemantic(depth - 1)); + default -> throw new IllegalStateException("Unexpected value"); + }; + } +} From 5d4d040793264079f6780c8bf8bbbeae8ef76ffa Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Fri, 11 Jul 2025 11:47:05 -0400 Subject: [PATCH 3/6] Spotless --- ...CapabilitiesRequestSemanticIndexFilterTests.java | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesRequestSemanticIndexFilterTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesRequestSemanticIndexFilterTests.java index 2e1a378a81230..9fd1c3921fcf6 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesRequestSemanticIndexFilterTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesRequestSemanticIndexFilterTests.java @@ -7,6 +7,7 @@ package org.elasticsearch.action.fieldcaps; +import org.apache.lucene.search.join.ScoreMode; import org.elasticsearch.action.ActionRequestValidationException; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.BoostingQueryBuilder; @@ -19,7 +20,6 @@ import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.inference.queries.SemanticQueryBuilder; -import org.apache.lucene.search.join.ScoreMode; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.notNullValue; @@ -70,10 +70,7 @@ public void testValidateWithRandomCompoundQueryContainingSemantic() { } private static SemanticQueryBuilder randomSemanticQuery() { - return new SemanticQueryBuilder( - randomAlphaOfLength(5), - randomAlphaOfLength(10) - ); + return new SemanticQueryBuilder(randomAlphaOfLength(5), randomAlphaOfLength(10)); } private static QueryBuilder randomNonSemanticQuery() { @@ -120,11 +117,7 @@ private static QueryBuilder randomCompoundQueryWithSemantic(int depth) { } yield disMax; } - case 2 -> new NestedQueryBuilder( - randomAlphaOfLength(5), - randomCompoundQueryWithSemantic(depth - 1), - ScoreMode.Max - ); + case 2 -> new NestedQueryBuilder(randomAlphaOfLength(5), randomCompoundQueryWithSemantic(depth - 1), ScoreMode.Max); case 3 -> { boolean positiveSemanticQuery = randomBoolean(); QueryBuilder semanticQuery = randomCompoundQueryWithSemantic(depth - 1); From 3a58b2614c3e38d79d05d283b7cae5129ceff91f Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Fri, 11 Jul 2025 11:52:03 -0400 Subject: [PATCH 4/6] Don't return early --- .../action/fieldcaps/FieldCapabilitiesRequest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesRequest.java b/server/src/main/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesRequest.java index 15ce402d3a062..ec30886b1acbf 100644 --- a/server/src/main/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesRequest.java +++ b/server/src/main/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesRequest.java @@ -301,7 +301,7 @@ private static boolean containsSemanticQuery(QueryBuilder queryBuilder) { if ("semantic".equals(queryBuilder.getWriteableName())) { containsSemanticQuery = true; } else if (queryBuilder instanceof BoolQueryBuilder boolQuery) { - return boolQuery.must().stream().anyMatch(FieldCapabilitiesRequest::containsSemanticQuery) + containsSemanticQuery = boolQuery.must().stream().anyMatch(FieldCapabilitiesRequest::containsSemanticQuery) || boolQuery.mustNot().stream().anyMatch(FieldCapabilitiesRequest::containsSemanticQuery) || boolQuery.should().stream().anyMatch(FieldCapabilitiesRequest::containsSemanticQuery) || boolQuery.filter().stream().anyMatch(FieldCapabilitiesRequest::containsSemanticQuery); From 12ffc7659002e7c74add57273979a57675c8e034 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Fri, 11 Jul 2025 12:00:00 -0400 Subject: [PATCH 5/6] Update docs/changelog/131111.yaml --- docs/changelog/131111.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/131111.yaml diff --git a/docs/changelog/131111.yaml b/docs/changelog/131111.yaml new file mode 100644 index 0000000000000..7802b723d898a --- /dev/null +++ b/docs/changelog/131111.yaml @@ -0,0 +1,5 @@ +pr: 131111 +summary: Don't allow field caps to use semantic queries as index filters +area: "Relevance, Search" +type: bug +issues: [] From 0a98a8a62893e4e718b06cdbe8ea18460c04dd4b Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Fri, 11 Jul 2025 12:02:19 -0400 Subject: [PATCH 6/6] Fix changelog --- docs/changelog/131111.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changelog/131111.yaml b/docs/changelog/131111.yaml index 7802b723d898a..ac5d950a3d31d 100644 --- a/docs/changelog/131111.yaml +++ b/docs/changelog/131111.yaml @@ -1,5 +1,5 @@ pr: 131111 summary: Don't allow field caps to use semantic queries as index filters -area: "Relevance, Search" +area: Search type: bug issues: []