From 9483cfecf73b884a95f7f954311ee7fce26c572f Mon Sep 17 00:00:00 2001 From: Dimitris Athanasiou Date: Wed, 10 Sep 2025 21:00:30 +0300 Subject: [PATCH 1/2] Prevent field caps from failing due to can match failure (#134134) `FieldCapabilitiesFetcher` performs a can-match in order to quickly return an empty response if no shard can match. However, if can-match fails for some reason, it can cause the field capabilities request to fail. An example of that is when a semantic query is used as filter. can-match will fail as it won't be able to expand the inference results of the query. In cases like that, it makes no sense to fail the field capabilities request. Instead, we should treat can-match as returning `true` to proceed. This change does that by following suit with other callers of can-match. Fixes #116106 (cherry picked from commit 7295189560dee0151019e46059d5b5e1fe0f5378) # Conflicts: # x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java # x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java # x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping.yml # x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping_bwc.yml --- docs/changelog/134134.yaml | 6 ++ .../fieldcaps/CCSFieldCapabilitiesIT.java | 48 ++++-------- .../search/fieldcaps/FieldCapabilitiesIT.java | 77 +++++++------------ .../fieldcaps/FieldCapabilitiesFetcher.java | 9 ++- .../xpack/inference/InferenceFeatures.java | 4 +- .../queries/SemanticQueryBuilder.java | 3 + .../10_semantic_text_field_mapping.yml | 27 +++++++ .../10_semantic_text_field_mapping_bwc.yml | 26 +++++++ 8 files changed, 117 insertions(+), 83 deletions(-) create mode 100644 docs/changelog/134134.yaml diff --git a/docs/changelog/134134.yaml b/docs/changelog/134134.yaml new file mode 100644 index 0000000000000..cbaff5bce55f6 --- /dev/null +++ b/docs/changelog/134134.yaml @@ -0,0 +1,6 @@ +pr: 134134 +summary: Prevent field caps from failing due to can match failure +area: Search +type: bug +issues: + - 116106 diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/fieldcaps/CCSFieldCapabilitiesIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/fieldcaps/CCSFieldCapabilitiesIT.java index f29cff98c6495..7fc6fd181d6b7 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/fieldcaps/CCSFieldCapabilitiesIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/fieldcaps/CCSFieldCapabilitiesIT.java @@ -14,15 +14,12 @@ import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse; import org.elasticsearch.client.internal.Client; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.plugins.Plugin; -import org.elasticsearch.search.fieldcaps.FieldCapabilitiesIT.ExceptionOnRewriteQueryBuilder; -import org.elasticsearch.search.fieldcaps.FieldCapabilitiesIT.ExceptionOnRewriteQueryPlugin; +import org.elasticsearch.index.shard.IllegalIndexShardStateException; import org.elasticsearch.test.AbstractMultiClustersTestCase; import org.elasticsearch.transport.RemoteTransportException; -import java.util.ArrayList; +import java.io.IOException; import java.util.Arrays; -import java.util.Collection; import java.util.List; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; @@ -43,14 +40,7 @@ protected boolean reuseClusters() { return false; } - @Override - protected Collection> nodePlugins(String clusterAlias) { - final List> plugins = new ArrayList<>(super.nodePlugins(clusterAlias)); - plugins.add(ExceptionOnRewriteQueryPlugin.class); - return plugins; - } - - public void testFailuresFromRemote() { + public void testFailuresFromRemote() throws IOException { Settings indexSettings = Settings.builder().put("index.number_of_replicas", 0).build(); final Client localClient = client(LOCAL_CLUSTER); final Client remoteClient = client("remote_cluster"); @@ -68,11 +58,10 @@ public void testFailuresFromRemote() { FieldCapabilitiesResponse response = client().prepareFieldCaps("*", "remote_cluster:*").setFields("*").get(); assertThat(Arrays.asList(response.getIndices()), containsInAnyOrder(localIndex, "remote_cluster:" + remoteErrorIndex)); - // adding an index filter so remote call should fail - response = client().prepareFieldCaps("*", "remote_cluster:*") - .setFields("*") - .setIndexFilter(new ExceptionOnRewriteQueryBuilder()) - .get(); + // Closed shards will result to index error because shards must be in readable state + FieldCapabilitiesIT.closeShards(cluster("remote_cluster"), remoteErrorIndex); + + response = client().prepareFieldCaps("*", "remote_cluster:*").setFields("*").get(); assertThat(response.getIndices()[0], equalTo(localIndex)); assertThat(response.getFailedIndicesCount(), equalTo(1)); FieldCapabilitiesFailure failure = response.getFailures() @@ -83,25 +72,22 @@ public void testFailuresFromRemote() { Exception ex = failure.getException(); assertEquals(RemoteTransportException.class, ex.getClass()); Throwable cause = ExceptionsHelper.unwrapCause(ex); - assertEquals(IllegalArgumentException.class, cause.getClass()); - assertEquals("I throw because I choose to.", cause.getMessage()); + assertEquals(IllegalIndexShardStateException.class, cause.getClass()); + assertEquals( + "CurrentState[CLOSED] operations only allowed when shard state is one of [POST_RECOVERY, STARTED]", + cause.getMessage() + ); // if we only query the remote we should get back an exception only - ex = expectThrows( - IllegalArgumentException.class, - client().prepareFieldCaps("remote_cluster:*").setFields("*").setIndexFilter(new ExceptionOnRewriteQueryBuilder()) - ); - assertEquals("I throw because I choose to.", ex.getMessage()); + ex = expectThrows(IllegalIndexShardStateException.class, client().prepareFieldCaps("remote_cluster:*").setFields("*")); + assertEquals("CurrentState[CLOSED] operations only allowed when shard state is one of [POST_RECOVERY, STARTED]", ex.getMessage()); // add an index that doesn't fail to the remote assertAcked(remoteClient.admin().indices().prepareCreate("okay_remote_index")); remoteClient.prepareIndex("okay_remote_index").setId("2").setSource("foo", "bar").get(); remoteClient.admin().indices().prepareRefresh("okay_remote_index").get(); - response = client().prepareFieldCaps("*", "remote_cluster:*") - .setFields("*") - .setIndexFilter(new ExceptionOnRewriteQueryBuilder()) - .get(); + response = client().prepareFieldCaps("*", "remote_cluster:*").setFields("*").get(); assertThat(Arrays.asList(response.getIndices()), containsInAnyOrder(localIndex, "remote_cluster:okay_remote_index")); assertThat(response.getFailedIndicesCount(), equalTo(1)); failure = response.getFailures() @@ -110,8 +96,8 @@ public void testFailuresFromRemote() { .findFirst() .get(); ex = failure.getException(); - assertEquals(IllegalArgumentException.class, ex.getClass()); - assertEquals("I throw because I choose to.", ex.getMessage()); + assertEquals(IllegalIndexShardStateException.class, ex.getClass()); + assertEquals("CurrentState[CLOSED] operations only allowed when shard state is one of [POST_RECOVERY, STARTED]", ex.getMessage()); } public void testFailedToConnectToRemoteCluster() throws Exception { diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/fieldcaps/FieldCapabilitiesIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/fieldcaps/FieldCapabilitiesIT.java index cbd22856f09a2..62fb94f488225 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/fieldcaps/FieldCapabilitiesIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/fieldcaps/FieldCapabilitiesIT.java @@ -46,7 +46,7 @@ import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.query.QueryRewriteContext; -import org.elasticsearch.index.query.SearchExecutionContext; +import org.elasticsearch.index.shard.IllegalIndexShardStateException; import org.elasticsearch.index.shard.IndexShard; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.indices.IndexClosedException; @@ -58,6 +58,7 @@ import org.elasticsearch.search.DummyQueryBuilder; import org.elasticsearch.tasks.TaskInfo; import org.elasticsearch.test.ESIntegTestCase; +import org.elasticsearch.test.InternalTestCluster; import org.elasticsearch.test.MockLog; import org.elasticsearch.test.junit.annotations.TestLogging; import org.elasticsearch.test.transport.MockTransportService; @@ -77,6 +78,7 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.CancellationException; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; @@ -85,7 +87,6 @@ import java.util.function.Function; import java.util.stream.IntStream; -import static java.util.Collections.singletonList; import static org.elasticsearch.action.support.ActionTestUtils.wrapAsRestResponseListener; import static org.elasticsearch.index.shard.IndexShardTestCase.closeShardNoCheck; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; @@ -190,7 +191,7 @@ public void setUp() throws Exception { @Override protected Collection> nodePlugins() { - return List.of(TestMapperPlugin.class, ExceptionOnRewriteQueryPlugin.class, BlockingOnRewriteQueryPlugin.class); + return List.of(TestMapperPlugin.class, BlockingOnRewriteQueryPlugin.class); } @Override @@ -477,30 +478,36 @@ public void testFieldMetricsAndDimensions() { assertThat(response.get().get("some_dimension").get("keyword").nonDimensionIndices(), array(equalTo("new_index"))); } - public void testFailures() throws InterruptedException { + public void testFailures() throws IOException { // in addition to the existing "old_index" and "new_index", create two where the test query throws an error on rewrite assertAcked(prepareCreate("index1-error"), prepareCreate("index2-error")); ensureGreen("index1-error", "index2-error"); - FieldCapabilitiesResponse response = client().prepareFieldCaps() + + // Closed shards will result to index error because shards must be in readable state + closeShards(internalCluster(), "index1-error", "index2-error"); + + FieldCapabilitiesResponse response = client().prepareFieldCaps("old_index", "new_index", "index1-error", "index2-error") .setFields("*") - .setIndexFilter(new ExceptionOnRewriteQueryBuilder()) .get(); assertEquals(1, response.getFailures().size()); assertEquals(2, response.getFailedIndicesCount()); assertThat(response.getFailures().get(0).getIndices(), arrayContainingInAnyOrder("index1-error", "index2-error")); Exception failure = response.getFailures().get(0).getException(); - assertEquals(IllegalArgumentException.class, failure.getClass()); - assertEquals("I throw because I choose to.", failure.getMessage()); + assertEquals(IllegalIndexShardStateException.class, failure.getClass()); + assertEquals( + "CurrentState[CLOSED] operations only allowed when shard state is one of [POST_RECOVERY, STARTED]", + failure.getMessage() + ); // the "indices" section should not include failed ones assertThat(Arrays.asList(response.getIndices()), containsInAnyOrder("old_index", "new_index")); // if all requested indices failed, we fail the request by throwing the exception - IllegalArgumentException ex = expectThrows( - IllegalArgumentException.class, - client().prepareFieldCaps("index1-error", "index2-error").setFields("*").setIndexFilter(new ExceptionOnRewriteQueryBuilder()) + IllegalIndexShardStateException ex = expectThrows( + IllegalIndexShardStateException.class, + client().prepareFieldCaps("index1-error", "index2-error").setFields("*") ); - assertEquals("I throw because I choose to.", ex.getMessage()); + assertEquals("CurrentState[CLOSED] operations only allowed when shard state is one of [POST_RECOVERY, STARTED]", ex.getMessage()); } private void populateTimeRangeIndices() throws Exception { @@ -852,45 +859,17 @@ private void assertIndices(FieldCapabilitiesResponse response, String... indices assertArrayEquals(indices, response.getIndices()); } - /** - * Adds an "exception" query that throws on rewrite if the index name contains the string "error" - */ - public static class ExceptionOnRewriteQueryPlugin extends Plugin implements SearchPlugin { - - public ExceptionOnRewriteQueryPlugin() {} - - @Override - public List> getQueries() { - return singletonList( - new QuerySpec<>("exception", ExceptionOnRewriteQueryBuilder::new, p -> new ExceptionOnRewriteQueryBuilder()) - ); - } - } - - static class ExceptionOnRewriteQueryBuilder extends DummyQueryBuilder { - - public static final String NAME = "exception"; - - ExceptionOnRewriteQueryBuilder() {} - - ExceptionOnRewriteQueryBuilder(StreamInput in) throws IOException { - super(in); - } - - @Override - protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) throws IOException { - SearchExecutionContext searchExecutionContext = queryRewriteContext.convertToSearchExecutionContext(); - if (searchExecutionContext != null) { - if (searchExecutionContext.indexMatches("*error*")) { - throw new IllegalArgumentException("I throw because I choose to."); + static void closeShards(InternalTestCluster cluster, String... indices) throws IOException { + final Set indicesToClose = Set.of(indices); + for (String node : cluster.getNodeNames()) { + final IndicesService indicesService = cluster.getInstance(IndicesService.class, node); + for (IndexService indexService : indicesService) { + if (indicesToClose.contains(indexService.getMetadata().getIndex().getName())) { + for (IndexShard indexShard : indexService) { + closeShardNoCheck(indexShard); + } } } - return this; - } - - @Override - public String getWriteableName() { - return NAME; } } diff --git a/server/src/main/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesFetcher.java b/server/src/main/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesFetcher.java index e6d7af11d06cc..bd4ab3c4e632c 100644 --- a/server/src/main/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesFetcher.java +++ b/server/src/main/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesFetcher.java @@ -237,12 +237,17 @@ private static boolean canMatchShard( QueryBuilder indexFilter, long nowInMillis, SearchExecutionContext searchExecutionContext - ) throws IOException { + ) { assert alwaysMatches(indexFilter) == false : "should not be called for always matching [" + indexFilter + "]"; assert nowInMillis != 0L; ShardSearchRequest searchRequest = new ShardSearchRequest(shardId, nowInMillis, AliasFilter.EMPTY); searchRequest.source(new SearchSourceBuilder().query(indexFilter)); - return SearchService.queryStillMatchesAfterRewrite(searchRequest, searchExecutionContext); + try { + return SearchService.queryStillMatchesAfterRewrite(searchRequest, searchExecutionContext); + } catch (Exception e) { + // treat as if shard is still a potential match + return true; + } } private static boolean alwaysMatches(QueryBuilder indexFilter) { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java index 17a31a4e0cb66..c28469b39130f 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java @@ -11,6 +11,7 @@ import org.elasticsearch.features.NodeFeature; import org.elasticsearch.xpack.inference.mapper.SemanticInferenceMetadataFieldsMapper; import org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper; +import org.elasticsearch.xpack.inference.queries.SemanticQueryBuilder; import org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankRetrieverBuilder; import java.util.Set; @@ -56,7 +57,8 @@ public Set getTestFeatures() { SEMANTIC_KNN_FILTER_FIX, SEMANTIC_TEXT_MATCH_ALL_HIGHLIGHTER, TEST_RERANKING_SERVICE_PARSE_TEXT_AS_SCORE, - SEMANTIC_QUERY_REWRITE_INTERCEPTORS_PROPAGATE_BOOST_AND_QUERY_NAME_FIX + SEMANTIC_QUERY_REWRITE_INTERCEPTORS_PROPAGATE_BOOST_AND_QUERY_NAME_FIX, + SemanticQueryBuilder.SEMANTIC_QUERY_FILTER_FIELD_CAPS_FIX ); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java index 285739fe0936f..c16e0c84937fa 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java @@ -16,6 +16,7 @@ import org.elasticsearch.cluster.metadata.InferenceFieldMetadata; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.query.AbstractQueryBuilder; import org.elasticsearch.index.query.MatchNoneQueryBuilder; @@ -52,6 +53,8 @@ public class SemanticQueryBuilder extends AbstractQueryBuilder { public static final String NAME = "semantic"; + public static final NodeFeature SEMANTIC_QUERY_FILTER_FIELD_CAPS_FIX = new NodeFeature("semantic_query.filter_field_caps_fix"); + private static final ParseField FIELD_FIELD = new ParseField("field"); private static final ParseField QUERY_FIELD = new ParseField("query"); private static final ParseField LENIENT_FIELD = new ParseField("lenient"); diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping.yml index fcbeab9262b20..5446c38faeeb4 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping.yml @@ -359,3 +359,30 @@ setup: index: test-always-include-inference-id-index - exists: test-always-include-inference-id-index.mappings.properties.semantic_field.inference_id + +--- +"Field caps with semantic query does not fail": + - requires: + cluster_features: "semantic_query.filter_field_caps_fix" + reason: "fixed bug with semantic query filtering in field_caps (#116106)" + # We need at least one document present to exercise can-match phase + - do: + index: + index: test-index + id: doc_1 + body: + sparse_field: "This is a story about a cat and a dog." + refresh: true + + - do: + field_caps: + index: test-index + fields: "*" + body: + index_filter: + semantic: + field: "sparse_field" + query: "test" + + - match: { indices: [ "test-index" ] } + - match: { fields.sparse_field.text.searchable: true } diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping_bwc.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping_bwc.yml index 7a0c5b912de26..58267f31eeaaf 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping_bwc.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping_bwc.yml @@ -307,3 +307,29 @@ setup: another_field: type: keyword +--- +"Field caps with semantic query does not fail": + - requires: + cluster_features: "semantic_query.filter_field_caps_fix" + reason: "fixed bug with semantic query filtering in field_caps (#116106)" + # We need at least one document present to exercise can-match phase + - do: + index: + index: test-index + id: doc_1 + body: + sparse_field: "This is a story about a cat and a dog." + refresh: true + + - do: + field_caps: + index: test-index + fields: "*" + body: + index_filter: + semantic: + field: "sparse_field" + query: "test" + + - match: { indices: [ "test-index" ] } + - match: { fields.sparse_field.text.searchable: true } From d7fb4622871616da328fa634065579351de64323 Mon Sep 17 00:00:00 2001 From: Dimitris Athanasiou Date: Thu, 11 Sep 2025 18:42:05 +0300 Subject: [PATCH 2/2] Missing YAML test setup --- .../10_semantic_text_field_mapping.yml | 32 +++++++++++++++++++ .../10_semantic_text_field_mapping_bwc.yml | 32 +++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping.yml index 5446c38faeeb4..5b47fa6e40f08 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping.yml @@ -3,6 +3,38 @@ setup: cluster_features: "gte_v8.15.0" reason: semantic_text introduced in 8.15.0 + - do: + inference.put: + task_type: sparse_embedding + inference_id: sparse-inference-id + body: > + { + "service": "test_service", + "service_settings": { + "model": "my_model", + "api_key": "abc64" + }, + "task_settings": { + } + } + + - do: + inference.put: + task_type: text_embedding + inference_id: dense-inference-id + body: > + { + "service": "text_embedding_test_service", + "service_settings": { + "model": "my_model", + "dimensions": 4, + "similarity": "cosine", + "api_key": "abc64" + }, + "task_settings": { + } + } + - do: indices.create: index: test-index diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping_bwc.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping_bwc.yml index 58267f31eeaaf..ad2f0f9fb8b82 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping_bwc.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping_bwc.yml @@ -3,6 +3,38 @@ setup: cluster_features: "gte_v8.15.0" reason: semantic_text introduced in 8.15.0 + - do: + inference.put: + task_type: sparse_embedding + inference_id: sparse-inference-id + body: > + { + "service": "test_service", + "service_settings": { + "model": "my_model", + "api_key": "abc64" + }, + "task_settings": { + } + } + + - do: + inference.put: + task_type: text_embedding + inference_id: dense-inference-id + body: > + { + "service": "text_embedding_test_service", + "service_settings": { + "model": "my_model", + "dimensions": 4, + "similarity": "cosine", + "api_key": "abc64" + }, + "task_settings": { + } + } + - do: indices.create: index: test-index