From 8a7069fb1957a334e25539621b5a50c0396c34ac Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Fri, 19 Sep 2025 08:37:58 -0400 Subject: [PATCH 01/52] Added transport version --- server/src/main/java/org/elasticsearch/TransportVersions.java | 1 + 1 file changed, 1 insertion(+) diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index e3b97c13686a3..2ff0f0c543152 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -334,6 +334,7 @@ static TransportVersion def(int id) { public static final TransportVersion INFERENCE_REQUEST_ADAPTIVE_RATE_LIMITING_REMOVED = def(9_164_0_00); public static final TransportVersion SEARCH_SOURCE_EXCLUDE_INFERENCE_FIELDS_PARAM = def(9_165_0_00); public static final TransportVersion INFERENCE_RESULTS_MAP_WITH_CLUSTER_ALIAS = def(9_166_0_00); + public static final TransportVersion SEMANTIC_SEARCH_CCS_SUPPORT = def(9_167_0_00); /* * STOP! READ THIS FIRST! No, really, From f2e8d108c3f97989d8c0de4b431140a48ffb7c41 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Fri, 19 Sep 2025 11:07:18 -0400 Subject: [PATCH 02/52] Added skeleton for semantic cross-cluster search integration tests --- .../ccs/SemanticCrossClusterSearchIT.java | 192 ++++++++++++++++++ .../plugin-metadata/entitlement-policy.yaml | 2 + 2 files changed, 194 insertions(+) create mode 100644 x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java new file mode 100644 index 0000000000000..2b8a06eb28123 --- /dev/null +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java @@ -0,0 +1,192 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.search.ccs; + +import org.elasticsearch.action.DocWriteResponse; +import org.elasticsearch.action.search.OpenPointInTimeRequest; +import org.elasticsearch.action.search.OpenPointInTimeResponse; +import org.elasticsearch.action.search.TransportOpenPointInTimeAction; +import org.elasticsearch.action.support.broadcast.BroadcastResponse; +import org.elasticsearch.client.internal.Client; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.inference.MinimalServiceSettings; +import org.elasticsearch.inference.TaskType; +import org.elasticsearch.license.LicenseSettings; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.plugins.SearchPlugin; +import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.test.AbstractMultiClustersTestCase; +import org.elasticsearch.test.InternalTestCluster; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentFactory; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.core.inference.action.PutInferenceModelAction; +import org.elasticsearch.xpack.core.ml.inference.MlInferenceNamedXContentProvider; +import org.elasticsearch.xpack.core.ml.vectors.TextEmbeddingQueryVectorBuilder; +import org.elasticsearch.xpack.inference.LocalStateInferencePlugin; +import org.elasticsearch.xpack.inference.mock.TestDenseInferenceServiceExtension; +import org.elasticsearch.xpack.inference.mock.TestInferenceServicePlugin; +import org.elasticsearch.xpack.inference.mock.TestSparseInferenceServiceExtension; + +import java.io.IOException; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.Matchers.is; + +public class SemanticCrossClusterSearchIT extends AbstractMultiClustersTestCase { + private static final String REMOTE_CLUSTER = "cluster_a"; + + @Override + protected List remoteClusterAlias() { + return List.of(REMOTE_CLUSTER); + } + + @Override + protected Map skipUnavailableForRemoteClusters() { + return Map.of(REMOTE_CLUSTER, DEFAULT_SKIP_UNAVAILABLE); + } + + @Override + protected boolean reuseClusters() { + return false; + } + + @Override + protected Settings nodeSettings() { + return Settings.builder().put(LicenseSettings.SELF_GENERATED_LICENSE_TYPE.getKey(), "trial").build(); + } + + @Override + protected Collection> nodePlugins(String clusterAlias) { + return List.of(LocalStateInferencePlugin.class, TestInferenceServicePlugin.class, FakeMlPlugin.class); + } + + private void setupTwoClusters(TestIndexInfo localIndexInfo, TestIndexInfo remoteIndexInfo) throws IOException { + setupCluster(LOCAL_CLUSTER, localIndexInfo); + setupCluster(REMOTE_CLUSTER, remoteIndexInfo); + } + + private void setupCluster(String clusterAlias, TestIndexInfo indexInfo) throws IOException { + final Client client = client(clusterAlias); + final String indexName = indexInfo.name(); + + for (var entry : indexInfo.inferenceEndpoints().entrySet()) { + String inferenceId = entry.getKey(); + MinimalServiceSettings minimalServiceSettings = entry.getValue(); + + Map serviceSettings = new HashMap<>(); + serviceSettings.put("model", randomAlphaOfLength(5)); + serviceSettings.put("api_key", randomAlphaOfLength(5)); + if (minimalServiceSettings.taskType() == TaskType.TEXT_EMBEDDING) { + serviceSettings.put("dimensions", minimalServiceSettings.dimensions()); + serviceSettings.put("similarity", minimalServiceSettings.similarity()); + serviceSettings.put("element_type", minimalServiceSettings.elementType()); + } + + createInferenceEndpoint(client, minimalServiceSettings.taskType(), inferenceId, serviceSettings); + } + + InternalTestCluster cluster = cluster(clusterAlias); + cluster.ensureAtLeastNumDataNodes(randomIntBetween(1, 3)); + + Settings indexSettings = indexSettings(randomIntBetween(2, 5), randomIntBetween(0, 1)).build(); + assertAcked(client.admin().indices().prepareCreate(indexName).setSettings(indexSettings).setMapping(indexInfo.mappings())); + assertFalse( + client.admin() + .cluster() + .prepareHealth(TEST_REQUEST_TIMEOUT, indexName) + .setWaitForYellowStatus() + .setTimeout(TimeValue.timeValueSeconds(10)) + .get() + .isTimedOut() + ); + + for (var entry : indexInfo.docs().entrySet()) { + String docId = entry.getKey(); + Map doc = entry.getValue(); + + DocWriteResponse response = client.prepareIndex(indexName).setId(docId).setSource(doc).execute().actionGet(); + assertThat(response.getResult(), equalTo(DocWriteResponse.Result.CREATED)); + } + BroadcastResponse refreshResponse = client.admin().indices().prepareRefresh(indexName).execute().actionGet(); + assertThat(refreshResponse.getStatus(), is(RestStatus.OK)); + } + + private static void createInferenceEndpoint(Client client, TaskType taskType, String inferenceId, Map serviceSettings) + throws IOException { + final String service = switch (taskType) { + case TEXT_EMBEDDING -> TestDenseInferenceServiceExtension.TestInferenceService.NAME; + case SPARSE_EMBEDDING -> TestSparseInferenceServiceExtension.TestInferenceService.NAME; + default -> throw new IllegalArgumentException("Unhandled task type [" + taskType + "]"); + }; + + final BytesReference content; + try (XContentBuilder builder = XContentFactory.jsonBuilder()) { + builder.startObject(); + builder.field("service", service); + builder.field("service_settings", serviceSettings); + builder.endObject(); + + content = BytesReference.bytes(builder); + } + + PutInferenceModelAction.Request request = new PutInferenceModelAction.Request( + taskType, + inferenceId, + content, + XContentType.JSON, + TEST_REQUEST_TIMEOUT + ); + var responseFuture = client.execute(PutInferenceModelAction.INSTANCE, request); + assertThat(responseFuture.actionGet(TEST_REQUEST_TIMEOUT).getModel().getInferenceEntityId(), equalTo(inferenceId)); + } + + private BytesReference openPointInTime(String[] indices, TimeValue keepAlive) { + OpenPointInTimeRequest request = new OpenPointInTimeRequest(indices).keepAlive(keepAlive); + final OpenPointInTimeResponse response = client().execute(TransportOpenPointInTimeAction.TYPE, request).actionGet(); + return response.getPointInTimeId(); + } + + public static class FakeMlPlugin extends Plugin implements SearchPlugin { + @Override + public List getNamedWriteables() { + return new MlInferenceNamedXContentProvider().getNamedWriteables(); + } + + @Override + public List> getQueryVectorBuilders() { + return List.of( + new QueryVectorBuilderSpec<>( + TextEmbeddingQueryVectorBuilder.NAME, + TextEmbeddingQueryVectorBuilder::new, + TextEmbeddingQueryVectorBuilder.PARSER + ) + ); + } + } + + private record TestIndexInfo( + String name, + Map inferenceEndpoints, + Map mappings, + Map> docs + ) { + @Override + public Map mappings() { + return Map.of("properties", mappings); + } + } +} diff --git a/x-pack/plugin/inference/src/main/plugin-metadata/entitlement-policy.yaml b/x-pack/plugin/inference/src/main/plugin-metadata/entitlement-policy.yaml index 36ac851acf1ea..cd046c4a6cb23 100644 --- a/x-pack/plugin/inference/src/main/plugin-metadata/entitlement-policy.yaml +++ b/x-pack/plugin/inference/src/main/plugin-metadata/entitlement-policy.yaml @@ -12,6 +12,7 @@ software.amazon.awssdk.http.nio.netty: io.netty.common: - outbound_network - manage_threads + - inbound_network - files: - path: "/etc/os-release" mode: "read" @@ -22,3 +23,4 @@ io.netty.common: io.netty.transport: - manage_threads - outbound_network + - inbound_network From d44419908a8e3fbbac3ad03697541a12e8a87dc8 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Fri, 19 Sep 2025 12:00:46 -0400 Subject: [PATCH 03/52] Updated semantic query to support CCS --- .../queries/SemanticQueryBuilder.java | 40 ++++++++++++++++--- 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java index 007f52e86aa1e..dda606047003b 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java @@ -51,6 +51,7 @@ import java.util.stream.Collectors; import static org.elasticsearch.TransportVersions.INFERENCE_RESULTS_MAP_WITH_CLUSTER_ALIAS; +import static org.elasticsearch.TransportVersions.SEMANTIC_SEARCH_CCS_SUPPORT; import static org.elasticsearch.transport.RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY; import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg; import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg; @@ -91,6 +92,7 @@ public class SemanticQueryBuilder extends AbstractQueryBuilder inferenceResultsMap; private final Boolean lenient; + private final boolean ccsRequest; public SemanticQueryBuilder(String fieldName, String query) { this(fieldName, query, null); @@ -116,6 +118,7 @@ protected SemanticQueryBuilder( this.query = query; this.inferenceResultsMap = inferenceResultsMap != null ? Map.copyOf(inferenceResultsMap) : null; this.lenient = lenient; + this.ccsRequest = false; } public SemanticQueryBuilder(StreamInput in) throws IOException { @@ -140,6 +143,11 @@ public SemanticQueryBuilder(StreamInput in) throws IOException { } else { this.lenient = null; } + if (in.getTransportVersion().supports(SEMANTIC_SEARCH_CCS_SUPPORT)) { + this.ccsRequest = in.readBoolean(); + } else { + this.ccsRequest = false; + } } @Override @@ -174,9 +182,24 @@ protected void doWriteTo(StreamOutput out) throws IOException { if (out.getTransportVersion().onOrAfter(TransportVersions.SEMANTIC_QUERY_LENIENT)) { out.writeOptionalBoolean(lenient); } + if (out.getTransportVersion().supports(SEMANTIC_SEARCH_CCS_SUPPORT)) { + out.writeBoolean(ccsRequest); + } else if (ccsRequest) { + throw new IllegalArgumentException( + "One or more nodes does not support [" + + NAME + + "] query cross-cluster search. Please update all nodes to at least " + + SEMANTIC_SEARCH_CCS_SUPPORT.toReleaseVersion() + + "." + ); + } } - private SemanticQueryBuilder(SemanticQueryBuilder other, Map inferenceResultsMap) { + private SemanticQueryBuilder( + SemanticQueryBuilder other, + Map inferenceResultsMap, + boolean ccsRequest + ) { this.fieldName = other.fieldName; this.query = other.query; this.boost = other.boost; @@ -184,6 +207,7 @@ private SemanticQueryBuilder(SemanticQueryBuilder other, Map getInferenceIdsForForField(Collection protected boolean doEquals(SemanticQueryBuilder other) { return Objects.equals(fieldName, other.fieldName) && Objects.equals(query, other.query) - && Objects.equals(inferenceResultsMap, other.inferenceResultsMap); + && Objects.equals(inferenceResultsMap, other.inferenceResultsMap) + && Objects.equals(ccsRequest, other.ccsRequest); } @Override protected int doHashCode() { - return Objects.hash(fieldName, query, inferenceResultsMap); + return Objects.hash(fieldName, query, inferenceResultsMap, ccsRequest); } } From 3133d6f6d99ec5c8465c1485229c69e3fda9156f Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Fri, 19 Sep 2025 13:55:40 -0400 Subject: [PATCH 04/52] Added semantic query CCS integration test --- .../ccs/SemanticCrossClusterSearchIT.java | 123 +++++++++++++++++- 1 file changed, 119 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java index 2b8a06eb28123..1e1ef7275d648 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java @@ -10,6 +10,7 @@ import org.elasticsearch.action.DocWriteResponse; import org.elasticsearch.action.search.OpenPointInTimeRequest; import org.elasticsearch.action.search.OpenPointInTimeResponse; +import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.search.TransportOpenPointInTimeAction; import org.elasticsearch.action.support.broadcast.BroadcastResponse; import org.elasticsearch.client.internal.Client; @@ -17,12 +18,17 @@ import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.TimeValue; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; +import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.inference.MinimalServiceSettings; +import org.elasticsearch.inference.SimilarityMeasure; import org.elasticsearch.inference.TaskType; import org.elasticsearch.license.LicenseSettings; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.plugins.SearchPlugin; import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.test.AbstractMultiClustersTestCase; import org.elasticsearch.test.InternalTestCluster; import org.elasticsearch.xcontent.XContentBuilder; @@ -32,17 +38,21 @@ import org.elasticsearch.xpack.core.ml.inference.MlInferenceNamedXContentProvider; import org.elasticsearch.xpack.core.ml.vectors.TextEmbeddingQueryVectorBuilder; import org.elasticsearch.xpack.inference.LocalStateInferencePlugin; +import org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper; import org.elasticsearch.xpack.inference.mock.TestDenseInferenceServiceExtension; import org.elasticsearch.xpack.inference.mock.TestInferenceServicePlugin; import org.elasticsearch.xpack.inference.mock.TestSparseInferenceServiceExtension; +import org.elasticsearch.xpack.inference.queries.SemanticQueryBuilder; import java.io.IOException; import java.util.Collection; import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.Map; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.Matchers.is; @@ -74,6 +84,68 @@ protected Collection> nodePlugins(String clusterAlias) { return List.of(LocalStateInferencePlugin.class, TestInferenceServicePlugin.class, FakeMlPlugin.class); } + public void testSemanticQuery() throws Exception { + final String localIndexName = "local-index"; + final String remoteIndexName = "remote-index"; + final String[] queryIndices = new String[] { localIndexName, fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName) }; + + final String commonInferenceId = "common-inference-id"; + final String localInferenceId = "local-inference-id"; + final String remoteInferenceId = "remote-inference-id"; + + final String commonInferenceIdField = "common-inference-id-field"; + final String variableInferenceIdField = "variable-inference-id-field"; + + final TestIndexInfo localIndexInfo = new TestIndexInfo( + localIndexName, + Map.of(commonInferenceId, sparseEmbeddingServiceSettings(), localInferenceId, sparseEmbeddingServiceSettings()), + Map.of( + commonInferenceIdField, + semanticTextMapping(commonInferenceId), + variableInferenceIdField, + semanticTextMapping(localInferenceId) + ), + Map.of("local_doc_1", Map.of(commonInferenceIdField, "a"), "local_doc_2", Map.of(variableInferenceIdField, "b")) + ); + final TestIndexInfo remoteIndexInfo = new TestIndexInfo( + remoteIndexName, + Map.of( + commonInferenceId, + textEmbeddingServiceSettings(256, SimilarityMeasure.COSINE, DenseVectorFieldMapper.ElementType.FLOAT), + remoteInferenceId, + textEmbeddingServiceSettings(384, SimilarityMeasure.COSINE, DenseVectorFieldMapper.ElementType.FLOAT) + ), + Map.of( + commonInferenceIdField, + semanticTextMapping(commonInferenceId), + variableInferenceIdField, + semanticTextMapping(remoteInferenceId) + ), + Map.of("remote_doc_1", Map.of(commonInferenceIdField, "x"), "remote_doc_2", Map.of(variableInferenceIdField, "y")) + ); + setupTwoClusters(localIndexInfo, remoteIndexInfo); + + // Query a field has the same inference ID value across clusters, but with different backing inference services + assertSearchResponse( + new SemanticQueryBuilder(commonInferenceIdField, "a"), + queryIndices, + List.of( + new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_1"), + new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_1") + ) + ); + + // Query a field that has different inference ID values across clusters + assertSearchResponse( + new SemanticQueryBuilder(variableInferenceIdField, "b"), + queryIndices, + List.of( + new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_2"), + new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_2") + ) + ); + } + private void setupTwoClusters(TestIndexInfo localIndexInfo, TestIndexInfo remoteIndexInfo) throws IOException { setupCluster(LOCAL_CLUSTER, localIndexInfo); setupCluster(REMOTE_CLUSTER, remoteIndexInfo); @@ -125,6 +197,12 @@ private void setupCluster(String clusterAlias, TestIndexInfo indexInfo) throws I assertThat(refreshResponse.getStatus(), is(RestStatus.OK)); } + private BytesReference openPointInTime(String[] indices, TimeValue keepAlive) { + OpenPointInTimeRequest request = new OpenPointInTimeRequest(indices).keepAlive(keepAlive); + final OpenPointInTimeResponse response = client().execute(TransportOpenPointInTimeAction.TYPE, request).actionGet(); + return response.getPointInTimeId(); + } + private static void createInferenceEndpoint(Client client, TaskType taskType, String inferenceId, Map serviceSettings) throws IOException { final String service = switch (taskType) { @@ -154,10 +232,45 @@ private static void createInferenceEndpoint(Client client, TaskType taskType, St assertThat(responseFuture.actionGet(TEST_REQUEST_TIMEOUT).getModel().getInferenceEntityId(), equalTo(inferenceId)); } - private BytesReference openPointInTime(String[] indices, TimeValue keepAlive) { - OpenPointInTimeRequest request = new OpenPointInTimeRequest(indices).keepAlive(keepAlive); - final OpenPointInTimeResponse response = client().execute(TransportOpenPointInTimeAction.TYPE, request).actionGet(); - return response.getPointInTimeId(); + private void assertSearchResponse(QueryBuilder queryBuilder, String[] indices, List expectedSearchResults) + throws Exception { + SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(queryBuilder).size(expectedSearchResults.size()); + SearchRequest searchRequest = new SearchRequest(indices, searchSourceBuilder); + + assertResponse(client().search(searchRequest), response -> { + SearchHit[] hits = response.getHits().getHits(); + assertThat(hits.length, equalTo(expectedSearchResults.size())); + + Iterator searchResultIterator = expectedSearchResults.iterator(); + for (int i = 0; i < hits.length; i++) { + SearchResult expectedSearchResult = searchResultIterator.next(); + SearchHit actualSearchResult = hits[i]; + + assertThat(actualSearchResult.getClusterAlias(), equalTo(expectedSearchResult.clusterAlias())); + assertThat(actualSearchResult.getIndex(), equalTo(expectedSearchResult.index())); + assertThat(actualSearchResult.getId(), equalTo(expectedSearchResult.id())); + } + }); + } + + private static MinimalServiceSettings sparseEmbeddingServiceSettings() { + return new MinimalServiceSettings(null, TaskType.SPARSE_EMBEDDING, null, null, null); + } + + private static MinimalServiceSettings textEmbeddingServiceSettings( + int dimensions, + SimilarityMeasure similarity, + DenseVectorFieldMapper.ElementType elementType + ) { + return new MinimalServiceSettings(null, TaskType.TEXT_EMBEDDING, dimensions, similarity, elementType); + } + + private static Map semanticTextMapping(String inferenceId) { + return Map.of("type", SemanticTextFieldMapper.CONTENT_TYPE, "inference_id", inferenceId); + } + + private static String fullyQualifiedIndexName(String clusterAlias, String indexName) { + return clusterAlias + ":" + indexName; } public static class FakeMlPlugin extends Plugin implements SearchPlugin { @@ -189,4 +302,6 @@ public Map mappings() { return Map.of("properties", mappings); } } + + private record SearchResult(String clusterAlias, String index, String id) {} } From 679f3c0d3f1470eedd89c763b2a44cf81b23d476 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Fri, 19 Sep 2025 14:38:18 -0400 Subject: [PATCH 05/52] Added semantic query CCS serialization test --- ...InterceptedInferenceMatchQueryBuilder.java | 2 +- .../queries/SemanticQueryBuilder.java | 7 ++-- ...ceptedInferenceMatchQueryBuilderTests.java | 3 +- .../queries/SemanticQueryBuilderTests.java | 36 ++++++++++++++++--- 4 files changed, 39 insertions(+), 9 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilder.java index eecc08acebb4d..ccd2435eff0a9 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilder.java @@ -78,7 +78,7 @@ protected QueryBuilder queryFields( if (fieldType == null) { rewritten = new MatchNoneQueryBuilder(); } else if (fieldType instanceof SemanticTextFieldMapper.SemanticTextFieldType) { - rewritten = new SemanticQueryBuilder(getField(), getQuery(), null, inferenceResultsMap).boost(originalQuery.boost()) + rewritten = new SemanticQueryBuilder(getField(), getQuery(), null, inferenceResultsMap, false).boost(originalQuery.boost()) .queryName(originalQuery.queryName()); } else { rewritten = originalQuery; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java index dda606047003b..a2c187495b018 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java @@ -99,14 +99,15 @@ public SemanticQueryBuilder(String fieldName, String query) { } public SemanticQueryBuilder(String fieldName, String query, Boolean lenient) { - this(fieldName, query, lenient, null); + this(fieldName, query, lenient, null, false); } protected SemanticQueryBuilder( String fieldName, String query, Boolean lenient, - Map inferenceResultsMap + Map inferenceResultsMap, + boolean ccsRequest ) { if (fieldName == null) { throw new IllegalArgumentException("[" + NAME + "] requires a " + FIELD_FIELD.getPreferredName() + " value"); @@ -118,7 +119,7 @@ protected SemanticQueryBuilder( this.query = query; this.inferenceResultsMap = inferenceResultsMap != null ? Map.copyOf(inferenceResultsMap) : null; this.lenient = lenient; - this.ccsRequest = false; + this.ccsRequest = ccsRequest; } public SemanticQueryBuilder(StreamInput in) throws IOException { diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilderTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilderTests.java index abf12b2c876d4..5d1dc4e6899e2 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilderTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilderTests.java @@ -124,7 +124,8 @@ public void testInterceptAndRewrite() throws Exception { field, queryText, null, - coordinatorIntercepted.inferenceResultsMap + coordinatorIntercepted.inferenceResultsMap, + false ).boost(matchQuery.boost()).queryName(matchQuery.queryName()); // Perform data node rewrite on test index 1 diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java index 69f1aa934cf7b..c0229a3bd55c3 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java @@ -92,9 +92,11 @@ import static org.apache.lucene.search.BooleanClause.Occur.FILTER; import static org.apache.lucene.search.BooleanClause.Occur.MUST; import static org.elasticsearch.TransportVersions.INFERENCE_RESULTS_MAP_WITH_CLUSTER_ALIAS; +import static org.elasticsearch.TransportVersions.SEMANTIC_SEARCH_CCS_SUPPORT; import static org.elasticsearch.transport.RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY; import static org.elasticsearch.xpack.core.ml.inference.trainedmodel.InferenceConfig.DEFAULT_RESULTS_FIELD; import static org.elasticsearch.xpack.inference.queries.SemanticQueryBuilder.SEMANTIC_QUERY_MULTIPLE_INFERENCE_IDS_TV; +import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.notNullValue; @@ -393,7 +395,8 @@ public void testSerializationRemoteClusterInferenceResults() throws IOException randomAlphaOfLength(5), randomAlphaOfLength(5), null, - inferenceResultsMap + inferenceResultsMap, + false ); QueryBuilder deserializedQuery = copyNamedWriteable(originalQuery, namedWriteableRegistry(), QueryBuilder.class); @@ -421,13 +424,15 @@ public void testSerializationBwc() throws IOException { fieldName, query, null, - Map.of(new FullyQualifiedInferenceId(LOCAL_CLUSTER_GROUP_KEY, randomAlphaOfLength(5)), inferenceResults) + Map.of(new FullyQualifiedInferenceId(LOCAL_CLUSTER_GROUP_KEY, randomAlphaOfLength(5)), inferenceResults), + false ); SemanticQueryBuilder bwcQuery = new SemanticQueryBuilder( fieldName, query, null, - SemanticQueryBuilder.buildSingleResultInferenceResultsMap(inferenceResults) + SemanticQueryBuilder.buildSingleResultInferenceResultsMap(inferenceResults), + false ); QueryBuilder deserializedQuery = copyNamedWriteable(originalQuery, namedWriteableRegistry(), QueryBuilder.class, version); @@ -460,7 +465,8 @@ public void testSerializationBwc() throws IOException { randomAlphaOfLength(5), randomAlphaOfLength(5), null, - inferenceResultsMap + inferenceResultsMap, + false ); String expectedErrorMessage; @@ -496,6 +502,28 @@ public void testSerializationBwc() throws IOException { } } + public void testSerializationCcs() throws Exception { + SemanticQueryBuilder originalQuery = new SemanticQueryBuilder(randomAlphaOfLength(5), randomAlphaOfLength(5), null, Map.of(), true); + + // Deserialize using the current transport version. This simulates sending the query to a remote cluster that supports semantic + // search CCS. + QueryBuilder deserializedQuery = copyNamedWriteable(originalQuery, namedWriteableRegistry(), QueryBuilder.class); + assertThat(deserializedQuery, equalTo(originalQuery)); + + // Deserialize using a transport version prior to semantic search CCS support. This simulates sending the query to a remote cluster + // that does *not* support semantic search CCS. + TransportVersion ccsUnsupportedVersion = TransportVersionUtils.randomVersionBetween( + random(), + originalQuery.getMinimalSupportedVersion(), + TransportVersionUtils.getPreviousVersion(SEMANTIC_SEARCH_CCS_SUPPORT) + ); + IllegalArgumentException e = assertThrows( + IllegalArgumentException.class, + () -> copyNamedWriteable(originalQuery, namedWriteableRegistry(), QueryBuilder.class, ccsUnsupportedVersion) + ); + assertThat(e.getMessage(), containsString("One or more nodes does not support [semantic] query cross-cluster search")); + } + public void testToXContent() throws IOException { QueryBuilder queryBuilder = new SemanticQueryBuilder("foo", "bar"); checkGeneratedJson(""" From d02419c5ac65401f8fc6a33b37d671335d44ab91 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Fri, 19 Sep 2025 14:43:38 -0400 Subject: [PATCH 06/52] Deleted obsolete YAML test --- .../test/multi_cluster/110_semantic_query.yml | 37 ------------------- 1 file changed, 37 deletions(-) delete mode 100644 qa/multi-cluster-search/src/test/resources/rest-api-spec/test/multi_cluster/110_semantic_query.yml diff --git a/qa/multi-cluster-search/src/test/resources/rest-api-spec/test/multi_cluster/110_semantic_query.yml b/qa/multi-cluster-search/src/test/resources/rest-api-spec/test/multi_cluster/110_semantic_query.yml deleted file mode 100644 index 0155175f0e54a..0000000000000 --- a/qa/multi-cluster-search/src/test/resources/rest-api-spec/test/multi_cluster/110_semantic_query.yml +++ /dev/null @@ -1,37 +0,0 @@ ---- -setup: - - requires: - cluster_features: "gte_v8.15.0" - reason: semantic query introduced in 8.15.0 - - - do: - indices.create: - index: test-index - body: - settings: - index: - number_of_shards: 1 - number_of_replicas: 0 ---- -teardown: - - - do: - indices.delete: - index: test-index - ignore_unavailable: true - ---- -"Test that semantic query does not support cross-cluster search": - - do: - catch: bad_request - search: - index: "test-index,my_remote_cluster:test-index" - body: - query: - semantic: - field: "field" - query: "test query" - - - - match: { error.type: "illegal_argument_exception" } - - match: { error.reason: "semantic query does not support cross-cluster search" } From f9d0d510157fc5965b17d60d7cb5c3d216193cf3 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Fri, 19 Sep 2025 15:15:13 -0400 Subject: [PATCH 07/52] Updated legacy semantic query rewrite interceptors to detect CCS requests --- ...LegacySemanticQueryRewriteInterceptor.java | 27 +++++++++++++------ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/LegacySemanticQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/LegacySemanticQueryRewriteInterceptor.java index 670d846c8d4a9..bc274743e9420 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/LegacySemanticQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/LegacySemanticQueryRewriteInterceptor.java @@ -16,6 +16,7 @@ import org.elasticsearch.index.query.QueryRewriteContext; import org.elasticsearch.index.query.TermsQueryBuilder; import org.elasticsearch.plugins.internal.rewriter.QueryRewriteInterceptor; +import org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper; import java.util.ArrayList; import java.util.Collection; @@ -46,15 +47,25 @@ public QueryBuilder interceptAndRewrite(QueryRewriteContext context, QueryBuilde if (indexInformation.getInferenceIndices().isEmpty()) { // No inference fields were identified, so return the original query. return queryBuilder; - } else if (indexInformation.nonInferenceIndices().isEmpty() == false) { - // Combined case where the field name requested by this query contains both - // semantic_text and non-inference fields, so we have to combine queries per index - // containing each field type. - return buildCombinedInferenceAndNonInferenceQuery(queryBuilder, indexInformation); + } else if (resolvedIndices.getRemoteClusterIndices().isEmpty()) { + if (indexInformation.nonInferenceIndices().isEmpty() == false) { + // Combined case where the field name requested by this query contains both + // semantic_text and non-inference fields, so we have to combine queries per index + // containing each field type. + return buildCombinedInferenceAndNonInferenceQuery(queryBuilder, indexInformation); + } else { + // The only fields we've identified are inference fields (e.g. semantic_text), + // so rewrite the entire query to work on a semantic_text field. + return buildInferenceQuery(queryBuilder, indexInformation); + } } else { - // The only fields we've identified are inference fields (e.g. semantic_text), - // so rewrite the entire query to work on a semantic_text field. - return buildInferenceQuery(queryBuilder, indexInformation); + throw new IllegalArgumentException( + "[" + + getQueryName() + + "] query does not support cross-cluster search when querying a [" + + SemanticTextFieldMapper.CONTENT_TYPE + + "] field in a mixed-version cluster" + ); } } From fc7fadcd9952a820c8fbc30c641c62ebf4adcb20 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Fri, 19 Sep 2025 15:57:43 -0400 Subject: [PATCH 08/52] Added semantic query CCS test when ccs_minimize_roundtrips=false --- .../ccs/SemanticCrossClusterSearchIT.java | 36 +++++++++++++++++++ .../queries/SemanticQueryBuilder.java | 4 +-- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java index 1e1ef7275d648..0cba2f5e49f5f 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java @@ -28,6 +28,7 @@ import org.elasticsearch.plugins.SearchPlugin; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.search.SearchHit; +import org.elasticsearch.search.builder.PointInTimeBuilder; import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.test.AbstractMultiClustersTestCase; import org.elasticsearch.test.InternalTestCluster; @@ -50,6 +51,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.function.Consumer; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; @@ -146,6 +148,40 @@ public void testSemanticQuery() throws Exception { ); } + public void testSemanticQueryWithCcMinimizeRoundTripsFalse() throws Exception { + final String localIndexName = "local-index"; + final String remoteIndexName = "remote-index"; + final String[] queryIndices = new String[] { localIndexName, fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName) }; + final SemanticQueryBuilder queryBuilder = new SemanticQueryBuilder("foo", "bar"); + final Consumer assertCcsMinimizeRoundTripsFalseFailure = s -> { + IllegalArgumentException e = assertThrows( + IllegalArgumentException.class, + () -> client().search(s).actionGet(TEST_REQUEST_TIMEOUT) + ); + assertThat( + e.getMessage(), + equalTo("[semantic] query does not support cross-cluster search when [ccs_minimize_roundtrips] is false") + ); + }; + + final TestIndexInfo localIndexInfo = new TestIndexInfo(localIndexName, Map.of(), Map.of(), Map.of()); + final TestIndexInfo remoteIndexInfo = new TestIndexInfo(remoteIndexName, Map.of(), Map.of(), Map.of()); + setupTwoClusters(localIndexInfo, remoteIndexInfo); + + // Explicitly set ccs_minimize_roundtrips=false in the search request + SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(queryBuilder); + SearchRequest searchRequestWithCcMinimizeRoundTripsFalse = new SearchRequest(queryIndices, searchSourceBuilder); + searchRequestWithCcMinimizeRoundTripsFalse.setCcsMinimizeRoundtrips(false); + assertCcsMinimizeRoundTripsFalseFailure.accept(searchRequestWithCcMinimizeRoundTripsFalse); + + // Using a point in time implicitly sets ccs_minimize_roundtrips=false + BytesReference pitId = openPointInTime(queryIndices, TimeValue.timeValueMinutes(2)); + SearchSourceBuilder searchSourceBuilderWithPit = new SearchSourceBuilder().query(queryBuilder) + .pointInTimeBuilder(new PointInTimeBuilder(pitId)); + SearchRequest searchRequestWithPit = new SearchRequest().source(searchSourceBuilderWithPit); + assertCcsMinimizeRoundTripsFalseFailure.accept(searchRequestWithPit); + } + private void setupTwoClusters(TestIndexInfo localIndexInfo, TestIndexInfo remoteIndexInfo) throws IOException { setupCluster(LOCAL_CLUSTER, localIndexInfo); setupCluster(REMOTE_CLUSTER, remoteIndexInfo); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java index a2c187495b018..dca967c7d7b4c 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java @@ -189,7 +189,7 @@ protected void doWriteTo(StreamOutput out) throws IOException { throw new IllegalArgumentException( "One or more nodes does not support [" + NAME - + "] query cross-cluster search. Please update all nodes to at least " + + "] query cross-cluster search. Please update all nodes to at least Elasticsearch " + SEMANTIC_SEARCH_CCS_SUPPORT.toReleaseVersion() + "." ); @@ -432,7 +432,7 @@ private SemanticQueryBuilder doRewriteGetInferenceResults(QueryRewriteContext qu boolean ccsRequest = resolvedIndices.getRemoteClusterIndices().isEmpty() == false; if (ccsRequest && queryRewriteContext.isCcsMinimizeRoundTrips() == false) { throw new IllegalArgumentException( - NAME + " query does not support cross-cluster search when [ccs_minimize_roundtrips] is false" + "[" + NAME + "] query does not support cross-cluster search when [ccs_minimize_roundtrips] is false" ); } From c439bd230d53c21b85c56adb613059b3cfcebd8c Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Fri, 19 Sep 2025 16:36:43 -0400 Subject: [PATCH 09/52] Fix random test failure --- .../search/ccs/SemanticCrossClusterSearchIT.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java index 0cba2f5e49f5f..56a17f3dc03b7 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java @@ -31,7 +31,6 @@ import org.elasticsearch.search.builder.PointInTimeBuilder; import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.test.AbstractMultiClustersTestCase; -import org.elasticsearch.test.InternalTestCluster; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentFactory; import org.elasticsearch.xcontent.XContentType; @@ -207,9 +206,6 @@ private void setupCluster(String clusterAlias, TestIndexInfo indexInfo) throws I createInferenceEndpoint(client, minimalServiceSettings.taskType(), inferenceId, serviceSettings); } - InternalTestCluster cluster = cluster(clusterAlias); - cluster.ensureAtLeastNumDataNodes(randomIntBetween(1, 3)); - Settings indexSettings = indexSettings(randomIntBetween(2, 5), randomIntBetween(0, 1)).build(); assertAcked(client.admin().indices().prepareCreate(indexName).setSettings(indexSettings).setMapping(indexInfo.mappings())); assertFalse( From 7db5ff88794e0067952a534db5d589de5d609b54 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Fri, 19 Sep 2025 17:06:43 -0400 Subject: [PATCH 10/52] Adjust error message format --- .../search/ccs/SemanticCrossClusterSearchIT.java | 2 +- .../queries/LegacySemanticQueryRewriteInterceptor.java | 5 ++--- .../xpack/inference/queries/SemanticQueryBuilder.java | 6 +++--- .../xpack/inference/queries/SemanticQueryBuilderTests.java | 2 +- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java index 56a17f3dc03b7..6dbc8ac8ce39b 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java @@ -159,7 +159,7 @@ public void testSemanticQueryWithCcMinimizeRoundTripsFalse() throws Exception { ); assertThat( e.getMessage(), - equalTo("[semantic] query does not support cross-cluster search when [ccs_minimize_roundtrips] is false") + equalTo("semantic query does not support cross-cluster search when [ccs_minimize_roundtrips] is false") ); }; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/LegacySemanticQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/LegacySemanticQueryRewriteInterceptor.java index bc274743e9420..49fa9e7cfd805 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/LegacySemanticQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/LegacySemanticQueryRewriteInterceptor.java @@ -60,9 +60,8 @@ public QueryBuilder interceptAndRewrite(QueryRewriteContext context, QueryBuilde } } else { throw new IllegalArgumentException( - "[" - + getQueryName() - + "] query does not support cross-cluster search when querying a [" + getQueryName() + + " query does not support cross-cluster search when querying a [" + SemanticTextFieldMapper.CONTENT_TYPE + "] field in a mixed-version cluster" ); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java index dca967c7d7b4c..b2b266ff5915c 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java @@ -187,9 +187,9 @@ protected void doWriteTo(StreamOutput out) throws IOException { out.writeBoolean(ccsRequest); } else if (ccsRequest) { throw new IllegalArgumentException( - "One or more nodes does not support [" + "One or more nodes does not support " + NAME - + "] query cross-cluster search. Please update all nodes to at least Elasticsearch " + + " query cross-cluster search. Please update all nodes to at least Elasticsearch " + SEMANTIC_SEARCH_CCS_SUPPORT.toReleaseVersion() + "." ); @@ -432,7 +432,7 @@ private SemanticQueryBuilder doRewriteGetInferenceResults(QueryRewriteContext qu boolean ccsRequest = resolvedIndices.getRemoteClusterIndices().isEmpty() == false; if (ccsRequest && queryRewriteContext.isCcsMinimizeRoundTrips() == false) { throw new IllegalArgumentException( - "[" + NAME + "] query does not support cross-cluster search when [ccs_minimize_roundtrips] is false" + NAME + " query does not support cross-cluster search when [ccs_minimize_roundtrips] is false" ); } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java index c0229a3bd55c3..c5179a7bc1025 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java @@ -521,7 +521,7 @@ public void testSerializationCcs() throws Exception { IllegalArgumentException.class, () -> copyNamedWriteable(originalQuery, namedWriteableRegistry(), QueryBuilder.class, ccsUnsupportedVersion) ); - assertThat(e.getMessage(), containsString("One or more nodes does not support [semantic] query cross-cluster search")); + assertThat(e.getMessage(), containsString("One or more nodes does not support semantic query cross-cluster search")); } public void testToXContent() throws IOException { From 455055af4e3241a8d03c597d820d5338196a80b9 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Mon, 22 Sep 2025 08:37:51 -0400 Subject: [PATCH 11/52] Updated InterceptedInferenceQueryBuilder to add CCS support --- ...rceptedInferenceKnnVectorQueryBuilder.java | 9 ++-- ...InterceptedInferenceMatchQueryBuilder.java | 9 ++-- .../InterceptedInferenceQueryBuilder.java | 42 +++++++++++++++---- ...ptedInferenceSparseVectorQueryBuilder.java | 9 ++-- 4 files changed, 49 insertions(+), 20 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceKnnVectorQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceKnnVectorQueryBuilder.java index bf6b9d534d52e..c242bde63f5cb 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceKnnVectorQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceKnnVectorQueryBuilder.java @@ -50,9 +50,10 @@ public InterceptedInferenceKnnVectorQueryBuilder(StreamInput in) throws IOExcept InterceptedInferenceKnnVectorQueryBuilder( InterceptedInferenceQueryBuilder other, - Map inferenceResultsMap + Map inferenceResultsMap, + boolean ccsRequest ) { - super(other, inferenceResultsMap); + super(other, inferenceResultsMap, ccsRequest); } @Override @@ -114,8 +115,8 @@ protected QueryBuilder doRewriteBwC(QueryRewriteContext queryRewriteContext) { } @Override - protected QueryBuilder copy(Map inferenceResultsMap) { - return new InterceptedInferenceKnnVectorQueryBuilder(this, inferenceResultsMap); + protected QueryBuilder copy(Map inferenceResultsMap, boolean ccsRequest) { + return new InterceptedInferenceKnnVectorQueryBuilder(this, inferenceResultsMap, ccsRequest); } @Override diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilder.java index ccd2435eff0a9..569d5bee3cfe6 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilder.java @@ -37,9 +37,10 @@ public InterceptedInferenceMatchQueryBuilder(StreamInput in) throws IOException InterceptedInferenceMatchQueryBuilder( InterceptedInferenceQueryBuilder other, - Map inferenceResultsMap + Map inferenceResultsMap, + boolean ccsRequest ) { - super(other, inferenceResultsMap); + super(other, inferenceResultsMap, ccsRequest); } @Override @@ -63,8 +64,8 @@ protected QueryBuilder doRewriteBwC(QueryRewriteContext queryRewriteContext) { } @Override - protected QueryBuilder copy(Map inferenceResultsMap) { - return new InterceptedInferenceMatchQueryBuilder(this, inferenceResultsMap); + protected QueryBuilder copy(Map inferenceResultsMap, boolean ccsRequest) { + return new InterceptedInferenceMatchQueryBuilder(this, inferenceResultsMap, ccsRequest); } @Override diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceQueryBuilder.java index a864e1425f634..b6b9d7197f75a 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceQueryBuilder.java @@ -39,6 +39,7 @@ import java.util.Set; import static org.elasticsearch.TransportVersions.INFERENCE_RESULTS_MAP_WITH_CLUSTER_ALIAS; +import static org.elasticsearch.TransportVersions.SEMANTIC_SEARCH_CCS_SUPPORT; import static org.elasticsearch.index.IndexSettings.DEFAULT_FIELD_SETTING; import static org.elasticsearch.transport.RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY; import static org.elasticsearch.xpack.inference.queries.SemanticQueryBuilder.convertFromBwcInferenceResultsMap; @@ -63,11 +64,13 @@ public abstract class InterceptedInferenceQueryBuilder inferenceResultsMap; + protected final boolean ccsRequest; protected InterceptedInferenceQueryBuilder(T originalQuery) { Objects.requireNonNull(originalQuery, "original query must not be null"); this.originalQuery = originalQuery; this.inferenceResultsMap = null; + this.ccsRequest = false; } @SuppressWarnings("unchecked") @@ -83,14 +86,21 @@ protected InterceptedInferenceQueryBuilder(StreamInput in) throws IOException { in.readOptional(i1 -> i1.readImmutableMap(i2 -> i2.readNamedWriteable(InferenceResults.class))) ); } + if (in.getTransportVersion().supports(SEMANTIC_SEARCH_CCS_SUPPORT)) { + this.ccsRequest = in.readBoolean(); + } else { + this.ccsRequest = false; + } } protected InterceptedInferenceQueryBuilder( InterceptedInferenceQueryBuilder other, - Map inferenceResultsMap + Map inferenceResultsMap, + boolean ccsRequest ) { this.originalQuery = other.originalQuery; this.inferenceResultsMap = inferenceResultsMap; + this.ccsRequest = ccsRequest; } /** @@ -130,9 +140,10 @@ protected InterceptedInferenceQueryBuilder( * Generate a copy of {@code this} using the provided inference results map. * * @param inferenceResultsMap The inference results map + * @param ccsRequest Flag indicating if this is a CCS request * @return A copy of {@code this} with the provided inference results map */ - protected abstract QueryBuilder copy(Map inferenceResultsMap); + protected abstract QueryBuilder copy(Map inferenceResultsMap, boolean ccsRequest); /** * Rewrite to a {@link QueryBuilder} appropriate for a specific index's mappings. The implementation can use @@ -191,6 +202,19 @@ protected void doWriteTo(StreamOutput out) throws IOException { o2.writeString(id.inferenceId()); }, StreamOutput::writeNamedWriteable), inferenceResultsMap); } + if (out.getTransportVersion().supports(SEMANTIC_SEARCH_CCS_SUPPORT)) { + out.writeBoolean(ccsRequest); + } else if (ccsRequest) { + throw new IllegalArgumentException( + "One or more nodes does not support " + + originalQuery.getName() + + " query cross-cluster search when querying a [" + + SemanticTextFieldMapper.CONTENT_TYPE + + "] field. Please update all nodes to at least Elasticsearch " + + SEMANTIC_SEARCH_CCS_SUPPORT.toReleaseVersion() + + "." + ); + } } @Override @@ -205,12 +229,14 @@ protected Query doToQuery(SearchExecutionContext context) { @Override protected boolean doEquals(InterceptedInferenceQueryBuilder other) { - return Objects.equals(originalQuery, other.originalQuery) && Objects.equals(inferenceResultsMap, other.inferenceResultsMap); + return Objects.equals(originalQuery, other.originalQuery) + && Objects.equals(inferenceResultsMap, other.inferenceResultsMap) + && Objects.equals(ccsRequest, other.ccsRequest); } @Override protected int doHashCode() { - return Objects.hash(originalQuery, inferenceResultsMap); + return Objects.hash(originalQuery, inferenceResultsMap, ccsRequest); } @Override @@ -273,13 +299,13 @@ private QueryBuilder doRewriteGetInferenceResults(QueryRewriteContext queryRewri // Validate early to prevent partial failures coordinatorNodeValidate(resolvedIndices); - // TODO: Check for supported CCS mode here (once we support CCS) - if (resolvedIndices.getRemoteClusterIndices().isEmpty() == false) { + boolean ccsRequest = resolvedIndices.getRemoteClusterIndices().isEmpty() == false; + if (ccsRequest && queryRewriteContext.isCcsMinimizeRoundTrips() == false) { throw new IllegalArgumentException( originalQuery.getName() + " query does not support cross-cluster search when querying a [" + SemanticTextFieldMapper.CONTENT_TYPE - + "] field" + + "] field when [ccs_minimize_roundtrips] is false" ); } @@ -304,7 +330,7 @@ private QueryBuilder doRewriteGetInferenceResults(QueryRewriteContext queryRewri // The inference results map is fully populated, so we can perform error checking inferenceResultsErrorCheck(modifiedInferenceResultsMap); } else { - rewritten = copy(modifiedInferenceResultsMap); + rewritten = copy(modifiedInferenceResultsMap, ccsRequest); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceSparseVectorQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceSparseVectorQueryBuilder.java index 655fb3f790cd9..1ce803fd5c4d2 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceSparseVectorQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceSparseVectorQueryBuilder.java @@ -49,9 +49,10 @@ public InterceptedInferenceSparseVectorQueryBuilder(StreamInput in) throws IOExc InterceptedInferenceSparseVectorQueryBuilder( InterceptedInferenceQueryBuilder other, - Map inferenceResultsMap + Map inferenceResultsMap, + boolean ccsRequest ) { - super(other, inferenceResultsMap); + super(other, inferenceResultsMap, ccsRequest); } @Override @@ -96,8 +97,8 @@ protected QueryBuilder doRewriteBwC(QueryRewriteContext queryRewriteContext) { } @Override - protected QueryBuilder copy(Map inferenceResultsMap) { - return new InterceptedInferenceSparseVectorQueryBuilder(this, inferenceResultsMap); + protected QueryBuilder copy(Map inferenceResultsMap, boolean ccsRequest) { + return new InterceptedInferenceSparseVectorQueryBuilder(this, inferenceResultsMap, ccsRequest); } @Override From 990f69a23cd88992db80fc0af7af5cb5d2ea07f4 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Mon, 22 Sep 2025 09:08:05 -0400 Subject: [PATCH 12/52] Added match query CCS test --- .../core/LocalStateCompositeXPackPlugin.java | 12 +++ .../ccs/SemanticCrossClusterSearchIT.java | 92 +++++++++++++++++++ 2 files changed, 104 insertions(+) diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/LocalStateCompositeXPackPlugin.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/LocalStateCompositeXPackPlugin.java index 117141708ed43..366b10e125604 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/LocalStateCompositeXPackPlugin.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/LocalStateCompositeXPackPlugin.java @@ -79,6 +79,8 @@ import org.elasticsearch.plugins.ShutdownAwarePlugin; import org.elasticsearch.plugins.SystemIndexPlugin; import org.elasticsearch.plugins.interceptor.RestServerActionPlugin; +import org.elasticsearch.plugins.internal.InternalSearchPlugin; +import org.elasticsearch.plugins.internal.rewriter.QueryRewriteInterceptor; import org.elasticsearch.repositories.RepositoriesMetrics; import org.elasticsearch.repositories.Repository; import org.elasticsearch.repositories.SnapshotMetrics; @@ -135,6 +137,7 @@ public class LocalStateCompositeXPackPlugin extends XPackPlugin IndexStorePlugin, SystemIndexPlugin, SearchPlugin, + InternalSearchPlugin, ShutdownAwarePlugin, RestServerActionPlugin { @@ -291,6 +294,15 @@ public List> getQueries() { return querySpecs; } + @Override + public List getQueryRewriteInterceptors() { + List interceptors = new ArrayList<>(); + filterPlugins(InternalSearchPlugin.class).stream() + .flatMap(p -> p.getQueryRewriteInterceptors().stream()) + .forEach(interceptors::add); + return interceptors; + } + @Override public List getNamedXContent() { List entries = new ArrayList<>(super.getNamedXContent()); diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java index 6dbc8ac8ce39b..ae84f0bb6d0e5 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java @@ -19,6 +19,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.TimeValue; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; +import org.elasticsearch.index.query.MatchQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.inference.MinimalServiceSettings; import org.elasticsearch.inference.SimilarityMeasure; @@ -181,6 +182,97 @@ public void testSemanticQueryWithCcMinimizeRoundTripsFalse() throws Exception { assertCcsMinimizeRoundTripsFalseFailure.accept(searchRequestWithPit); } + public void testMatchQuery() throws Exception { + final String localIndexName = "local-index"; + final String remoteIndexName = "remote-index"; + final String[] queryIndices = new String[] { localIndexName, fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName) }; + + final String commonInferenceId = "common-inference-id"; + final String localInferenceId = "local-inference-id"; + final String remoteInferenceId = "remote-inference-id"; + + final String commonInferenceIdField = "common-inference-id-field"; + final String variableInferenceIdField = "variable-inference-id-field"; + final String mixedTypeField = "mixed-type-field"; + + final TestIndexInfo localIndexInfo = new TestIndexInfo( + localIndexName, + Map.of(commonInferenceId, sparseEmbeddingServiceSettings(), localInferenceId, sparseEmbeddingServiceSettings()), + Map.of( + commonInferenceIdField, + semanticTextMapping(commonInferenceId), + variableInferenceIdField, + semanticTextMapping(localInferenceId), + mixedTypeField, + semanticTextMapping(localInferenceId) + ), + Map.of( + "local_doc_1", + Map.of(commonInferenceIdField, "a"), + "local_doc_2", + Map.of(variableInferenceIdField, "b"), + "local_doc_3", + Map.of(mixedTypeField, "c") + ) + ); + final TestIndexInfo remoteIndexInfo = new TestIndexInfo( + remoteIndexName, + Map.of( + commonInferenceId, + textEmbeddingServiceSettings(256, SimilarityMeasure.COSINE, DenseVectorFieldMapper.ElementType.FLOAT), + remoteInferenceId, + textEmbeddingServiceSettings(384, SimilarityMeasure.COSINE, DenseVectorFieldMapper.ElementType.FLOAT) + ), + Map.of( + commonInferenceIdField, + semanticTextMapping(commonInferenceId), + variableInferenceIdField, + semanticTextMapping(remoteInferenceId), + mixedTypeField, + Map.of("type", "text") + ), + Map.of( + "remote_doc_1", + Map.of(commonInferenceIdField, "x"), + "remote_doc_2", + Map.of(variableInferenceIdField, "y"), + "remote_doc_3", + Map.of(mixedTypeField, "z") + ) + ); + setupTwoClusters(localIndexInfo, remoteIndexInfo); + + // Query a field has the same inference ID value across clusters, but with different backing inference services + assertSearchResponse( + new MatchQueryBuilder(commonInferenceIdField, "a"), + queryIndices, + List.of( + new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_1"), + new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_1") + ) + ); + + // Query a field that has different inference ID values across clusters + assertSearchResponse( + new MatchQueryBuilder(variableInferenceIdField, "b"), + queryIndices, + List.of( + new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_2"), + new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_2") + ) + ); + + // Query a field that has mixed types across clusters + assertSearchResponse( + new MatchQueryBuilder(mixedTypeField, "z"), + queryIndices, + List.of( + new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3"), + new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_3") + ) + ); + } + private void setupTwoClusters(TestIndexInfo localIndexInfo, TestIndexInfo remoteIndexInfo) throws IOException { setupCluster(LOCAL_CLUSTER, localIndexInfo); setupCluster(REMOTE_CLUSTER, remoteIndexInfo); From 3fc3e5b383576783a0c557cc158f9939d20a7a3b Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Mon, 22 Sep 2025 09:08:39 -0400 Subject: [PATCH 13/52] Fix build error --- .../InterceptedInferenceKnnVectorQueryBuilderTests.java | 3 ++- .../queries/InterceptedInferenceMatchQueryBuilderTests.java | 6 +++++- .../InterceptedInferenceSparseVectorQueryBuilderTests.java | 3 ++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceKnnVectorQueryBuilderTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceKnnVectorQueryBuilderTests.java index 12ee911a125bd..ee844d238beae 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceKnnVectorQueryBuilderTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceKnnVectorQueryBuilderTests.java @@ -69,7 +69,8 @@ protected InterceptedInferenceQueryBuilder createIntercep ) { return new InterceptedInferenceKnnVectorQueryBuilder( new InterceptedInferenceKnnVectorQueryBuilder(originalQuery), - inferenceResultsMap + inferenceResultsMap, + false ); } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilderTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilderTests.java index 5d1dc4e6899e2..3d9c7ca2bf9dd 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilderTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilderTests.java @@ -33,7 +33,11 @@ protected InterceptedInferenceQueryBuilder createInterceptedQ MatchQueryBuilder originalQuery, Map inferenceResultsMap ) { - return new InterceptedInferenceMatchQueryBuilder(new InterceptedInferenceMatchQueryBuilder(originalQuery), inferenceResultsMap); + return new InterceptedInferenceMatchQueryBuilder( + new InterceptedInferenceMatchQueryBuilder(originalQuery), + inferenceResultsMap, + false + ); } @Override diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceSparseVectorQueryBuilderTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceSparseVectorQueryBuilderTests.java index a0066f5da130d..9e0cfda289632 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceSparseVectorQueryBuilderTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceSparseVectorQueryBuilderTests.java @@ -63,7 +63,8 @@ protected InterceptedInferenceQueryBuilder createInter ) { return new InterceptedInferenceSparseVectorQueryBuilder( new InterceptedInferenceSparseVectorQueryBuilder(originalQuery), - inferenceResultsMap + inferenceResultsMap, + false ); } From 2ce408af19c6718fa29beaf2c33a6fc8d86a6f59 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Mon, 22 Sep 2025 13:49:37 -0400 Subject: [PATCH 14/52] Added knn query CCS test --- x-pack/plugin/inference/build.gradle | 1 + .../ccs/SemanticCrossClusterSearchIT.java | 179 +++++++++++++++++- 2 files changed, 175 insertions(+), 5 deletions(-) diff --git a/x-pack/plugin/inference/build.gradle b/x-pack/plugin/inference/build.gradle index 9486d239e5de5..eb9372e675831 100644 --- a/x-pack/plugin/inference/build.gradle +++ b/x-pack/plugin/inference/build.gradle @@ -36,6 +36,7 @@ dependencies { testImplementation(project(':x-pack:plugin:inference:qa:test-service-plugin')) testImplementation project(':modules:reindex') testImplementation project(':modules:mapper-extras') + testImplementation project(':x-pack:plugin:ml') clusterPlugins project(':x-pack:plugin:inference:qa:test-service-plugin') api "com.ibm.icu:icu4j:${versions.icu4j}" diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java index ae84f0bb6d0e5..9d5785b003f96 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java @@ -11,6 +11,7 @@ import org.elasticsearch.action.search.OpenPointInTimeRequest; import org.elasticsearch.action.search.OpenPointInTimeResponse; import org.elasticsearch.action.search.SearchRequest; +import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.search.TransportOpenPointInTimeAction; import org.elasticsearch.action.support.broadcast.BroadcastResponse; import org.elasticsearch.client.internal.Client; @@ -25,17 +26,20 @@ import org.elasticsearch.inference.SimilarityMeasure; import org.elasticsearch.inference.TaskType; import org.elasticsearch.license.LicenseSettings; +import org.elasticsearch.plugins.ActionPlugin; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.plugins.SearchPlugin; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.builder.PointInTimeBuilder; import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.elasticsearch.search.vectors.KnnVectorQueryBuilder; import org.elasticsearch.test.AbstractMultiClustersTestCase; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentFactory; import org.elasticsearch.xcontent.XContentType; import org.elasticsearch.xpack.core.inference.action.PutInferenceModelAction; +import org.elasticsearch.xpack.core.ml.action.CoordinatedInferenceAction; import org.elasticsearch.xpack.core.ml.inference.MlInferenceNamedXContentProvider; import org.elasticsearch.xpack.core.ml.vectors.TextEmbeddingQueryVectorBuilder; import org.elasticsearch.xpack.inference.LocalStateInferencePlugin; @@ -44,6 +48,7 @@ import org.elasticsearch.xpack.inference.mock.TestInferenceServicePlugin; import org.elasticsearch.xpack.inference.mock.TestSparseInferenceServiceExtension; import org.elasticsearch.xpack.inference.queries.SemanticQueryBuilder; +import org.elasticsearch.xpack.ml.action.TransportCoordinatedInferenceAction; import java.io.IOException; import java.util.Collection; @@ -53,9 +58,11 @@ import java.util.Map; import java.util.function.Consumer; +import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.IVF_FORMAT; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; -import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; public class SemanticCrossClusterSearchIT extends AbstractMultiClustersTestCase { @@ -204,7 +211,7 @@ public void testMatchQuery() throws Exception { variableInferenceIdField, semanticTextMapping(localInferenceId), mixedTypeField, - semanticTextMapping(localInferenceId) + Map.of("type", "text") ), Map.of( "local_doc_1", @@ -229,7 +236,7 @@ public void testMatchQuery() throws Exception { variableInferenceIdField, semanticTextMapping(remoteInferenceId), mixedTypeField, - Map.of("type", "text") + semanticTextMapping(remoteInferenceId) ), Map.of( "remote_doc_1", @@ -264,13 +271,151 @@ public void testMatchQuery() throws Exception { // Query a field that has mixed types across clusters assertSearchResponse( - new MatchQueryBuilder(mixedTypeField, "z"), + new MatchQueryBuilder(mixedTypeField, "c"), + queryIndices, + List.of( + new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_3"), + new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3") + ) + ); + } + + public void testKnnQuery() throws Exception { + final String localIndexName = "local-index"; + final String remoteIndexName = "remote-index"; + final String[] queryIndices = new String[] { localIndexName, fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName) }; + + final String commonInferenceId = "common-inference-id"; + final String localInferenceId = "local-inference-id"; + + final String commonInferenceIdField = "common-inference-id-field"; + final String mixedTypeField1 = "mixed-type-field-1"; + final String mixedTypeField2 = "mixed-type-field-2"; + + final TestIndexInfo localIndexInfo = new TestIndexInfo( + localIndexName, + Map.of( + commonInferenceId, + textEmbeddingServiceSettings(256, SimilarityMeasure.COSINE, DenseVectorFieldMapper.ElementType.FLOAT), + localInferenceId, + textEmbeddingServiceSettings(384, SimilarityMeasure.COSINE, DenseVectorFieldMapper.ElementType.FLOAT) + ), + Map.of( + commonInferenceIdField, + semanticTextMapping(commonInferenceId), + mixedTypeField1, + denseVectorMapping(384), + mixedTypeField2, + semanticTextMapping(localInferenceId) + ), + Map.of( + "local_doc_1", + Map.of(commonInferenceIdField, "a"), + "local_doc_2", + Map.of(mixedTypeField1, generateDenseVectorFieldValue(384, DenseVectorFieldMapper.ElementType.FLOAT)), + "local_doc_3", + Map.of(mixedTypeField2, "c") + ) + ); + final TestIndexInfo remoteIndexInfo = new TestIndexInfo( + remoteIndexName, + Map.of( + commonInferenceId, + textEmbeddingServiceSettings(384, SimilarityMeasure.COSINE, DenseVectorFieldMapper.ElementType.FLOAT) + ), + Map.of( + commonInferenceIdField, + semanticTextMapping(commonInferenceId), + mixedTypeField1, + semanticTextMapping(commonInferenceId), + mixedTypeField2, + denseVectorMapping(384) + ), + Map.of( + "remote_doc_1", + Map.of(commonInferenceIdField, "x"), + "remote_doc_2", + Map.of(mixedTypeField1, "y"), + "remote_doc_3", + Map.of(mixedTypeField2, generateDenseVectorFieldValue(384, DenseVectorFieldMapper.ElementType.FLOAT)) + ) + ); + setupTwoClusters(localIndexInfo, remoteIndexInfo); + + // Query a field has the same inference ID value across clusters, but with different backing inference services + assertSearchResponse( + new KnnVectorQueryBuilder( + commonInferenceIdField, + new TextEmbeddingQueryVectorBuilder(null, "a"), + 10, + 100, + IVF_FORMAT.isEnabled() ? 10f : null, + null + ), + queryIndices, + List.of( + new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_1"), + new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_1") + ) + ); + + // Query a field that has mixed types across clusters + assertSearchResponse( + new KnnVectorQueryBuilder( + mixedTypeField1, + new TextEmbeddingQueryVectorBuilder(localInferenceId, "y"), + 10, + 100, + IVF_FORMAT.isEnabled() ? 10f : null, + null + ), + queryIndices, + List.of( + new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_2"), + new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_2") + ) + ); + + assertSearchResponse( + new KnnVectorQueryBuilder( + mixedTypeField2, + new TextEmbeddingQueryVectorBuilder(localInferenceId, "c"), + 10, + 100, + IVF_FORMAT.isEnabled() ? 10f : null, + null + ), queryIndices, List.of( new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3"), new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_3") ) ); + + // Check that omitting the inference ID when querying a remote dense vector field leads to the expected partial failure + KnnVectorQueryBuilder queryBuilder = new KnnVectorQueryBuilder( + mixedTypeField2, + new TextEmbeddingQueryVectorBuilder(null, "c"), + 10, + 100, + IVF_FORMAT.isEnabled() ? 10f : null, + null + ); + SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(queryBuilder); + SearchRequest searchRequest = new SearchRequest(queryIndices, searchSourceBuilder); + assertResponse(client().search(searchRequest), response -> { + SearchHit[] hits = response.getHits().getHits(); + assertThat(hits.length, equalTo(1)); + assertThat(hits[0].getId(), equalTo("local_doc_3")); + + SearchResponse.Cluster remoteCluster = response.getClusters().getCluster(REMOTE_CLUSTER); + assertThat(remoteCluster.getStatus(), equalTo(SearchResponse.Cluster.Status.SKIPPED)); + assertThat(remoteCluster.getFailures().size(), equalTo(1)); + + Throwable cause = remoteCluster.getFailures().getFirst().getCause(); + assertThat(cause, instanceOf(IllegalArgumentException.class)); + assertThat(cause.getMessage(), equalTo("[model_id] must not be null.")); + }); } private void setupTwoClusters(TestIndexInfo localIndexInfo, TestIndexInfo remoteIndexInfo) throws IOException { @@ -393,11 +538,30 @@ private static Map semanticTextMapping(String inferenceId) { return Map.of("type", SemanticTextFieldMapper.CONTENT_TYPE, "inference_id", inferenceId); } + private static Map denseVectorMapping(int dimensions) { + return Map.of("type", DenseVectorFieldMapper.CONTENT_TYPE, "dims", dimensions); + } + private static String fullyQualifiedIndexName(String clusterAlias, String indexName) { return clusterAlias + ":" + indexName; } - public static class FakeMlPlugin extends Plugin implements SearchPlugin { + private static float[] generateDenseVectorFieldValue(int dimensions, DenseVectorFieldMapper.ElementType elementType) { + if (elementType == DenseVectorFieldMapper.ElementType.BIT) { + assert dimensions % 8 == 0; + dimensions /= 8; + } + + float[] vector = new float[dimensions]; + for (int i = 0; i < dimensions; i++) { + // Use a constant value so that relevance is consistent + vector[i] = -128.0f; + } + + return vector; + } + + public static class FakeMlPlugin extends Plugin implements ActionPlugin, SearchPlugin { @Override public List getNamedWriteables() { return new MlInferenceNamedXContentProvider().getNamedWriteables(); @@ -413,6 +577,11 @@ public List> getQueryVectorBuilders() { ) ); } + + @Override + public Collection getActions() { + return List.of(new ActionHandler(CoordinatedInferenceAction.INSTANCE, TransportCoordinatedInferenceAction.class)); + } } private record TestIndexInfo( From ec40327ac21893923c73d40546f23556bcc97240 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Mon, 22 Sep 2025 14:08:02 -0400 Subject: [PATCH 15/52] Use fully qualified inference ID to disambiguate inference ID overrides --- .../queries/FullyQualifiedInferenceId.java | 5 ++ ...rceptedInferenceKnnVectorQueryBuilder.java | 36 ++++++++------ .../InterceptedInferenceQueryBuilder.java | 39 ++++++++++----- ...ptedInferenceSparseVectorQueryBuilder.java | 36 ++++++++------ .../queries/SemanticQueryBuilder.java | 48 ++++++++++++------- 5 files changed, 108 insertions(+), 56 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/FullyQualifiedInferenceId.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/FullyQualifiedInferenceId.java index 5ee9d00da4abb..a1b7983847c68 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/FullyQualifiedInferenceId.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/FullyQualifiedInferenceId.java @@ -29,4 +29,9 @@ public void writeTo(StreamOutput out) throws IOException { out.writeString(clusterAlias); out.writeString(inferenceId); } + + @Override + public String toString() { + return "{" + "clusterAlias=" + clusterAlias + ", inferenceId=" + inferenceId + "}"; + } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceKnnVectorQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceKnnVectorQueryBuilder.java index c242bde63f5cb..721bd3e5c1270 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceKnnVectorQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceKnnVectorQueryBuilder.java @@ -34,6 +34,8 @@ import java.util.Collection; import java.util.Map; +import static org.elasticsearch.transport.RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY; + public class InterceptedInferenceKnnVectorQueryBuilder extends InterceptedInferenceQueryBuilder { public static final String NAME = "intercepted_inference_knn"; @@ -73,8 +75,14 @@ protected String getQuery() { } @Override - protected String getInferenceIdOverride() { - return getQueryVectorBuilderModelId(); + protected FullyQualifiedInferenceId getInferenceIdOverride() { + FullyQualifiedInferenceId override = null; + String modelId = getQueryVectorBuilderModelId(); + if (modelId != null) { + override = new FullyQualifiedInferenceId(LOCAL_CLUSTER_GROUP_KEY, modelId); + } + + return override; } @Override @@ -132,7 +140,7 @@ protected QueryBuilder queryFields( } else if (fieldType instanceof SemanticTextFieldMapper.SemanticTextFieldType semanticTextFieldType) { rewritten = querySemanticTextField(indexMetadataContext.getLocalClusterAlias(), semanticTextFieldType); } else { - rewritten = queryNonSemanticTextField(indexMetadataContext.getLocalClusterAlias()); + rewritten = queryNonSemanticTextField(); } return rewritten; @@ -178,12 +186,12 @@ private QueryBuilder querySemanticTextField(String clusterAlias, SemanticTextFie VectorData queryVector = originalQuery.queryVector(); if (queryVector == null) { - String inferenceId = getQueryVectorBuilderModelId(); - if (inferenceId == null) { - inferenceId = semanticTextFieldType.getSearchInferenceId(); + FullyQualifiedInferenceId fullyQualifiedInferenceId = getInferenceIdOverride(); + if (fullyQualifiedInferenceId == null) { + fullyQualifiedInferenceId = new FullyQualifiedInferenceId(clusterAlias, semanticTextFieldType.getSearchInferenceId()); } - MlTextEmbeddingResults textEmbeddingResults = getTextEmbeddingResults(clusterAlias, inferenceId); + MlTextEmbeddingResults textEmbeddingResults = getTextEmbeddingResults(fullyQualifiedInferenceId); queryVector = new VectorData(textEmbeddingResults.getInferenceAsFloat()); } @@ -203,18 +211,18 @@ private QueryBuilder querySemanticTextField(String clusterAlias, SemanticTextFie .queryName(originalQuery.queryName()); } - private QueryBuilder queryNonSemanticTextField(String clusterAlias) { + private QueryBuilder queryNonSemanticTextField() { VectorData queryVector = originalQuery.queryVector(); if (queryVector == null) { - String modelId = getQueryVectorBuilderModelId(); - if (modelId == null) { + FullyQualifiedInferenceId fullyQualifiedInferenceId = getInferenceIdOverride(); + if (fullyQualifiedInferenceId == null) { // This should never happen because we validate that either query vector or a valid query vector builder is specified in: // - The KnnVectorQueryBuilder constructor // - coordinatorNodeValidate throw new IllegalStateException("No query vector or query vector builder model ID specified"); } - MlTextEmbeddingResults textEmbeddingResults = getTextEmbeddingResults(clusterAlias, modelId); + MlTextEmbeddingResults textEmbeddingResults = getTextEmbeddingResults(fullyQualifiedInferenceId); queryVector = new VectorData(textEmbeddingResults.getInferenceAsFloat()); } @@ -232,10 +240,10 @@ private QueryBuilder queryNonSemanticTextField(String clusterAlias) { return knnQuery; } - private MlTextEmbeddingResults getTextEmbeddingResults(String clusterAlias, String inferenceId) { - InferenceResults inferenceResults = inferenceResultsMap.get(new FullyQualifiedInferenceId(clusterAlias, inferenceId)); + private MlTextEmbeddingResults getTextEmbeddingResults(FullyQualifiedInferenceId fullyQualifiedInferenceId) { + InferenceResults inferenceResults = inferenceResultsMap.get(fullyQualifiedInferenceId); if (inferenceResults == null) { - throw new IllegalStateException("Could not find inference results from inference endpoint [" + inferenceId + "]"); + throw new IllegalStateException("Could not find inference results from inference endpoint [" + fullyQualifiedInferenceId + "]"); } else if (inferenceResults instanceof MlTextEmbeddingResults == false) { throw new IllegalArgumentException( "Expected query inference results to be of type [" diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceQueryBuilder.java index b6b9d7197f75a..42dedab46a1d4 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceQueryBuilder.java @@ -175,7 +175,7 @@ protected abstract QueryBuilder queryFields( /** * Get the query-time inference ID override. If not applicable or available, {@code null} should be returned. */ - protected String getInferenceIdOverride() { + protected FullyQualifiedInferenceId getInferenceIdOverride() { return null; } @@ -284,14 +284,19 @@ private QueryBuilder doRewriteGetInferenceResults(QueryRewriteContext queryRewri // In this case, the remote data node will receive the original query, which will in turn result in an error about querying an // unsupported field type. ResolvedIndices resolvedIndices = queryRewriteContext.getResolvedIndices(); - Set inferenceIds = getInferenceIdsForFields( + Set inferenceIds = getInferenceIdsForFields( resolvedIndices.getConcreteLocalIndicesMetadata().values(), + queryRewriteContext.getLocalClusterAlias(), getFields(), resolveWildcards(), useDefaultFields() ); - if (inferenceIds.isEmpty()) { + // If we are handling a CCS request, always retain the intercepted query logic so that we can get inference results generated on + // the local cluster from the inference results map when rewriting on remote cluster data nodes. This can be necessary when: + // - A query specifies an inference ID override + // - Only non-inference fields are queried on the remote cluster + if (inferenceIds.isEmpty() && this.ccsRequest == false) { // Not querying a semantic text field return originalQuery; } @@ -299,7 +304,7 @@ private QueryBuilder doRewriteGetInferenceResults(QueryRewriteContext queryRewri // Validate early to prevent partial failures coordinatorNodeValidate(resolvedIndices); - boolean ccsRequest = resolvedIndices.getRemoteClusterIndices().isEmpty() == false; + boolean ccsRequest = this.ccsRequest || resolvedIndices.getRemoteClusterIndices().isEmpty() == false; if (ccsRequest && queryRewriteContext.isCcsMinimizeRoundTrips() == false) { throw new IllegalArgumentException( originalQuery.getName() @@ -309,7 +314,7 @@ private QueryBuilder doRewriteGetInferenceResults(QueryRewriteContext queryRewri ); } - String inferenceIdOverride = getInferenceIdOverride(); + FullyQualifiedInferenceId inferenceIdOverride = getInferenceIdOverride(); if (inferenceIdOverride != null) { inferenceIds = Set.of(inferenceIdOverride); } @@ -337,13 +342,14 @@ private QueryBuilder doRewriteGetInferenceResults(QueryRewriteContext queryRewri return rewritten; } - private static Set getInferenceIdsForFields( + private static Set getInferenceIdsForFields( Collection indexMetadataCollection, + String clusterAlias, Map fields, boolean resolveWildcards, boolean useDefaultFields ) { - Set inferenceIds = new HashSet<>(); + Set fullyQualifiedInferenceIds = new HashSet<>(); for (IndexMetadata indexMetadata : indexMetadataCollection) { final Map indexQueryFields = (useDefaultFields && fields.isEmpty()) ? getDefaultFields(indexMetadata.getSettings()) @@ -354,23 +360,34 @@ private static Set getInferenceIdsForFields( if (indexInferenceFields.containsKey(indexQueryField)) { // No wildcards in field name InferenceFieldMetadata inferenceFieldMetadata = indexInferenceFields.get(indexQueryField); - inferenceIds.add(inferenceFieldMetadata.getSearchInferenceId()); + fullyQualifiedInferenceIds.add( + new FullyQualifiedInferenceId(clusterAlias, inferenceFieldMetadata.getSearchInferenceId()) + ); continue; } if (resolveWildcards) { if (Regex.isMatchAllPattern(indexQueryField)) { - indexInferenceFields.values().forEach(ifm -> inferenceIds.add(ifm.getSearchInferenceId())); + indexInferenceFields.values() + .forEach( + ifm -> fullyQualifiedInferenceIds.add( + new FullyQualifiedInferenceId(clusterAlias, ifm.getSearchInferenceId()) + ) + ); } else if (Regex.isSimpleMatchPattern(indexQueryField)) { indexInferenceFields.values() .stream() .filter(ifm -> Regex.simpleMatch(indexQueryField, ifm.getName())) - .forEach(ifm -> inferenceIds.add(ifm.getSearchInferenceId())); + .forEach( + ifm -> fullyQualifiedInferenceIds.add( + new FullyQualifiedInferenceId(clusterAlias, ifm.getSearchInferenceId()) + ) + ); } } } } - return inferenceIds; + return fullyQualifiedInferenceIds; } private static Map getInferenceFieldsMap( diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceSparseVectorQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceSparseVectorQueryBuilder.java index 1ce803fd5c4d2..ee85f9ab3f4ad 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceSparseVectorQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceSparseVectorQueryBuilder.java @@ -33,6 +33,8 @@ import java.util.List; import java.util.Map; +import static org.elasticsearch.transport.RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY; + public class InterceptedInferenceSparseVectorQueryBuilder extends InterceptedInferenceQueryBuilder { public static final String NAME = "intercepted_inference_sparse_vector"; @@ -66,8 +68,14 @@ protected String getQuery() { } @Override - protected String getInferenceIdOverride() { - return originalQuery.getInferenceId(); + protected FullyQualifiedInferenceId getInferenceIdOverride() { + FullyQualifiedInferenceId override = null; + String originalInferenceId = originalQuery.getInferenceId(); + if (originalInferenceId != null) { + override = new FullyQualifiedInferenceId(LOCAL_CLUSTER_GROUP_KEY, originalInferenceId); + } + + return override; } @Override @@ -114,7 +122,7 @@ protected QueryBuilder queryFields( } else if (fieldType instanceof SemanticTextFieldMapper.SemanticTextFieldType semanticTextFieldType) { rewritten = querySemanticTextField(indexMetadataContext.getLocalClusterAlias(), semanticTextFieldType); } else { - rewritten = queryNonSemanticTextField(indexMetadataContext.getLocalClusterAlias()); + rewritten = queryNonSemanticTextField(); } return rewritten; @@ -150,12 +158,12 @@ private QueryBuilder querySemanticTextField(String clusterAlias, SemanticTextFie List queryVector = originalQuery.getQueryVectors(); if (queryVector == null) { - String inferenceId = originalQuery.getInferenceId(); - if (inferenceId == null) { - inferenceId = semanticTextFieldType.getSearchInferenceId(); + FullyQualifiedInferenceId fullyQualifiedInferenceId = getInferenceIdOverride(); + if (fullyQualifiedInferenceId == null) { + fullyQualifiedInferenceId = new FullyQualifiedInferenceId(clusterAlias, semanticTextFieldType.getSearchInferenceId()); } - queryVector = getQueryVector(clusterAlias, inferenceId); + queryVector = getQueryVector(fullyQualifiedInferenceId); } SparseVectorQueryBuilder innerSparseVectorQuery = new SparseVectorQueryBuilder( @@ -172,15 +180,15 @@ private QueryBuilder querySemanticTextField(String clusterAlias, SemanticTextFie .queryName(originalQuery.queryName()); } - private QueryBuilder queryNonSemanticTextField(String clusterAlias) { + private QueryBuilder queryNonSemanticTextField() { List queryVector = originalQuery.getQueryVectors(); if (queryVector == null) { - String inferenceId = originalQuery.getInferenceId(); - if (inferenceId == null) { + FullyQualifiedInferenceId fullyQualifiedInferenceId = getInferenceIdOverride(); + if (fullyQualifiedInferenceId == null) { throw new IllegalArgumentException("Either query vector or inference ID must be specified"); } - queryVector = getQueryVector(clusterAlias, inferenceId); + queryVector = getQueryVector(fullyQualifiedInferenceId); } return new SparseVectorQueryBuilder( @@ -193,10 +201,10 @@ private QueryBuilder queryNonSemanticTextField(String clusterAlias) { ).boost(originalQuery.boost()).queryName(originalQuery.queryName()); } - private List getQueryVector(String clusterAlias, String inferenceId) { - InferenceResults inferenceResults = inferenceResultsMap.get(new FullyQualifiedInferenceId(clusterAlias, inferenceId)); + private List getQueryVector(FullyQualifiedInferenceId fullyQualifiedInferenceId) { + InferenceResults inferenceResults = inferenceResultsMap.get(fullyQualifiedInferenceId); if (inferenceResults == null) { - throw new IllegalStateException("Could not find inference results from inference endpoint [" + inferenceId + "]"); + throw new IllegalStateException("Could not find inference results from inference endpoint [" + fullyQualifiedInferenceId + "]"); } else if (inferenceResults instanceof TextExpansionResults == false) { throw new IllegalArgumentException( "Expected query inference results to be of type [" diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java index b2b266ff5915c..23eae0c4e498f 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java @@ -235,8 +235,7 @@ public static SemanticQueryBuilder fromXContent(XContentParser parser) throws IO /** *

- * Get inference results for the provided query using the provided inference IDs. The inference IDs are fully qualified by the - * cluster alias in the provided {@link QueryRewriteContext}. + * Get inference results for the provided query using the provided fully qualified inference IDs. *

*

* This method will return an inference results map that will be asynchronously populated with inference results. If the provided @@ -245,14 +244,14 @@ public static SemanticQueryBuilder fromXContent(XContentParser parser) throws IO *

* * @param queryRewriteContext The query rewrite context - * @param inferenceIds The inference IDs to use to generate inference results + * @param fullyQualifiedInferenceIds The fully qualified inference IDs to use to generate inference results * @param inferenceResultsMap The initial inference results map * @param query The query to generate inference results for * @return An inference results map */ static Map getInferenceResults( QueryRewriteContext queryRewriteContext, - Set inferenceIds, + Set fullyQualifiedInferenceIds, @Nullable Map inferenceResultsMap, @Nullable String query ) { @@ -262,12 +261,19 @@ static Map getInferenceResults( : Map.of(); if (query != null) { - for (String inferenceId : inferenceIds) { - FullyQualifiedInferenceId fullyQualifiedInferenceId = new FullyQualifiedInferenceId( - queryRewriteContext.getLocalClusterAlias(), - inferenceId - ); + for (FullyQualifiedInferenceId fullyQualifiedInferenceId : fullyQualifiedInferenceIds) { if (currentInferenceResultsMap.containsKey(fullyQualifiedInferenceId) == false) { + if (fullyQualifiedInferenceId.clusterAlias().equals(queryRewriteContext.getLocalClusterAlias()) == false) { + // Catch if we are missing inference results that should have been generated on another cluster + throw new IllegalStateException( + "Cannot get inference results for cluster [" + + fullyQualifiedInferenceId.clusterAlias() + + "] on cluster [" + + queryRewriteContext.getLocalClusterAlias() + + "]" + ); + } + if (modifiedInferenceResultsMap == false) { // Copy the inference results map to ensure it is mutable and thread safe currentInferenceResultsMap = new ConcurrentHashMap<>(currentInferenceResultsMap); @@ -278,7 +284,7 @@ static Map getInferenceResults( queryRewriteContext, ((ConcurrentHashMap) currentInferenceResultsMap), query, - inferenceId + fullyQualifiedInferenceId.inferenceId() ); } } @@ -429,7 +435,7 @@ private QueryBuilder doRewriteBuildSemanticQuery(SearchExecutionContext searchEx private SemanticQueryBuilder doRewriteGetInferenceResults(QueryRewriteContext queryRewriteContext) { ResolvedIndices resolvedIndices = queryRewriteContext.getResolvedIndices(); - boolean ccsRequest = resolvedIndices.getRemoteClusterIndices().isEmpty() == false; + boolean ccsRequest = this.ccsRequest || resolvedIndices.getRemoteClusterIndices().isEmpty() == false; if (ccsRequest && queryRewriteContext.isCcsMinimizeRoundTrips() == false) { throw new IllegalArgumentException( NAME + " query does not support cross-cluster search when [ccs_minimize_roundtrips] is false" @@ -438,10 +444,14 @@ private SemanticQueryBuilder doRewriteGetInferenceResults(QueryRewriteContext qu SemanticQueryBuilder rewritten = this; if (queryRewriteContext.hasAsyncActions() == false) { - Set inferenceIds = getInferenceIdsForForField(resolvedIndices.getConcreteLocalIndicesMetadata().values(), fieldName); + Set fullyQualifiedInferenceIds = getInferenceIdsForForField( + resolvedIndices.getConcreteLocalIndicesMetadata().values(), + queryRewriteContext.getLocalClusterAlias(), + fieldName + ); Map modifiedInferenceResultsMap = getInferenceResults( queryRewriteContext, - inferenceIds, + fullyQualifiedInferenceIds, inferenceResultsMap, query ); @@ -528,17 +538,21 @@ protected Query doToQuery(SearchExecutionContext context) throws IOException { throw new IllegalStateException(NAME + " should have been rewritten to another query type"); } - private static Set getInferenceIdsForForField(Collection indexMetadataCollection, String fieldName) { - Set inferenceIds = new HashSet<>(); + private static Set getInferenceIdsForForField( + Collection indexMetadataCollection, + String clusterAlias, + String fieldName + ) { + Set fullyQualifiedInferenceIds = new HashSet<>(); for (IndexMetadata indexMetadata : indexMetadataCollection) { InferenceFieldMetadata inferenceFieldMetadata = indexMetadata.getInferenceFields().get(fieldName); String indexInferenceId = inferenceFieldMetadata != null ? inferenceFieldMetadata.getSearchInferenceId() : null; if (indexInferenceId != null) { - inferenceIds.add(indexInferenceId); + fullyQualifiedInferenceIds.add(new FullyQualifiedInferenceId(clusterAlias, indexInferenceId)); } } - return inferenceIds; + return fullyQualifiedInferenceIds; } @Override From fe71bc7228565f769aad13c21b016d9414721573 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Mon, 22 Sep 2025 14:42:15 -0400 Subject: [PATCH 16/52] CCS test refactoring --- .../ccs/SemanticCrossClusterSearchIT.java | 66 ++++++++++++------- 1 file changed, 43 insertions(+), 23 deletions(-) diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java index 9d5785b003f96..59e50a7c1e504 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java @@ -56,13 +56,14 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.function.Consumer; +import java.util.stream.Collectors; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.IVF_FORMAT; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; public class SemanticCrossClusterSearchIT extends AbstractMultiClustersTestCase { @@ -393,29 +394,19 @@ public void testKnnQuery() throws Exception { ); // Check that omitting the inference ID when querying a remote dense vector field leads to the expected partial failure - KnnVectorQueryBuilder queryBuilder = new KnnVectorQueryBuilder( - mixedTypeField2, - new TextEmbeddingQueryVectorBuilder(null, "c"), - 10, - 100, - IVF_FORMAT.isEnabled() ? 10f : null, - null + assertSearchResponse( + new KnnVectorQueryBuilder( + mixedTypeField2, + new TextEmbeddingQueryVectorBuilder(null, "c"), + 10, + 100, + IVF_FORMAT.isEnabled() ? 10f : null, + null + ), + queryIndices, + List.of(new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3")), + Set.of(new FailureCause(IllegalArgumentException.class, "[model_id] must not be null.")) ); - SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(queryBuilder); - SearchRequest searchRequest = new SearchRequest(queryIndices, searchSourceBuilder); - assertResponse(client().search(searchRequest), response -> { - SearchHit[] hits = response.getHits().getHits(); - assertThat(hits.length, equalTo(1)); - assertThat(hits[0].getId(), equalTo("local_doc_3")); - - SearchResponse.Cluster remoteCluster = response.getClusters().getCluster(REMOTE_CLUSTER); - assertThat(remoteCluster.getStatus(), equalTo(SearchResponse.Cluster.Status.SKIPPED)); - assertThat(remoteCluster.getFailures().size(), equalTo(1)); - - Throwable cause = remoteCluster.getFailures().getFirst().getCause(); - assertThat(cause, instanceOf(IllegalArgumentException.class)); - assertThat(cause.getMessage(), equalTo("[model_id] must not be null.")); - }); } private void setupTwoClusters(TestIndexInfo localIndexInfo, TestIndexInfo remoteIndexInfo) throws IOException { @@ -503,6 +494,15 @@ private static void createInferenceEndpoint(Client client, TaskType taskType, St private void assertSearchResponse(QueryBuilder queryBuilder, String[] indices, List expectedSearchResults) throws Exception { + assertSearchResponse(queryBuilder, indices, expectedSearchResults, null); + } + + private void assertSearchResponse( + QueryBuilder queryBuilder, + String[] indices, + List expectedSearchResults, + Set expectedRemoteFailures + ) throws Exception { SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(queryBuilder).size(expectedSearchResults.size()); SearchRequest searchRequest = new SearchRequest(indices, searchSourceBuilder); @@ -519,6 +519,24 @@ private void assertSearchResponse(QueryBuilder queryBuilder, String[] indices, L assertThat(actualSearchResult.getIndex(), equalTo(expectedSearchResult.index())); assertThat(actualSearchResult.getId(), equalTo(expectedSearchResult.id())); } + + SearchResponse.Clusters clusters = response.getClusters(); + assertThat(clusters.getCluster(LOCAL_CLUSTER).getStatus(), equalTo(SearchResponse.Cluster.Status.SUCCESSFUL)); + assertThat(clusters.getCluster(LOCAL_CLUSTER).getFailures().isEmpty(), is(true)); + + SearchResponse.Cluster remoteCluster = clusters.getCluster(REMOTE_CLUSTER); + if (expectedRemoteFailures != null && expectedRemoteFailures.isEmpty() == false) { + assertThat(remoteCluster.getStatus(), equalTo(SearchResponse.Cluster.Status.SKIPPED)); + + Set actualFailures = remoteCluster.getFailures() + .stream() + .map(f -> new FailureCause(f.getCause().getClass(), f.getCause().getMessage())) + .collect(Collectors.toSet()); + assertThat(actualFailures, equalTo(expectedRemoteFailures)); + } else { + assertThat(remoteCluster.getStatus(), equalTo(SearchResponse.Cluster.Status.SUCCESSFUL)); + assertThat(remoteCluster.getFailures().isEmpty(), is(true)); + } }); } @@ -597,4 +615,6 @@ public Map mappings() { } private record SearchResult(String clusterAlias, String index, String id) {} + + private record FailureCause(Class causeClass, String message) {} } From e7269e6f3f5dbdf83c264e332119f235d4a3416b Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Mon, 22 Sep 2025 15:09:21 -0400 Subject: [PATCH 17/52] Extended match query CCS test --- .../ccs/SemanticCrossClusterSearchIT.java | 37 ++++++++++++++----- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java index 59e50a7c1e504..b70f72e744e11 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java @@ -201,7 +201,8 @@ public void testMatchQuery() throws Exception { final String commonInferenceIdField = "common-inference-id-field"; final String variableInferenceIdField = "variable-inference-id-field"; - final String mixedTypeField = "mixed-type-field"; + final String mixedTypeField1 = "mixed-type-field-1"; + final String mixedTypeField2 = "mixed-type-field-2"; final TestIndexInfo localIndexInfo = new TestIndexInfo( localIndexName, @@ -211,7 +212,9 @@ public void testMatchQuery() throws Exception { semanticTextMapping(commonInferenceId), variableInferenceIdField, semanticTextMapping(localInferenceId), - mixedTypeField, + mixedTypeField1, + semanticTextMapping(localInferenceId), + mixedTypeField2, Map.of("type", "text") ), Map.of( @@ -220,7 +223,9 @@ public void testMatchQuery() throws Exception { "local_doc_2", Map.of(variableInferenceIdField, "b"), "local_doc_3", - Map.of(mixedTypeField, "c") + Map.of(mixedTypeField1, "c"), + "local_doc_4", + Map.of(mixedTypeField2, "d") ) ); final TestIndexInfo remoteIndexInfo = new TestIndexInfo( @@ -236,16 +241,20 @@ public void testMatchQuery() throws Exception { semanticTextMapping(commonInferenceId), variableInferenceIdField, semanticTextMapping(remoteInferenceId), - mixedTypeField, + mixedTypeField1, + Map.of("type", "text"), + mixedTypeField2, semanticTextMapping(remoteInferenceId) ), Map.of( "remote_doc_1", - Map.of(commonInferenceIdField, "x"), + Map.of(commonInferenceIdField, "w"), "remote_doc_2", - Map.of(variableInferenceIdField, "y"), + Map.of(variableInferenceIdField, "x"), "remote_doc_3", - Map.of(mixedTypeField, "z") + Map.of(mixedTypeField1, "y"), + "remote_doc_4", + Map.of(mixedTypeField2, "z") ) ); setupTwoClusters(localIndexInfo, remoteIndexInfo); @@ -272,11 +281,19 @@ public void testMatchQuery() throws Exception { // Query a field that has mixed types across clusters assertSearchResponse( - new MatchQueryBuilder(mixedTypeField, "c"), + new MatchQueryBuilder(mixedTypeField1, "y"), + queryIndices, + List.of( + new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3"), + new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_3") + ) + ); + assertSearchResponse( + new MatchQueryBuilder(mixedTypeField2, "d"), queryIndices, List.of( - new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_3"), - new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3") + new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_4"), + new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_4") ) ); } From deeefcb220cd79b669345a7029dd09e8f0ace4b7 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Mon, 22 Sep 2025 16:02:03 -0400 Subject: [PATCH 18/52] Added sparse vector query CCS test --- .../ccs/SemanticCrossClusterSearchIT.java | 102 ++++++++++++++++++ 1 file changed, 102 insertions(+) diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java index b70f72e744e11..64af2abc878c9 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java @@ -20,6 +20,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.TimeValue; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; +import org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper; import org.elasticsearch.index.query.MatchQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.inference.MinimalServiceSettings; @@ -41,6 +42,7 @@ import org.elasticsearch.xpack.core.inference.action.PutInferenceModelAction; import org.elasticsearch.xpack.core.ml.action.CoordinatedInferenceAction; import org.elasticsearch.xpack.core.ml.inference.MlInferenceNamedXContentProvider; +import org.elasticsearch.xpack.core.ml.search.SparseVectorQueryBuilder; import org.elasticsearch.xpack.core.ml.vectors.TextEmbeddingQueryVectorBuilder; import org.elasticsearch.xpack.inference.LocalStateInferencePlugin; import org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper; @@ -426,6 +428,96 @@ public void testKnnQuery() throws Exception { ); } + public void testSparseVectorQuery() throws Exception { + final String localIndexName = "local-index"; + final String remoteIndexName = "remote-index"; + final String[] queryIndices = new String[] { localIndexName, fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName) }; + + final String commonInferenceId = "common-inference-id"; + + final String commonInferenceIdField = "common-inference-id-field"; + final String mixedTypeField1 = "mixed-type-field-1"; + final String mixedTypeField2 = "mixed-type-field-2"; + + final TestIndexInfo localIndexInfo = new TestIndexInfo( + localIndexName, + Map.of(commonInferenceId, sparseEmbeddingServiceSettings()), + Map.of( + commonInferenceIdField, + semanticTextMapping(commonInferenceId), + mixedTypeField1, + sparseVectorMapping(), + mixedTypeField2, + semanticTextMapping(commonInferenceId) + ), + Map.of( + "local_doc_1", + Map.of(commonInferenceIdField, "a"), + "local_doc_2", + Map.of(mixedTypeField1, generateSparseVectorFieldValue()), + "local_doc_3", + Map.of(mixedTypeField2, "c") + ) + ); + final TestIndexInfo remoteIndexInfo = new TestIndexInfo( + remoteIndexName, + Map.of(commonInferenceId, sparseEmbeddingServiceSettings()), + Map.of( + commonInferenceIdField, + semanticTextMapping(commonInferenceId), + mixedTypeField1, + semanticTextMapping(commonInferenceId), + mixedTypeField2, + sparseVectorMapping() + ), + Map.of( + "remote_doc_1", + Map.of(commonInferenceIdField, "x"), + "remote_doc_2", + Map.of(mixedTypeField1, "y"), + "remote_doc_3", + Map.of(mixedTypeField2, generateSparseVectorFieldValue()) + ) + ); + setupTwoClusters(localIndexInfo, remoteIndexInfo); + + // Query a field has the same inference ID value across clusters, but with different backing inference services + assertSearchResponse( + new SparseVectorQueryBuilder(commonInferenceIdField, null, "a"), + queryIndices, + List.of( + new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_1"), + new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_1") + ) + ); + + // Query a field that has mixed types across clusters + assertSearchResponse( + new SparseVectorQueryBuilder(mixedTypeField1, commonInferenceId, "b"), + queryIndices, + List.of( + new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_2"), + new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_2") + ) + ); + assertSearchResponse( + new SparseVectorQueryBuilder(mixedTypeField2, commonInferenceId, "c"), + queryIndices, + List.of( + new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3"), + new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_3") + ) + ); + + // Check that omitting the inference ID when querying a remote sparse vector field leads to the expected partial failure + assertSearchResponse( + new SparseVectorQueryBuilder(mixedTypeField2, null, "c"), + queryIndices, + List.of(new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3")), + Set.of(new FailureCause(IllegalArgumentException.class, "inference_id required to perform vector search on query string")) + ); + } + private void setupTwoClusters(TestIndexInfo localIndexInfo, TestIndexInfo remoteIndexInfo) throws IOException { setupCluster(LOCAL_CLUSTER, localIndexInfo); setupCluster(REMOTE_CLUSTER, remoteIndexInfo); @@ -577,6 +669,10 @@ private static Map denseVectorMapping(int dimensions) { return Map.of("type", DenseVectorFieldMapper.CONTENT_TYPE, "dims", dimensions); } + private static Map sparseVectorMapping() { + return Map.of("type", SparseVectorFieldMapper.CONTENT_TYPE); + } + private static String fullyQualifiedIndexName(String clusterAlias, String indexName) { return clusterAlias + ":" + indexName; } @@ -596,6 +692,12 @@ private static float[] generateDenseVectorFieldValue(int dimensions, DenseVector return vector; } + private static Map generateSparseVectorFieldValue() { + // Generate values that have the same recall behavior as those produced by TestSparseInferenceServiceExtension. Use a constant token + // weight so that relevance is consistent. + return Map.of("feature_0", 1.0f); + } + public static class FakeMlPlugin extends Plugin implements ActionPlugin, SearchPlugin { @Override public List getNamedWriteables() { From 1d3896751f59055257720802a156e36424f80111 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Mon, 22 Sep 2025 16:15:29 -0400 Subject: [PATCH 19/52] Added test cases using query vectors --- .../ccs/SemanticCrossClusterSearchIT.java | 44 ++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java index 64af2abc878c9..e144ee5143e79 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java @@ -26,6 +26,7 @@ import org.elasticsearch.inference.MinimalServiceSettings; import org.elasticsearch.inference.SimilarityMeasure; import org.elasticsearch.inference.TaskType; +import org.elasticsearch.inference.WeightedToken; import org.elasticsearch.license.LicenseSettings; import org.elasticsearch.plugins.ActionPlugin; import org.elasticsearch.plugins.Plugin; @@ -35,6 +36,7 @@ import org.elasticsearch.search.builder.PointInTimeBuilder; import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.search.vectors.KnnVectorQueryBuilder; +import org.elasticsearch.search.vectors.VectorData; import org.elasticsearch.test.AbstractMultiClustersTestCase; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentFactory; @@ -395,7 +397,6 @@ public void testKnnQuery() throws Exception { new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_2") ) ); - assertSearchResponse( new KnnVectorQueryBuilder( mixedTypeField2, @@ -412,6 +413,25 @@ public void testKnnQuery() throws Exception { ) ); + // Query a field that has mixed types across clusters using a query vector + final VectorData queryVector = new VectorData(generateDenseVectorFieldValue(384, DenseVectorFieldMapper.ElementType.FLOAT)); + assertSearchResponse( + new KnnVectorQueryBuilder(mixedTypeField1, queryVector, 10, 100, IVF_FORMAT.isEnabled() ? 10f : null, null, null), + queryIndices, + List.of( + new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_2"), + new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_2") + ) + ); + assertSearchResponse( + new KnnVectorQueryBuilder(mixedTypeField2, queryVector, 10, 100, IVF_FORMAT.isEnabled() ? 10f : null, null, null), + queryIndices, + List.of( + new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_3"), + new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3") + ) + ); + // Check that omitting the inference ID when querying a remote dense vector field leads to the expected partial failure assertSearchResponse( new KnnVectorQueryBuilder( @@ -509,6 +529,28 @@ public void testSparseVectorQuery() throws Exception { ) ); + // Query a field that has mixed types across clusters using a query vector + final List queryVector = generateSparseVectorFieldValue().entrySet() + .stream() + .map(e -> new WeightedToken(e.getKey(), e.getValue())) + .toList(); + assertSearchResponse( + new SparseVectorQueryBuilder(mixedTypeField1, queryVector, null, null, null, null), + queryIndices, + List.of( + new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_2"), + new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_2") + ) + ); + assertSearchResponse( + new SparseVectorQueryBuilder(mixedTypeField2, queryVector, null, null, null, null), + queryIndices, + List.of( + new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3"), + new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_3") + ) + ); + // Check that omitting the inference ID when querying a remote sparse vector field leads to the expected partial failure assertSearchResponse( new SparseVectorQueryBuilder(mixedTypeField2, null, "c"), From 3c1acdfe96a50c7d92a9f6b8b5fa1be185d246ab Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Mon, 22 Sep 2025 18:00:04 -0400 Subject: [PATCH 20/52] Added match query test with ccs_minimize_roundtrips=false --- .../ccs/SemanticCrossClusterSearchIT.java | 118 ++++++++++++++++-- 1 file changed, 111 insertions(+), 7 deletions(-) diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java index e144ee5143e79..4413bb06d6572 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java @@ -18,11 +18,13 @@ import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper; import org.elasticsearch.index.query.MatchQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryShardException; import org.elasticsearch.inference.MinimalServiceSettings; import org.elasticsearch.inference.SimilarityMeasure; import org.elasticsearch.inference.TaskType; @@ -219,7 +221,7 @@ public void testMatchQuery() throws Exception { mixedTypeField1, semanticTextMapping(localInferenceId), mixedTypeField2, - Map.of("type", "text") + textMapping() ), Map.of( "local_doc_1", @@ -246,7 +248,7 @@ public void testMatchQuery() throws Exception { variableInferenceIdField, semanticTextMapping(remoteInferenceId), mixedTypeField1, - Map.of("type", "text"), + textMapping(), mixedTypeField2, semanticTextMapping(remoteInferenceId) ), @@ -302,6 +304,98 @@ public void testMatchQuery() throws Exception { ); } + public void testMatchQueryWithCcMinimizeRoundTripsFalse() throws Exception { + final String localIndexName = "local-index"; + final String remoteIndexName = "remote-index"; + final String[] queryIndices = new String[] { localIndexName, fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName) }; + final Consumer assertCcsMinimizeRoundTripsFalseFailure = q -> { + SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(q); + SearchRequest searchRequest = new SearchRequest(queryIndices, searchSourceBuilder); + searchRequest.setCcsMinimizeRoundtrips(false); + + IllegalArgumentException e = assertThrows( + IllegalArgumentException.class, + () -> client().search(searchRequest).actionGet(TEST_REQUEST_TIMEOUT) + ); + assertThat( + e.getMessage(), + equalTo( + "match query does not support cross-cluster search when querying a [semantic_text] field when " + + "[ccs_minimize_roundtrips] is false" + ) + ); + }; + + final String commonInferenceId = "common-inference-id"; + + final String commonInferenceIdField = "common-inference-id-field"; + final String mixedTypeField1 = "mixed-type-field-1"; + final String mixedTypeField2 = "mixed-type-field-2"; + final String textField = "text-field"; + + final TestIndexInfo localIndexInfo = new TestIndexInfo( + localIndexName, + Map.of(commonInferenceId, sparseEmbeddingServiceSettings()), + Map.of( + commonInferenceIdField, + semanticTextMapping(commonInferenceId), + mixedTypeField1, + semanticTextMapping(commonInferenceId), + mixedTypeField2, + textMapping(), + textField, + textMapping() + ), + Map.of(mixedTypeField2 + "_doc", Map.of(mixedTypeField2, "a"), textField + "_doc", Map.of(textField, "b b b")) + ); + final TestIndexInfo remoteIndexInfo = new TestIndexInfo( + remoteIndexName, + Map.of(commonInferenceId, sparseEmbeddingServiceSettings()), + Map.of( + commonInferenceIdField, + semanticTextMapping(commonInferenceId), + mixedTypeField1, + textMapping(), + mixedTypeField2, + semanticTextMapping(commonInferenceId), + textField, + textMapping() + ), + Map.of(textField + "_doc", Map.of(textField, "b")) + ); + setupTwoClusters(localIndexInfo, remoteIndexInfo); + + // Validate that expected cases fail + assertCcsMinimizeRoundTripsFalseFailure.accept(new MatchQueryBuilder(commonInferenceIdField, randomAlphaOfLength(5))); + assertCcsMinimizeRoundTripsFalseFailure.accept(new MatchQueryBuilder(mixedTypeField1, randomAlphaOfLength(5))); + + // Validate the expected ccs_minimize_roundtrips=false detection gap and failure mode when querying non-inference fields locally + assertSearchResponse( + new MatchQueryBuilder(mixedTypeField2, "a"), + queryIndices, + List.of(new SearchResult(null, localIndexName, mixedTypeField2 + "_doc")), + Set.of( + new FailureCause( + QueryShardException.class, + "failed to create query: Field [mixed-type-field-2] of type [semantic_text] does not support match queries" + ) + ), + s -> s.setCcsMinimizeRoundtrips(false) + ); + + // Validate that a CCS match query functions when only text fields are queried + assertSearchResponse( + new MatchQueryBuilder(textField, "b"), + queryIndices, + List.of( + new SearchResult(null, localIndexName, textField + "_doc"), + new SearchResult(REMOTE_CLUSTER, remoteIndexName, textField + "_doc") + ), + Set.of(), + s -> s.setCcsMinimizeRoundtrips(false) + ); + } + public void testKnnQuery() throws Exception { final String localIndexName = "local-index"; final String remoteIndexName = "remote-index"; @@ -444,7 +538,8 @@ public void testKnnQuery() throws Exception { ), queryIndices, List.of(new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3")), - Set.of(new FailureCause(IllegalArgumentException.class, "[model_id] must not be null.")) + Set.of(new FailureCause(IllegalArgumentException.class, "[model_id] must not be null.")), + null ); } @@ -556,7 +651,8 @@ public void testSparseVectorQuery() throws Exception { new SparseVectorQueryBuilder(mixedTypeField2, null, "c"), queryIndices, List.of(new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3")), - Set.of(new FailureCause(IllegalArgumentException.class, "inference_id required to perform vector search on query string")) + Set.of(new FailureCause(IllegalArgumentException.class, "inference_id required to perform vector search on query string")), + null ); } @@ -645,17 +741,21 @@ private static void createInferenceEndpoint(Client client, TaskType taskType, St private void assertSearchResponse(QueryBuilder queryBuilder, String[] indices, List expectedSearchResults) throws Exception { - assertSearchResponse(queryBuilder, indices, expectedSearchResults, null); + assertSearchResponse(queryBuilder, indices, expectedSearchResults, null, null); } private void assertSearchResponse( QueryBuilder queryBuilder, String[] indices, List expectedSearchResults, - Set expectedRemoteFailures + Set expectedRemoteFailures, + Consumer searchRequestModifier ) throws Exception { SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(queryBuilder).size(expectedSearchResults.size()); SearchRequest searchRequest = new SearchRequest(indices, searchSourceBuilder); + if (searchRequestModifier != null) { + searchRequestModifier.accept(searchRequest); + } assertResponse(client().search(searchRequest), response -> { SearchHit[] hits = response.getHits().getHits(); @@ -707,6 +807,10 @@ private static Map semanticTextMapping(String inferenceId) { return Map.of("type", SemanticTextFieldMapper.CONTENT_TYPE, "inference_id", inferenceId); } + private static Map textMapping() { + return Map.of("type", "text"); + } + private static Map denseVectorMapping(int dimensions) { return Map.of("type", DenseVectorFieldMapper.CONTENT_TYPE, "dims", dimensions); } @@ -775,7 +879,7 @@ public Map mappings() { } } - private record SearchResult(String clusterAlias, String index, String id) {} + private record SearchResult(@Nullable String clusterAlias, String index, String id) {} private record FailureCause(Class causeClass, String message) {} } From 9fd9c667fe8799c1b71b7583bb5fe4013d937610 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Tue, 23 Sep 2025 10:14:56 -0400 Subject: [PATCH 21/52] Added knn query test with ccs_minimize_roundtrips=false --- .../ccs/SemanticCrossClusterSearchIT.java | 181 ++++++++++++++++-- 1 file changed, 165 insertions(+), 16 deletions(-) diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java index 4413bb06d6572..ad2dc0b0b9e92 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java @@ -63,6 +63,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.function.BiConsumer; import java.util.function.Consumer; import java.util.stream.Collectors; @@ -374,10 +375,13 @@ public void testMatchQueryWithCcMinimizeRoundTripsFalse() throws Exception { new MatchQueryBuilder(mixedTypeField2, "a"), queryIndices, List.of(new SearchResult(null, localIndexName, mixedTypeField2 + "_doc")), - Set.of( - new FailureCause( - QueryShardException.class, - "failed to create query: Field [mixed-type-field-2] of type [semantic_text] does not support match queries" + new ClusterFailure( + SearchResponse.Cluster.Status.SKIPPED, + Set.of( + new FailureCause( + QueryShardException.class, + "failed to create query: Field [mixed-type-field-2] of type [semantic_text] does not support match queries" + ) ) ), s -> s.setCcsMinimizeRoundtrips(false) @@ -391,7 +395,7 @@ public void testMatchQueryWithCcMinimizeRoundTripsFalse() throws Exception { new SearchResult(null, localIndexName, textField + "_doc"), new SearchResult(REMOTE_CLUSTER, remoteIndexName, textField + "_doc") ), - Set.of(), + null, s -> s.setCcsMinimizeRoundtrips(false) ); } @@ -428,7 +432,7 @@ public void testKnnQuery() throws Exception { "local_doc_1", Map.of(commonInferenceIdField, "a"), "local_doc_2", - Map.of(mixedTypeField1, generateDenseVectorFieldValue(384, DenseVectorFieldMapper.ElementType.FLOAT)), + Map.of(mixedTypeField1, generateDenseVectorFieldValue(384, DenseVectorFieldMapper.ElementType.FLOAT, -128.0f)), "local_doc_3", Map.of(mixedTypeField2, "c") ) @@ -453,7 +457,7 @@ public void testKnnQuery() throws Exception { "remote_doc_2", Map.of(mixedTypeField1, "y"), "remote_doc_3", - Map.of(mixedTypeField2, generateDenseVectorFieldValue(384, DenseVectorFieldMapper.ElementType.FLOAT)) + Map.of(mixedTypeField2, generateDenseVectorFieldValue(384, DenseVectorFieldMapper.ElementType.FLOAT, -128.0f)) ) ); setupTwoClusters(localIndexInfo, remoteIndexInfo); @@ -508,7 +512,9 @@ public void testKnnQuery() throws Exception { ); // Query a field that has mixed types across clusters using a query vector - final VectorData queryVector = new VectorData(generateDenseVectorFieldValue(384, DenseVectorFieldMapper.ElementType.FLOAT)); + final VectorData queryVector = new VectorData( + generateDenseVectorFieldValue(384, DenseVectorFieldMapper.ElementType.FLOAT, -128.0f) + ); assertSearchResponse( new KnnVectorQueryBuilder(mixedTypeField1, queryVector, 10, 100, IVF_FORMAT.isEnabled() ? 10f : null, null, null), queryIndices, @@ -538,11 +544,148 @@ public void testKnnQuery() throws Exception { ), queryIndices, List.of(new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3")), - Set.of(new FailureCause(IllegalArgumentException.class, "[model_id] must not be null.")), + new ClusterFailure( + SearchResponse.Cluster.Status.SKIPPED, + Set.of(new FailureCause(IllegalArgumentException.class, "[model_id] must not be null.")) + ), null ); } + public void testKnnQueryWithCcMinimizeRoundTripsFalse() throws Exception { + final String localIndexName = "local-index"; + final String remoteIndexName = "remote-index"; + final String[] queryIndices = new String[] { localIndexName, fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName) }; + final BiConsumer assertCcsMinimizeRoundTripsFalseFailure = (f, qvb) -> { + KnnVectorQueryBuilder queryBuilder = new KnnVectorQueryBuilder(f, qvb, 10, 100, IVF_FORMAT.isEnabled() ? 10f : null, null); + + SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(queryBuilder); + SearchRequest searchRequest = new SearchRequest(queryIndices, searchSourceBuilder); + searchRequest.setCcsMinimizeRoundtrips(false); + + IllegalArgumentException e = assertThrows( + IllegalArgumentException.class, + () -> client().search(searchRequest).actionGet(TEST_REQUEST_TIMEOUT) + ); + assertThat( + e.getMessage(), + equalTo( + "knn query does not support cross-cluster search when querying a [semantic_text] field when " + + "[ccs_minimize_roundtrips] is false" + ) + ); + }; + + final int dimensions = 256; + final String commonInferenceId = "common-inference-id"; + final MinimalServiceSettings commonInferenceIdServiceSettings = textEmbeddingServiceSettings( + dimensions, + SimilarityMeasure.COSINE, + DenseVectorFieldMapper.ElementType.FLOAT + ); + + final String commonInferenceIdField = "common-inference-id-field"; + final String mixedTypeField1 = "mixed-type-field-1"; + final String mixedTypeField2 = "mixed-type-field-2"; + final String denseVectorField = "dense-vector-field"; + + final TestIndexInfo localIndexInfo = new TestIndexInfo( + localIndexName, + Map.of(commonInferenceId, commonInferenceIdServiceSettings), + Map.of( + commonInferenceIdField, + semanticTextMapping(commonInferenceId), + mixedTypeField1, + semanticTextMapping(commonInferenceId), + mixedTypeField2, + denseVectorMapping(dimensions), + denseVectorField, + denseVectorMapping(dimensions) + ), + Map.of( + mixedTypeField2 + "_doc", + Map.of(mixedTypeField2, generateDenseVectorFieldValue(dimensions, DenseVectorFieldMapper.ElementType.FLOAT, -128.0f)), + denseVectorField + "_doc", + Map.of(denseVectorField, generateDenseVectorFieldValue(dimensions, DenseVectorFieldMapper.ElementType.FLOAT, 1.0f)) + ) + ); + final TestIndexInfo remoteIndexInfo = new TestIndexInfo( + remoteIndexName, + Map.of(commonInferenceId, commonInferenceIdServiceSettings), + Map.of( + commonInferenceIdField, + semanticTextMapping(commonInferenceId), + mixedTypeField1, + denseVectorMapping(dimensions), + mixedTypeField2, + semanticTextMapping(commonInferenceId), + denseVectorField, + denseVectorMapping(dimensions) + ), + Map.of( + mixedTypeField2 + "_doc", + Map.of(mixedTypeField2, "a"), + denseVectorField + "_doc", + Map.of(denseVectorField, generateDenseVectorFieldValue(dimensions, DenseVectorFieldMapper.ElementType.FLOAT, -128.0f)) + ) + ); + setupTwoClusters(localIndexInfo, remoteIndexInfo); + + // Validate that expected cases fail + assertCcsMinimizeRoundTripsFalseFailure.accept( + commonInferenceIdField, + new TextEmbeddingQueryVectorBuilder(null, randomAlphaOfLength(5)) + ); + assertCcsMinimizeRoundTripsFalseFailure.accept( + mixedTypeField1, + new TextEmbeddingQueryVectorBuilder(commonInferenceId, randomAlphaOfLength(5)) + ); + + // Validate the expected ccs_minimize_roundtrips=false detection gap and failure mode when querying non-inference fields locally + assertSearchResponse( + new KnnVectorQueryBuilder( + mixedTypeField2, + new TextEmbeddingQueryVectorBuilder(commonInferenceId, "foo"), + 10, + 100, + IVF_FORMAT.isEnabled() ? 10f : null, + null + ), + queryIndices, + List.of(new SearchResult(null, localIndexName, mixedTypeField2 + "_doc")), + new ClusterFailure( + SearchResponse.Cluster.Status.SKIPPED, + Set.of( + new FailureCause( + QueryShardException.class, + "failed to create query: [knn] queries are only supported on [dense_vector] fields" + ) + ) + ), + s -> s.setCcsMinimizeRoundtrips(false) + ); + + // Validate that a CCS knn query functions when only dense vector fields are queried + assertSearchResponse( + new KnnVectorQueryBuilder( + denseVectorField, + generateDenseVectorFieldValue(dimensions, DenseVectorFieldMapper.ElementType.FLOAT, 1.0f), + 10, + 100, + IVF_FORMAT.isEnabled() ? 10f : null, + null, + null + ), + queryIndices, + List.of( + new SearchResult(null, localIndexName, denseVectorField + "_doc"), + new SearchResult(REMOTE_CLUSTER, remoteIndexName, denseVectorField + "_doc") + ), + null, + s -> s.setCcsMinimizeRoundtrips(false) + ); + } + public void testSparseVectorQuery() throws Exception { final String localIndexName = "local-index"; final String remoteIndexName = "remote-index"; @@ -651,7 +794,10 @@ public void testSparseVectorQuery() throws Exception { new SparseVectorQueryBuilder(mixedTypeField2, null, "c"), queryIndices, List.of(new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3")), - Set.of(new FailureCause(IllegalArgumentException.class, "inference_id required to perform vector search on query string")), + new ClusterFailure( + SearchResponse.Cluster.Status.SKIPPED, + Set.of(new FailureCause(IllegalArgumentException.class, "inference_id required to perform vector search on query string")) + ), null ); } @@ -748,7 +894,7 @@ private void assertSearchResponse( QueryBuilder queryBuilder, String[] indices, List expectedSearchResults, - Set expectedRemoteFailures, + ClusterFailure expectedRemoteFailure, Consumer searchRequestModifier ) throws Exception { SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(queryBuilder).size(expectedSearchResults.size()); @@ -776,14 +922,15 @@ private void assertSearchResponse( assertThat(clusters.getCluster(LOCAL_CLUSTER).getFailures().isEmpty(), is(true)); SearchResponse.Cluster remoteCluster = clusters.getCluster(REMOTE_CLUSTER); - if (expectedRemoteFailures != null && expectedRemoteFailures.isEmpty() == false) { - assertThat(remoteCluster.getStatus(), equalTo(SearchResponse.Cluster.Status.SKIPPED)); + if (expectedRemoteFailure != null) { + assertThat(remoteCluster.getStatus(), equalTo(expectedRemoteFailure.status())); + Set expectedFailures = expectedRemoteFailure.failures(); Set actualFailures = remoteCluster.getFailures() .stream() .map(f -> new FailureCause(f.getCause().getClass(), f.getCause().getMessage())) .collect(Collectors.toSet()); - assertThat(actualFailures, equalTo(expectedRemoteFailures)); + assertThat(actualFailures, equalTo(expectedFailures)); } else { assertThat(remoteCluster.getStatus(), equalTo(SearchResponse.Cluster.Status.SUCCESSFUL)); assertThat(remoteCluster.getFailures().isEmpty(), is(true)); @@ -823,7 +970,7 @@ private static String fullyQualifiedIndexName(String clusterAlias, String indexN return clusterAlias + ":" + indexName; } - private static float[] generateDenseVectorFieldValue(int dimensions, DenseVectorFieldMapper.ElementType elementType) { + private static float[] generateDenseVectorFieldValue(int dimensions, DenseVectorFieldMapper.ElementType elementType, float value) { if (elementType == DenseVectorFieldMapper.ElementType.BIT) { assert dimensions % 8 == 0; dimensions /= 8; @@ -832,7 +979,7 @@ private static float[] generateDenseVectorFieldValue(int dimensions, DenseVector float[] vector = new float[dimensions]; for (int i = 0; i < dimensions; i++) { // Use a constant value so that relevance is consistent - vector[i] = -128.0f; + vector[i] = value; } return vector; @@ -882,4 +1029,6 @@ public Map mappings() { private record SearchResult(@Nullable String clusterAlias, String index, String id) {} private record FailureCause(Class causeClass, String message) {} + + private record ClusterFailure(SearchResponse.Cluster.Status status, Set failures) {} } From e92935570b30733232744a74b532d2fe2f688353 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Tue, 23 Sep 2025 10:46:21 -0400 Subject: [PATCH 22/52] Added sparse vector query test with ccs_minimize_roundtrips=false --- .../ccs/SemanticCrossClusterSearchIT.java | 121 +++++++++++++++++- 1 file changed, 114 insertions(+), 7 deletions(-) diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java index ad2dc0b0b9e92..b0bff3ee0a848 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java @@ -305,7 +305,7 @@ public void testMatchQuery() throws Exception { ); } - public void testMatchQueryWithCcMinimizeRoundTripsFalse() throws Exception { + public void testMatchQueryWithCcsMinimizeRoundTripsFalse() throws Exception { final String localIndexName = "local-index"; final String remoteIndexName = "remote-index"; final String[] queryIndices = new String[] { localIndexName, fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName) }; @@ -552,7 +552,7 @@ public void testKnnQuery() throws Exception { ); } - public void testKnnQueryWithCcMinimizeRoundTripsFalse() throws Exception { + public void testKnnQueryWithCcsMinimizeRoundTripsFalse() throws Exception { final String localIndexName = "local-index"; final String remoteIndexName = "remote-index"; final String[] queryIndices = new String[] { localIndexName, fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName) }; @@ -712,7 +712,7 @@ public void testSparseVectorQuery() throws Exception { "local_doc_1", Map.of(commonInferenceIdField, "a"), "local_doc_2", - Map.of(mixedTypeField1, generateSparseVectorFieldValue()), + Map.of(mixedTypeField1, generateSparseVectorFieldValue(1.0f)), "local_doc_3", Map.of(mixedTypeField2, "c") ) @@ -734,7 +734,7 @@ public void testSparseVectorQuery() throws Exception { "remote_doc_2", Map.of(mixedTypeField1, "y"), "remote_doc_3", - Map.of(mixedTypeField2, generateSparseVectorFieldValue()) + Map.of(mixedTypeField2, generateSparseVectorFieldValue(1.0f)) ) ); setupTwoClusters(localIndexInfo, remoteIndexInfo); @@ -768,7 +768,7 @@ public void testSparseVectorQuery() throws Exception { ); // Query a field that has mixed types across clusters using a query vector - final List queryVector = generateSparseVectorFieldValue().entrySet() + final List queryVector = generateSparseVectorFieldValue(1.0f).entrySet() .stream() .map(e -> new WeightedToken(e.getKey(), e.getValue())) .toList(); @@ -802,6 +802,113 @@ public void testSparseVectorQuery() throws Exception { ); } + public void testSparseVectorQueryWithCcsMinimizeRoundTripsFalse() throws Exception { + final String localIndexName = "local-index"; + final String remoteIndexName = "remote-index"; + final String[] queryIndices = new String[] { localIndexName, fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName) }; + final Consumer assertCcsMinimizeRoundTripsFalseFailure = q -> { + SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(q); + SearchRequest searchRequest = new SearchRequest(queryIndices, searchSourceBuilder); + searchRequest.setCcsMinimizeRoundtrips(false); + + IllegalArgumentException e = assertThrows( + IllegalArgumentException.class, + () -> client().search(searchRequest).actionGet(TEST_REQUEST_TIMEOUT) + ); + assertThat( + e.getMessage(), + equalTo( + "sparse_vector query does not support cross-cluster search when querying a [semantic_text] field when " + + "[ccs_minimize_roundtrips] is false" + ) + ); + }; + + final String commonInferenceId = "common-inference-id"; + + final String commonInferenceIdField = "common-inference-id-field"; + final String mixedTypeField1 = "mixed-type-field-1"; + final String mixedTypeField2 = "mixed-type-field-2"; + final String sparseVectorField = "sparse-vector-field"; + + final TestIndexInfo localIndexInfo = new TestIndexInfo( + localIndexName, + Map.of(commonInferenceId, sparseEmbeddingServiceSettings()), + Map.of( + commonInferenceIdField, + semanticTextMapping(commonInferenceId), + mixedTypeField1, + semanticTextMapping(commonInferenceId), + mixedTypeField2, + sparseVectorMapping(), + sparseVectorField, + sparseVectorMapping() + ), + Map.of( + mixedTypeField2 + "_doc", + Map.of(mixedTypeField2, generateSparseVectorFieldValue(1.0f)), + sparseVectorField + "_doc", + Map.of(sparseVectorField, generateSparseVectorFieldValue(1.0f)) + ) + ); + final TestIndexInfo remoteIndexInfo = new TestIndexInfo( + remoteIndexName, + Map.of(commonInferenceId, sparseEmbeddingServiceSettings()), + Map.of( + commonInferenceIdField, + semanticTextMapping(commonInferenceId), + mixedTypeField1, + sparseVectorMapping(), + mixedTypeField2, + semanticTextMapping(commonInferenceId), + sparseVectorField, + sparseVectorMapping() + ), + Map.of( + mixedTypeField2 + "_doc", + Map.of(mixedTypeField2, "a"), + sparseVectorField + "_doc", + Map.of(sparseVectorField, generateSparseVectorFieldValue(0.5f)) + ) + ); + setupTwoClusters(localIndexInfo, remoteIndexInfo); + + // Validate that expected cases fail + assertCcsMinimizeRoundTripsFalseFailure.accept(new SparseVectorQueryBuilder(commonInferenceIdField, null, randomAlphaOfLength(5))); + assertCcsMinimizeRoundTripsFalseFailure.accept( + new SparseVectorQueryBuilder(mixedTypeField1, commonInferenceId, randomAlphaOfLength(5)) + ); + + // Validate the expected ccs_minimize_roundtrips=false detection gap and failure mode when querying non-inference fields locally + assertSearchResponse( + new SparseVectorQueryBuilder(mixedTypeField2, commonInferenceId, "foo"), + queryIndices, + List.of(new SearchResult(null, localIndexName, mixedTypeField2 + "_doc")), + new ClusterFailure( + SearchResponse.Cluster.Status.SKIPPED, + Set.of( + new FailureCause( + QueryShardException.class, + "failed to create query: field [mixed-type-field-2] must be type [sparse_vector] but is type [semantic_text]" + ) + ) + ), + s -> s.setCcsMinimizeRoundtrips(false) + ); + + // Validate that a CCS sparse vector query functions when only sparse vector fields are queried + assertSearchResponse( + new SparseVectorQueryBuilder(sparseVectorField, commonInferenceId, "foo"), + queryIndices, + List.of( + new SearchResult(null, localIndexName, sparseVectorField + "_doc"), + new SearchResult(REMOTE_CLUSTER, remoteIndexName, sparseVectorField + "_doc") + ), + null, + s -> s.setCcsMinimizeRoundtrips(false) + ); + } + private void setupTwoClusters(TestIndexInfo localIndexInfo, TestIndexInfo remoteIndexInfo) throws IOException { setupCluster(LOCAL_CLUSTER, localIndexInfo); setupCluster(REMOTE_CLUSTER, remoteIndexInfo); @@ -985,10 +1092,10 @@ private static float[] generateDenseVectorFieldValue(int dimensions, DenseVector return vector; } - private static Map generateSparseVectorFieldValue() { + private static Map generateSparseVectorFieldValue(float weight) { // Generate values that have the same recall behavior as those produced by TestSparseInferenceServiceExtension. Use a constant token // weight so that relevance is consistent. - return Map.of("feature_0", 1.0f); + return Map.of("feature_0", weight); } public static class FakeMlPlugin extends Plugin implements ActionPlugin, SearchPlugin { From 3815f26127b3520da6ff4da0b9bf4e2d0ff71d72 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Tue, 23 Sep 2025 14:28:55 -0400 Subject: [PATCH 23/52] Added CCS serialization test for intercepted queries --- ...erceptedInferenceQueryBuilderTestCase.java | 194 ++++++++++++++---- ...edInferenceKnnVectorQueryBuilderTests.java | 3 +- ...ceptedInferenceMatchQueryBuilderTests.java | 3 +- ...nferenceSparseVectorQueryBuilderTests.java | 3 +- 4 files changed, 165 insertions(+), 38 deletions(-) diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/AbstractInterceptedInferenceQueryBuilderTestCase.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/AbstractInterceptedInferenceQueryBuilderTestCase.java index 7316af4d5cffe..d0cff4ab98d22 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/AbstractInterceptedInferenceQueryBuilderTestCase.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/AbstractInterceptedInferenceQueryBuilderTestCase.java @@ -65,12 +65,14 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.function.BiConsumer; import java.util.function.Supplier; +import static org.elasticsearch.TransportVersions.NEW_SEMANTIC_QUERY_INTERCEPTORS; +import static org.elasticsearch.TransportVersions.SEMANTIC_SEARCH_CCS_SUPPORT; +import static org.elasticsearch.TransportVersions.V_8_15_0; import static org.elasticsearch.xpack.core.ml.inference.trainedmodel.InferenceConfig.DEFAULT_RESULTS_FIELD; -import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.spy; @@ -168,34 +170,96 @@ public void testBwCSerialization() throws Exception { } } - public void testCcs() throws Exception { - final String field = "semantic_field"; - final QueryRewriteContext queryRewriteContext = createQueryRewriteContext( - Map.of("local-index", Map.of(field, SPARSE_INFERENCE_ID)), - Map.of("remote-alias", "remote-index"), - TransportVersion.current() + public void testCcsSerialization() throws Exception { + final String inferenceField = "semantic_field"; + final var localIndexInferenceFields = Map.of("local-index", Map.of(inferenceField, SPARSE_INFERENCE_ID)); + final var remoteIndices = Map.of("remote-alias", "remote-index"); + final T inferenceFieldQuery = createQueryBuilder(inferenceField); + final T nonInferenceFieldQuery = createQueryBuilder("non_inference_field"); + + // Test with the current transport version. This simulates sending the query to a remote cluster that supports semantic search CCS. + final QueryRewriteContext contextCurrent = createQueryRewriteContext( + localIndexInferenceFields, + remoteIndices, + TransportVersion.current(), + true ); - // Test querying a semantic text field - final T semanticFieldQuery = createQueryBuilder(field); - IllegalArgumentException e = assertThrows( - IllegalArgumentException.class, - () -> rewriteAndFetch(semanticFieldQuery, queryRewriteContext) + assertRewriteAndSerializeOnInferenceField(inferenceFieldQuery, contextCurrent, null, null); + assertRewriteAndSerializeOnNonInferenceField(nonInferenceFieldQuery, contextCurrent); + + // Test when ccs_minimize_roundtrips=false + final QueryRewriteContext minimizeRoundTripsFalseContext = createQueryRewriteContext( + localIndexInferenceFields, + remoteIndices, + TransportVersion.current(), + false + ); + + assertRewriteAndSerializeOnInferenceField( + inferenceFieldQuery, + minimizeRoundTripsFalseContext, + new IllegalArgumentException( + inferenceFieldQuery.getName() + + " query does not support cross-cluster search when querying a [" + + SemanticTextFieldMapper.CONTENT_TYPE + + "] field when [ccs_minimize_roundtrips] is false" + ), + null ); - assertThat( - e.getMessage(), - containsString( - semanticFieldQuery.getName() + " query does not support cross-cluster search when querying a [semantic_text] field" + assertRewriteAndSerializeOnNonInferenceField(nonInferenceFieldQuery, minimizeRoundTripsFalseContext); + + // Test with a transport version prior to semantic search CCS support, but still new enough to use the new interceptors. + // This simulates if one of the local or remote cluster data nodes is slightly out of date. + final TransportVersion preCcsVersion = TransportVersionUtils.randomVersionBetween( + random(), + NEW_SEMANTIC_QUERY_INTERCEPTORS, + TransportVersionUtils.getPreviousVersion(SEMANTIC_SEARCH_CCS_SUPPORT) + ); + final QueryRewriteContext preCcsContext = createQueryRewriteContext(localIndexInferenceFields, remoteIndices, preCcsVersion, true); + + assertRewriteAndSerializeOnInferenceField( + inferenceFieldQuery, + preCcsContext, + null, + new IllegalArgumentException( + "One or more nodes does not support " + + inferenceFieldQuery.getName() + + " query cross-cluster search when querying a [" + + SemanticTextFieldMapper.CONTENT_TYPE + + "] field. Please update all nodes to at least Elasticsearch " + + SEMANTIC_SEARCH_CCS_SUPPORT.toReleaseVersion() + + "." ) ); + assertRewriteAndSerializeOnNonInferenceField(nonInferenceFieldQuery, preCcsContext); - // Test querying a non-inference field - final T nonInferenceFieldQuery = createQueryBuilder("non_inference_field"); - QueryBuilder coordinatorRewritten = rewriteAndFetch(nonInferenceFieldQuery, queryRewriteContext); + // Test with a transport version prior to the new query interceptors. This simulates if one of the local cluster data nodes is more + // out of date. + final TransportVersion legacyInterceptorsVersion = TransportVersionUtils.randomVersionBetween( + random(), + V_8_15_0, + TransportVersionUtils.getPreviousVersion(NEW_SEMANTIC_QUERY_INTERCEPTORS) + ); + final QueryRewriteContext legacyInterceptorsContext = createQueryRewriteContext( + localIndexInferenceFields, + remoteIndices, + legacyInterceptorsVersion, + true + ); - // Use a serialization cycle to strip InterceptedQueryBuilderWrapper - coordinatorRewritten = copyNamedWriteable(coordinatorRewritten, writableRegistry(), QueryBuilder.class); - assertCoordinatorNodeRewriteOnNonInferenceField(nonInferenceFieldQuery, coordinatorRewritten); + assertRewriteAndSerializeOnInferenceField( + inferenceFieldQuery, + legacyInterceptorsContext, + new IllegalArgumentException( + inferenceFieldQuery.getName() + + " query does not support cross-cluster search when querying a [" + + SemanticTextFieldMapper.CONTENT_TYPE + + "] field in a mixed-version cluster" + ), + null + ); + assertRewriteAndSerializeOnNonInferenceField(nonInferenceFieldQuery, legacyInterceptorsContext); } public void testSerializationRemoteClusterInferenceResults() throws Exception { @@ -229,7 +293,7 @@ public void testSerializationRemoteClusterInferenceResults() throws Exception { // Test with a transport version prior to cluster alias support, which should fail TransportVersion transportVersion = TransportVersionUtils.randomVersionBetween( random(), - TransportVersions.NEW_SEMANTIC_QUERY_INTERCEPTORS, + NEW_SEMANTIC_QUERY_INTERCEPTORS, TransportVersionUtils.getPreviousVersion(TransportVersions.INFERENCE_RESULTS_MAP_WITH_CLUSTER_ALIAS) ); IllegalArgumentException e = assertThrows( @@ -290,24 +354,19 @@ protected void serializationTestCase(TransportVersion transportVersion) throws E final QueryRewriteContext queryRewriteContext = createQueryRewriteContext( Map.of(testIndex1.name(), testIndex1.semanticTextFields(), testIndex2.name(), testIndex2.semanticTextFields()), Map.of(), - transportVersion + transportVersion, + null ); // Disable query interception when checking the results of coordinator node rewrite so that the query rewrite context can be used // to populate inference results without triggering another query interception. In production this is achieved by wrapping with // InterceptedQueryBuilderWrapper, but we do not have access to that in this test. - final BiConsumer disableQueryInterception = (c, r) -> { - QueryRewriteInterceptor interceptor = c.getQueryRewriteInterceptor(); - c.setQueryRewriteInterceptor(null); - r.run(); - c.setQueryRewriteInterceptor(interceptor); - }; // Query a semantic text field in both indices QueryBuilder originalSemantic = createQueryBuilder(semanticField); QueryBuilder rewrittenSemantic = rewriteAndFetch(originalSemantic, queryRewriteContext); QueryBuilder serializedSemantic = copyNamedWriteable(rewrittenSemantic, writableRegistry(), QueryBuilder.class); - disableQueryInterception.accept( + disableQueryInterception( queryRewriteContext, () -> assertCoordinatorNodeRewriteOnInferenceField(originalSemantic, serializedSemantic, transportVersion, queryRewriteContext) ); @@ -316,7 +375,7 @@ protected void serializationTestCase(TransportVersion transportVersion) throws E QueryBuilder originalMixed = createQueryBuilder(mixedField); QueryBuilder rewrittenMixed = rewriteAndFetch(originalMixed, queryRewriteContext); QueryBuilder serializedMixed = copyNamedWriteable(rewrittenMixed, writableRegistry(), QueryBuilder.class); - disableQueryInterception.accept( + disableQueryInterception( queryRewriteContext, () -> assertCoordinatorNodeRewriteOnInferenceField(originalMixed, serializedMixed, transportVersion, queryRewriteContext) ); @@ -331,7 +390,8 @@ protected void serializationTestCase(TransportVersion transportVersion) throws E protected QueryRewriteContext createQueryRewriteContext( Map> localIndexInferenceFields, Map remoteIndexNames, - TransportVersion minTransportVersion + TransportVersion minTransportVersion, + Boolean ccsMinimizeRoundTrips ) { Map indexMetadata = new HashMap<>(); for (var indexEntry : localIndexInferenceFields.entrySet()) { @@ -384,7 +444,7 @@ protected QueryRewriteContext createQueryRewriteContext( resolvedIndices, null, QueryRewriteInterceptor.multi(interceptorMap), - null + ccsMinimizeRoundTrips ); } @@ -464,12 +524,76 @@ protected QueryRewriteContext createIndexMetadataContext( } } + protected void assertRewriteAndSerializeOnInferenceField( + QueryBuilder originalQuery, + QueryRewriteContext queryRewriteContext, + Exception expectedRewriteException, + Exception expectedSerializationException + ) throws IOException { + if (expectedRewriteException != null) { + Exception actualException = assertThrows(Exception.class, () -> rewriteAndFetch(originalQuery, queryRewriteContext)); + assertThat(actualException, instanceOf(expectedRewriteException.getClass())); + assertThat(actualException.getMessage(), equalTo(expectedRewriteException.getMessage())); + return; + } + QueryBuilder rewrittenQuery = rewriteAndFetch(originalQuery, queryRewriteContext); + + TransportVersion serializationTransportVersion = queryRewriteContext.getMinTransportVersion(); + if (expectedSerializationException != null) { + Exception actualException = assertThrows( + Exception.class, + () -> copyNamedWriteable(rewrittenQuery, writableRegistry(), QueryBuilder.class, serializationTransportVersion) + ); + assertThat(actualException, instanceOf(expectedSerializationException.getClass())); + assertThat(actualException.getMessage(), equalTo(expectedSerializationException.getMessage())); + return; + } + QueryBuilder serializedQuery = copyNamedWriteable( + rewrittenQuery, + writableRegistry(), + QueryBuilder.class, + serializationTransportVersion + ); + + // Disable query interception when checking the results of coordinator node rewrite so that the query rewrite context can be used + // to populate inference results without triggering another query interception. In production this is achieved by wrapping with + // InterceptedQueryBuilderWrapper, but we do not have access to that in this test. + disableQueryInterception( + queryRewriteContext, + () -> assertCoordinatorNodeRewriteOnInferenceField( + originalQuery, + serializedQuery, + queryRewriteContext.getMinTransportVersion(), + queryRewriteContext + ) + ); + } + + protected void assertRewriteAndSerializeOnNonInferenceField(QueryBuilder originalQuery, QueryRewriteContext queryRewriteContext) + throws IOException { + QueryBuilder rewrittenQuery = rewriteAndFetch(originalQuery, queryRewriteContext); + QueryBuilder serializedQuery = copyNamedWriteable( + rewrittenQuery, + writableRegistry(), + QueryBuilder.class, + queryRewriteContext.getMinTransportVersion() + ); + assertCoordinatorNodeRewriteOnNonInferenceField(originalQuery, serializedQuery); + } + protected static QueryBuilder rewriteAndFetch(QueryBuilder queryBuilder, QueryRewriteContext queryRewriteContext) { PlainActionFuture future = new PlainActionFuture<>(); Rewriteable.rewriteAndFetch(queryBuilder, queryRewriteContext, future); return future.actionGet(); } + protected static void disableQueryInterception(QueryRewriteContext queryRewriteContext, Runnable runnable) { + QueryRewriteInterceptor interceptor = queryRewriteContext.getQueryRewriteInterceptor(); + queryRewriteContext.setQueryRewriteInterceptor(null); + runnable.run(); + queryRewriteContext.setQueryRewriteInterceptor(interceptor); + } + private static ModelRegistry createModelRegistry(ThreadPool threadPool) { ClusterService clusterService = ClusterServiceUtils.createClusterService(threadPool); ModelRegistry modelRegistry = spy(new ModelRegistry(clusterService, new NoOpClient(threadPool))); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceKnnVectorQueryBuilderTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceKnnVectorQueryBuilderTests.java index ee844d238beae..b72b430a7ce8f 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceKnnVectorQueryBuilderTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceKnnVectorQueryBuilderTests.java @@ -167,7 +167,8 @@ public void testInterceptAndRewrite() throws Exception { final QueryRewriteContext queryRewriteContext = createQueryRewriteContext( Map.of(testIndex1.name(), testIndex1.semanticTextFields(), testIndex2.name(), testIndex2.semanticTextFields()), Map.of(), - TransportVersion.current() + TransportVersion.current(), + null ); QueryBuilder coordinatorRewritten = rewriteAndFetch(knnQuery, queryRewriteContext); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilderTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilderTests.java index 3d9c7ca2bf9dd..27573e8ce0b1d 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilderTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilderTests.java @@ -102,7 +102,8 @@ public void testInterceptAndRewrite() throws Exception { testIndex3.semanticTextFields() ), Map.of(), - TransportVersion.current() + TransportVersion.current(), + null ); QueryBuilder coordinatorRewritten = rewriteAndFetch(matchQuery, queryRewriteContext); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceSparseVectorQueryBuilderTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceSparseVectorQueryBuilderTests.java index 9e0cfda289632..f594a0f034ce9 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceSparseVectorQueryBuilderTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceSparseVectorQueryBuilderTests.java @@ -134,7 +134,8 @@ public void testInterceptAndRewrite() throws Exception { final QueryRewriteContext queryRewriteContext = createQueryRewriteContext( Map.of(testIndex1.name(), testIndex1.semanticTextFields(), testIndex2.name(), testIndex2.semanticTextFields()), Map.of(), - TransportVersion.current() + TransportVersion.current(), + null ); QueryBuilder coordinatorRewritten = rewriteAndFetch(sparseVectorQuery, queryRewriteContext); From c82d083d43941969912399508ef877540041ed4a Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Tue, 23 Sep 2025 15:04:20 -0400 Subject: [PATCH 24/52] Fixed test failures --- ...erceptedInferenceQueryBuilderTestCase.java | 47 +++++++++---------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/AbstractInterceptedInferenceQueryBuilderTestCase.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/AbstractInterceptedInferenceQueryBuilderTestCase.java index d0cff4ab98d22..4004561c779a4 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/AbstractInterceptedInferenceQueryBuilderTestCase.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/AbstractInterceptedInferenceQueryBuilderTestCase.java @@ -358,33 +358,17 @@ protected void serializationTestCase(TransportVersion transportVersion) throws E null ); - // Disable query interception when checking the results of coordinator node rewrite so that the query rewrite context can be used - // to populate inference results without triggering another query interception. In production this is achieved by wrapping with - // InterceptedQueryBuilderWrapper, but we do not have access to that in this test. - // Query a semantic text field in both indices QueryBuilder originalSemantic = createQueryBuilder(semanticField); - QueryBuilder rewrittenSemantic = rewriteAndFetch(originalSemantic, queryRewriteContext); - QueryBuilder serializedSemantic = copyNamedWriteable(rewrittenSemantic, writableRegistry(), QueryBuilder.class); - disableQueryInterception( - queryRewriteContext, - () -> assertCoordinatorNodeRewriteOnInferenceField(originalSemantic, serializedSemantic, transportVersion, queryRewriteContext) - ); + assertRewriteAndSerializeOnInferenceField(originalSemantic, queryRewriteContext, null, null); // Query a field that is a semantic text field in one index QueryBuilder originalMixed = createQueryBuilder(mixedField); - QueryBuilder rewrittenMixed = rewriteAndFetch(originalMixed, queryRewriteContext); - QueryBuilder serializedMixed = copyNamedWriteable(rewrittenMixed, writableRegistry(), QueryBuilder.class); - disableQueryInterception( - queryRewriteContext, - () -> assertCoordinatorNodeRewriteOnInferenceField(originalMixed, serializedMixed, transportVersion, queryRewriteContext) - ); + assertRewriteAndSerializeOnInferenceField(originalMixed, queryRewriteContext, null, null); // Query a text field in both indices QueryBuilder originalText = createQueryBuilder(textField); - QueryBuilder rewrittenText = rewriteAndFetch(originalText, queryRewriteContext); - QueryBuilder serializedText = copyNamedWriteable(rewrittenText, writableRegistry(), QueryBuilder.class); - assertCoordinatorNodeRewriteOnNonInferenceField(originalText, serializedText); + assertRewriteAndSerializeOnNonInferenceField(originalText, queryRewriteContext); } protected QueryRewriteContext createQueryRewriteContext( @@ -555,13 +539,21 @@ protected void assertRewriteAndSerializeOnInferenceField( serializationTransportVersion ); + // Run the original query through a serialization cycle to account for any BwC logic applied through the transport version + QueryBuilder originalSerializedQuery = copyNamedWriteable( + originalQuery, + writableRegistry(), + QueryBuilder.class, + serializationTransportVersion + ); + // Disable query interception when checking the results of coordinator node rewrite so that the query rewrite context can be used // to populate inference results without triggering another query interception. In production this is achieved by wrapping with // InterceptedQueryBuilderWrapper, but we do not have access to that in this test. disableQueryInterception( queryRewriteContext, () -> assertCoordinatorNodeRewriteOnInferenceField( - originalQuery, + originalSerializedQuery, serializedQuery, queryRewriteContext.getMinTransportVersion(), queryRewriteContext @@ -571,14 +563,19 @@ protected void assertRewriteAndSerializeOnInferenceField( protected void assertRewriteAndSerializeOnNonInferenceField(QueryBuilder originalQuery, QueryRewriteContext queryRewriteContext) throws IOException { - QueryBuilder rewrittenQuery = rewriteAndFetch(originalQuery, queryRewriteContext); - QueryBuilder serializedQuery = copyNamedWriteable( - rewrittenQuery, + TransportVersion serializationVersion = queryRewriteContext.getMinTransportVersion(); + + // Run the original query through a serialization cycle to account for any BwC logic applied through the transport version + QueryBuilder originalSerializedQuery = copyNamedWriteable( + originalQuery, writableRegistry(), QueryBuilder.class, - queryRewriteContext.getMinTransportVersion() + serializationVersion ); - assertCoordinatorNodeRewriteOnNonInferenceField(originalQuery, serializedQuery); + + QueryBuilder rewrittenQuery = rewriteAndFetch(originalQuery, queryRewriteContext); + QueryBuilder serializedQuery = copyNamedWriteable(rewrittenQuery, writableRegistry(), QueryBuilder.class, serializationVersion); + assertCoordinatorNodeRewriteOnNonInferenceField(originalSerializedQuery, serializedQuery); } protected static QueryBuilder rewriteAndFetch(QueryBuilder queryBuilder, QueryRewriteContext queryRewriteContext) { From 588eebd86718f5be13cb6d75c6883ca36b74cdd5 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Tue, 23 Sep 2025 16:29:22 -0400 Subject: [PATCH 25/52] Fixed test failures --- ...actInterceptedInferenceQueryBuilderTestCase.java | 8 +++++--- .../InterceptedInferenceMatchQueryBuilderTests.java | 13 +++++++++++-- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/AbstractInterceptedInferenceQueryBuilderTestCase.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/AbstractInterceptedInferenceQueryBuilderTestCase.java index 4004561c779a4..bd48e2517ca89 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/AbstractInterceptedInferenceQueryBuilderTestCase.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/AbstractInterceptedInferenceQueryBuilderTestCase.java @@ -23,6 +23,7 @@ import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.regex.Regex; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.CheckedRunnable; import org.elasticsearch.index.Index; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexVersion; @@ -333,7 +334,7 @@ protected abstract void assertCoordinatorNodeRewriteOnInferenceField( QueryBuilder rewritten, TransportVersion transportVersion, QueryRewriteContext queryRewriteContext - ); + ) throws Exception; protected abstract void assertCoordinatorNodeRewriteOnNonInferenceField(QueryBuilder original, QueryBuilder rewritten); @@ -513,7 +514,7 @@ protected void assertRewriteAndSerializeOnInferenceField( QueryRewriteContext queryRewriteContext, Exception expectedRewriteException, Exception expectedSerializationException - ) throws IOException { + ) throws Exception { if (expectedRewriteException != null) { Exception actualException = assertThrows(Exception.class, () -> rewriteAndFetch(originalQuery, queryRewriteContext)); assertThat(actualException, instanceOf(expectedRewriteException.getClass())); @@ -584,7 +585,8 @@ protected static QueryBuilder rewriteAndFetch(QueryBuilder queryBuilder, QueryRe return future.actionGet(); } - protected static void disableQueryInterception(QueryRewriteContext queryRewriteContext, Runnable runnable) { + protected static void disableQueryInterception(QueryRewriteContext queryRewriteContext, CheckedRunnable runnable) + throws Exception { QueryRewriteInterceptor interceptor = queryRewriteContext.getQueryRewriteInterceptor(); queryRewriteContext.setQueryRewriteInterceptor(null); runnable.run(); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilderTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilderTests.java index 27573e8ce0b1d..63fbb5279eb94 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilderTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilderTests.java @@ -56,7 +56,7 @@ protected void assertCoordinatorNodeRewriteOnInferenceField( QueryBuilder rewritten, TransportVersion transportVersion, QueryRewriteContext queryRewriteContext - ) { + ) throws Exception { assertThat(original, instanceOf(MatchQueryBuilder.class)); if (transportVersion.onOrAfter(TransportVersions.NEW_SEMANTIC_QUERY_INTERCEPTORS)) { assertThat(rewritten, instanceOf(InterceptedInferenceMatchQueryBuilder.class)); @@ -73,7 +73,16 @@ protected void assertCoordinatorNodeRewriteOnInferenceField( original ); QueryBuilder expectedLegacyRewritten = rewriteAndFetch(expectedLegacyIntercepted, queryRewriteContext); - assertThat(rewritten, equalTo(expectedLegacyRewritten)); + + // Run the expected query through a serialization cycle to align the inference results map representations + QueryBuilder expectedLegacySerialized = copyNamedWriteable( + expectedLegacyRewritten, + writableRegistry(), + QueryBuilder.class, + transportVersion + ); + + assertThat(rewritten, equalTo(expectedLegacySerialized)); } } From ba54ae831ead3f29e7d2eb05822731c7c5e4f782 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Tue, 23 Sep 2025 16:48:40 -0400 Subject: [PATCH 26/52] Update docs/changelog/135309.yaml --- docs/changelog/135309.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/135309.yaml diff --git a/docs/changelog/135309.yaml b/docs/changelog/135309.yaml new file mode 100644 index 0000000000000..11189e2f134e7 --- /dev/null +++ b/docs/changelog/135309.yaml @@ -0,0 +1,5 @@ +pr: 135309 +summary: Enable Semantic Search CCS When ccs_minimize_roundtrips=true +area: Relevance +type: enhancement +issues: [] From 5b91665b65b373cd2cab97432fb6e5f9d2bac151 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 23 Sep 2025 20:57:53 +0000 Subject: [PATCH 27/52] [CI] Update transport version definitions --- server/src/main/resources/transport/upper_bounds/8.18.csv | 2 +- server/src/main/resources/transport/upper_bounds/8.19.csv | 2 +- server/src/main/resources/transport/upper_bounds/9.0.csv | 2 +- server/src/main/resources/transport/upper_bounds/9.1.csv | 2 +- server/src/main/resources/transport/upper_bounds/9.2.csv | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/server/src/main/resources/transport/upper_bounds/8.18.csv b/server/src/main/resources/transport/upper_bounds/8.18.csv index 4eb5140004ea6..ffc592e1809ee 100644 --- a/server/src/main/resources/transport/upper_bounds/8.18.csv +++ b/server/src/main/resources/transport/upper_bounds/8.18.csv @@ -1 +1 @@ -initial_elasticsearch_8_18_6,8840008 +initial_elasticsearch_8_18_8,8840010 diff --git a/server/src/main/resources/transport/upper_bounds/8.19.csv b/server/src/main/resources/transport/upper_bounds/8.19.csv index 476468b203875..3cc6f439c5ea5 100644 --- a/server/src/main/resources/transport/upper_bounds/8.19.csv +++ b/server/src/main/resources/transport/upper_bounds/8.19.csv @@ -1 +1 @@ -initial_elasticsearch_8_19_3,8841067 +initial_elasticsearch_8_19_5,8841069 diff --git a/server/src/main/resources/transport/upper_bounds/9.0.csv b/server/src/main/resources/transport/upper_bounds/9.0.csv index f8f50cc6d7839..8ad2ed1a4cacf 100644 --- a/server/src/main/resources/transport/upper_bounds/9.0.csv +++ b/server/src/main/resources/transport/upper_bounds/9.0.csv @@ -1 +1 @@ -initial_elasticsearch_9_0_6,9000015 +initial_elasticsearch_9_0_8,9000017 diff --git a/server/src/main/resources/transport/upper_bounds/9.1.csv b/server/src/main/resources/transport/upper_bounds/9.1.csv index 5a65f2e578156..1cea5dc4d929b 100644 --- a/server/src/main/resources/transport/upper_bounds/9.1.csv +++ b/server/src/main/resources/transport/upper_bounds/9.1.csv @@ -1 +1 @@ -initial_elasticsearch_9_1_4,9112007 +initial_elasticsearch_9_1_5,9112008 diff --git a/server/src/main/resources/transport/upper_bounds/9.2.csv b/server/src/main/resources/transport/upper_bounds/9.2.csv index e24f914a1d1ca..b1209b927d8a5 100644 --- a/server/src/main/resources/transport/upper_bounds/9.2.csv +++ b/server/src/main/resources/transport/upper_bounds/9.2.csv @@ -1 +1 @@ -ml_inference_endpoint_cache,9157000 +inference_api_openai_embeddings_headers,9169000 From 27913a54577cdcaa2ffb64116e56d8bdfe60ebb3 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Tue, 23 Sep 2025 17:14:54 -0400 Subject: [PATCH 28/52] Fixed SEMANTIC_SEARCH_CCS_SUPPORT transport version --- server/src/main/java/org/elasticsearch/TransportVersions.java | 1 - .../inference/queries/InterceptedInferenceQueryBuilder.java | 2 +- .../xpack/inference/queries/SemanticQueryBuilder.java | 4 +++- .../AbstractInterceptedInferenceQueryBuilderTestCase.java | 2 +- .../xpack/inference/queries/SemanticQueryBuilderTests.java | 2 +- 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index b85c3a132f73c..1aa25a4214656 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -329,7 +329,6 @@ static TransportVersion def(int id) { public static final TransportVersion TIMESERIES_DEFAULT_LIMIT = def(9_160_0_00); public static final TransportVersion INFERENCE_API_OPENAI_HEADERS = def(9_161_0_00); public static final TransportVersion NEW_SEMANTIC_QUERY_INTERCEPTORS = def(9_162_0_00); - public static final TransportVersion SEMANTIC_SEARCH_CCS_SUPPORT = def(9_167_0_00); /* * STOP! READ THIS FIRST! No, really, diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceQueryBuilder.java index 241d7faede35e..7e6fe8a0befe4 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceQueryBuilder.java @@ -38,9 +38,9 @@ import java.util.Objects; import java.util.Set; -import static org.elasticsearch.TransportVersions.SEMANTIC_SEARCH_CCS_SUPPORT; import static org.elasticsearch.index.IndexSettings.DEFAULT_FIELD_SETTING; import static org.elasticsearch.transport.RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY; +import static org.elasticsearch.xpack.inference.queries.SemanticQueryBuilder.SEMANTIC_SEARCH_CCS_SUPPORT; import static org.elasticsearch.xpack.inference.queries.SemanticQueryBuilder.convertFromBwcInferenceResultsMap; /** diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java index 1eb29a66ab860..1ed3bb62bcc2d 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java @@ -50,7 +50,6 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.stream.Collectors; -import static org.elasticsearch.TransportVersions.SEMANTIC_SEARCH_CCS_SUPPORT; import static org.elasticsearch.transport.RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY; import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg; import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg; @@ -69,6 +68,9 @@ public class SemanticQueryBuilder extends AbstractQueryBuilder Date: Tue, 23 Sep 2025 17:16:31 -0400 Subject: [PATCH 29/52] Updated changelog --- docs/changelog/135309.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/changelog/135309.yaml b/docs/changelog/135309.yaml index 11189e2f134e7..20c50553c2eb8 100644 --- a/docs/changelog/135309.yaml +++ b/docs/changelog/135309.yaml @@ -1,5 +1,5 @@ pr: 135309 -summary: Enable Semantic Search CCS When ccs_minimize_roundtrips=true -area: Relevance +summary: Enable semantic search CCS when ccs_minimize_roundtrips=true +area: Vector Search type: enhancement issues: [] From 66c0c09e227c55b33b63fa89987e9b843ae1356d Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Tue, 23 Sep 2025 17:25:03 -0400 Subject: [PATCH 30/52] Spotless --- .../xpack/inference/queries/SemanticQueryBuilder.java | 4 +--- .../AbstractInterceptedInferenceQueryBuilderTestCase.java | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java index 1ed3bb62bcc2d..47fd02459a23f 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java @@ -68,9 +68,7 @@ public class SemanticQueryBuilder extends AbstractQueryBuilder Date: Tue, 23 Sep 2025 21:32:09 +0000 Subject: [PATCH 31/52] [CI] Update transport version definitions --- .../definitions/referable/semantic_search_ccs_support.csv | 1 + server/src/main/resources/transport/upper_bounds/9.2.csv | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 server/src/main/resources/transport/definitions/referable/semantic_search_ccs_support.csv diff --git a/server/src/main/resources/transport/definitions/referable/semantic_search_ccs_support.csv b/server/src/main/resources/transport/definitions/referable/semantic_search_ccs_support.csv new file mode 100644 index 0000000000000..e9954a5466961 --- /dev/null +++ b/server/src/main/resources/transport/definitions/referable/semantic_search_ccs_support.csv @@ -0,0 +1 @@ +9170000 diff --git a/server/src/main/resources/transport/upper_bounds/9.2.csv b/server/src/main/resources/transport/upper_bounds/9.2.csv index b1209b927d8a5..6a17f45fe298c 100644 --- a/server/src/main/resources/transport/upper_bounds/9.2.csv +++ b/server/src/main/resources/transport/upper_bounds/9.2.csv @@ -1 +1 @@ -inference_api_openai_embeddings_headers,9169000 +semantic_search_ccs_support,9170000 From 9000d356af41fad07b05eef2f97b33445acfa843 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Wed, 24 Sep 2025 17:13:06 -0400 Subject: [PATCH 32/52] Refactored CCS integration tests into separate classes for each query type --- ...actSemanticCrossClusterSearchTestCase.java | 323 +++++ ...ectorQueryBuilderCrossClusterSearchIT.java | 315 +++++ ...MatchQueryBuilderCrossClusterSearchIT.java | 229 ++++ .../ccs/SemanticCrossClusterSearchIT.java | 1141 ----------------- ...anticQueryBuilderCrossClusterSearchIT.java | 121 ++ ...ectorQueryBuilderCrossClusterSearchIT.java | 248 ++++ 6 files changed, 1236 insertions(+), 1141 deletions(-) create mode 100644 x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/AbstractSemanticCrossClusterSearchTestCase.java create mode 100644 x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/KnnVectorQueryBuilderCrossClusterSearchIT.java create mode 100644 x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/MatchQueryBuilderCrossClusterSearchIT.java delete mode 100644 x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java create mode 100644 x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticQueryBuilderCrossClusterSearchIT.java create mode 100644 x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SparseVectorQueryBuilderCrossClusterSearchIT.java diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/AbstractSemanticCrossClusterSearchTestCase.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/AbstractSemanticCrossClusterSearchTestCase.java new file mode 100644 index 0000000000000..1a9c87205c3d8 --- /dev/null +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/AbstractSemanticCrossClusterSearchTestCase.java @@ -0,0 +1,323 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.search.ccs; + +import org.elasticsearch.action.DocWriteResponse; +import org.elasticsearch.action.search.OpenPointInTimeRequest; +import org.elasticsearch.action.search.OpenPointInTimeResponse; +import org.elasticsearch.action.search.SearchRequest; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.action.search.TransportOpenPointInTimeAction; +import org.elasticsearch.action.support.broadcast.BroadcastResponse; +import org.elasticsearch.client.internal.Client; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; +import org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.inference.MinimalServiceSettings; +import org.elasticsearch.inference.SimilarityMeasure; +import org.elasticsearch.inference.TaskType; +import org.elasticsearch.license.LicenseSettings; +import org.elasticsearch.plugins.ActionPlugin; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.plugins.SearchPlugin; +import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.elasticsearch.test.AbstractMultiClustersTestCase; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentFactory; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.core.inference.action.PutInferenceModelAction; +import org.elasticsearch.xpack.core.ml.action.CoordinatedInferenceAction; +import org.elasticsearch.xpack.core.ml.inference.MlInferenceNamedXContentProvider; +import org.elasticsearch.xpack.core.ml.vectors.TextEmbeddingQueryVectorBuilder; +import org.elasticsearch.xpack.inference.LocalStateInferencePlugin; +import org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper; +import org.elasticsearch.xpack.inference.mock.TestDenseInferenceServiceExtension; +import org.elasticsearch.xpack.inference.mock.TestInferenceServicePlugin; +import org.elasticsearch.xpack.inference.mock.TestSparseInferenceServiceExtension; +import org.elasticsearch.xpack.ml.action.TransportCoordinatedInferenceAction; + +import java.io.IOException; +import java.util.Collection; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Consumer; +import java.util.stream.Collectors; + +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; + +public abstract class AbstractSemanticCrossClusterSearchTestCase extends AbstractMultiClustersTestCase { + protected static final String REMOTE_CLUSTER = "cluster_a"; + + @Override + protected List remoteClusterAlias() { + return List.of(REMOTE_CLUSTER); + } + + @Override + protected Map skipUnavailableForRemoteClusters() { + return Map.of(REMOTE_CLUSTER, DEFAULT_SKIP_UNAVAILABLE); + } + + @Override + protected boolean reuseClusters() { + return false; + } + + @Override + protected Settings nodeSettings() { + return Settings.builder().put(LicenseSettings.SELF_GENERATED_LICENSE_TYPE.getKey(), "trial").build(); + } + + @Override + protected Collection> nodePlugins(String clusterAlias) { + return List.of(LocalStateInferencePlugin.class, TestInferenceServicePlugin.class, FakeMlPlugin.class); + } + + protected void setupTwoClusters(TestIndexInfo localIndexInfo, TestIndexInfo remoteIndexInfo) throws IOException { + setupCluster(LOCAL_CLUSTER, localIndexInfo); + setupCluster(REMOTE_CLUSTER, remoteIndexInfo); + } + + protected void setupCluster(String clusterAlias, TestIndexInfo indexInfo) throws IOException { + final Client client = client(clusterAlias); + final String indexName = indexInfo.name(); + + for (var entry : indexInfo.inferenceEndpoints().entrySet()) { + String inferenceId = entry.getKey(); + MinimalServiceSettings minimalServiceSettings = entry.getValue(); + + Map serviceSettings = new HashMap<>(); + serviceSettings.put("model", randomAlphaOfLength(5)); + serviceSettings.put("api_key", randomAlphaOfLength(5)); + if (minimalServiceSettings.taskType() == TaskType.TEXT_EMBEDDING) { + serviceSettings.put("dimensions", minimalServiceSettings.dimensions()); + serviceSettings.put("similarity", minimalServiceSettings.similarity()); + serviceSettings.put("element_type", minimalServiceSettings.elementType()); + } + + createInferenceEndpoint(client, minimalServiceSettings.taskType(), inferenceId, serviceSettings); + } + + Settings indexSettings = indexSettings(randomIntBetween(2, 5), randomIntBetween(0, 1)).build(); + assertAcked(client.admin().indices().prepareCreate(indexName).setSettings(indexSettings).setMapping(indexInfo.mappings())); + assertFalse( + client.admin() + .cluster() + .prepareHealth(TEST_REQUEST_TIMEOUT, indexName) + .setWaitForYellowStatus() + .setTimeout(TimeValue.timeValueSeconds(10)) + .get() + .isTimedOut() + ); + + for (var entry : indexInfo.docs().entrySet()) { + String docId = entry.getKey(); + Map doc = entry.getValue(); + + DocWriteResponse response = client.prepareIndex(indexName).setId(docId).setSource(doc).execute().actionGet(); + assertThat(response.getResult(), equalTo(DocWriteResponse.Result.CREATED)); + } + BroadcastResponse refreshResponse = client.admin().indices().prepareRefresh(indexName).execute().actionGet(); + assertThat(refreshResponse.getStatus(), is(RestStatus.OK)); + } + + protected BytesReference openPointInTime(String[] indices, TimeValue keepAlive) { + OpenPointInTimeRequest request = new OpenPointInTimeRequest(indices).keepAlive(keepAlive); + final OpenPointInTimeResponse response = client().execute(TransportOpenPointInTimeAction.TYPE, request).actionGet(); + return response.getPointInTimeId(); + } + + protected static void createInferenceEndpoint(Client client, TaskType taskType, String inferenceId, Map serviceSettings) + throws IOException { + final String service = switch (taskType) { + case TEXT_EMBEDDING -> TestDenseInferenceServiceExtension.TestInferenceService.NAME; + case SPARSE_EMBEDDING -> TestSparseInferenceServiceExtension.TestInferenceService.NAME; + default -> throw new IllegalArgumentException("Unhandled task type [" + taskType + "]"); + }; + + final BytesReference content; + try (XContentBuilder builder = XContentFactory.jsonBuilder()) { + builder.startObject(); + builder.field("service", service); + builder.field("service_settings", serviceSettings); + builder.endObject(); + + content = BytesReference.bytes(builder); + } + + PutInferenceModelAction.Request request = new PutInferenceModelAction.Request( + taskType, + inferenceId, + content, + XContentType.JSON, + TEST_REQUEST_TIMEOUT + ); + var responseFuture = client.execute(PutInferenceModelAction.INSTANCE, request); + assertThat(responseFuture.actionGet(TEST_REQUEST_TIMEOUT).getModel().getInferenceEntityId(), equalTo(inferenceId)); + } + + protected void assertSearchResponse(QueryBuilder queryBuilder, String[] indices, List expectedSearchResults) + throws Exception { + assertSearchResponse(queryBuilder, indices, expectedSearchResults, null, null); + } + + protected void assertSearchResponse( + QueryBuilder queryBuilder, + String[] indices, + List expectedSearchResults, + ClusterFailure expectedRemoteFailure, + Consumer searchRequestModifier + ) throws Exception { + SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(queryBuilder).size(expectedSearchResults.size()); + SearchRequest searchRequest = new SearchRequest(indices, searchSourceBuilder); + if (searchRequestModifier != null) { + searchRequestModifier.accept(searchRequest); + } + + assertResponse(client().search(searchRequest), response -> { + SearchHit[] hits = response.getHits().getHits(); + assertThat(hits.length, equalTo(expectedSearchResults.size())); + + Iterator searchResultIterator = expectedSearchResults.iterator(); + for (int i = 0; i < hits.length; i++) { + SearchResult expectedSearchResult = searchResultIterator.next(); + SearchHit actualSearchResult = hits[i]; + + assertThat(actualSearchResult.getClusterAlias(), equalTo(expectedSearchResult.clusterAlias())); + assertThat(actualSearchResult.getIndex(), equalTo(expectedSearchResult.index())); + assertThat(actualSearchResult.getId(), equalTo(expectedSearchResult.id())); + } + + SearchResponse.Clusters clusters = response.getClusters(); + assertThat(clusters.getCluster(LOCAL_CLUSTER).getStatus(), equalTo(SearchResponse.Cluster.Status.SUCCESSFUL)); + assertThat(clusters.getCluster(LOCAL_CLUSTER).getFailures().isEmpty(), is(true)); + + SearchResponse.Cluster remoteCluster = clusters.getCluster(REMOTE_CLUSTER); + if (expectedRemoteFailure != null) { + assertThat(remoteCluster.getStatus(), equalTo(expectedRemoteFailure.status())); + + Set expectedFailures = expectedRemoteFailure.failures(); + Set actualFailures = remoteCluster.getFailures() + .stream() + .map(f -> new FailureCause(f.getCause().getClass(), f.getCause().getMessage())) + .collect(Collectors.toSet()); + assertThat(actualFailures, equalTo(expectedFailures)); + } else { + assertThat(remoteCluster.getStatus(), equalTo(SearchResponse.Cluster.Status.SUCCESSFUL)); + assertThat(remoteCluster.getFailures().isEmpty(), is(true)); + } + }); + } + + protected static MinimalServiceSettings sparseEmbeddingServiceSettings() { + return new MinimalServiceSettings(null, TaskType.SPARSE_EMBEDDING, null, null, null); + } + + protected static MinimalServiceSettings textEmbeddingServiceSettings( + int dimensions, + SimilarityMeasure similarity, + DenseVectorFieldMapper.ElementType elementType + ) { + return new MinimalServiceSettings(null, TaskType.TEXT_EMBEDDING, dimensions, similarity, elementType); + } + + protected static Map semanticTextMapping(String inferenceId) { + return Map.of("type", SemanticTextFieldMapper.CONTENT_TYPE, "inference_id", inferenceId); + } + + protected static Map textMapping() { + return Map.of("type", "text"); + } + + protected static Map denseVectorMapping(int dimensions) { + return Map.of("type", DenseVectorFieldMapper.CONTENT_TYPE, "dims", dimensions); + } + + protected static Map sparseVectorMapping() { + return Map.of("type", SparseVectorFieldMapper.CONTENT_TYPE); + } + + protected static String fullyQualifiedIndexName(String clusterAlias, String indexName) { + return clusterAlias + ":" + indexName; + } + + protected static float[] generateDenseVectorFieldValue(int dimensions, DenseVectorFieldMapper.ElementType elementType, float value) { + if (elementType == DenseVectorFieldMapper.ElementType.BIT) { + assert dimensions % 8 == 0; + dimensions /= 8; + } + + float[] vector = new float[dimensions]; + for (int i = 0; i < dimensions; i++) { + // Use a constant value so that relevance is consistent + vector[i] = value; + } + + return vector; + } + + protected static Map generateSparseVectorFieldValue(float weight) { + // Generate values that have the same recall behavior as those produced by TestSparseInferenceServiceExtension. Use a constant token + // weight so that relevance is consistent. + return Map.of("feature_0", weight); + } + + public static class FakeMlPlugin extends Plugin implements ActionPlugin, SearchPlugin { + @Override + public List getNamedWriteables() { + return new MlInferenceNamedXContentProvider().getNamedWriteables(); + } + + @Override + public List> getQueryVectorBuilders() { + return List.of( + new QueryVectorBuilderSpec<>( + TextEmbeddingQueryVectorBuilder.NAME, + TextEmbeddingQueryVectorBuilder::new, + TextEmbeddingQueryVectorBuilder.PARSER + ) + ); + } + + @Override + public Collection getActions() { + return List.of(new ActionHandler(CoordinatedInferenceAction.INSTANCE, TransportCoordinatedInferenceAction.class)); + } + } + + protected record TestIndexInfo( + String name, + Map inferenceEndpoints, + Map mappings, + Map> docs + ) { + @Override + public Map mappings() { + return Map.of("properties", mappings); + } + } + + protected record SearchResult(@Nullable String clusterAlias, String index, String id) {} + + protected record FailureCause(Class causeClass, String message) {} + + protected record ClusterFailure(SearchResponse.Cluster.Status status, Set failures) {} +} diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/KnnVectorQueryBuilderCrossClusterSearchIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/KnnVectorQueryBuilderCrossClusterSearchIT.java new file mode 100644 index 0000000000000..8ae0daa5d5c07 --- /dev/null +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/KnnVectorQueryBuilderCrossClusterSearchIT.java @@ -0,0 +1,315 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.search.ccs; + +import org.elasticsearch.action.search.SearchRequest; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; +import org.elasticsearch.index.query.QueryShardException; +import org.elasticsearch.inference.MinimalServiceSettings; +import org.elasticsearch.inference.SimilarityMeasure; +import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.elasticsearch.search.vectors.KnnVectorQueryBuilder; +import org.elasticsearch.search.vectors.VectorData; +import org.elasticsearch.xpack.core.ml.vectors.TextEmbeddingQueryVectorBuilder; + +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.BiConsumer; + +import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.IVF_FORMAT; +import static org.hamcrest.Matchers.equalTo; + +public class KnnVectorQueryBuilderCrossClusterSearchIT extends AbstractSemanticCrossClusterSearchTestCase { + public void testKnnQuery() throws Exception { + final String localIndexName = "local-index"; + final String remoteIndexName = "remote-index"; + final String[] queryIndices = new String[] { localIndexName, fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName) }; + + final String commonInferenceId = "common-inference-id"; + final String localInferenceId = "local-inference-id"; + + final String commonInferenceIdField = "common-inference-id-field"; + final String mixedTypeField1 = "mixed-type-field-1"; + final String mixedTypeField2 = "mixed-type-field-2"; + + final TestIndexInfo localIndexInfo = new TestIndexInfo( + localIndexName, + Map.of( + commonInferenceId, + textEmbeddingServiceSettings(256, SimilarityMeasure.COSINE, DenseVectorFieldMapper.ElementType.FLOAT), + localInferenceId, + textEmbeddingServiceSettings(384, SimilarityMeasure.COSINE, DenseVectorFieldMapper.ElementType.FLOAT) + ), + Map.of( + commonInferenceIdField, + semanticTextMapping(commonInferenceId), + mixedTypeField1, + denseVectorMapping(384), + mixedTypeField2, + semanticTextMapping(localInferenceId) + ), + Map.of( + "local_doc_1", + Map.of(commonInferenceIdField, "a"), + "local_doc_2", + Map.of(mixedTypeField1, generateDenseVectorFieldValue(384, DenseVectorFieldMapper.ElementType.FLOAT, -128.0f)), + "local_doc_3", + Map.of(mixedTypeField2, "c") + ) + ); + final TestIndexInfo remoteIndexInfo = new TestIndexInfo( + remoteIndexName, + Map.of( + commonInferenceId, + textEmbeddingServiceSettings(384, SimilarityMeasure.COSINE, DenseVectorFieldMapper.ElementType.FLOAT) + ), + Map.of( + commonInferenceIdField, + semanticTextMapping(commonInferenceId), + mixedTypeField1, + semanticTextMapping(commonInferenceId), + mixedTypeField2, + denseVectorMapping(384) + ), + Map.of( + "remote_doc_1", + Map.of(commonInferenceIdField, "x"), + "remote_doc_2", + Map.of(mixedTypeField1, "y"), + "remote_doc_3", + Map.of(mixedTypeField2, generateDenseVectorFieldValue(384, DenseVectorFieldMapper.ElementType.FLOAT, -128.0f)) + ) + ); + setupTwoClusters(localIndexInfo, remoteIndexInfo); + + // Query a field has the same inference ID value across clusters, but with different backing inference services + assertSearchResponse( + new KnnVectorQueryBuilder( + commonInferenceIdField, + new TextEmbeddingQueryVectorBuilder(null, "a"), + 10, + 100, + IVF_FORMAT.isEnabled() ? 10f : null, + null + ), + queryIndices, + List.of( + new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_1"), + new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_1") + ) + ); + + // Query a field that has mixed types across clusters + assertSearchResponse( + new KnnVectorQueryBuilder( + mixedTypeField1, + new TextEmbeddingQueryVectorBuilder(localInferenceId, "y"), + 10, + 100, + IVF_FORMAT.isEnabled() ? 10f : null, + null + ), + queryIndices, + List.of( + new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_2"), + new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_2") + ) + ); + assertSearchResponse( + new KnnVectorQueryBuilder( + mixedTypeField2, + new TextEmbeddingQueryVectorBuilder(localInferenceId, "c"), + 10, + 100, + IVF_FORMAT.isEnabled() ? 10f : null, + null + ), + queryIndices, + List.of( + new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3"), + new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_3") + ) + ); + + // Query a field that has mixed types across clusters using a query vector + final VectorData queryVector = new VectorData( + generateDenseVectorFieldValue(384, DenseVectorFieldMapper.ElementType.FLOAT, -128.0f) + ); + assertSearchResponse( + new KnnVectorQueryBuilder(mixedTypeField1, queryVector, 10, 100, IVF_FORMAT.isEnabled() ? 10f : null, null, null), + queryIndices, + List.of( + new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_2"), + new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_2") + ) + ); + assertSearchResponse( + new KnnVectorQueryBuilder(mixedTypeField2, queryVector, 10, 100, IVF_FORMAT.isEnabled() ? 10f : null, null, null), + queryIndices, + List.of( + new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_3"), + new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3") + ) + ); + + // Check that omitting the inference ID when querying a remote dense vector field leads to the expected partial failure + assertSearchResponse( + new KnnVectorQueryBuilder( + mixedTypeField2, + new TextEmbeddingQueryVectorBuilder(null, "c"), + 10, + 100, + IVF_FORMAT.isEnabled() ? 10f : null, + null + ), + queryIndices, + List.of(new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3")), + new ClusterFailure( + SearchResponse.Cluster.Status.SKIPPED, + Set.of(new FailureCause(IllegalArgumentException.class, "[model_id] must not be null.")) + ), + null + ); + } + + public void testKnnQueryWithCcsMinimizeRoundTripsFalse() throws Exception { + final String localIndexName = "local-index"; + final String remoteIndexName = "remote-index"; + final String[] queryIndices = new String[] { localIndexName, fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName) }; + final BiConsumer assertCcsMinimizeRoundTripsFalseFailure = (f, qvb) -> { + KnnVectorQueryBuilder queryBuilder = new KnnVectorQueryBuilder(f, qvb, 10, 100, IVF_FORMAT.isEnabled() ? 10f : null, null); + + SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(queryBuilder); + SearchRequest searchRequest = new SearchRequest(queryIndices, searchSourceBuilder); + searchRequest.setCcsMinimizeRoundtrips(false); + + IllegalArgumentException e = assertThrows( + IllegalArgumentException.class, + () -> client().search(searchRequest).actionGet(TEST_REQUEST_TIMEOUT) + ); + assertThat( + e.getMessage(), + equalTo( + "knn query does not support cross-cluster search when querying a [semantic_text] field when " + + "[ccs_minimize_roundtrips] is false" + ) + ); + }; + + final int dimensions = 256; + final String commonInferenceId = "common-inference-id"; + final MinimalServiceSettings commonInferenceIdServiceSettings = textEmbeddingServiceSettings( + dimensions, + SimilarityMeasure.COSINE, + DenseVectorFieldMapper.ElementType.FLOAT + ); + + final String commonInferenceIdField = "common-inference-id-field"; + final String mixedTypeField1 = "mixed-type-field-1"; + final String mixedTypeField2 = "mixed-type-field-2"; + final String denseVectorField = "dense-vector-field"; + + final TestIndexInfo localIndexInfo = new TestIndexInfo( + localIndexName, + Map.of(commonInferenceId, commonInferenceIdServiceSettings), + Map.of( + commonInferenceIdField, + semanticTextMapping(commonInferenceId), + mixedTypeField1, + semanticTextMapping(commonInferenceId), + mixedTypeField2, + denseVectorMapping(dimensions), + denseVectorField, + denseVectorMapping(dimensions) + ), + Map.of( + mixedTypeField2 + "_doc", + Map.of(mixedTypeField2, generateDenseVectorFieldValue(dimensions, DenseVectorFieldMapper.ElementType.FLOAT, -128.0f)), + denseVectorField + "_doc", + Map.of(denseVectorField, generateDenseVectorFieldValue(dimensions, DenseVectorFieldMapper.ElementType.FLOAT, 1.0f)) + ) + ); + final TestIndexInfo remoteIndexInfo = new TestIndexInfo( + remoteIndexName, + Map.of(commonInferenceId, commonInferenceIdServiceSettings), + Map.of( + commonInferenceIdField, + semanticTextMapping(commonInferenceId), + mixedTypeField1, + denseVectorMapping(dimensions), + mixedTypeField2, + semanticTextMapping(commonInferenceId), + denseVectorField, + denseVectorMapping(dimensions) + ), + Map.of( + mixedTypeField2 + "_doc", + Map.of(mixedTypeField2, "a"), + denseVectorField + "_doc", + Map.of(denseVectorField, generateDenseVectorFieldValue(dimensions, DenseVectorFieldMapper.ElementType.FLOAT, -128.0f)) + ) + ); + setupTwoClusters(localIndexInfo, remoteIndexInfo); + + // Validate that expected cases fail + assertCcsMinimizeRoundTripsFalseFailure.accept( + commonInferenceIdField, + new TextEmbeddingQueryVectorBuilder(null, randomAlphaOfLength(5)) + ); + assertCcsMinimizeRoundTripsFalseFailure.accept( + mixedTypeField1, + new TextEmbeddingQueryVectorBuilder(commonInferenceId, randomAlphaOfLength(5)) + ); + + // Validate the expected ccs_minimize_roundtrips=false detection gap and failure mode when querying non-inference fields locally + assertSearchResponse( + new KnnVectorQueryBuilder( + mixedTypeField2, + new TextEmbeddingQueryVectorBuilder(commonInferenceId, "foo"), + 10, + 100, + IVF_FORMAT.isEnabled() ? 10f : null, + null + ), + queryIndices, + List.of(new SearchResult(null, localIndexName, mixedTypeField2 + "_doc")), + new ClusterFailure( + SearchResponse.Cluster.Status.SKIPPED, + Set.of( + new FailureCause( + QueryShardException.class, + "failed to create query: [knn] queries are only supported on [dense_vector] fields" + ) + ) + ), + s -> s.setCcsMinimizeRoundtrips(false) + ); + + // Validate that a CCS knn query functions when only dense vector fields are queried + assertSearchResponse( + new KnnVectorQueryBuilder( + denseVectorField, + generateDenseVectorFieldValue(dimensions, DenseVectorFieldMapper.ElementType.FLOAT, 1.0f), + 10, + 100, + IVF_FORMAT.isEnabled() ? 10f : null, + null, + null + ), + queryIndices, + List.of( + new SearchResult(null, localIndexName, denseVectorField + "_doc"), + new SearchResult(REMOTE_CLUSTER, remoteIndexName, denseVectorField + "_doc") + ), + null, + s -> s.setCcsMinimizeRoundtrips(false) + ); + } +} diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/MatchQueryBuilderCrossClusterSearchIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/MatchQueryBuilderCrossClusterSearchIT.java new file mode 100644 index 0000000000000..6868032fd6fa0 --- /dev/null +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/MatchQueryBuilderCrossClusterSearchIT.java @@ -0,0 +1,229 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.search.ccs; + +import org.elasticsearch.action.search.SearchRequest; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; +import org.elasticsearch.index.query.MatchQueryBuilder; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryShardException; +import org.elasticsearch.inference.SimilarityMeasure; +import org.elasticsearch.search.builder.SearchSourceBuilder; + +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Consumer; + +import static org.hamcrest.Matchers.equalTo; + +public class MatchQueryBuilderCrossClusterSearchIT extends AbstractSemanticCrossClusterSearchTestCase { + public void testMatchQuery() throws Exception { + final String localIndexName = "local-index"; + final String remoteIndexName = "remote-index"; + final String[] queryIndices = new String[] { localIndexName, fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName) }; + + final String commonInferenceId = "common-inference-id"; + final String localInferenceId = "local-inference-id"; + final String remoteInferenceId = "remote-inference-id"; + + final String commonInferenceIdField = "common-inference-id-field"; + final String variableInferenceIdField = "variable-inference-id-field"; + final String mixedTypeField1 = "mixed-type-field-1"; + final String mixedTypeField2 = "mixed-type-field-2"; + + final TestIndexInfo localIndexInfo = new TestIndexInfo( + localIndexName, + Map.of(commonInferenceId, sparseEmbeddingServiceSettings(), localInferenceId, sparseEmbeddingServiceSettings()), + Map.of( + commonInferenceIdField, + semanticTextMapping(commonInferenceId), + variableInferenceIdField, + semanticTextMapping(localInferenceId), + mixedTypeField1, + semanticTextMapping(localInferenceId), + mixedTypeField2, + textMapping() + ), + Map.of( + "local_doc_1", + Map.of(commonInferenceIdField, "a"), + "local_doc_2", + Map.of(variableInferenceIdField, "b"), + "local_doc_3", + Map.of(mixedTypeField1, "c"), + "local_doc_4", + Map.of(mixedTypeField2, "d") + ) + ); + final TestIndexInfo remoteIndexInfo = new TestIndexInfo( + remoteIndexName, + Map.of( + commonInferenceId, + textEmbeddingServiceSettings(256, SimilarityMeasure.COSINE, DenseVectorFieldMapper.ElementType.FLOAT), + remoteInferenceId, + textEmbeddingServiceSettings(384, SimilarityMeasure.COSINE, DenseVectorFieldMapper.ElementType.FLOAT) + ), + Map.of( + commonInferenceIdField, + semanticTextMapping(commonInferenceId), + variableInferenceIdField, + semanticTextMapping(remoteInferenceId), + mixedTypeField1, + textMapping(), + mixedTypeField2, + semanticTextMapping(remoteInferenceId) + ), + Map.of( + "remote_doc_1", + Map.of(commonInferenceIdField, "w"), + "remote_doc_2", + Map.of(variableInferenceIdField, "x"), + "remote_doc_3", + Map.of(mixedTypeField1, "y"), + "remote_doc_4", + Map.of(mixedTypeField2, "z") + ) + ); + setupTwoClusters(localIndexInfo, remoteIndexInfo); + + // Query a field has the same inference ID value across clusters, but with different backing inference services + assertSearchResponse( + new MatchQueryBuilder(commonInferenceIdField, "a"), + queryIndices, + List.of( + new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_1"), + new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_1") + ) + ); + + // Query a field that has different inference ID values across clusters + assertSearchResponse( + new MatchQueryBuilder(variableInferenceIdField, "b"), + queryIndices, + List.of( + new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_2"), + new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_2") + ) + ); + + // Query a field that has mixed types across clusters + assertSearchResponse( + new MatchQueryBuilder(mixedTypeField1, "y"), + queryIndices, + List.of( + new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3"), + new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_3") + ) + ); + assertSearchResponse( + new MatchQueryBuilder(mixedTypeField2, "d"), + queryIndices, + List.of( + new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_4"), + new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_4") + ) + ); + } + + public void testMatchQueryWithCcsMinimizeRoundTripsFalse() throws Exception { + final String localIndexName = "local-index"; + final String remoteIndexName = "remote-index"; + final String[] queryIndices = new String[] { localIndexName, fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName) }; + final Consumer assertCcsMinimizeRoundTripsFalseFailure = q -> { + SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(q); + SearchRequest searchRequest = new SearchRequest(queryIndices, searchSourceBuilder); + searchRequest.setCcsMinimizeRoundtrips(false); + + IllegalArgumentException e = assertThrows( + IllegalArgumentException.class, + () -> client().search(searchRequest).actionGet(TEST_REQUEST_TIMEOUT) + ); + assertThat( + e.getMessage(), + equalTo( + "match query does not support cross-cluster search when querying a [semantic_text] field when " + + "[ccs_minimize_roundtrips] is false" + ) + ); + }; + + final String commonInferenceId = "common-inference-id"; + + final String commonInferenceIdField = "common-inference-id-field"; + final String mixedTypeField1 = "mixed-type-field-1"; + final String mixedTypeField2 = "mixed-type-field-2"; + final String textField = "text-field"; + + final TestIndexInfo localIndexInfo = new TestIndexInfo( + localIndexName, + Map.of(commonInferenceId, sparseEmbeddingServiceSettings()), + Map.of( + commonInferenceIdField, + semanticTextMapping(commonInferenceId), + mixedTypeField1, + semanticTextMapping(commonInferenceId), + mixedTypeField2, + textMapping(), + textField, + textMapping() + ), + Map.of(mixedTypeField2 + "_doc", Map.of(mixedTypeField2, "a"), textField + "_doc", Map.of(textField, "b b b")) + ); + final TestIndexInfo remoteIndexInfo = new TestIndexInfo( + remoteIndexName, + Map.of(commonInferenceId, sparseEmbeddingServiceSettings()), + Map.of( + commonInferenceIdField, + semanticTextMapping(commonInferenceId), + mixedTypeField1, + textMapping(), + mixedTypeField2, + semanticTextMapping(commonInferenceId), + textField, + textMapping() + ), + Map.of(textField + "_doc", Map.of(textField, "b")) + ); + setupTwoClusters(localIndexInfo, remoteIndexInfo); + + // Validate that expected cases fail + assertCcsMinimizeRoundTripsFalseFailure.accept(new MatchQueryBuilder(commonInferenceIdField, randomAlphaOfLength(5))); + assertCcsMinimizeRoundTripsFalseFailure.accept(new MatchQueryBuilder(mixedTypeField1, randomAlphaOfLength(5))); + + // Validate the expected ccs_minimize_roundtrips=false detection gap and failure mode when querying non-inference fields locally + assertSearchResponse( + new MatchQueryBuilder(mixedTypeField2, "a"), + queryIndices, + List.of(new SearchResult(null, localIndexName, mixedTypeField2 + "_doc")), + new ClusterFailure( + SearchResponse.Cluster.Status.SKIPPED, + Set.of( + new FailureCause( + QueryShardException.class, + "failed to create query: Field [mixed-type-field-2] of type [semantic_text] does not support match queries" + ) + ) + ), + s -> s.setCcsMinimizeRoundtrips(false) + ); + + // Validate that a CCS match query functions when only text fields are queried + assertSearchResponse( + new MatchQueryBuilder(textField, "b"), + queryIndices, + List.of( + new SearchResult(null, localIndexName, textField + "_doc"), + new SearchResult(REMOTE_CLUSTER, remoteIndexName, textField + "_doc") + ), + null, + s -> s.setCcsMinimizeRoundtrips(false) + ); + } +} diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java deleted file mode 100644 index b0bff3ee0a848..0000000000000 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticCrossClusterSearchIT.java +++ /dev/null @@ -1,1141 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.search.ccs; - -import org.elasticsearch.action.DocWriteResponse; -import org.elasticsearch.action.search.OpenPointInTimeRequest; -import org.elasticsearch.action.search.OpenPointInTimeResponse; -import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.action.search.TransportOpenPointInTimeAction; -import org.elasticsearch.action.support.broadcast.BroadcastResponse; -import org.elasticsearch.client.internal.Client; -import org.elasticsearch.common.bytes.BytesReference; -import org.elasticsearch.common.io.stream.NamedWriteableRegistry; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.core.Nullable; -import org.elasticsearch.core.TimeValue; -import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; -import org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper; -import org.elasticsearch.index.query.MatchQueryBuilder; -import org.elasticsearch.index.query.QueryBuilder; -import org.elasticsearch.index.query.QueryShardException; -import org.elasticsearch.inference.MinimalServiceSettings; -import org.elasticsearch.inference.SimilarityMeasure; -import org.elasticsearch.inference.TaskType; -import org.elasticsearch.inference.WeightedToken; -import org.elasticsearch.license.LicenseSettings; -import org.elasticsearch.plugins.ActionPlugin; -import org.elasticsearch.plugins.Plugin; -import org.elasticsearch.plugins.SearchPlugin; -import org.elasticsearch.rest.RestStatus; -import org.elasticsearch.search.SearchHit; -import org.elasticsearch.search.builder.PointInTimeBuilder; -import org.elasticsearch.search.builder.SearchSourceBuilder; -import org.elasticsearch.search.vectors.KnnVectorQueryBuilder; -import org.elasticsearch.search.vectors.VectorData; -import org.elasticsearch.test.AbstractMultiClustersTestCase; -import org.elasticsearch.xcontent.XContentBuilder; -import org.elasticsearch.xcontent.XContentFactory; -import org.elasticsearch.xcontent.XContentType; -import org.elasticsearch.xpack.core.inference.action.PutInferenceModelAction; -import org.elasticsearch.xpack.core.ml.action.CoordinatedInferenceAction; -import org.elasticsearch.xpack.core.ml.inference.MlInferenceNamedXContentProvider; -import org.elasticsearch.xpack.core.ml.search.SparseVectorQueryBuilder; -import org.elasticsearch.xpack.core.ml.vectors.TextEmbeddingQueryVectorBuilder; -import org.elasticsearch.xpack.inference.LocalStateInferencePlugin; -import org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper; -import org.elasticsearch.xpack.inference.mock.TestDenseInferenceServiceExtension; -import org.elasticsearch.xpack.inference.mock.TestInferenceServicePlugin; -import org.elasticsearch.xpack.inference.mock.TestSparseInferenceServiceExtension; -import org.elasticsearch.xpack.inference.queries.SemanticQueryBuilder; -import org.elasticsearch.xpack.ml.action.TransportCoordinatedInferenceAction; - -import java.io.IOException; -import java.util.Collection; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.function.BiConsumer; -import java.util.function.Consumer; -import java.util.stream.Collectors; - -import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.IVF_FORMAT; -import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; -import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.is; - -public class SemanticCrossClusterSearchIT extends AbstractMultiClustersTestCase { - private static final String REMOTE_CLUSTER = "cluster_a"; - - @Override - protected List remoteClusterAlias() { - return List.of(REMOTE_CLUSTER); - } - - @Override - protected Map skipUnavailableForRemoteClusters() { - return Map.of(REMOTE_CLUSTER, DEFAULT_SKIP_UNAVAILABLE); - } - - @Override - protected boolean reuseClusters() { - return false; - } - - @Override - protected Settings nodeSettings() { - return Settings.builder().put(LicenseSettings.SELF_GENERATED_LICENSE_TYPE.getKey(), "trial").build(); - } - - @Override - protected Collection> nodePlugins(String clusterAlias) { - return List.of(LocalStateInferencePlugin.class, TestInferenceServicePlugin.class, FakeMlPlugin.class); - } - - public void testSemanticQuery() throws Exception { - final String localIndexName = "local-index"; - final String remoteIndexName = "remote-index"; - final String[] queryIndices = new String[] { localIndexName, fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName) }; - - final String commonInferenceId = "common-inference-id"; - final String localInferenceId = "local-inference-id"; - final String remoteInferenceId = "remote-inference-id"; - - final String commonInferenceIdField = "common-inference-id-field"; - final String variableInferenceIdField = "variable-inference-id-field"; - - final TestIndexInfo localIndexInfo = new TestIndexInfo( - localIndexName, - Map.of(commonInferenceId, sparseEmbeddingServiceSettings(), localInferenceId, sparseEmbeddingServiceSettings()), - Map.of( - commonInferenceIdField, - semanticTextMapping(commonInferenceId), - variableInferenceIdField, - semanticTextMapping(localInferenceId) - ), - Map.of("local_doc_1", Map.of(commonInferenceIdField, "a"), "local_doc_2", Map.of(variableInferenceIdField, "b")) - ); - final TestIndexInfo remoteIndexInfo = new TestIndexInfo( - remoteIndexName, - Map.of( - commonInferenceId, - textEmbeddingServiceSettings(256, SimilarityMeasure.COSINE, DenseVectorFieldMapper.ElementType.FLOAT), - remoteInferenceId, - textEmbeddingServiceSettings(384, SimilarityMeasure.COSINE, DenseVectorFieldMapper.ElementType.FLOAT) - ), - Map.of( - commonInferenceIdField, - semanticTextMapping(commonInferenceId), - variableInferenceIdField, - semanticTextMapping(remoteInferenceId) - ), - Map.of("remote_doc_1", Map.of(commonInferenceIdField, "x"), "remote_doc_2", Map.of(variableInferenceIdField, "y")) - ); - setupTwoClusters(localIndexInfo, remoteIndexInfo); - - // Query a field has the same inference ID value across clusters, but with different backing inference services - assertSearchResponse( - new SemanticQueryBuilder(commonInferenceIdField, "a"), - queryIndices, - List.of( - new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_1"), - new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_1") - ) - ); - - // Query a field that has different inference ID values across clusters - assertSearchResponse( - new SemanticQueryBuilder(variableInferenceIdField, "b"), - queryIndices, - List.of( - new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_2"), - new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_2") - ) - ); - } - - public void testSemanticQueryWithCcMinimizeRoundTripsFalse() throws Exception { - final String localIndexName = "local-index"; - final String remoteIndexName = "remote-index"; - final String[] queryIndices = new String[] { localIndexName, fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName) }; - final SemanticQueryBuilder queryBuilder = new SemanticQueryBuilder("foo", "bar"); - final Consumer assertCcsMinimizeRoundTripsFalseFailure = s -> { - IllegalArgumentException e = assertThrows( - IllegalArgumentException.class, - () -> client().search(s).actionGet(TEST_REQUEST_TIMEOUT) - ); - assertThat( - e.getMessage(), - equalTo("semantic query does not support cross-cluster search when [ccs_minimize_roundtrips] is false") - ); - }; - - final TestIndexInfo localIndexInfo = new TestIndexInfo(localIndexName, Map.of(), Map.of(), Map.of()); - final TestIndexInfo remoteIndexInfo = new TestIndexInfo(remoteIndexName, Map.of(), Map.of(), Map.of()); - setupTwoClusters(localIndexInfo, remoteIndexInfo); - - // Explicitly set ccs_minimize_roundtrips=false in the search request - SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(queryBuilder); - SearchRequest searchRequestWithCcMinimizeRoundTripsFalse = new SearchRequest(queryIndices, searchSourceBuilder); - searchRequestWithCcMinimizeRoundTripsFalse.setCcsMinimizeRoundtrips(false); - assertCcsMinimizeRoundTripsFalseFailure.accept(searchRequestWithCcMinimizeRoundTripsFalse); - - // Using a point in time implicitly sets ccs_minimize_roundtrips=false - BytesReference pitId = openPointInTime(queryIndices, TimeValue.timeValueMinutes(2)); - SearchSourceBuilder searchSourceBuilderWithPit = new SearchSourceBuilder().query(queryBuilder) - .pointInTimeBuilder(new PointInTimeBuilder(pitId)); - SearchRequest searchRequestWithPit = new SearchRequest().source(searchSourceBuilderWithPit); - assertCcsMinimizeRoundTripsFalseFailure.accept(searchRequestWithPit); - } - - public void testMatchQuery() throws Exception { - final String localIndexName = "local-index"; - final String remoteIndexName = "remote-index"; - final String[] queryIndices = new String[] { localIndexName, fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName) }; - - final String commonInferenceId = "common-inference-id"; - final String localInferenceId = "local-inference-id"; - final String remoteInferenceId = "remote-inference-id"; - - final String commonInferenceIdField = "common-inference-id-field"; - final String variableInferenceIdField = "variable-inference-id-field"; - final String mixedTypeField1 = "mixed-type-field-1"; - final String mixedTypeField2 = "mixed-type-field-2"; - - final TestIndexInfo localIndexInfo = new TestIndexInfo( - localIndexName, - Map.of(commonInferenceId, sparseEmbeddingServiceSettings(), localInferenceId, sparseEmbeddingServiceSettings()), - Map.of( - commonInferenceIdField, - semanticTextMapping(commonInferenceId), - variableInferenceIdField, - semanticTextMapping(localInferenceId), - mixedTypeField1, - semanticTextMapping(localInferenceId), - mixedTypeField2, - textMapping() - ), - Map.of( - "local_doc_1", - Map.of(commonInferenceIdField, "a"), - "local_doc_2", - Map.of(variableInferenceIdField, "b"), - "local_doc_3", - Map.of(mixedTypeField1, "c"), - "local_doc_4", - Map.of(mixedTypeField2, "d") - ) - ); - final TestIndexInfo remoteIndexInfo = new TestIndexInfo( - remoteIndexName, - Map.of( - commonInferenceId, - textEmbeddingServiceSettings(256, SimilarityMeasure.COSINE, DenseVectorFieldMapper.ElementType.FLOAT), - remoteInferenceId, - textEmbeddingServiceSettings(384, SimilarityMeasure.COSINE, DenseVectorFieldMapper.ElementType.FLOAT) - ), - Map.of( - commonInferenceIdField, - semanticTextMapping(commonInferenceId), - variableInferenceIdField, - semanticTextMapping(remoteInferenceId), - mixedTypeField1, - textMapping(), - mixedTypeField2, - semanticTextMapping(remoteInferenceId) - ), - Map.of( - "remote_doc_1", - Map.of(commonInferenceIdField, "w"), - "remote_doc_2", - Map.of(variableInferenceIdField, "x"), - "remote_doc_3", - Map.of(mixedTypeField1, "y"), - "remote_doc_4", - Map.of(mixedTypeField2, "z") - ) - ); - setupTwoClusters(localIndexInfo, remoteIndexInfo); - - // Query a field has the same inference ID value across clusters, but with different backing inference services - assertSearchResponse( - new MatchQueryBuilder(commonInferenceIdField, "a"), - queryIndices, - List.of( - new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_1"), - new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_1") - ) - ); - - // Query a field that has different inference ID values across clusters - assertSearchResponse( - new MatchQueryBuilder(variableInferenceIdField, "b"), - queryIndices, - List.of( - new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_2"), - new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_2") - ) - ); - - // Query a field that has mixed types across clusters - assertSearchResponse( - new MatchQueryBuilder(mixedTypeField1, "y"), - queryIndices, - List.of( - new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3"), - new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_3") - ) - ); - assertSearchResponse( - new MatchQueryBuilder(mixedTypeField2, "d"), - queryIndices, - List.of( - new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_4"), - new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_4") - ) - ); - } - - public void testMatchQueryWithCcsMinimizeRoundTripsFalse() throws Exception { - final String localIndexName = "local-index"; - final String remoteIndexName = "remote-index"; - final String[] queryIndices = new String[] { localIndexName, fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName) }; - final Consumer assertCcsMinimizeRoundTripsFalseFailure = q -> { - SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(q); - SearchRequest searchRequest = new SearchRequest(queryIndices, searchSourceBuilder); - searchRequest.setCcsMinimizeRoundtrips(false); - - IllegalArgumentException e = assertThrows( - IllegalArgumentException.class, - () -> client().search(searchRequest).actionGet(TEST_REQUEST_TIMEOUT) - ); - assertThat( - e.getMessage(), - equalTo( - "match query does not support cross-cluster search when querying a [semantic_text] field when " - + "[ccs_minimize_roundtrips] is false" - ) - ); - }; - - final String commonInferenceId = "common-inference-id"; - - final String commonInferenceIdField = "common-inference-id-field"; - final String mixedTypeField1 = "mixed-type-field-1"; - final String mixedTypeField2 = "mixed-type-field-2"; - final String textField = "text-field"; - - final TestIndexInfo localIndexInfo = new TestIndexInfo( - localIndexName, - Map.of(commonInferenceId, sparseEmbeddingServiceSettings()), - Map.of( - commonInferenceIdField, - semanticTextMapping(commonInferenceId), - mixedTypeField1, - semanticTextMapping(commonInferenceId), - mixedTypeField2, - textMapping(), - textField, - textMapping() - ), - Map.of(mixedTypeField2 + "_doc", Map.of(mixedTypeField2, "a"), textField + "_doc", Map.of(textField, "b b b")) - ); - final TestIndexInfo remoteIndexInfo = new TestIndexInfo( - remoteIndexName, - Map.of(commonInferenceId, sparseEmbeddingServiceSettings()), - Map.of( - commonInferenceIdField, - semanticTextMapping(commonInferenceId), - mixedTypeField1, - textMapping(), - mixedTypeField2, - semanticTextMapping(commonInferenceId), - textField, - textMapping() - ), - Map.of(textField + "_doc", Map.of(textField, "b")) - ); - setupTwoClusters(localIndexInfo, remoteIndexInfo); - - // Validate that expected cases fail - assertCcsMinimizeRoundTripsFalseFailure.accept(new MatchQueryBuilder(commonInferenceIdField, randomAlphaOfLength(5))); - assertCcsMinimizeRoundTripsFalseFailure.accept(new MatchQueryBuilder(mixedTypeField1, randomAlphaOfLength(5))); - - // Validate the expected ccs_minimize_roundtrips=false detection gap and failure mode when querying non-inference fields locally - assertSearchResponse( - new MatchQueryBuilder(mixedTypeField2, "a"), - queryIndices, - List.of(new SearchResult(null, localIndexName, mixedTypeField2 + "_doc")), - new ClusterFailure( - SearchResponse.Cluster.Status.SKIPPED, - Set.of( - new FailureCause( - QueryShardException.class, - "failed to create query: Field [mixed-type-field-2] of type [semantic_text] does not support match queries" - ) - ) - ), - s -> s.setCcsMinimizeRoundtrips(false) - ); - - // Validate that a CCS match query functions when only text fields are queried - assertSearchResponse( - new MatchQueryBuilder(textField, "b"), - queryIndices, - List.of( - new SearchResult(null, localIndexName, textField + "_doc"), - new SearchResult(REMOTE_CLUSTER, remoteIndexName, textField + "_doc") - ), - null, - s -> s.setCcsMinimizeRoundtrips(false) - ); - } - - public void testKnnQuery() throws Exception { - final String localIndexName = "local-index"; - final String remoteIndexName = "remote-index"; - final String[] queryIndices = new String[] { localIndexName, fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName) }; - - final String commonInferenceId = "common-inference-id"; - final String localInferenceId = "local-inference-id"; - - final String commonInferenceIdField = "common-inference-id-field"; - final String mixedTypeField1 = "mixed-type-field-1"; - final String mixedTypeField2 = "mixed-type-field-2"; - - final TestIndexInfo localIndexInfo = new TestIndexInfo( - localIndexName, - Map.of( - commonInferenceId, - textEmbeddingServiceSettings(256, SimilarityMeasure.COSINE, DenseVectorFieldMapper.ElementType.FLOAT), - localInferenceId, - textEmbeddingServiceSettings(384, SimilarityMeasure.COSINE, DenseVectorFieldMapper.ElementType.FLOAT) - ), - Map.of( - commonInferenceIdField, - semanticTextMapping(commonInferenceId), - mixedTypeField1, - denseVectorMapping(384), - mixedTypeField2, - semanticTextMapping(localInferenceId) - ), - Map.of( - "local_doc_1", - Map.of(commonInferenceIdField, "a"), - "local_doc_2", - Map.of(mixedTypeField1, generateDenseVectorFieldValue(384, DenseVectorFieldMapper.ElementType.FLOAT, -128.0f)), - "local_doc_3", - Map.of(mixedTypeField2, "c") - ) - ); - final TestIndexInfo remoteIndexInfo = new TestIndexInfo( - remoteIndexName, - Map.of( - commonInferenceId, - textEmbeddingServiceSettings(384, SimilarityMeasure.COSINE, DenseVectorFieldMapper.ElementType.FLOAT) - ), - Map.of( - commonInferenceIdField, - semanticTextMapping(commonInferenceId), - mixedTypeField1, - semanticTextMapping(commonInferenceId), - mixedTypeField2, - denseVectorMapping(384) - ), - Map.of( - "remote_doc_1", - Map.of(commonInferenceIdField, "x"), - "remote_doc_2", - Map.of(mixedTypeField1, "y"), - "remote_doc_3", - Map.of(mixedTypeField2, generateDenseVectorFieldValue(384, DenseVectorFieldMapper.ElementType.FLOAT, -128.0f)) - ) - ); - setupTwoClusters(localIndexInfo, remoteIndexInfo); - - // Query a field has the same inference ID value across clusters, but with different backing inference services - assertSearchResponse( - new KnnVectorQueryBuilder( - commonInferenceIdField, - new TextEmbeddingQueryVectorBuilder(null, "a"), - 10, - 100, - IVF_FORMAT.isEnabled() ? 10f : null, - null - ), - queryIndices, - List.of( - new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_1"), - new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_1") - ) - ); - - // Query a field that has mixed types across clusters - assertSearchResponse( - new KnnVectorQueryBuilder( - mixedTypeField1, - new TextEmbeddingQueryVectorBuilder(localInferenceId, "y"), - 10, - 100, - IVF_FORMAT.isEnabled() ? 10f : null, - null - ), - queryIndices, - List.of( - new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_2"), - new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_2") - ) - ); - assertSearchResponse( - new KnnVectorQueryBuilder( - mixedTypeField2, - new TextEmbeddingQueryVectorBuilder(localInferenceId, "c"), - 10, - 100, - IVF_FORMAT.isEnabled() ? 10f : null, - null - ), - queryIndices, - List.of( - new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3"), - new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_3") - ) - ); - - // Query a field that has mixed types across clusters using a query vector - final VectorData queryVector = new VectorData( - generateDenseVectorFieldValue(384, DenseVectorFieldMapper.ElementType.FLOAT, -128.0f) - ); - assertSearchResponse( - new KnnVectorQueryBuilder(mixedTypeField1, queryVector, 10, 100, IVF_FORMAT.isEnabled() ? 10f : null, null, null), - queryIndices, - List.of( - new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_2"), - new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_2") - ) - ); - assertSearchResponse( - new KnnVectorQueryBuilder(mixedTypeField2, queryVector, 10, 100, IVF_FORMAT.isEnabled() ? 10f : null, null, null), - queryIndices, - List.of( - new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_3"), - new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3") - ) - ); - - // Check that omitting the inference ID when querying a remote dense vector field leads to the expected partial failure - assertSearchResponse( - new KnnVectorQueryBuilder( - mixedTypeField2, - new TextEmbeddingQueryVectorBuilder(null, "c"), - 10, - 100, - IVF_FORMAT.isEnabled() ? 10f : null, - null - ), - queryIndices, - List.of(new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3")), - new ClusterFailure( - SearchResponse.Cluster.Status.SKIPPED, - Set.of(new FailureCause(IllegalArgumentException.class, "[model_id] must not be null.")) - ), - null - ); - } - - public void testKnnQueryWithCcsMinimizeRoundTripsFalse() throws Exception { - final String localIndexName = "local-index"; - final String remoteIndexName = "remote-index"; - final String[] queryIndices = new String[] { localIndexName, fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName) }; - final BiConsumer assertCcsMinimizeRoundTripsFalseFailure = (f, qvb) -> { - KnnVectorQueryBuilder queryBuilder = new KnnVectorQueryBuilder(f, qvb, 10, 100, IVF_FORMAT.isEnabled() ? 10f : null, null); - - SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(queryBuilder); - SearchRequest searchRequest = new SearchRequest(queryIndices, searchSourceBuilder); - searchRequest.setCcsMinimizeRoundtrips(false); - - IllegalArgumentException e = assertThrows( - IllegalArgumentException.class, - () -> client().search(searchRequest).actionGet(TEST_REQUEST_TIMEOUT) - ); - assertThat( - e.getMessage(), - equalTo( - "knn query does not support cross-cluster search when querying a [semantic_text] field when " - + "[ccs_minimize_roundtrips] is false" - ) - ); - }; - - final int dimensions = 256; - final String commonInferenceId = "common-inference-id"; - final MinimalServiceSettings commonInferenceIdServiceSettings = textEmbeddingServiceSettings( - dimensions, - SimilarityMeasure.COSINE, - DenseVectorFieldMapper.ElementType.FLOAT - ); - - final String commonInferenceIdField = "common-inference-id-field"; - final String mixedTypeField1 = "mixed-type-field-1"; - final String mixedTypeField2 = "mixed-type-field-2"; - final String denseVectorField = "dense-vector-field"; - - final TestIndexInfo localIndexInfo = new TestIndexInfo( - localIndexName, - Map.of(commonInferenceId, commonInferenceIdServiceSettings), - Map.of( - commonInferenceIdField, - semanticTextMapping(commonInferenceId), - mixedTypeField1, - semanticTextMapping(commonInferenceId), - mixedTypeField2, - denseVectorMapping(dimensions), - denseVectorField, - denseVectorMapping(dimensions) - ), - Map.of( - mixedTypeField2 + "_doc", - Map.of(mixedTypeField2, generateDenseVectorFieldValue(dimensions, DenseVectorFieldMapper.ElementType.FLOAT, -128.0f)), - denseVectorField + "_doc", - Map.of(denseVectorField, generateDenseVectorFieldValue(dimensions, DenseVectorFieldMapper.ElementType.FLOAT, 1.0f)) - ) - ); - final TestIndexInfo remoteIndexInfo = new TestIndexInfo( - remoteIndexName, - Map.of(commonInferenceId, commonInferenceIdServiceSettings), - Map.of( - commonInferenceIdField, - semanticTextMapping(commonInferenceId), - mixedTypeField1, - denseVectorMapping(dimensions), - mixedTypeField2, - semanticTextMapping(commonInferenceId), - denseVectorField, - denseVectorMapping(dimensions) - ), - Map.of( - mixedTypeField2 + "_doc", - Map.of(mixedTypeField2, "a"), - denseVectorField + "_doc", - Map.of(denseVectorField, generateDenseVectorFieldValue(dimensions, DenseVectorFieldMapper.ElementType.FLOAT, -128.0f)) - ) - ); - setupTwoClusters(localIndexInfo, remoteIndexInfo); - - // Validate that expected cases fail - assertCcsMinimizeRoundTripsFalseFailure.accept( - commonInferenceIdField, - new TextEmbeddingQueryVectorBuilder(null, randomAlphaOfLength(5)) - ); - assertCcsMinimizeRoundTripsFalseFailure.accept( - mixedTypeField1, - new TextEmbeddingQueryVectorBuilder(commonInferenceId, randomAlphaOfLength(5)) - ); - - // Validate the expected ccs_minimize_roundtrips=false detection gap and failure mode when querying non-inference fields locally - assertSearchResponse( - new KnnVectorQueryBuilder( - mixedTypeField2, - new TextEmbeddingQueryVectorBuilder(commonInferenceId, "foo"), - 10, - 100, - IVF_FORMAT.isEnabled() ? 10f : null, - null - ), - queryIndices, - List.of(new SearchResult(null, localIndexName, mixedTypeField2 + "_doc")), - new ClusterFailure( - SearchResponse.Cluster.Status.SKIPPED, - Set.of( - new FailureCause( - QueryShardException.class, - "failed to create query: [knn] queries are only supported on [dense_vector] fields" - ) - ) - ), - s -> s.setCcsMinimizeRoundtrips(false) - ); - - // Validate that a CCS knn query functions when only dense vector fields are queried - assertSearchResponse( - new KnnVectorQueryBuilder( - denseVectorField, - generateDenseVectorFieldValue(dimensions, DenseVectorFieldMapper.ElementType.FLOAT, 1.0f), - 10, - 100, - IVF_FORMAT.isEnabled() ? 10f : null, - null, - null - ), - queryIndices, - List.of( - new SearchResult(null, localIndexName, denseVectorField + "_doc"), - new SearchResult(REMOTE_CLUSTER, remoteIndexName, denseVectorField + "_doc") - ), - null, - s -> s.setCcsMinimizeRoundtrips(false) - ); - } - - public void testSparseVectorQuery() throws Exception { - final String localIndexName = "local-index"; - final String remoteIndexName = "remote-index"; - final String[] queryIndices = new String[] { localIndexName, fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName) }; - - final String commonInferenceId = "common-inference-id"; - - final String commonInferenceIdField = "common-inference-id-field"; - final String mixedTypeField1 = "mixed-type-field-1"; - final String mixedTypeField2 = "mixed-type-field-2"; - - final TestIndexInfo localIndexInfo = new TestIndexInfo( - localIndexName, - Map.of(commonInferenceId, sparseEmbeddingServiceSettings()), - Map.of( - commonInferenceIdField, - semanticTextMapping(commonInferenceId), - mixedTypeField1, - sparseVectorMapping(), - mixedTypeField2, - semanticTextMapping(commonInferenceId) - ), - Map.of( - "local_doc_1", - Map.of(commonInferenceIdField, "a"), - "local_doc_2", - Map.of(mixedTypeField1, generateSparseVectorFieldValue(1.0f)), - "local_doc_3", - Map.of(mixedTypeField2, "c") - ) - ); - final TestIndexInfo remoteIndexInfo = new TestIndexInfo( - remoteIndexName, - Map.of(commonInferenceId, sparseEmbeddingServiceSettings()), - Map.of( - commonInferenceIdField, - semanticTextMapping(commonInferenceId), - mixedTypeField1, - semanticTextMapping(commonInferenceId), - mixedTypeField2, - sparseVectorMapping() - ), - Map.of( - "remote_doc_1", - Map.of(commonInferenceIdField, "x"), - "remote_doc_2", - Map.of(mixedTypeField1, "y"), - "remote_doc_3", - Map.of(mixedTypeField2, generateSparseVectorFieldValue(1.0f)) - ) - ); - setupTwoClusters(localIndexInfo, remoteIndexInfo); - - // Query a field has the same inference ID value across clusters, but with different backing inference services - assertSearchResponse( - new SparseVectorQueryBuilder(commonInferenceIdField, null, "a"), - queryIndices, - List.of( - new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_1"), - new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_1") - ) - ); - - // Query a field that has mixed types across clusters - assertSearchResponse( - new SparseVectorQueryBuilder(mixedTypeField1, commonInferenceId, "b"), - queryIndices, - List.of( - new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_2"), - new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_2") - ) - ); - assertSearchResponse( - new SparseVectorQueryBuilder(mixedTypeField2, commonInferenceId, "c"), - queryIndices, - List.of( - new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3"), - new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_3") - ) - ); - - // Query a field that has mixed types across clusters using a query vector - final List queryVector = generateSparseVectorFieldValue(1.0f).entrySet() - .stream() - .map(e -> new WeightedToken(e.getKey(), e.getValue())) - .toList(); - assertSearchResponse( - new SparseVectorQueryBuilder(mixedTypeField1, queryVector, null, null, null, null), - queryIndices, - List.of( - new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_2"), - new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_2") - ) - ); - assertSearchResponse( - new SparseVectorQueryBuilder(mixedTypeField2, queryVector, null, null, null, null), - queryIndices, - List.of( - new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3"), - new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_3") - ) - ); - - // Check that omitting the inference ID when querying a remote sparse vector field leads to the expected partial failure - assertSearchResponse( - new SparseVectorQueryBuilder(mixedTypeField2, null, "c"), - queryIndices, - List.of(new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3")), - new ClusterFailure( - SearchResponse.Cluster.Status.SKIPPED, - Set.of(new FailureCause(IllegalArgumentException.class, "inference_id required to perform vector search on query string")) - ), - null - ); - } - - public void testSparseVectorQueryWithCcsMinimizeRoundTripsFalse() throws Exception { - final String localIndexName = "local-index"; - final String remoteIndexName = "remote-index"; - final String[] queryIndices = new String[] { localIndexName, fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName) }; - final Consumer assertCcsMinimizeRoundTripsFalseFailure = q -> { - SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(q); - SearchRequest searchRequest = new SearchRequest(queryIndices, searchSourceBuilder); - searchRequest.setCcsMinimizeRoundtrips(false); - - IllegalArgumentException e = assertThrows( - IllegalArgumentException.class, - () -> client().search(searchRequest).actionGet(TEST_REQUEST_TIMEOUT) - ); - assertThat( - e.getMessage(), - equalTo( - "sparse_vector query does not support cross-cluster search when querying a [semantic_text] field when " - + "[ccs_minimize_roundtrips] is false" - ) - ); - }; - - final String commonInferenceId = "common-inference-id"; - - final String commonInferenceIdField = "common-inference-id-field"; - final String mixedTypeField1 = "mixed-type-field-1"; - final String mixedTypeField2 = "mixed-type-field-2"; - final String sparseVectorField = "sparse-vector-field"; - - final TestIndexInfo localIndexInfo = new TestIndexInfo( - localIndexName, - Map.of(commonInferenceId, sparseEmbeddingServiceSettings()), - Map.of( - commonInferenceIdField, - semanticTextMapping(commonInferenceId), - mixedTypeField1, - semanticTextMapping(commonInferenceId), - mixedTypeField2, - sparseVectorMapping(), - sparseVectorField, - sparseVectorMapping() - ), - Map.of( - mixedTypeField2 + "_doc", - Map.of(mixedTypeField2, generateSparseVectorFieldValue(1.0f)), - sparseVectorField + "_doc", - Map.of(sparseVectorField, generateSparseVectorFieldValue(1.0f)) - ) - ); - final TestIndexInfo remoteIndexInfo = new TestIndexInfo( - remoteIndexName, - Map.of(commonInferenceId, sparseEmbeddingServiceSettings()), - Map.of( - commonInferenceIdField, - semanticTextMapping(commonInferenceId), - mixedTypeField1, - sparseVectorMapping(), - mixedTypeField2, - semanticTextMapping(commonInferenceId), - sparseVectorField, - sparseVectorMapping() - ), - Map.of( - mixedTypeField2 + "_doc", - Map.of(mixedTypeField2, "a"), - sparseVectorField + "_doc", - Map.of(sparseVectorField, generateSparseVectorFieldValue(0.5f)) - ) - ); - setupTwoClusters(localIndexInfo, remoteIndexInfo); - - // Validate that expected cases fail - assertCcsMinimizeRoundTripsFalseFailure.accept(new SparseVectorQueryBuilder(commonInferenceIdField, null, randomAlphaOfLength(5))); - assertCcsMinimizeRoundTripsFalseFailure.accept( - new SparseVectorQueryBuilder(mixedTypeField1, commonInferenceId, randomAlphaOfLength(5)) - ); - - // Validate the expected ccs_minimize_roundtrips=false detection gap and failure mode when querying non-inference fields locally - assertSearchResponse( - new SparseVectorQueryBuilder(mixedTypeField2, commonInferenceId, "foo"), - queryIndices, - List.of(new SearchResult(null, localIndexName, mixedTypeField2 + "_doc")), - new ClusterFailure( - SearchResponse.Cluster.Status.SKIPPED, - Set.of( - new FailureCause( - QueryShardException.class, - "failed to create query: field [mixed-type-field-2] must be type [sparse_vector] but is type [semantic_text]" - ) - ) - ), - s -> s.setCcsMinimizeRoundtrips(false) - ); - - // Validate that a CCS sparse vector query functions when only sparse vector fields are queried - assertSearchResponse( - new SparseVectorQueryBuilder(sparseVectorField, commonInferenceId, "foo"), - queryIndices, - List.of( - new SearchResult(null, localIndexName, sparseVectorField + "_doc"), - new SearchResult(REMOTE_CLUSTER, remoteIndexName, sparseVectorField + "_doc") - ), - null, - s -> s.setCcsMinimizeRoundtrips(false) - ); - } - - private void setupTwoClusters(TestIndexInfo localIndexInfo, TestIndexInfo remoteIndexInfo) throws IOException { - setupCluster(LOCAL_CLUSTER, localIndexInfo); - setupCluster(REMOTE_CLUSTER, remoteIndexInfo); - } - - private void setupCluster(String clusterAlias, TestIndexInfo indexInfo) throws IOException { - final Client client = client(clusterAlias); - final String indexName = indexInfo.name(); - - for (var entry : indexInfo.inferenceEndpoints().entrySet()) { - String inferenceId = entry.getKey(); - MinimalServiceSettings minimalServiceSettings = entry.getValue(); - - Map serviceSettings = new HashMap<>(); - serviceSettings.put("model", randomAlphaOfLength(5)); - serviceSettings.put("api_key", randomAlphaOfLength(5)); - if (minimalServiceSettings.taskType() == TaskType.TEXT_EMBEDDING) { - serviceSettings.put("dimensions", minimalServiceSettings.dimensions()); - serviceSettings.put("similarity", minimalServiceSettings.similarity()); - serviceSettings.put("element_type", minimalServiceSettings.elementType()); - } - - createInferenceEndpoint(client, minimalServiceSettings.taskType(), inferenceId, serviceSettings); - } - - Settings indexSettings = indexSettings(randomIntBetween(2, 5), randomIntBetween(0, 1)).build(); - assertAcked(client.admin().indices().prepareCreate(indexName).setSettings(indexSettings).setMapping(indexInfo.mappings())); - assertFalse( - client.admin() - .cluster() - .prepareHealth(TEST_REQUEST_TIMEOUT, indexName) - .setWaitForYellowStatus() - .setTimeout(TimeValue.timeValueSeconds(10)) - .get() - .isTimedOut() - ); - - for (var entry : indexInfo.docs().entrySet()) { - String docId = entry.getKey(); - Map doc = entry.getValue(); - - DocWriteResponse response = client.prepareIndex(indexName).setId(docId).setSource(doc).execute().actionGet(); - assertThat(response.getResult(), equalTo(DocWriteResponse.Result.CREATED)); - } - BroadcastResponse refreshResponse = client.admin().indices().prepareRefresh(indexName).execute().actionGet(); - assertThat(refreshResponse.getStatus(), is(RestStatus.OK)); - } - - private BytesReference openPointInTime(String[] indices, TimeValue keepAlive) { - OpenPointInTimeRequest request = new OpenPointInTimeRequest(indices).keepAlive(keepAlive); - final OpenPointInTimeResponse response = client().execute(TransportOpenPointInTimeAction.TYPE, request).actionGet(); - return response.getPointInTimeId(); - } - - private static void createInferenceEndpoint(Client client, TaskType taskType, String inferenceId, Map serviceSettings) - throws IOException { - final String service = switch (taskType) { - case TEXT_EMBEDDING -> TestDenseInferenceServiceExtension.TestInferenceService.NAME; - case SPARSE_EMBEDDING -> TestSparseInferenceServiceExtension.TestInferenceService.NAME; - default -> throw new IllegalArgumentException("Unhandled task type [" + taskType + "]"); - }; - - final BytesReference content; - try (XContentBuilder builder = XContentFactory.jsonBuilder()) { - builder.startObject(); - builder.field("service", service); - builder.field("service_settings", serviceSettings); - builder.endObject(); - - content = BytesReference.bytes(builder); - } - - PutInferenceModelAction.Request request = new PutInferenceModelAction.Request( - taskType, - inferenceId, - content, - XContentType.JSON, - TEST_REQUEST_TIMEOUT - ); - var responseFuture = client.execute(PutInferenceModelAction.INSTANCE, request); - assertThat(responseFuture.actionGet(TEST_REQUEST_TIMEOUT).getModel().getInferenceEntityId(), equalTo(inferenceId)); - } - - private void assertSearchResponse(QueryBuilder queryBuilder, String[] indices, List expectedSearchResults) - throws Exception { - assertSearchResponse(queryBuilder, indices, expectedSearchResults, null, null); - } - - private void assertSearchResponse( - QueryBuilder queryBuilder, - String[] indices, - List expectedSearchResults, - ClusterFailure expectedRemoteFailure, - Consumer searchRequestModifier - ) throws Exception { - SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(queryBuilder).size(expectedSearchResults.size()); - SearchRequest searchRequest = new SearchRequest(indices, searchSourceBuilder); - if (searchRequestModifier != null) { - searchRequestModifier.accept(searchRequest); - } - - assertResponse(client().search(searchRequest), response -> { - SearchHit[] hits = response.getHits().getHits(); - assertThat(hits.length, equalTo(expectedSearchResults.size())); - - Iterator searchResultIterator = expectedSearchResults.iterator(); - for (int i = 0; i < hits.length; i++) { - SearchResult expectedSearchResult = searchResultIterator.next(); - SearchHit actualSearchResult = hits[i]; - - assertThat(actualSearchResult.getClusterAlias(), equalTo(expectedSearchResult.clusterAlias())); - assertThat(actualSearchResult.getIndex(), equalTo(expectedSearchResult.index())); - assertThat(actualSearchResult.getId(), equalTo(expectedSearchResult.id())); - } - - SearchResponse.Clusters clusters = response.getClusters(); - assertThat(clusters.getCluster(LOCAL_CLUSTER).getStatus(), equalTo(SearchResponse.Cluster.Status.SUCCESSFUL)); - assertThat(clusters.getCluster(LOCAL_CLUSTER).getFailures().isEmpty(), is(true)); - - SearchResponse.Cluster remoteCluster = clusters.getCluster(REMOTE_CLUSTER); - if (expectedRemoteFailure != null) { - assertThat(remoteCluster.getStatus(), equalTo(expectedRemoteFailure.status())); - - Set expectedFailures = expectedRemoteFailure.failures(); - Set actualFailures = remoteCluster.getFailures() - .stream() - .map(f -> new FailureCause(f.getCause().getClass(), f.getCause().getMessage())) - .collect(Collectors.toSet()); - assertThat(actualFailures, equalTo(expectedFailures)); - } else { - assertThat(remoteCluster.getStatus(), equalTo(SearchResponse.Cluster.Status.SUCCESSFUL)); - assertThat(remoteCluster.getFailures().isEmpty(), is(true)); - } - }); - } - - private static MinimalServiceSettings sparseEmbeddingServiceSettings() { - return new MinimalServiceSettings(null, TaskType.SPARSE_EMBEDDING, null, null, null); - } - - private static MinimalServiceSettings textEmbeddingServiceSettings( - int dimensions, - SimilarityMeasure similarity, - DenseVectorFieldMapper.ElementType elementType - ) { - return new MinimalServiceSettings(null, TaskType.TEXT_EMBEDDING, dimensions, similarity, elementType); - } - - private static Map semanticTextMapping(String inferenceId) { - return Map.of("type", SemanticTextFieldMapper.CONTENT_TYPE, "inference_id", inferenceId); - } - - private static Map textMapping() { - return Map.of("type", "text"); - } - - private static Map denseVectorMapping(int dimensions) { - return Map.of("type", DenseVectorFieldMapper.CONTENT_TYPE, "dims", dimensions); - } - - private static Map sparseVectorMapping() { - return Map.of("type", SparseVectorFieldMapper.CONTENT_TYPE); - } - - private static String fullyQualifiedIndexName(String clusterAlias, String indexName) { - return clusterAlias + ":" + indexName; - } - - private static float[] generateDenseVectorFieldValue(int dimensions, DenseVectorFieldMapper.ElementType elementType, float value) { - if (elementType == DenseVectorFieldMapper.ElementType.BIT) { - assert dimensions % 8 == 0; - dimensions /= 8; - } - - float[] vector = new float[dimensions]; - for (int i = 0; i < dimensions; i++) { - // Use a constant value so that relevance is consistent - vector[i] = value; - } - - return vector; - } - - private static Map generateSparseVectorFieldValue(float weight) { - // Generate values that have the same recall behavior as those produced by TestSparseInferenceServiceExtension. Use a constant token - // weight so that relevance is consistent. - return Map.of("feature_0", weight); - } - - public static class FakeMlPlugin extends Plugin implements ActionPlugin, SearchPlugin { - @Override - public List getNamedWriteables() { - return new MlInferenceNamedXContentProvider().getNamedWriteables(); - } - - @Override - public List> getQueryVectorBuilders() { - return List.of( - new QueryVectorBuilderSpec<>( - TextEmbeddingQueryVectorBuilder.NAME, - TextEmbeddingQueryVectorBuilder::new, - TextEmbeddingQueryVectorBuilder.PARSER - ) - ); - } - - @Override - public Collection getActions() { - return List.of(new ActionHandler(CoordinatedInferenceAction.INSTANCE, TransportCoordinatedInferenceAction.class)); - } - } - - private record TestIndexInfo( - String name, - Map inferenceEndpoints, - Map mappings, - Map> docs - ) { - @Override - public Map mappings() { - return Map.of("properties", mappings); - } - } - - private record SearchResult(@Nullable String clusterAlias, String index, String id) {} - - private record FailureCause(Class causeClass, String message) {} - - private record ClusterFailure(SearchResponse.Cluster.Status status, Set failures) {} -} diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticQueryBuilderCrossClusterSearchIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticQueryBuilderCrossClusterSearchIT.java new file mode 100644 index 0000000000000..c4f4edb2ff543 --- /dev/null +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticQueryBuilderCrossClusterSearchIT.java @@ -0,0 +1,121 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.search.ccs; + +import org.elasticsearch.action.search.SearchRequest; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; +import org.elasticsearch.inference.SimilarityMeasure; +import org.elasticsearch.search.builder.PointInTimeBuilder; +import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.elasticsearch.xpack.inference.queries.SemanticQueryBuilder; + +import java.util.List; +import java.util.Map; +import java.util.function.Consumer; + +import static org.hamcrest.Matchers.equalTo; + +public class SemanticQueryBuilderCrossClusterSearchIT extends AbstractSemanticCrossClusterSearchTestCase { + public void testSemanticQuery() throws Exception { + final String localIndexName = "local-index"; + final String remoteIndexName = "remote-index"; + final String[] queryIndices = new String[] { localIndexName, fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName) }; + + final String commonInferenceId = "common-inference-id"; + final String localInferenceId = "local-inference-id"; + final String remoteInferenceId = "remote-inference-id"; + + final String commonInferenceIdField = "common-inference-id-field"; + final String variableInferenceIdField = "variable-inference-id-field"; + + final TestIndexInfo localIndexInfo = new TestIndexInfo( + localIndexName, + Map.of(commonInferenceId, sparseEmbeddingServiceSettings(), localInferenceId, sparseEmbeddingServiceSettings()), + Map.of( + commonInferenceIdField, + semanticTextMapping(commonInferenceId), + variableInferenceIdField, + semanticTextMapping(localInferenceId) + ), + Map.of("local_doc_1", Map.of(commonInferenceIdField, "a"), "local_doc_2", Map.of(variableInferenceIdField, "b")) + ); + final TestIndexInfo remoteIndexInfo = new TestIndexInfo( + remoteIndexName, + Map.of( + commonInferenceId, + textEmbeddingServiceSettings(256, SimilarityMeasure.COSINE, DenseVectorFieldMapper.ElementType.FLOAT), + remoteInferenceId, + textEmbeddingServiceSettings(384, SimilarityMeasure.COSINE, DenseVectorFieldMapper.ElementType.FLOAT) + ), + Map.of( + commonInferenceIdField, + semanticTextMapping(commonInferenceId), + variableInferenceIdField, + semanticTextMapping(remoteInferenceId) + ), + Map.of("remote_doc_1", Map.of(commonInferenceIdField, "x"), "remote_doc_2", Map.of(variableInferenceIdField, "y")) + ); + setupTwoClusters(localIndexInfo, remoteIndexInfo); + + // Query a field has the same inference ID value across clusters, but with different backing inference services + assertSearchResponse( + new SemanticQueryBuilder(commonInferenceIdField, "a"), + queryIndices, + List.of( + new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_1"), + new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_1") + ) + ); + + // Query a field that has different inference ID values across clusters + assertSearchResponse( + new SemanticQueryBuilder(variableInferenceIdField, "b"), + queryIndices, + List.of( + new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_2"), + new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_2") + ) + ); + } + + public void testSemanticQueryWithCcMinimizeRoundTripsFalse() throws Exception { + final String localIndexName = "local-index"; + final String remoteIndexName = "remote-index"; + final String[] queryIndices = new String[] { localIndexName, fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName) }; + final SemanticQueryBuilder queryBuilder = new SemanticQueryBuilder("foo", "bar"); + final Consumer assertCcsMinimizeRoundTripsFalseFailure = s -> { + IllegalArgumentException e = assertThrows( + IllegalArgumentException.class, + () -> client().search(s).actionGet(TEST_REQUEST_TIMEOUT) + ); + assertThat( + e.getMessage(), + equalTo("semantic query does not support cross-cluster search when [ccs_minimize_roundtrips] is false") + ); + }; + + final TestIndexInfo localIndexInfo = new TestIndexInfo(localIndexName, Map.of(), Map.of(), Map.of()); + final TestIndexInfo remoteIndexInfo = new TestIndexInfo(remoteIndexName, Map.of(), Map.of(), Map.of()); + setupTwoClusters(localIndexInfo, remoteIndexInfo); + + // Explicitly set ccs_minimize_roundtrips=false in the search request + SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(queryBuilder); + SearchRequest searchRequestWithCcMinimizeRoundTripsFalse = new SearchRequest(queryIndices, searchSourceBuilder); + searchRequestWithCcMinimizeRoundTripsFalse.setCcsMinimizeRoundtrips(false); + assertCcsMinimizeRoundTripsFalseFailure.accept(searchRequestWithCcMinimizeRoundTripsFalse); + + // Using a point in time implicitly sets ccs_minimize_roundtrips=false + BytesReference pitId = openPointInTime(queryIndices, TimeValue.timeValueMinutes(2)); + SearchSourceBuilder searchSourceBuilderWithPit = new SearchSourceBuilder().query(queryBuilder) + .pointInTimeBuilder(new PointInTimeBuilder(pitId)); + SearchRequest searchRequestWithPit = new SearchRequest().source(searchSourceBuilderWithPit); + assertCcsMinimizeRoundTripsFalseFailure.accept(searchRequestWithPit); + } +} diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SparseVectorQueryBuilderCrossClusterSearchIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SparseVectorQueryBuilderCrossClusterSearchIT.java new file mode 100644 index 0000000000000..30ed1d514627e --- /dev/null +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SparseVectorQueryBuilderCrossClusterSearchIT.java @@ -0,0 +1,248 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.search.ccs; + +import org.elasticsearch.action.search.SearchRequest; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryShardException; +import org.elasticsearch.inference.WeightedToken; +import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.elasticsearch.xpack.core.ml.search.SparseVectorQueryBuilder; + +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Consumer; + +import static org.hamcrest.Matchers.equalTo; + +public class SparseVectorQueryBuilderCrossClusterSearchIT extends AbstractSemanticCrossClusterSearchTestCase { + public void testSparseVectorQuery() throws Exception { + final String localIndexName = "local-index"; + final String remoteIndexName = "remote-index"; + final String[] queryIndices = new String[] { localIndexName, fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName) }; + + final String commonInferenceId = "common-inference-id"; + + final String commonInferenceIdField = "common-inference-id-field"; + final String mixedTypeField1 = "mixed-type-field-1"; + final String mixedTypeField2 = "mixed-type-field-2"; + + final TestIndexInfo localIndexInfo = new TestIndexInfo( + localIndexName, + Map.of(commonInferenceId, sparseEmbeddingServiceSettings()), + Map.of( + commonInferenceIdField, + semanticTextMapping(commonInferenceId), + mixedTypeField1, + sparseVectorMapping(), + mixedTypeField2, + semanticTextMapping(commonInferenceId) + ), + Map.of( + "local_doc_1", + Map.of(commonInferenceIdField, "a"), + "local_doc_2", + Map.of(mixedTypeField1, generateSparseVectorFieldValue(1.0f)), + "local_doc_3", + Map.of(mixedTypeField2, "c") + ) + ); + final TestIndexInfo remoteIndexInfo = new TestIndexInfo( + remoteIndexName, + Map.of(commonInferenceId, sparseEmbeddingServiceSettings()), + Map.of( + commonInferenceIdField, + semanticTextMapping(commonInferenceId), + mixedTypeField1, + semanticTextMapping(commonInferenceId), + mixedTypeField2, + sparseVectorMapping() + ), + Map.of( + "remote_doc_1", + Map.of(commonInferenceIdField, "x"), + "remote_doc_2", + Map.of(mixedTypeField1, "y"), + "remote_doc_3", + Map.of(mixedTypeField2, generateSparseVectorFieldValue(1.0f)) + ) + ); + setupTwoClusters(localIndexInfo, remoteIndexInfo); + + // Query a field has the same inference ID value across clusters, but with different backing inference services + assertSearchResponse( + new SparseVectorQueryBuilder(commonInferenceIdField, null, "a"), + queryIndices, + List.of( + new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_1"), + new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_1") + ) + ); + + // Query a field that has mixed types across clusters + assertSearchResponse( + new SparseVectorQueryBuilder(mixedTypeField1, commonInferenceId, "b"), + queryIndices, + List.of( + new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_2"), + new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_2") + ) + ); + assertSearchResponse( + new SparseVectorQueryBuilder(mixedTypeField2, commonInferenceId, "c"), + queryIndices, + List.of( + new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3"), + new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_3") + ) + ); + + // Query a field that has mixed types across clusters using a query vector + final List queryVector = generateSparseVectorFieldValue(1.0f).entrySet() + .stream() + .map(e -> new WeightedToken(e.getKey(), e.getValue())) + .toList(); + assertSearchResponse( + new SparseVectorQueryBuilder(mixedTypeField1, queryVector, null, null, null, null), + queryIndices, + List.of( + new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_2"), + new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_2") + ) + ); + assertSearchResponse( + new SparseVectorQueryBuilder(mixedTypeField2, queryVector, null, null, null, null), + queryIndices, + List.of( + new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3"), + new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_3") + ) + ); + + // Check that omitting the inference ID when querying a remote sparse vector field leads to the expected partial failure + assertSearchResponse( + new SparseVectorQueryBuilder(mixedTypeField2, null, "c"), + queryIndices, + List.of(new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3")), + new ClusterFailure( + SearchResponse.Cluster.Status.SKIPPED, + Set.of(new FailureCause(IllegalArgumentException.class, "inference_id required to perform vector search on query string")) + ), + null + ); + } + + public void testSparseVectorQueryWithCcsMinimizeRoundTripsFalse() throws Exception { + final String localIndexName = "local-index"; + final String remoteIndexName = "remote-index"; + final String[] queryIndices = new String[] { localIndexName, fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName) }; + final Consumer assertCcsMinimizeRoundTripsFalseFailure = q -> { + SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(q); + SearchRequest searchRequest = new SearchRequest(queryIndices, searchSourceBuilder); + searchRequest.setCcsMinimizeRoundtrips(false); + + IllegalArgumentException e = assertThrows( + IllegalArgumentException.class, + () -> client().search(searchRequest).actionGet(TEST_REQUEST_TIMEOUT) + ); + assertThat( + e.getMessage(), + equalTo( + "sparse_vector query does not support cross-cluster search when querying a [semantic_text] field when " + + "[ccs_minimize_roundtrips] is false" + ) + ); + }; + + final String commonInferenceId = "common-inference-id"; + + final String commonInferenceIdField = "common-inference-id-field"; + final String mixedTypeField1 = "mixed-type-field-1"; + final String mixedTypeField2 = "mixed-type-field-2"; + final String sparseVectorField = "sparse-vector-field"; + + final TestIndexInfo localIndexInfo = new TestIndexInfo( + localIndexName, + Map.of(commonInferenceId, sparseEmbeddingServiceSettings()), + Map.of( + commonInferenceIdField, + semanticTextMapping(commonInferenceId), + mixedTypeField1, + semanticTextMapping(commonInferenceId), + mixedTypeField2, + sparseVectorMapping(), + sparseVectorField, + sparseVectorMapping() + ), + Map.of( + mixedTypeField2 + "_doc", + Map.of(mixedTypeField2, generateSparseVectorFieldValue(1.0f)), + sparseVectorField + "_doc", + Map.of(sparseVectorField, generateSparseVectorFieldValue(1.0f)) + ) + ); + final TestIndexInfo remoteIndexInfo = new TestIndexInfo( + remoteIndexName, + Map.of(commonInferenceId, sparseEmbeddingServiceSettings()), + Map.of( + commonInferenceIdField, + semanticTextMapping(commonInferenceId), + mixedTypeField1, + sparseVectorMapping(), + mixedTypeField2, + semanticTextMapping(commonInferenceId), + sparseVectorField, + sparseVectorMapping() + ), + Map.of( + mixedTypeField2 + "_doc", + Map.of(mixedTypeField2, "a"), + sparseVectorField + "_doc", + Map.of(sparseVectorField, generateSparseVectorFieldValue(0.5f)) + ) + ); + setupTwoClusters(localIndexInfo, remoteIndexInfo); + + // Validate that expected cases fail + assertCcsMinimizeRoundTripsFalseFailure.accept(new SparseVectorQueryBuilder(commonInferenceIdField, null, randomAlphaOfLength(5))); + assertCcsMinimizeRoundTripsFalseFailure.accept( + new SparseVectorQueryBuilder(mixedTypeField1, commonInferenceId, randomAlphaOfLength(5)) + ); + + // Validate the expected ccs_minimize_roundtrips=false detection gap and failure mode when querying non-inference fields locally + assertSearchResponse( + new SparseVectorQueryBuilder(mixedTypeField2, commonInferenceId, "foo"), + queryIndices, + List.of(new SearchResult(null, localIndexName, mixedTypeField2 + "_doc")), + new ClusterFailure( + SearchResponse.Cluster.Status.SKIPPED, + Set.of( + new FailureCause( + QueryShardException.class, + "failed to create query: field [mixed-type-field-2] must be type [sparse_vector] but is type [semantic_text]" + ) + ) + ), + s -> s.setCcsMinimizeRoundtrips(false) + ); + + // Validate that a CCS sparse vector query functions when only sparse vector fields are queried + assertSearchResponse( + new SparseVectorQueryBuilder(sparseVectorField, commonInferenceId, "foo"), + queryIndices, + List.of( + new SearchResult(null, localIndexName, sparseVectorField + "_doc"), + new SearchResult(REMOTE_CLUSTER, remoteIndexName, sparseVectorField + "_doc") + ), + null, + s -> s.setCcsMinimizeRoundtrips(false) + ); + } +} From 88fafadaf683fa56fd34e574e58bdd300a0e0a36 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 24 Sep 2025 21:20:07 +0000 Subject: [PATCH 33/52] [CI] Update transport version definitions --- .../definitions/referable/semantic_search_ccs_support.csv | 2 +- server/src/main/resources/transport/upper_bounds/8.18.csv | 2 +- server/src/main/resources/transport/upper_bounds/8.19.csv | 2 +- server/src/main/resources/transport/upper_bounds/9.0.csv | 2 +- server/src/main/resources/transport/upper_bounds/9.1.csv | 2 +- server/src/main/resources/transport/upper_bounds/9.2.csv | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/server/src/main/resources/transport/definitions/referable/semantic_search_ccs_support.csv b/server/src/main/resources/transport/definitions/referable/semantic_search_ccs_support.csv index e9954a5466961..833feabb55a10 100644 --- a/server/src/main/resources/transport/definitions/referable/semantic_search_ccs_support.csv +++ b/server/src/main/resources/transport/definitions/referable/semantic_search_ccs_support.csv @@ -1 +1 @@ -9170000 +9171000 diff --git a/server/src/main/resources/transport/upper_bounds/8.18.csv b/server/src/main/resources/transport/upper_bounds/8.18.csv index ffc592e1809ee..266bfbbd3bf78 100644 --- a/server/src/main/resources/transport/upper_bounds/8.18.csv +++ b/server/src/main/resources/transport/upper_bounds/8.18.csv @@ -1 +1 @@ -initial_elasticsearch_8_18_8,8840010 +transform_check_for_dangling_tasks,8840011 diff --git a/server/src/main/resources/transport/upper_bounds/8.19.csv b/server/src/main/resources/transport/upper_bounds/8.19.csv index 3cc6f439c5ea5..3600b3f8c633a 100644 --- a/server/src/main/resources/transport/upper_bounds/8.19.csv +++ b/server/src/main/resources/transport/upper_bounds/8.19.csv @@ -1 +1 @@ -initial_elasticsearch_8_19_5,8841069 +transform_check_for_dangling_tasks,8841070 diff --git a/server/src/main/resources/transport/upper_bounds/9.0.csv b/server/src/main/resources/transport/upper_bounds/9.0.csv index 8ad2ed1a4cacf..c11e6837bb813 100644 --- a/server/src/main/resources/transport/upper_bounds/9.0.csv +++ b/server/src/main/resources/transport/upper_bounds/9.0.csv @@ -1 +1 @@ -initial_elasticsearch_9_0_8,9000017 +transform_check_for_dangling_tasks,9000018 diff --git a/server/src/main/resources/transport/upper_bounds/9.1.csv b/server/src/main/resources/transport/upper_bounds/9.1.csv index 1cea5dc4d929b..80b97d85f7511 100644 --- a/server/src/main/resources/transport/upper_bounds/9.1.csv +++ b/server/src/main/resources/transport/upper_bounds/9.1.csv @@ -1 +1 @@ -initial_elasticsearch_9_1_5,9112008 +transform_check_for_dangling_tasks,9112009 diff --git a/server/src/main/resources/transport/upper_bounds/9.2.csv b/server/src/main/resources/transport/upper_bounds/9.2.csv index 6a17f45fe298c..737e05227e381 100644 --- a/server/src/main/resources/transport/upper_bounds/9.2.csv +++ b/server/src/main/resources/transport/upper_bounds/9.2.csv @@ -1 +1 @@ -semantic_search_ccs_support,9170000 +semantic_search_ccs_support,9171000 From 60f9be4bf776370ea55a43f816fa11f3c398b4aa Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 25 Sep 2025 09:38:53 -0400 Subject: [PATCH 34/52] Fix transport version --- .../definitions/referable/semantic_search_ccs_support.csv | 2 +- server/src/main/resources/transport/upper_bounds/9.2.csv | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/resources/transport/definitions/referable/semantic_search_ccs_support.csv b/server/src/main/resources/transport/definitions/referable/semantic_search_ccs_support.csv index 833feabb55a10..42d1df167e3b9 100644 --- a/server/src/main/resources/transport/definitions/referable/semantic_search_ccs_support.csv +++ b/server/src/main/resources/transport/definitions/referable/semantic_search_ccs_support.csv @@ -1 +1 @@ -9171000 +9172000 diff --git a/server/src/main/resources/transport/upper_bounds/9.2.csv b/server/src/main/resources/transport/upper_bounds/9.2.csv index f69c2d7fac9a6..6f43da11fc85d 100644 --- a/server/src/main/resources/transport/upper_bounds/9.2.csv +++ b/server/src/main/resources/transport/upper_bounds/9.2.csv @@ -1 +1 @@ -esql_fuse_linear_operator_status,9171000 +semantic_search_ccs_support,9172000 From fdf1ccf744c92814f23681c563cd7ca6f5baae43 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 25 Sep 2025 09:39:47 -0400 Subject: [PATCH 35/52] Remove IVF_FORMAT feature flag references --- ...ectorQueryBuilderCrossClusterSearchIT.java | 54 ++++--------------- 1 file changed, 9 insertions(+), 45 deletions(-) diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/KnnVectorQueryBuilderCrossClusterSearchIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/KnnVectorQueryBuilderCrossClusterSearchIT.java index 8ae0daa5d5c07..bc57db418e91e 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/KnnVectorQueryBuilderCrossClusterSearchIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/KnnVectorQueryBuilderCrossClusterSearchIT.java @@ -23,7 +23,6 @@ import java.util.Set; import java.util.function.BiConsumer; -import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.IVF_FORMAT; import static org.hamcrest.Matchers.equalTo; public class KnnVectorQueryBuilderCrossClusterSearchIT extends AbstractSemanticCrossClusterSearchTestCase { @@ -91,14 +90,7 @@ public void testKnnQuery() throws Exception { // Query a field has the same inference ID value across clusters, but with different backing inference services assertSearchResponse( - new KnnVectorQueryBuilder( - commonInferenceIdField, - new TextEmbeddingQueryVectorBuilder(null, "a"), - 10, - 100, - IVF_FORMAT.isEnabled() ? 10f : null, - null - ), + new KnnVectorQueryBuilder(commonInferenceIdField, new TextEmbeddingQueryVectorBuilder(null, "a"), 10, 100, 10f, null), queryIndices, List.of( new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_1"), @@ -108,14 +100,7 @@ public void testKnnQuery() throws Exception { // Query a field that has mixed types across clusters assertSearchResponse( - new KnnVectorQueryBuilder( - mixedTypeField1, - new TextEmbeddingQueryVectorBuilder(localInferenceId, "y"), - 10, - 100, - IVF_FORMAT.isEnabled() ? 10f : null, - null - ), + new KnnVectorQueryBuilder(mixedTypeField1, new TextEmbeddingQueryVectorBuilder(localInferenceId, "y"), 10, 100, 10f, null), queryIndices, List.of( new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_2"), @@ -123,14 +108,7 @@ public void testKnnQuery() throws Exception { ) ); assertSearchResponse( - new KnnVectorQueryBuilder( - mixedTypeField2, - new TextEmbeddingQueryVectorBuilder(localInferenceId, "c"), - 10, - 100, - IVF_FORMAT.isEnabled() ? 10f : null, - null - ), + new KnnVectorQueryBuilder(mixedTypeField2, new TextEmbeddingQueryVectorBuilder(localInferenceId, "c"), 10, 100, 10f, null), queryIndices, List.of( new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3"), @@ -143,7 +121,7 @@ public void testKnnQuery() throws Exception { generateDenseVectorFieldValue(384, DenseVectorFieldMapper.ElementType.FLOAT, -128.0f) ); assertSearchResponse( - new KnnVectorQueryBuilder(mixedTypeField1, queryVector, 10, 100, IVF_FORMAT.isEnabled() ? 10f : null, null, null), + new KnnVectorQueryBuilder(mixedTypeField1, queryVector, 10, 100, 10f, null, null), queryIndices, List.of( new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_2"), @@ -151,7 +129,7 @@ public void testKnnQuery() throws Exception { ) ); assertSearchResponse( - new KnnVectorQueryBuilder(mixedTypeField2, queryVector, 10, 100, IVF_FORMAT.isEnabled() ? 10f : null, null, null), + new KnnVectorQueryBuilder(mixedTypeField2, queryVector, 10, 100, 10f, null, null), queryIndices, List.of( new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_3"), @@ -161,14 +139,7 @@ public void testKnnQuery() throws Exception { // Check that omitting the inference ID when querying a remote dense vector field leads to the expected partial failure assertSearchResponse( - new KnnVectorQueryBuilder( - mixedTypeField2, - new TextEmbeddingQueryVectorBuilder(null, "c"), - 10, - 100, - IVF_FORMAT.isEnabled() ? 10f : null, - null - ), + new KnnVectorQueryBuilder(mixedTypeField2, new TextEmbeddingQueryVectorBuilder(null, "c"), 10, 100, 10f, null), queryIndices, List.of(new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3")), new ClusterFailure( @@ -184,7 +155,7 @@ public void testKnnQueryWithCcsMinimizeRoundTripsFalse() throws Exception { final String remoteIndexName = "remote-index"; final String[] queryIndices = new String[] { localIndexName, fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName) }; final BiConsumer assertCcsMinimizeRoundTripsFalseFailure = (f, qvb) -> { - KnnVectorQueryBuilder queryBuilder = new KnnVectorQueryBuilder(f, qvb, 10, 100, IVF_FORMAT.isEnabled() ? 10f : null, null); + KnnVectorQueryBuilder queryBuilder = new KnnVectorQueryBuilder(f, qvb, 10, 100, 10f, null); SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(queryBuilder); SearchRequest searchRequest = new SearchRequest(queryIndices, searchSourceBuilder); @@ -270,14 +241,7 @@ public void testKnnQueryWithCcsMinimizeRoundTripsFalse() throws Exception { // Validate the expected ccs_minimize_roundtrips=false detection gap and failure mode when querying non-inference fields locally assertSearchResponse( - new KnnVectorQueryBuilder( - mixedTypeField2, - new TextEmbeddingQueryVectorBuilder(commonInferenceId, "foo"), - 10, - 100, - IVF_FORMAT.isEnabled() ? 10f : null, - null - ), + new KnnVectorQueryBuilder(mixedTypeField2, new TextEmbeddingQueryVectorBuilder(commonInferenceId, "foo"), 10, 100, 10f, null), queryIndices, List.of(new SearchResult(null, localIndexName, mixedTypeField2 + "_doc")), new ClusterFailure( @@ -299,7 +263,7 @@ public void testKnnQueryWithCcsMinimizeRoundTripsFalse() throws Exception { generateDenseVectorFieldValue(dimensions, DenseVectorFieldMapper.ElementType.FLOAT, 1.0f), 10, 100, - IVF_FORMAT.isEnabled() ? 10f : null, + 10f, null, null ), From b4721fd930b2fa5d9fe3b9d73b3f5b52e4805d76 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 25 Sep 2025 09:49:27 -0400 Subject: [PATCH 36/52] Knn CCS test refactoring --- ...ectorQueryBuilderCrossClusterSearchIT.java | 67 +++++++++---------- 1 file changed, 33 insertions(+), 34 deletions(-) diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/KnnVectorQueryBuilderCrossClusterSearchIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/KnnVectorQueryBuilderCrossClusterSearchIT.java index bc57db418e91e..e71acf63ed01d 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/KnnVectorQueryBuilderCrossClusterSearchIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/KnnVectorQueryBuilderCrossClusterSearchIT.java @@ -26,11 +26,13 @@ import static org.hamcrest.Matchers.equalTo; public class KnnVectorQueryBuilderCrossClusterSearchIT extends AbstractSemanticCrossClusterSearchTestCase { - public void testKnnQuery() throws Exception { - final String localIndexName = "local-index"; - final String remoteIndexName = "remote-index"; - final String[] queryIndices = new String[] { localIndexName, fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName) }; + private static final String LOCAL_INDEX_NAME = "local-index"; + private static final String REMOTE_INDEX_NAME = "remote-index"; + private static final String[] QUERY_INDICES = new String[] { + LOCAL_INDEX_NAME, + fullyQualifiedIndexName(REMOTE_CLUSTER, REMOTE_INDEX_NAME) }; + public void testKnnQuery() throws Exception { final String commonInferenceId = "common-inference-id"; final String localInferenceId = "local-inference-id"; @@ -39,7 +41,7 @@ public void testKnnQuery() throws Exception { final String mixedTypeField2 = "mixed-type-field-2"; final TestIndexInfo localIndexInfo = new TestIndexInfo( - localIndexName, + LOCAL_INDEX_NAME, Map.of( commonInferenceId, textEmbeddingServiceSettings(256, SimilarityMeasure.COSINE, DenseVectorFieldMapper.ElementType.FLOAT), @@ -64,7 +66,7 @@ public void testKnnQuery() throws Exception { ) ); final TestIndexInfo remoteIndexInfo = new TestIndexInfo( - remoteIndexName, + REMOTE_INDEX_NAME, Map.of( commonInferenceId, textEmbeddingServiceSettings(384, SimilarityMeasure.COSINE, DenseVectorFieldMapper.ElementType.FLOAT) @@ -91,28 +93,28 @@ public void testKnnQuery() throws Exception { // Query a field has the same inference ID value across clusters, but with different backing inference services assertSearchResponse( new KnnVectorQueryBuilder(commonInferenceIdField, new TextEmbeddingQueryVectorBuilder(null, "a"), 10, 100, 10f, null), - queryIndices, + QUERY_INDICES, List.of( - new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_1"), - new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_1") + new SearchResult(LOCAL_CLUSTER, LOCAL_INDEX_NAME, "local_doc_1"), + new SearchResult(REMOTE_CLUSTER, REMOTE_INDEX_NAME, "remote_doc_1") ) ); // Query a field that has mixed types across clusters assertSearchResponse( new KnnVectorQueryBuilder(mixedTypeField1, new TextEmbeddingQueryVectorBuilder(localInferenceId, "y"), 10, 100, 10f, null), - queryIndices, + QUERY_INDICES, List.of( - new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_2"), - new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_2") + new SearchResult(REMOTE_CLUSTER, REMOTE_INDEX_NAME, "remote_doc_2"), + new SearchResult(LOCAL_CLUSTER, LOCAL_INDEX_NAME, "local_doc_2") ) ); assertSearchResponse( new KnnVectorQueryBuilder(mixedTypeField2, new TextEmbeddingQueryVectorBuilder(localInferenceId, "c"), 10, 100, 10f, null), - queryIndices, + QUERY_INDICES, List.of( - new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3"), - new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_3") + new SearchResult(LOCAL_CLUSTER, LOCAL_INDEX_NAME, "local_doc_3"), + new SearchResult(REMOTE_CLUSTER, REMOTE_INDEX_NAME, "remote_doc_3") ) ); @@ -122,26 +124,26 @@ public void testKnnQuery() throws Exception { ); assertSearchResponse( new KnnVectorQueryBuilder(mixedTypeField1, queryVector, 10, 100, 10f, null, null), - queryIndices, + QUERY_INDICES, List.of( - new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_2"), - new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_2") + new SearchResult(LOCAL_CLUSTER, LOCAL_INDEX_NAME, "local_doc_2"), + new SearchResult(REMOTE_CLUSTER, REMOTE_INDEX_NAME, "remote_doc_2") ) ); assertSearchResponse( new KnnVectorQueryBuilder(mixedTypeField2, queryVector, 10, 100, 10f, null, null), - queryIndices, + QUERY_INDICES, List.of( - new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_3"), - new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3") + new SearchResult(REMOTE_CLUSTER, REMOTE_INDEX_NAME, "remote_doc_3"), + new SearchResult(LOCAL_CLUSTER, LOCAL_INDEX_NAME, "local_doc_3") ) ); // Check that omitting the inference ID when querying a remote dense vector field leads to the expected partial failure assertSearchResponse( new KnnVectorQueryBuilder(mixedTypeField2, new TextEmbeddingQueryVectorBuilder(null, "c"), 10, 100, 10f, null), - queryIndices, - List.of(new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3")), + QUERY_INDICES, + List.of(new SearchResult(LOCAL_CLUSTER, LOCAL_INDEX_NAME, "local_doc_3")), new ClusterFailure( SearchResponse.Cluster.Status.SKIPPED, Set.of(new FailureCause(IllegalArgumentException.class, "[model_id] must not be null.")) @@ -151,14 +153,11 @@ public void testKnnQuery() throws Exception { } public void testKnnQueryWithCcsMinimizeRoundTripsFalse() throws Exception { - final String localIndexName = "local-index"; - final String remoteIndexName = "remote-index"; - final String[] queryIndices = new String[] { localIndexName, fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName) }; final BiConsumer assertCcsMinimizeRoundTripsFalseFailure = (f, qvb) -> { KnnVectorQueryBuilder queryBuilder = new KnnVectorQueryBuilder(f, qvb, 10, 100, 10f, null); SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(queryBuilder); - SearchRequest searchRequest = new SearchRequest(queryIndices, searchSourceBuilder); + SearchRequest searchRequest = new SearchRequest(QUERY_INDICES, searchSourceBuilder); searchRequest.setCcsMinimizeRoundtrips(false); IllegalArgumentException e = assertThrows( @@ -188,7 +187,7 @@ public void testKnnQueryWithCcsMinimizeRoundTripsFalse() throws Exception { final String denseVectorField = "dense-vector-field"; final TestIndexInfo localIndexInfo = new TestIndexInfo( - localIndexName, + LOCAL_INDEX_NAME, Map.of(commonInferenceId, commonInferenceIdServiceSettings), Map.of( commonInferenceIdField, @@ -208,7 +207,7 @@ public void testKnnQueryWithCcsMinimizeRoundTripsFalse() throws Exception { ) ); final TestIndexInfo remoteIndexInfo = new TestIndexInfo( - remoteIndexName, + REMOTE_INDEX_NAME, Map.of(commonInferenceId, commonInferenceIdServiceSettings), Map.of( commonInferenceIdField, @@ -242,8 +241,8 @@ public void testKnnQueryWithCcsMinimizeRoundTripsFalse() throws Exception { // Validate the expected ccs_minimize_roundtrips=false detection gap and failure mode when querying non-inference fields locally assertSearchResponse( new KnnVectorQueryBuilder(mixedTypeField2, new TextEmbeddingQueryVectorBuilder(commonInferenceId, "foo"), 10, 100, 10f, null), - queryIndices, - List.of(new SearchResult(null, localIndexName, mixedTypeField2 + "_doc")), + QUERY_INDICES, + List.of(new SearchResult(null, LOCAL_INDEX_NAME, mixedTypeField2 + "_doc")), new ClusterFailure( SearchResponse.Cluster.Status.SKIPPED, Set.of( @@ -267,10 +266,10 @@ public void testKnnQueryWithCcsMinimizeRoundTripsFalse() throws Exception { null, null ), - queryIndices, + QUERY_INDICES, List.of( - new SearchResult(null, localIndexName, denseVectorField + "_doc"), - new SearchResult(REMOTE_CLUSTER, remoteIndexName, denseVectorField + "_doc") + new SearchResult(null, LOCAL_INDEX_NAME, denseVectorField + "_doc"), + new SearchResult(REMOTE_CLUSTER, REMOTE_INDEX_NAME, denseVectorField + "_doc") ), null, s -> s.setCcsMinimizeRoundtrips(false) From 00fe101df3d67dcdbf475118d2f2c1f22f6d742e Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 25 Sep 2025 10:42:47 -0400 Subject: [PATCH 37/52] Adjusted and documented ccsRequest usage in SemanticQueryBuilder --- .../xpack/inference/queries/SemanticQueryBuilder.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java index 47fd02459a23f..f97f7e3094972 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java @@ -94,6 +94,12 @@ public class SemanticQueryBuilder extends AbstractQueryBuilder inferenceResultsMap; private final Boolean lenient; + + // ccsRequest is only used on the local cluster coordinator node to detect when: + // - The request references a remote index + // - The remote cluster is too old to support semantic search CCS + // It doesn't technically need to be serialized since it is only used for this purpose, but we do so to keep its behavior in line with + // standard query member variables. private final boolean ccsRequest; public SemanticQueryBuilder(String fieldName, String query) { @@ -437,7 +443,7 @@ private QueryBuilder doRewriteBuildSemanticQuery(SearchExecutionContext searchEx private SemanticQueryBuilder doRewriteGetInferenceResults(QueryRewriteContext queryRewriteContext) { ResolvedIndices resolvedIndices = queryRewriteContext.getResolvedIndices(); - boolean ccsRequest = this.ccsRequest || resolvedIndices.getRemoteClusterIndices().isEmpty() == false; + boolean ccsRequest = resolvedIndices.getRemoteClusterIndices().isEmpty() == false; if (ccsRequest && queryRewriteContext.isCcsMinimizeRoundTrips() == false) { throw new IllegalArgumentException( NAME + " query does not support cross-cluster search when [ccs_minimize_roundtrips] is false" From cf70b708d0f160fb5400174341ee30b3e7ce2a84 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 25 Sep 2025 10:44:19 -0400 Subject: [PATCH 38/52] Fix typo --- .../xpack/inference/queries/SemanticQueryBuilder.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java index f97f7e3094972..361d44f86847a 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java @@ -452,7 +452,7 @@ private SemanticQueryBuilder doRewriteGetInferenceResults(QueryRewriteContext qu SemanticQueryBuilder rewritten = this; if (queryRewriteContext.hasAsyncActions() == false) { - Set fullyQualifiedInferenceIds = getInferenceIdsForForField( + Set fullyQualifiedInferenceIds = getInferenceIdsForField( resolvedIndices.getConcreteLocalIndicesMetadata().values(), queryRewriteContext.getLocalClusterAlias(), fieldName @@ -546,7 +546,7 @@ protected Query doToQuery(SearchExecutionContext context) throws IOException { throw new IllegalStateException(NAME + " should have been rewritten to another query type"); } - private static Set getInferenceIdsForForField( + private static Set getInferenceIdsForField( Collection indexMetadataCollection, String clusterAlias, String fieldName From 9d5c68a8b84260e24b23988b710e2d99c865379c Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 25 Sep 2025 12:05:33 -0400 Subject: [PATCH 39/52] Refactored intercepted query CCS serialization tests --- ...erceptedInferenceQueryBuilderTestCase.java | 117 ++++++++++-------- 1 file changed, 62 insertions(+), 55 deletions(-) diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/AbstractInterceptedInferenceQueryBuilderTestCase.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/AbstractInterceptedInferenceQueryBuilderTestCase.java index f6eb4e9746d0b..678af296a50d1 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/AbstractInterceptedInferenceQueryBuilderTestCase.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/AbstractInterceptedInferenceQueryBuilderTestCase.java @@ -173,26 +173,29 @@ public void testBwCSerialization() throws Exception { public void testCcsSerialization() throws Exception { final String inferenceField = "semantic_field"; - final var localIndexInferenceFields = Map.of("local-index", Map.of(inferenceField, SPARSE_INFERENCE_ID)); - final var remoteIndices = Map.of("remote-alias", "remote-index"); final T inferenceFieldQuery = createQueryBuilder(inferenceField); final T nonInferenceFieldQuery = createQueryBuilder("non_inference_field"); // Test with the current transport version. This simulates sending the query to a remote cluster that supports semantic search CCS. final QueryRewriteContext contextCurrent = createQueryRewriteContext( - localIndexInferenceFields, - remoteIndices, + Map.of("local-index", Map.of(inferenceField, SPARSE_INFERENCE_ID)), + Map.of("remote-alias", "remote-index"), TransportVersion.current(), true ); assertRewriteAndSerializeOnInferenceField(inferenceFieldQuery, contextCurrent, null, null); assertRewriteAndSerializeOnNonInferenceField(nonInferenceFieldQuery, contextCurrent); + } + + public void testCcsSerializationWithMinimizeRoundTripsFalse() throws Exception { + final String inferenceField = "semantic_field"; + final T inferenceFieldQuery = createQueryBuilder(inferenceField); + final T nonInferenceFieldQuery = createQueryBuilder("non_inference_field"); - // Test when ccs_minimize_roundtrips=false final QueryRewriteContext minimizeRoundTripsFalseContext = createQueryRewriteContext( - localIndexInferenceFields, - remoteIndices, + Map.of("local-index", Map.of(inferenceField, SPARSE_INFERENCE_ID)), + Map.of("remote-alias", "remote-index"), TransportVersion.current(), false ); @@ -209,58 +212,62 @@ public void testCcsSerialization() throws Exception { null ); assertRewriteAndSerializeOnNonInferenceField(nonInferenceFieldQuery, minimizeRoundTripsFalseContext); + } - // Test with a transport version prior to semantic search CCS support, but still new enough to use the new interceptors. - // This simulates if one of the local or remote cluster data nodes is slightly out of date. - final TransportVersion preCcsVersion = TransportVersionUtils.randomVersionBetween( - random(), - NEW_SEMANTIC_QUERY_INTERCEPTORS, - TransportVersionUtils.getPreviousVersion(SEMANTIC_SEARCH_CCS_SUPPORT) - ); - final QueryRewriteContext preCcsContext = createQueryRewriteContext(localIndexInferenceFields, remoteIndices, preCcsVersion, true); + public void testCcsBwCSerialization() throws Exception { + final String inferenceField = "semantic_field"; + final T inferenceFieldQuery = createQueryBuilder(inferenceField); + final T nonInferenceFieldQuery = createQueryBuilder("non_inference_field"); - assertRewriteAndSerializeOnInferenceField( - inferenceFieldQuery, - preCcsContext, - null, - new IllegalArgumentException( - "One or more nodes does not support " - + inferenceFieldQuery.getName() - + " query cross-cluster search when querying a [" - + SemanticTextFieldMapper.CONTENT_TYPE - + "] field. Please update all nodes to at least Elasticsearch " - + SEMANTIC_SEARCH_CCS_SUPPORT.toReleaseVersion() - + "." - ) - ); - assertRewriteAndSerializeOnNonInferenceField(nonInferenceFieldQuery, preCcsContext); + for (int i = 0; i < 100; i++) { + TransportVersion transportVersion = TransportVersionUtils.randomVersionBetween( + random(), + V_8_15_0, + TransportVersionUtils.getPreviousVersion(TransportVersion.current()) + ); - // Test with a transport version prior to the new query interceptors. This simulates if one of the local cluster data nodes is more - // out of date. - final TransportVersion legacyInterceptorsVersion = TransportVersionUtils.randomVersionBetween( - random(), - V_8_15_0, - TransportVersionUtils.getPreviousVersion(NEW_SEMANTIC_QUERY_INTERCEPTORS) - ); - final QueryRewriteContext legacyInterceptorsContext = createQueryRewriteContext( - localIndexInferenceFields, - remoteIndices, - legacyInterceptorsVersion, - true - ); + QueryRewriteContext queryRewriteContext = createQueryRewriteContext( + Map.of("local-index", Map.of(inferenceField, SPARSE_INFERENCE_ID)), + Map.of("remote-alias", "remote-index"), + transportVersion, + true + ); - assertRewriteAndSerializeOnInferenceField( - inferenceFieldQuery, - legacyInterceptorsContext, - new IllegalArgumentException( - inferenceFieldQuery.getName() - + " query does not support cross-cluster search when querying a [" - + SemanticTextFieldMapper.CONTENT_TYPE - + "] field in a mixed-version cluster" - ), - null - ); - assertRewriteAndSerializeOnNonInferenceField(nonInferenceFieldQuery, legacyInterceptorsContext); + Exception expectedRewriteException = null; + Exception expectedSerializationException = null; + if (transportVersion.supports(SEMANTIC_SEARCH_CCS_SUPPORT) == false) { + if (transportVersion.supports(NEW_SEMANTIC_QUERY_INTERCEPTORS)) { + // Transport version is new enough to support the new interceptors, but not new enough to support CCS. This simulates if + // one of the local or remote cluster data nodes is out of date. + expectedSerializationException = new IllegalArgumentException( + "One or more nodes does not support " + + inferenceFieldQuery.getName() + + " query cross-cluster search when querying a [" + + SemanticTextFieldMapper.CONTENT_TYPE + + "] field. Please update all nodes to at least Elasticsearch " + + SEMANTIC_SEARCH_CCS_SUPPORT.toReleaseVersion() + + "." + ); + } else { + // Transport version indicates usage of the legacy interceptors. This simulates if one of the local cluster data nodes + // is out of date to the point that it can't use the new interceptors. + expectedRewriteException = new IllegalArgumentException( + inferenceFieldQuery.getName() + + " query does not support cross-cluster search when querying a [" + + SemanticTextFieldMapper.CONTENT_TYPE + + "] field in a mixed-version cluster" + ); + } + } + + assertRewriteAndSerializeOnInferenceField( + inferenceFieldQuery, + queryRewriteContext, + expectedRewriteException, + expectedSerializationException + ); + assertRewriteAndSerializeOnNonInferenceField(nonInferenceFieldQuery, queryRewriteContext); + } } public void testSerializationRemoteClusterInferenceResults() throws Exception { From bab63a538e0b87285e70195651b1a4ad14289316 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 25 Sep 2025 12:18:46 -0400 Subject: [PATCH 40/52] Refactored semantic query CCS serialization tests --- .../queries/SemanticQueryBuilderTests.java | 42 ++++++++++++------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java index cc1182132fc3f..7804283cb5b91 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java @@ -504,24 +504,36 @@ public void testSerializationBwc() throws IOException { public void testSerializationCcs() throws Exception { SemanticQueryBuilder originalQuery = new SemanticQueryBuilder(randomAlphaOfLength(5), randomAlphaOfLength(5), null, Map.of(), true); - - // Deserialize using the current transport version. This simulates sending the query to a remote cluster that supports semantic - // search CCS. QueryBuilder deserializedQuery = copyNamedWriteable(originalQuery, namedWriteableRegistry(), QueryBuilder.class); assertThat(deserializedQuery, equalTo(originalQuery)); + } - // Deserialize using a transport version prior to semantic search CCS support. This simulates sending the query to a remote cluster - // that does *not* support semantic search CCS. - TransportVersion ccsUnsupportedVersion = TransportVersionUtils.randomVersionBetween( - random(), - originalQuery.getMinimalSupportedVersion(), - TransportVersionUtils.getPreviousVersion(SEMANTIC_SEARCH_CCS_SUPPORT) - ); - IllegalArgumentException e = assertThrows( - IllegalArgumentException.class, - () -> copyNamedWriteable(originalQuery, namedWriteableRegistry(), QueryBuilder.class, ccsUnsupportedVersion) - ); - assertThat(e.getMessage(), containsString("One or more nodes does not support semantic query cross-cluster search")); + public void testSerializationCcsBwc() throws Exception { + SemanticQueryBuilder originalQuery = new SemanticQueryBuilder(randomAlphaOfLength(5), randomAlphaOfLength(5), null, Map.of(), true); + + for (int i = 0; i < 100; i++) { + TransportVersion transportVersion = TransportVersionUtils.randomVersionBetween( + random(), + originalQuery.getMinimalSupportedVersion(), + TransportVersionUtils.getPreviousVersion(TransportVersion.current()) + ); + + if (transportVersion.supports(SEMANTIC_SEARCH_CCS_SUPPORT)) { + QueryBuilder deserializedQuery = copyNamedWriteable( + originalQuery, + namedWriteableRegistry(), + QueryBuilder.class, + transportVersion + ); + assertThat(deserializedQuery, equalTo(originalQuery)); + } else { + IllegalArgumentException e = assertThrows( + IllegalArgumentException.class, + () -> copyNamedWriteable(originalQuery, namedWriteableRegistry(), QueryBuilder.class, transportVersion) + ); + assertThat(e.getMessage(), containsString("One or more nodes does not support semantic query cross-cluster search")); + } + } } public void testToXContent() throws IOException { From d9be362b3762e615a529563470ce56d64f2e1f3e Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 25 Sep 2025 12:31:05 -0400 Subject: [PATCH 41/52] Updated intercepted queries to add a constructor for setting only the inference results map --- .../InterceptedInferenceKnnVectorQueryBuilder.java | 9 ++++++++- .../queries/InterceptedInferenceMatchQueryBuilder.java | 9 ++++++++- .../queries/InterceptedInferenceQueryBuilder.java | 6 +++++- .../InterceptedInferenceSparseVectorQueryBuilder.java | 9 ++++++++- .../InterceptedInferenceKnnVectorQueryBuilderTests.java | 6 +----- .../InterceptedInferenceMatchQueryBuilderTests.java | 6 +----- ...nterceptedInferenceSparseVectorQueryBuilderTests.java | 6 +----- 7 files changed, 32 insertions(+), 19 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceKnnVectorQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceKnnVectorQueryBuilder.java index 721bd3e5c1270..6b168d4f4c8a3 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceKnnVectorQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceKnnVectorQueryBuilder.java @@ -46,11 +46,18 @@ public InterceptedInferenceKnnVectorQueryBuilder(KnnVectorQueryBuilder originalQ super(originalQuery); } + public InterceptedInferenceKnnVectorQueryBuilder( + KnnVectorQueryBuilder originalQuery, + Map inferenceResultsMap + ) { + super(originalQuery, inferenceResultsMap); + } + public InterceptedInferenceKnnVectorQueryBuilder(StreamInput in) throws IOException { super(in); } - InterceptedInferenceKnnVectorQueryBuilder( + private InterceptedInferenceKnnVectorQueryBuilder( InterceptedInferenceQueryBuilder other, Map inferenceResultsMap, boolean ccsRequest diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilder.java index 569d5bee3cfe6..39066ed40f19a 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilder.java @@ -31,11 +31,18 @@ public InterceptedInferenceMatchQueryBuilder(MatchQueryBuilder originalQuery) { super(originalQuery); } + public InterceptedInferenceMatchQueryBuilder( + MatchQueryBuilder originalQuery, + Map inferenceResultsMap + ) { + super(originalQuery, inferenceResultsMap); + } + public InterceptedInferenceMatchQueryBuilder(StreamInput in) throws IOException { super(in); } - InterceptedInferenceMatchQueryBuilder( + private InterceptedInferenceMatchQueryBuilder( InterceptedInferenceQueryBuilder other, Map inferenceResultsMap, boolean ccsRequest diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceQueryBuilder.java index 7e6fe8a0befe4..8774d35f17ade 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceQueryBuilder.java @@ -70,9 +70,13 @@ public abstract class InterceptedInferenceQueryBuilder inferenceResultsMap) { Objects.requireNonNull(originalQuery, "original query must not be null"); this.originalQuery = originalQuery; - this.inferenceResultsMap = null; + this.inferenceResultsMap = inferenceResultsMap != null ? Map.copyOf(inferenceResultsMap) : null; this.ccsRequest = false; } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceSparseVectorQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceSparseVectorQueryBuilder.java index ee85f9ab3f4ad..dab789c0223e7 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceSparseVectorQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceSparseVectorQueryBuilder.java @@ -45,11 +45,18 @@ public InterceptedInferenceSparseVectorQueryBuilder(SparseVectorQueryBuilder ori super(originalQuery); } + public InterceptedInferenceSparseVectorQueryBuilder( + SparseVectorQueryBuilder originalQuery, + Map inferenceResultsMap + ) { + super(originalQuery, inferenceResultsMap); + } + public InterceptedInferenceSparseVectorQueryBuilder(StreamInput in) throws IOException { super(in); } - InterceptedInferenceSparseVectorQueryBuilder( + private InterceptedInferenceSparseVectorQueryBuilder( InterceptedInferenceQueryBuilder other, Map inferenceResultsMap, boolean ccsRequest diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceKnnVectorQueryBuilderTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceKnnVectorQueryBuilderTests.java index ec92f41dbdb22..329444595ef3e 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceKnnVectorQueryBuilderTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceKnnVectorQueryBuilderTests.java @@ -61,11 +61,7 @@ protected InterceptedInferenceQueryBuilder createIntercep KnnVectorQueryBuilder originalQuery, Map inferenceResultsMap ) { - return new InterceptedInferenceKnnVectorQueryBuilder( - new InterceptedInferenceKnnVectorQueryBuilder(originalQuery), - inferenceResultsMap, - false - ); + return new InterceptedInferenceKnnVectorQueryBuilder(originalQuery, inferenceResultsMap); } @Override diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilderTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilderTests.java index 63fbb5279eb94..ea7bfa2857f04 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilderTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilderTests.java @@ -33,11 +33,7 @@ protected InterceptedInferenceQueryBuilder createInterceptedQ MatchQueryBuilder originalQuery, Map inferenceResultsMap ) { - return new InterceptedInferenceMatchQueryBuilder( - new InterceptedInferenceMatchQueryBuilder(originalQuery), - inferenceResultsMap, - false - ); + return new InterceptedInferenceMatchQueryBuilder(originalQuery, inferenceResultsMap); } @Override diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceSparseVectorQueryBuilderTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceSparseVectorQueryBuilderTests.java index f594a0f034ce9..9a44222b16cc3 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceSparseVectorQueryBuilderTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceSparseVectorQueryBuilderTests.java @@ -61,11 +61,7 @@ protected InterceptedInferenceQueryBuilder createInter SparseVectorQueryBuilder originalQuery, Map inferenceResultsMap ) { - return new InterceptedInferenceSparseVectorQueryBuilder( - new InterceptedInferenceSparseVectorQueryBuilder(originalQuery), - inferenceResultsMap, - false - ); + return new InterceptedInferenceSparseVectorQueryBuilder(originalQuery, inferenceResultsMap); } @Override From 441eba42a8af06e4d22eafb569047312b2bcc804 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 25 Sep 2025 12:39:19 -0400 Subject: [PATCH 42/52] Simplify intercepted knn query getInferenceIdOverride implementation --- .../queries/InterceptedInferenceKnnVectorQueryBuilder.java | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceKnnVectorQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceKnnVectorQueryBuilder.java index 6b168d4f4c8a3..0696185e11650 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceKnnVectorQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceKnnVectorQueryBuilder.java @@ -83,13 +83,8 @@ protected String getQuery() { @Override protected FullyQualifiedInferenceId getInferenceIdOverride() { - FullyQualifiedInferenceId override = null; String modelId = getQueryVectorBuilderModelId(); - if (modelId != null) { - override = new FullyQualifiedInferenceId(LOCAL_CLUSTER_GROUP_KEY, modelId); - } - - return override; + return modelId != null ? new FullyQualifiedInferenceId(LOCAL_CLUSTER_GROUP_KEY, modelId) : null; } @Override From 407405270e40aed0cc1177e15b093d4df5ffd40d Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 25 Sep 2025 12:59:52 -0400 Subject: [PATCH 43/52] Updated error message --- .../queries/LegacySemanticQueryRewriteInterceptor.java | 6 +++++- .../AbstractInterceptedInferenceQueryBuilderTestCase.java | 4 +++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/LegacySemanticQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/LegacySemanticQueryRewriteInterceptor.java index 49fa9e7cfd805..4052e93559437 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/LegacySemanticQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/LegacySemanticQueryRewriteInterceptor.java @@ -25,6 +25,8 @@ import java.util.Map; import java.util.stream.Collectors; +import static org.elasticsearch.xpack.inference.queries.SemanticQueryBuilder.SEMANTIC_SEARCH_CCS_SUPPORT; + /** * Intercepts and adapts a query to be rewritten to work seamlessly on a semantic_text field. */ @@ -63,7 +65,9 @@ public QueryBuilder interceptAndRewrite(QueryRewriteContext context, QueryBuilde getQueryName() + " query does not support cross-cluster search when querying a [" + SemanticTextFieldMapper.CONTENT_TYPE - + "] field in a mixed-version cluster" + + "] field in a mixed-version cluster. Please update all nodes to at least Elasticsearch " + + SEMANTIC_SEARCH_CCS_SUPPORT.toReleaseVersion() + + "." ); } } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/AbstractInterceptedInferenceQueryBuilderTestCase.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/AbstractInterceptedInferenceQueryBuilderTestCase.java index 678af296a50d1..f999c0f89ae90 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/AbstractInterceptedInferenceQueryBuilderTestCase.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/AbstractInterceptedInferenceQueryBuilderTestCase.java @@ -255,7 +255,9 @@ public void testCcsBwCSerialization() throws Exception { inferenceFieldQuery.getName() + " query does not support cross-cluster search when querying a [" + SemanticTextFieldMapper.CONTENT_TYPE - + "] field in a mixed-version cluster" + + "] field in a mixed-version cluster. Please update all nodes to at least Elasticsearch " + + SEMANTIC_SEARCH_CCS_SUPPORT.toReleaseVersion() + + "." ); } } From 2e4979247ebe26fa3c2b88567ac3ef494a6a1835 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 25 Sep 2025 13:07:21 -0400 Subject: [PATCH 44/52] Added convenience constructor for semantic query --- .../InterceptedInferenceMatchQueryBuilder.java | 2 +- .../inference/queries/SemanticQueryBuilder.java | 9 +++++++++ .../InterceptedInferenceMatchQueryBuilderTests.java | 3 +-- .../inference/queries/SemanticQueryBuilderTests.java | 12 ++++-------- 4 files changed, 15 insertions(+), 11 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilder.java index 39066ed40f19a..69cbf665cc1f8 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilder.java @@ -86,7 +86,7 @@ protected QueryBuilder queryFields( if (fieldType == null) { rewritten = new MatchNoneQueryBuilder(); } else if (fieldType instanceof SemanticTextFieldMapper.SemanticTextFieldType) { - rewritten = new SemanticQueryBuilder(getField(), getQuery(), null, inferenceResultsMap, false).boost(originalQuery.boost()) + rewritten = new SemanticQueryBuilder(getField(), getQuery(), null, inferenceResultsMap).boost(originalQuery.boost()) .queryName(originalQuery.queryName()); } else { rewritten = originalQuery; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java index 361d44f86847a..c0df149edc449 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java @@ -110,6 +110,15 @@ public SemanticQueryBuilder(String fieldName, String query, Boolean lenient) { this(fieldName, query, lenient, null, false); } + protected SemanticQueryBuilder( + String fieldName, + String query, + Boolean lenient, + Map inferenceResultsMap + ) { + this(fieldName, query, lenient, inferenceResultsMap, false); + } + protected SemanticQueryBuilder( String fieldName, String query, diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilderTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilderTests.java index ea7bfa2857f04..c7a680f15d6d1 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilderTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/InterceptedInferenceMatchQueryBuilderTests.java @@ -134,8 +134,7 @@ public void testInterceptAndRewrite() throws Exception { field, queryText, null, - coordinatorIntercepted.inferenceResultsMap, - false + coordinatorIntercepted.inferenceResultsMap ).boost(matchQuery.boost()).queryName(matchQuery.queryName()); // Perform data node rewrite on test index 1 diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java index 7804283cb5b91..b2d7218720a57 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java @@ -395,8 +395,7 @@ public void testSerializationRemoteClusterInferenceResults() throws IOException randomAlphaOfLength(5), randomAlphaOfLength(5), null, - inferenceResultsMap, - false + inferenceResultsMap ); QueryBuilder deserializedQuery = copyNamedWriteable(originalQuery, namedWriteableRegistry(), QueryBuilder.class); @@ -424,15 +423,13 @@ public void testSerializationBwc() throws IOException { fieldName, query, null, - Map.of(new FullyQualifiedInferenceId(LOCAL_CLUSTER_GROUP_KEY, randomAlphaOfLength(5)), inferenceResults), - false + Map.of(new FullyQualifiedInferenceId(LOCAL_CLUSTER_GROUP_KEY, randomAlphaOfLength(5)), inferenceResults) ); SemanticQueryBuilder bwcQuery = new SemanticQueryBuilder( fieldName, query, null, - SemanticQueryBuilder.buildSingleResultInferenceResultsMap(inferenceResults), - false + SemanticQueryBuilder.buildSingleResultInferenceResultsMap(inferenceResults) ); QueryBuilder deserializedQuery = copyNamedWriteable(originalQuery, namedWriteableRegistry(), QueryBuilder.class, version); @@ -465,8 +462,7 @@ public void testSerializationBwc() throws IOException { randomAlphaOfLength(5), randomAlphaOfLength(5), null, - inferenceResultsMap, - false + inferenceResultsMap ); String expectedErrorMessage; From e0a71875eec915dd8e7f0032e0f8b569a5db6d45 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 25 Sep 2025 13:14:00 -0400 Subject: [PATCH 45/52] Adjusted error message --- .../xpack/inference/queries/FullyQualifiedInferenceId.java | 2 +- .../xpack/inference/queries/SemanticQueryBuilder.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/FullyQualifiedInferenceId.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/FullyQualifiedInferenceId.java index a1b7983847c68..d1cc7aa3aa5c7 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/FullyQualifiedInferenceId.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/FullyQualifiedInferenceId.java @@ -32,6 +32,6 @@ public void writeTo(StreamOutput out) throws IOException { @Override public String toString() { - return "{" + "clusterAlias=" + clusterAlias + ", inferenceId=" + inferenceId + "}"; + return "{clusterAlias=" + clusterAlias + ", inferenceId=" + inferenceId + "}"; } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java index c0df149edc449..97d0caef98d0e 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java @@ -283,8 +283,8 @@ static Map getInferenceResults( if (fullyQualifiedInferenceId.clusterAlias().equals(queryRewriteContext.getLocalClusterAlias()) == false) { // Catch if we are missing inference results that should have been generated on another cluster throw new IllegalStateException( - "Cannot get inference results for cluster [" - + fullyQualifiedInferenceId.clusterAlias() + "Cannot get inference results for inference endpoint [" + + fullyQualifiedInferenceId + "] on cluster [" + queryRewriteContext.getLocalClusterAlias() + "]" From da5ebb6002ed5719fcd5ba34f556b3b008384285 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 25 Sep 2025 13:23:15 -0400 Subject: [PATCH 46/52] Update transport version --- .../definitions/referable/semantic_search_ccs_support.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/resources/transport/definitions/referable/semantic_search_ccs_support.csv b/server/src/main/resources/transport/definitions/referable/semantic_search_ccs_support.csv index 42d1df167e3b9..2c8dc4fc73614 100644 --- a/server/src/main/resources/transport/definitions/referable/semantic_search_ccs_support.csv +++ b/server/src/main/resources/transport/definitions/referable/semantic_search_ccs_support.csv @@ -1 +1 @@ -9172000 +9173000 From b6e6e6c85ba60dcfe958a1ba4ed33c4c8ee09c11 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 25 Sep 2025 13:41:08 -0400 Subject: [PATCH 47/52] Match query CCS test refactoring --- ...MatchQueryBuilderCrossClusterSearchIT.java | 57 +++++++++---------- 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/MatchQueryBuilderCrossClusterSearchIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/MatchQueryBuilderCrossClusterSearchIT.java index 6868032fd6fa0..6a38982c77da7 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/MatchQueryBuilderCrossClusterSearchIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/MatchQueryBuilderCrossClusterSearchIT.java @@ -24,11 +24,13 @@ import static org.hamcrest.Matchers.equalTo; public class MatchQueryBuilderCrossClusterSearchIT extends AbstractSemanticCrossClusterSearchTestCase { - public void testMatchQuery() throws Exception { - final String localIndexName = "local-index"; - final String remoteIndexName = "remote-index"; - final String[] queryIndices = new String[] { localIndexName, fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName) }; + private static final String LOCAL_INDEX_NAME = "local-index"; + private static final String REMOTE_INDEX_NAME = "remote-index"; + private static final String[] QUERY_INDICES = new String[] { + LOCAL_INDEX_NAME, + fullyQualifiedIndexName(REMOTE_CLUSTER, REMOTE_INDEX_NAME) }; + public void testMatchQuery() throws Exception { final String commonInferenceId = "common-inference-id"; final String localInferenceId = "local-inference-id"; final String remoteInferenceId = "remote-inference-id"; @@ -39,7 +41,7 @@ public void testMatchQuery() throws Exception { final String mixedTypeField2 = "mixed-type-field-2"; final TestIndexInfo localIndexInfo = new TestIndexInfo( - localIndexName, + LOCAL_INDEX_NAME, Map.of(commonInferenceId, sparseEmbeddingServiceSettings(), localInferenceId, sparseEmbeddingServiceSettings()), Map.of( commonInferenceIdField, @@ -63,7 +65,7 @@ public void testMatchQuery() throws Exception { ) ); final TestIndexInfo remoteIndexInfo = new TestIndexInfo( - remoteIndexName, + REMOTE_INDEX_NAME, Map.of( commonInferenceId, textEmbeddingServiceSettings(256, SimilarityMeasure.COSINE, DenseVectorFieldMapper.ElementType.FLOAT), @@ -96,49 +98,46 @@ public void testMatchQuery() throws Exception { // Query a field has the same inference ID value across clusters, but with different backing inference services assertSearchResponse( new MatchQueryBuilder(commonInferenceIdField, "a"), - queryIndices, + QUERY_INDICES, List.of( - new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_1"), - new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_1") + new SearchResult(LOCAL_CLUSTER, LOCAL_INDEX_NAME, "local_doc_1"), + new SearchResult(REMOTE_CLUSTER, REMOTE_INDEX_NAME, "remote_doc_1") ) ); // Query a field that has different inference ID values across clusters assertSearchResponse( new MatchQueryBuilder(variableInferenceIdField, "b"), - queryIndices, + QUERY_INDICES, List.of( - new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_2"), - new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_2") + new SearchResult(LOCAL_CLUSTER, LOCAL_INDEX_NAME, "local_doc_2"), + new SearchResult(REMOTE_CLUSTER, REMOTE_INDEX_NAME, "remote_doc_2") ) ); // Query a field that has mixed types across clusters assertSearchResponse( new MatchQueryBuilder(mixedTypeField1, "y"), - queryIndices, + QUERY_INDICES, List.of( - new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3"), - new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_3") + new SearchResult(LOCAL_CLUSTER, LOCAL_INDEX_NAME, "local_doc_3"), + new SearchResult(REMOTE_CLUSTER, REMOTE_INDEX_NAME, "remote_doc_3") ) ); assertSearchResponse( new MatchQueryBuilder(mixedTypeField2, "d"), - queryIndices, + QUERY_INDICES, List.of( - new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_4"), - new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_4") + new SearchResult(REMOTE_CLUSTER, REMOTE_INDEX_NAME, "remote_doc_4"), + new SearchResult(LOCAL_CLUSTER, LOCAL_INDEX_NAME, "local_doc_4") ) ); } public void testMatchQueryWithCcsMinimizeRoundTripsFalse() throws Exception { - final String localIndexName = "local-index"; - final String remoteIndexName = "remote-index"; - final String[] queryIndices = new String[] { localIndexName, fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName) }; final Consumer assertCcsMinimizeRoundTripsFalseFailure = q -> { SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(q); - SearchRequest searchRequest = new SearchRequest(queryIndices, searchSourceBuilder); + SearchRequest searchRequest = new SearchRequest(QUERY_INDICES, searchSourceBuilder); searchRequest.setCcsMinimizeRoundtrips(false); IllegalArgumentException e = assertThrows( @@ -162,7 +161,7 @@ public void testMatchQueryWithCcsMinimizeRoundTripsFalse() throws Exception { final String textField = "text-field"; final TestIndexInfo localIndexInfo = new TestIndexInfo( - localIndexName, + LOCAL_INDEX_NAME, Map.of(commonInferenceId, sparseEmbeddingServiceSettings()), Map.of( commonInferenceIdField, @@ -177,7 +176,7 @@ public void testMatchQueryWithCcsMinimizeRoundTripsFalse() throws Exception { Map.of(mixedTypeField2 + "_doc", Map.of(mixedTypeField2, "a"), textField + "_doc", Map.of(textField, "b b b")) ); final TestIndexInfo remoteIndexInfo = new TestIndexInfo( - remoteIndexName, + REMOTE_INDEX_NAME, Map.of(commonInferenceId, sparseEmbeddingServiceSettings()), Map.of( commonInferenceIdField, @@ -200,8 +199,8 @@ public void testMatchQueryWithCcsMinimizeRoundTripsFalse() throws Exception { // Validate the expected ccs_minimize_roundtrips=false detection gap and failure mode when querying non-inference fields locally assertSearchResponse( new MatchQueryBuilder(mixedTypeField2, "a"), - queryIndices, - List.of(new SearchResult(null, localIndexName, mixedTypeField2 + "_doc")), + QUERY_INDICES, + List.of(new SearchResult(null, LOCAL_INDEX_NAME, mixedTypeField2 + "_doc")), new ClusterFailure( SearchResponse.Cluster.Status.SKIPPED, Set.of( @@ -217,10 +216,10 @@ public void testMatchQueryWithCcsMinimizeRoundTripsFalse() throws Exception { // Validate that a CCS match query functions when only text fields are queried assertSearchResponse( new MatchQueryBuilder(textField, "b"), - queryIndices, + QUERY_INDICES, List.of( - new SearchResult(null, localIndexName, textField + "_doc"), - new SearchResult(REMOTE_CLUSTER, remoteIndexName, textField + "_doc") + new SearchResult(null, LOCAL_INDEX_NAME, textField + "_doc"), + new SearchResult(REMOTE_CLUSTER, REMOTE_INDEX_NAME, textField + "_doc") ), null, s -> s.setCcsMinimizeRoundtrips(false) From c124e02665523df6308c8edd2f52709aab35e1d3 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 25 Sep 2025 14:25:09 -0400 Subject: [PATCH 48/52] Integrated index boosts into CCS integration tests --- ...actSemanticCrossClusterSearchTestCase.java | 20 ++++++++++++++++--- ...ectorQueryBuilderCrossClusterSearchIT.java | 9 +++++---- ...MatchQueryBuilderCrossClusterSearchIT.java | 9 +++++---- ...anticQueryBuilderCrossClusterSearchIT.java | 14 +++++++++---- ...ectorQueryBuilderCrossClusterSearchIT.java | 12 ++++++++--- 5 files changed, 46 insertions(+), 18 deletions(-) diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/AbstractSemanticCrossClusterSearchTestCase.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/AbstractSemanticCrossClusterSearchTestCase.java index 1a9c87205c3d8..685453fa77c78 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/AbstractSemanticCrossClusterSearchTestCase.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/AbstractSemanticCrossClusterSearchTestCase.java @@ -13,6 +13,7 @@ import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.search.TransportOpenPointInTimeAction; +import org.elasticsearch.action.support.IndicesOptions; import org.elasticsearch.action.support.broadcast.BroadcastResponse; import org.elasticsearch.client.internal.Client; import org.elasticsearch.common.bytes.BytesReference; @@ -174,20 +175,23 @@ protected static void createInferenceEndpoint(Client client, TaskType taskType, assertThat(responseFuture.actionGet(TEST_REQUEST_TIMEOUT).getModel().getInferenceEntityId(), equalTo(inferenceId)); } - protected void assertSearchResponse(QueryBuilder queryBuilder, String[] indices, List expectedSearchResults) + protected void assertSearchResponse(QueryBuilder queryBuilder, List indices, List expectedSearchResults) throws Exception { assertSearchResponse(queryBuilder, indices, expectedSearchResults, null, null); } protected void assertSearchResponse( QueryBuilder queryBuilder, - String[] indices, + List indices, List expectedSearchResults, ClusterFailure expectedRemoteFailure, Consumer searchRequestModifier ) throws Exception { SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(queryBuilder).size(expectedSearchResults.size()); - SearchRequest searchRequest = new SearchRequest(indices, searchSourceBuilder); + indices.forEach(i -> searchSourceBuilder.indexBoost(i.index(), i.boost())); + + SearchRequest searchRequest = new SearchRequest(convertToArray(indices), searchSourceBuilder); + searchRequest.indicesOptions(IndicesOptions.LENIENT_EXPAND_OPEN); if (searchRequestModifier != null) { searchRequestModifier.accept(searchRequest); } @@ -280,6 +284,10 @@ protected static Map generateSparseVectorFieldValue(float weight) return Map.of("feature_0", weight); } + protected static String[] convertToArray(List indices) { + return indices.stream().map(IndexWithBoost::index).toArray(String[]::new); + } + public static class FakeMlPlugin extends Plugin implements ActionPlugin, SearchPlugin { @Override public List getNamedWriteables() { @@ -320,4 +328,10 @@ protected record SearchResult(@Nullable String clusterAlias, String index, Strin protected record FailureCause(Class causeClass, String message) {} protected record ClusterFailure(SearchResponse.Cluster.Status status, Set failures) {} + + protected record IndexWithBoost(String index, float boost) { + public IndexWithBoost(String index) { + this(index, 1.0f); + } + } } diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/KnnVectorQueryBuilderCrossClusterSearchIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/KnnVectorQueryBuilderCrossClusterSearchIT.java index e71acf63ed01d..b54d7afe08714 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/KnnVectorQueryBuilderCrossClusterSearchIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/KnnVectorQueryBuilderCrossClusterSearchIT.java @@ -28,9 +28,10 @@ public class KnnVectorQueryBuilderCrossClusterSearchIT extends AbstractSemanticCrossClusterSearchTestCase { private static final String LOCAL_INDEX_NAME = "local-index"; private static final String REMOTE_INDEX_NAME = "remote-index"; - private static final String[] QUERY_INDICES = new String[] { - LOCAL_INDEX_NAME, - fullyQualifiedIndexName(REMOTE_CLUSTER, REMOTE_INDEX_NAME) }; + private static final List QUERY_INDICES = List.of( + new IndexWithBoost(LOCAL_INDEX_NAME), + new IndexWithBoost(fullyQualifiedIndexName(REMOTE_CLUSTER, REMOTE_INDEX_NAME)) + ); public void testKnnQuery() throws Exception { final String commonInferenceId = "common-inference-id"; @@ -157,7 +158,7 @@ public void testKnnQueryWithCcsMinimizeRoundTripsFalse() throws Exception { KnnVectorQueryBuilder queryBuilder = new KnnVectorQueryBuilder(f, qvb, 10, 100, 10f, null); SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(queryBuilder); - SearchRequest searchRequest = new SearchRequest(QUERY_INDICES, searchSourceBuilder); + SearchRequest searchRequest = new SearchRequest(convertToArray(QUERY_INDICES), searchSourceBuilder); searchRequest.setCcsMinimizeRoundtrips(false); IllegalArgumentException e = assertThrows( diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/MatchQueryBuilderCrossClusterSearchIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/MatchQueryBuilderCrossClusterSearchIT.java index 6a38982c77da7..0a065ffc54733 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/MatchQueryBuilderCrossClusterSearchIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/MatchQueryBuilderCrossClusterSearchIT.java @@ -26,9 +26,10 @@ public class MatchQueryBuilderCrossClusterSearchIT extends AbstractSemanticCrossClusterSearchTestCase { private static final String LOCAL_INDEX_NAME = "local-index"; private static final String REMOTE_INDEX_NAME = "remote-index"; - private static final String[] QUERY_INDICES = new String[] { - LOCAL_INDEX_NAME, - fullyQualifiedIndexName(REMOTE_CLUSTER, REMOTE_INDEX_NAME) }; + private static final List QUERY_INDICES = List.of( + new IndexWithBoost(LOCAL_INDEX_NAME), + new IndexWithBoost(fullyQualifiedIndexName(REMOTE_CLUSTER, REMOTE_INDEX_NAME)) + ); public void testMatchQuery() throws Exception { final String commonInferenceId = "common-inference-id"; @@ -137,7 +138,7 @@ public void testMatchQuery() throws Exception { public void testMatchQueryWithCcsMinimizeRoundTripsFalse() throws Exception { final Consumer assertCcsMinimizeRoundTripsFalseFailure = q -> { SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(q); - SearchRequest searchRequest = new SearchRequest(QUERY_INDICES, searchSourceBuilder); + SearchRequest searchRequest = new SearchRequest(convertToArray(QUERY_INDICES), searchSourceBuilder); searchRequest.setCcsMinimizeRoundtrips(false); IllegalArgumentException e = assertThrows( diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticQueryBuilderCrossClusterSearchIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticQueryBuilderCrossClusterSearchIT.java index c4f4edb2ff543..623dccd220ee9 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticQueryBuilderCrossClusterSearchIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticQueryBuilderCrossClusterSearchIT.java @@ -26,7 +26,10 @@ public class SemanticQueryBuilderCrossClusterSearchIT extends AbstractSemanticCr public void testSemanticQuery() throws Exception { final String localIndexName = "local-index"; final String remoteIndexName = "remote-index"; - final String[] queryIndices = new String[] { localIndexName, fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName) }; + final List queryIndices = List.of( + new IndexWithBoost(localIndexName), + new IndexWithBoost(fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName)) + ); final String commonInferenceId = "common-inference-id"; final String localInferenceId = "local-inference-id"; @@ -88,7 +91,10 @@ public void testSemanticQuery() throws Exception { public void testSemanticQueryWithCcMinimizeRoundTripsFalse() throws Exception { final String localIndexName = "local-index"; final String remoteIndexName = "remote-index"; - final String[] queryIndices = new String[] { localIndexName, fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName) }; + final List queryIndices = List.of( + new IndexWithBoost(localIndexName), + new IndexWithBoost(fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName)) + ); final SemanticQueryBuilder queryBuilder = new SemanticQueryBuilder("foo", "bar"); final Consumer assertCcsMinimizeRoundTripsFalseFailure = s -> { IllegalArgumentException e = assertThrows( @@ -107,12 +113,12 @@ public void testSemanticQueryWithCcMinimizeRoundTripsFalse() throws Exception { // Explicitly set ccs_minimize_roundtrips=false in the search request SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(queryBuilder); - SearchRequest searchRequestWithCcMinimizeRoundTripsFalse = new SearchRequest(queryIndices, searchSourceBuilder); + SearchRequest searchRequestWithCcMinimizeRoundTripsFalse = new SearchRequest(convertToArray(queryIndices), searchSourceBuilder); searchRequestWithCcMinimizeRoundTripsFalse.setCcsMinimizeRoundtrips(false); assertCcsMinimizeRoundTripsFalseFailure.accept(searchRequestWithCcMinimizeRoundTripsFalse); // Using a point in time implicitly sets ccs_minimize_roundtrips=false - BytesReference pitId = openPointInTime(queryIndices, TimeValue.timeValueMinutes(2)); + BytesReference pitId = openPointInTime(convertToArray(queryIndices), TimeValue.timeValueMinutes(2)); SearchSourceBuilder searchSourceBuilderWithPit = new SearchSourceBuilder().query(queryBuilder) .pointInTimeBuilder(new PointInTimeBuilder(pitId)); SearchRequest searchRequestWithPit = new SearchRequest().source(searchSourceBuilderWithPit); diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SparseVectorQueryBuilderCrossClusterSearchIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SparseVectorQueryBuilderCrossClusterSearchIT.java index 30ed1d514627e..db1dfa57eb5e0 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SparseVectorQueryBuilderCrossClusterSearchIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SparseVectorQueryBuilderCrossClusterSearchIT.java @@ -26,7 +26,10 @@ public class SparseVectorQueryBuilderCrossClusterSearchIT extends AbstractSemant public void testSparseVectorQuery() throws Exception { final String localIndexName = "local-index"; final String remoteIndexName = "remote-index"; - final String[] queryIndices = new String[] { localIndexName, fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName) }; + final List queryIndices = List.of( + new IndexWithBoost(localIndexName), + new IndexWithBoost(fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName)) + ); final String commonInferenceId = "common-inference-id"; @@ -142,10 +145,13 @@ public void testSparseVectorQuery() throws Exception { public void testSparseVectorQueryWithCcsMinimizeRoundTripsFalse() throws Exception { final String localIndexName = "local-index"; final String remoteIndexName = "remote-index"; - final String[] queryIndices = new String[] { localIndexName, fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName) }; + final List queryIndices = List.of( + new IndexWithBoost(localIndexName), + new IndexWithBoost(fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName)) + ); final Consumer assertCcsMinimizeRoundTripsFalseFailure = q -> { SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(q); - SearchRequest searchRequest = new SearchRequest(queryIndices, searchSourceBuilder); + SearchRequest searchRequest = new SearchRequest(convertToArray(queryIndices), searchSourceBuilder); searchRequest.setCcsMinimizeRoundtrips(false); IllegalArgumentException e = assertThrows( From 825bc9a4c84b623df858a3f13f3af91511fc17b2 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 25 Sep 2025 14:58:24 -0400 Subject: [PATCH 49/52] Updated match query CCS tests to use a common cluster configuration --- ...MatchQueryBuilderCrossClusterSearchIT.java | 230 +++++++++--------- 1 file changed, 109 insertions(+), 121 deletions(-) diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/MatchQueryBuilderCrossClusterSearchIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/MatchQueryBuilderCrossClusterSearchIT.java index 0a065ffc54733..a83f7fa80e461 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/MatchQueryBuilderCrossClusterSearchIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/MatchQueryBuilderCrossClusterSearchIT.java @@ -15,7 +15,9 @@ import org.elasticsearch.index.query.QueryShardException; import org.elasticsearch.inference.SimilarityMeasure; import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.junit.Before; +import java.io.IOException; import java.util.List; import java.util.Map; import java.util.Set; @@ -26,111 +28,72 @@ public class MatchQueryBuilderCrossClusterSearchIT extends AbstractSemanticCrossClusterSearchTestCase { private static final String LOCAL_INDEX_NAME = "local-index"; private static final String REMOTE_INDEX_NAME = "remote-index"; + + // Boost the local index so that we can use the same doc values for local and remote indices and have consistent relevance private static final List QUERY_INDICES = List.of( - new IndexWithBoost(LOCAL_INDEX_NAME), + new IndexWithBoost(LOCAL_INDEX_NAME, 10.0f), new IndexWithBoost(fullyQualifiedIndexName(REMOTE_CLUSTER, REMOTE_INDEX_NAME)) ); - public void testMatchQuery() throws Exception { - final String commonInferenceId = "common-inference-id"; - final String localInferenceId = "local-inference-id"; - final String remoteInferenceId = "remote-inference-id"; + private static final String COMMON_INFERENCE_ID_FIELD = "common-inference-id-field"; + private static final String VARIABLE_INFERENCE_ID_FIELD = "variable-inference-id-field"; + private static final String MIXED_TYPE_FIELD_1 = "mixed-type-field-1"; + private static final String MIXED_TYPE_FIELD_2 = "mixed-type-field-2"; + private static final String TEXT_FIELD = "text-field"; - final String commonInferenceIdField = "common-inference-id-field"; - final String variableInferenceIdField = "variable-inference-id-field"; - final String mixedTypeField1 = "mixed-type-field-1"; - final String mixedTypeField2 = "mixed-type-field-2"; + boolean clustersConfigured = false; - final TestIndexInfo localIndexInfo = new TestIndexInfo( - LOCAL_INDEX_NAME, - Map.of(commonInferenceId, sparseEmbeddingServiceSettings(), localInferenceId, sparseEmbeddingServiceSettings()), - Map.of( - commonInferenceIdField, - semanticTextMapping(commonInferenceId), - variableInferenceIdField, - semanticTextMapping(localInferenceId), - mixedTypeField1, - semanticTextMapping(localInferenceId), - mixedTypeField2, - textMapping() - ), - Map.of( - "local_doc_1", - Map.of(commonInferenceIdField, "a"), - "local_doc_2", - Map.of(variableInferenceIdField, "b"), - "local_doc_3", - Map.of(mixedTypeField1, "c"), - "local_doc_4", - Map.of(mixedTypeField2, "d") - ) - ); - final TestIndexInfo remoteIndexInfo = new TestIndexInfo( - REMOTE_INDEX_NAME, - Map.of( - commonInferenceId, - textEmbeddingServiceSettings(256, SimilarityMeasure.COSINE, DenseVectorFieldMapper.ElementType.FLOAT), - remoteInferenceId, - textEmbeddingServiceSettings(384, SimilarityMeasure.COSINE, DenseVectorFieldMapper.ElementType.FLOAT) - ), - Map.of( - commonInferenceIdField, - semanticTextMapping(commonInferenceId), - variableInferenceIdField, - semanticTextMapping(remoteInferenceId), - mixedTypeField1, - textMapping(), - mixedTypeField2, - semanticTextMapping(remoteInferenceId) - ), - Map.of( - "remote_doc_1", - Map.of(commonInferenceIdField, "w"), - "remote_doc_2", - Map.of(variableInferenceIdField, "x"), - "remote_doc_3", - Map.of(mixedTypeField1, "y"), - "remote_doc_4", - Map.of(mixedTypeField2, "z") - ) - ); - setupTwoClusters(localIndexInfo, remoteIndexInfo); + @Override + protected boolean reuseClusters() { + return true; + } + @Before + @Override + public void setUp() throws Exception { + super.setUp(); + if (clustersConfigured == false) { + configureClusters(); + clustersConfigured = true; + } + } + + public void testMatchQuery() throws Exception { // Query a field has the same inference ID value across clusters, but with different backing inference services assertSearchResponse( - new MatchQueryBuilder(commonInferenceIdField, "a"), + new MatchQueryBuilder(COMMON_INFERENCE_ID_FIELD, "a"), QUERY_INDICES, List.of( - new SearchResult(LOCAL_CLUSTER, LOCAL_INDEX_NAME, "local_doc_1"), - new SearchResult(REMOTE_CLUSTER, REMOTE_INDEX_NAME, "remote_doc_1") + new SearchResult(LOCAL_CLUSTER, LOCAL_INDEX_NAME, getDocId(COMMON_INFERENCE_ID_FIELD)), + new SearchResult(REMOTE_CLUSTER, REMOTE_INDEX_NAME, getDocId(COMMON_INFERENCE_ID_FIELD)) ) ); // Query a field that has different inference ID values across clusters assertSearchResponse( - new MatchQueryBuilder(variableInferenceIdField, "b"), + new MatchQueryBuilder(VARIABLE_INFERENCE_ID_FIELD, "b"), QUERY_INDICES, List.of( - new SearchResult(LOCAL_CLUSTER, LOCAL_INDEX_NAME, "local_doc_2"), - new SearchResult(REMOTE_CLUSTER, REMOTE_INDEX_NAME, "remote_doc_2") + new SearchResult(LOCAL_CLUSTER, LOCAL_INDEX_NAME, getDocId(VARIABLE_INFERENCE_ID_FIELD)), + new SearchResult(REMOTE_CLUSTER, REMOTE_INDEX_NAME, getDocId(VARIABLE_INFERENCE_ID_FIELD)) ) ); // Query a field that has mixed types across clusters assertSearchResponse( - new MatchQueryBuilder(mixedTypeField1, "y"), + new MatchQueryBuilder(MIXED_TYPE_FIELD_1, "c"), QUERY_INDICES, List.of( - new SearchResult(LOCAL_CLUSTER, LOCAL_INDEX_NAME, "local_doc_3"), - new SearchResult(REMOTE_CLUSTER, REMOTE_INDEX_NAME, "remote_doc_3") + new SearchResult(LOCAL_CLUSTER, LOCAL_INDEX_NAME, getDocId(MIXED_TYPE_FIELD_1)), + new SearchResult(REMOTE_CLUSTER, REMOTE_INDEX_NAME, getDocId(MIXED_TYPE_FIELD_1)) ) ); assertSearchResponse( - new MatchQueryBuilder(mixedTypeField2, "d"), + new MatchQueryBuilder(MIXED_TYPE_FIELD_2, "d"), QUERY_INDICES, List.of( - new SearchResult(REMOTE_CLUSTER, REMOTE_INDEX_NAME, "remote_doc_4"), - new SearchResult(LOCAL_CLUSTER, LOCAL_INDEX_NAME, "local_doc_4") + new SearchResult(LOCAL_CLUSTER, LOCAL_INDEX_NAME, getDocId(MIXED_TYPE_FIELD_2)), + new SearchResult(REMOTE_CLUSTER, REMOTE_INDEX_NAME, getDocId(MIXED_TYPE_FIELD_2)) ) ); } @@ -154,54 +117,15 @@ public void testMatchQueryWithCcsMinimizeRoundTripsFalse() throws Exception { ); }; - final String commonInferenceId = "common-inference-id"; - - final String commonInferenceIdField = "common-inference-id-field"; - final String mixedTypeField1 = "mixed-type-field-1"; - final String mixedTypeField2 = "mixed-type-field-2"; - final String textField = "text-field"; - - final TestIndexInfo localIndexInfo = new TestIndexInfo( - LOCAL_INDEX_NAME, - Map.of(commonInferenceId, sparseEmbeddingServiceSettings()), - Map.of( - commonInferenceIdField, - semanticTextMapping(commonInferenceId), - mixedTypeField1, - semanticTextMapping(commonInferenceId), - mixedTypeField2, - textMapping(), - textField, - textMapping() - ), - Map.of(mixedTypeField2 + "_doc", Map.of(mixedTypeField2, "a"), textField + "_doc", Map.of(textField, "b b b")) - ); - final TestIndexInfo remoteIndexInfo = new TestIndexInfo( - REMOTE_INDEX_NAME, - Map.of(commonInferenceId, sparseEmbeddingServiceSettings()), - Map.of( - commonInferenceIdField, - semanticTextMapping(commonInferenceId), - mixedTypeField1, - textMapping(), - mixedTypeField2, - semanticTextMapping(commonInferenceId), - textField, - textMapping() - ), - Map.of(textField + "_doc", Map.of(textField, "b")) - ); - setupTwoClusters(localIndexInfo, remoteIndexInfo); - // Validate that expected cases fail - assertCcsMinimizeRoundTripsFalseFailure.accept(new MatchQueryBuilder(commonInferenceIdField, randomAlphaOfLength(5))); - assertCcsMinimizeRoundTripsFalseFailure.accept(new MatchQueryBuilder(mixedTypeField1, randomAlphaOfLength(5))); + assertCcsMinimizeRoundTripsFalseFailure.accept(new MatchQueryBuilder(COMMON_INFERENCE_ID_FIELD, randomAlphaOfLength(5))); + assertCcsMinimizeRoundTripsFalseFailure.accept(new MatchQueryBuilder(MIXED_TYPE_FIELD_1, randomAlphaOfLength(5))); // Validate the expected ccs_minimize_roundtrips=false detection gap and failure mode when querying non-inference fields locally assertSearchResponse( - new MatchQueryBuilder(mixedTypeField2, "a"), + new MatchQueryBuilder(MIXED_TYPE_FIELD_2, "d"), QUERY_INDICES, - List.of(new SearchResult(null, LOCAL_INDEX_NAME, mixedTypeField2 + "_doc")), + List.of(new SearchResult(null, LOCAL_INDEX_NAME, getDocId(MIXED_TYPE_FIELD_2))), new ClusterFailure( SearchResponse.Cluster.Status.SKIPPED, Set.of( @@ -216,14 +140,78 @@ public void testMatchQueryWithCcsMinimizeRoundTripsFalse() throws Exception { // Validate that a CCS match query functions when only text fields are queried assertSearchResponse( - new MatchQueryBuilder(textField, "b"), + new MatchQueryBuilder(TEXT_FIELD, "e"), QUERY_INDICES, List.of( - new SearchResult(null, LOCAL_INDEX_NAME, textField + "_doc"), - new SearchResult(REMOTE_CLUSTER, REMOTE_INDEX_NAME, textField + "_doc") + new SearchResult(null, LOCAL_INDEX_NAME, getDocId(TEXT_FIELD)), + new SearchResult(REMOTE_CLUSTER, REMOTE_INDEX_NAME, getDocId(TEXT_FIELD)) ), null, s -> s.setCcsMinimizeRoundtrips(false) ); } + + private void configureClusters() throws IOException { + final String commonInferenceId = "common-inference-id"; + final String localInferenceId = "local-inference-id"; + final String remoteInferenceId = "remote-inference-id"; + + final Map> docs = Map.of( + getDocId(COMMON_INFERENCE_ID_FIELD), + Map.of(COMMON_INFERENCE_ID_FIELD, "a"), + getDocId(VARIABLE_INFERENCE_ID_FIELD), + Map.of(VARIABLE_INFERENCE_ID_FIELD, "b"), + getDocId(MIXED_TYPE_FIELD_1), + Map.of(MIXED_TYPE_FIELD_1, "c"), + getDocId(MIXED_TYPE_FIELD_2), + Map.of(MIXED_TYPE_FIELD_2, "d"), + getDocId(TEXT_FIELD), + Map.of(TEXT_FIELD, "e") + ); + + final TestIndexInfo localIndexInfo = new TestIndexInfo( + LOCAL_INDEX_NAME, + Map.of(commonInferenceId, sparseEmbeddingServiceSettings(), localInferenceId, sparseEmbeddingServiceSettings()), + Map.of( + COMMON_INFERENCE_ID_FIELD, + semanticTextMapping(commonInferenceId), + VARIABLE_INFERENCE_ID_FIELD, + semanticTextMapping(localInferenceId), + MIXED_TYPE_FIELD_1, + semanticTextMapping(localInferenceId), + MIXED_TYPE_FIELD_2, + textMapping(), + TEXT_FIELD, + textMapping() + ), + docs + ); + final TestIndexInfo remoteIndexInfo = new TestIndexInfo( + REMOTE_INDEX_NAME, + Map.of( + commonInferenceId, + textEmbeddingServiceSettings(256, SimilarityMeasure.COSINE, DenseVectorFieldMapper.ElementType.FLOAT), + remoteInferenceId, + textEmbeddingServiceSettings(384, SimilarityMeasure.COSINE, DenseVectorFieldMapper.ElementType.FLOAT) + ), + Map.of( + COMMON_INFERENCE_ID_FIELD, + semanticTextMapping(commonInferenceId), + VARIABLE_INFERENCE_ID_FIELD, + semanticTextMapping(remoteInferenceId), + MIXED_TYPE_FIELD_1, + textMapping(), + MIXED_TYPE_FIELD_2, + semanticTextMapping(remoteInferenceId), + TEXT_FIELD, + textMapping() + ), + docs + ); + setupTwoClusters(localIndexInfo, remoteIndexInfo); + } + + private static String getDocId(String field) { + return field + "_doc"; + } } From 3beb2d8d747487869ae6905f222d3bec0a69cba3 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 25 Sep 2025 15:09:04 -0400 Subject: [PATCH 50/52] Semantic query CCS test refactoring --- ...anticQueryBuilderCrossClusterSearchIT.java | 44 ++++++++----------- 1 file changed, 19 insertions(+), 25 deletions(-) diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticQueryBuilderCrossClusterSearchIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticQueryBuilderCrossClusterSearchIT.java index 623dccd220ee9..4b3b616f93bb0 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticQueryBuilderCrossClusterSearchIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SemanticQueryBuilderCrossClusterSearchIT.java @@ -23,14 +23,14 @@ import static org.hamcrest.Matchers.equalTo; public class SemanticQueryBuilderCrossClusterSearchIT extends AbstractSemanticCrossClusterSearchTestCase { - public void testSemanticQuery() throws Exception { - final String localIndexName = "local-index"; - final String remoteIndexName = "remote-index"; - final List queryIndices = List.of( - new IndexWithBoost(localIndexName), - new IndexWithBoost(fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName)) - ); + private static final String LOCAL_INDEX_NAME = "local-index"; + private static final String REMOTE_INDEX_NAME = "remote-index"; + private static final List QUERY_INDICES = List.of( + new IndexWithBoost(LOCAL_INDEX_NAME), + new IndexWithBoost(fullyQualifiedIndexName(REMOTE_CLUSTER, REMOTE_INDEX_NAME)) + ); + public void testSemanticQuery() throws Exception { final String commonInferenceId = "common-inference-id"; final String localInferenceId = "local-inference-id"; final String remoteInferenceId = "remote-inference-id"; @@ -39,7 +39,7 @@ public void testSemanticQuery() throws Exception { final String variableInferenceIdField = "variable-inference-id-field"; final TestIndexInfo localIndexInfo = new TestIndexInfo( - localIndexName, + LOCAL_INDEX_NAME, Map.of(commonInferenceId, sparseEmbeddingServiceSettings(), localInferenceId, sparseEmbeddingServiceSettings()), Map.of( commonInferenceIdField, @@ -50,7 +50,7 @@ public void testSemanticQuery() throws Exception { Map.of("local_doc_1", Map.of(commonInferenceIdField, "a"), "local_doc_2", Map.of(variableInferenceIdField, "b")) ); final TestIndexInfo remoteIndexInfo = new TestIndexInfo( - remoteIndexName, + REMOTE_INDEX_NAME, Map.of( commonInferenceId, textEmbeddingServiceSettings(256, SimilarityMeasure.COSINE, DenseVectorFieldMapper.ElementType.FLOAT), @@ -70,31 +70,25 @@ public void testSemanticQuery() throws Exception { // Query a field has the same inference ID value across clusters, but with different backing inference services assertSearchResponse( new SemanticQueryBuilder(commonInferenceIdField, "a"), - queryIndices, + QUERY_INDICES, List.of( - new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_1"), - new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_1") + new SearchResult(LOCAL_CLUSTER, LOCAL_INDEX_NAME, "local_doc_1"), + new SearchResult(REMOTE_CLUSTER, REMOTE_INDEX_NAME, "remote_doc_1") ) ); // Query a field that has different inference ID values across clusters assertSearchResponse( new SemanticQueryBuilder(variableInferenceIdField, "b"), - queryIndices, + QUERY_INDICES, List.of( - new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_2"), - new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_2") + new SearchResult(LOCAL_CLUSTER, LOCAL_INDEX_NAME, "local_doc_2"), + new SearchResult(REMOTE_CLUSTER, REMOTE_INDEX_NAME, "remote_doc_2") ) ); } public void testSemanticQueryWithCcMinimizeRoundTripsFalse() throws Exception { - final String localIndexName = "local-index"; - final String remoteIndexName = "remote-index"; - final List queryIndices = List.of( - new IndexWithBoost(localIndexName), - new IndexWithBoost(fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName)) - ); final SemanticQueryBuilder queryBuilder = new SemanticQueryBuilder("foo", "bar"); final Consumer assertCcsMinimizeRoundTripsFalseFailure = s -> { IllegalArgumentException e = assertThrows( @@ -107,18 +101,18 @@ public void testSemanticQueryWithCcMinimizeRoundTripsFalse() throws Exception { ); }; - final TestIndexInfo localIndexInfo = new TestIndexInfo(localIndexName, Map.of(), Map.of(), Map.of()); - final TestIndexInfo remoteIndexInfo = new TestIndexInfo(remoteIndexName, Map.of(), Map.of(), Map.of()); + final TestIndexInfo localIndexInfo = new TestIndexInfo(LOCAL_INDEX_NAME, Map.of(), Map.of(), Map.of()); + final TestIndexInfo remoteIndexInfo = new TestIndexInfo(REMOTE_INDEX_NAME, Map.of(), Map.of(), Map.of()); setupTwoClusters(localIndexInfo, remoteIndexInfo); // Explicitly set ccs_minimize_roundtrips=false in the search request SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(queryBuilder); - SearchRequest searchRequestWithCcMinimizeRoundTripsFalse = new SearchRequest(convertToArray(queryIndices), searchSourceBuilder); + SearchRequest searchRequestWithCcMinimizeRoundTripsFalse = new SearchRequest(convertToArray(QUERY_INDICES), searchSourceBuilder); searchRequestWithCcMinimizeRoundTripsFalse.setCcsMinimizeRoundtrips(false); assertCcsMinimizeRoundTripsFalseFailure.accept(searchRequestWithCcMinimizeRoundTripsFalse); // Using a point in time implicitly sets ccs_minimize_roundtrips=false - BytesReference pitId = openPointInTime(convertToArray(queryIndices), TimeValue.timeValueMinutes(2)); + BytesReference pitId = openPointInTime(convertToArray(QUERY_INDICES), TimeValue.timeValueMinutes(2)); SearchSourceBuilder searchSourceBuilderWithPit = new SearchSourceBuilder().query(queryBuilder) .pointInTimeBuilder(new PointInTimeBuilder(pitId)); SearchRequest searchRequestWithPit = new SearchRequest().source(searchSourceBuilderWithPit); From 7333107799af0306ddc5ca9fdba97dfda81224f4 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 25 Sep 2025 15:13:02 -0400 Subject: [PATCH 51/52] Sparse vector query CCS test refactoring --- ...ectorQueryBuilderCrossClusterSearchIT.java | 74 +++++++++---------- 1 file changed, 34 insertions(+), 40 deletions(-) diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SparseVectorQueryBuilderCrossClusterSearchIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SparseVectorQueryBuilderCrossClusterSearchIT.java index db1dfa57eb5e0..be9183722a48c 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SparseVectorQueryBuilderCrossClusterSearchIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/search/ccs/SparseVectorQueryBuilderCrossClusterSearchIT.java @@ -23,14 +23,14 @@ import static org.hamcrest.Matchers.equalTo; public class SparseVectorQueryBuilderCrossClusterSearchIT extends AbstractSemanticCrossClusterSearchTestCase { - public void testSparseVectorQuery() throws Exception { - final String localIndexName = "local-index"; - final String remoteIndexName = "remote-index"; - final List queryIndices = List.of( - new IndexWithBoost(localIndexName), - new IndexWithBoost(fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName)) - ); + private static final String LOCAL_INDEX_NAME = "local-index"; + private static final String REMOTE_INDEX_NAME = "remote-index"; + private static final List QUERY_INDICES = List.of( + new IndexWithBoost(LOCAL_INDEX_NAME), + new IndexWithBoost(fullyQualifiedIndexName(REMOTE_CLUSTER, REMOTE_INDEX_NAME)) + ); + public void testSparseVectorQuery() throws Exception { final String commonInferenceId = "common-inference-id"; final String commonInferenceIdField = "common-inference-id-field"; @@ -38,7 +38,7 @@ public void testSparseVectorQuery() throws Exception { final String mixedTypeField2 = "mixed-type-field-2"; final TestIndexInfo localIndexInfo = new TestIndexInfo( - localIndexName, + LOCAL_INDEX_NAME, Map.of(commonInferenceId, sparseEmbeddingServiceSettings()), Map.of( commonInferenceIdField, @@ -58,7 +58,7 @@ public void testSparseVectorQuery() throws Exception { ) ); final TestIndexInfo remoteIndexInfo = new TestIndexInfo( - remoteIndexName, + REMOTE_INDEX_NAME, Map.of(commonInferenceId, sparseEmbeddingServiceSettings()), Map.of( commonInferenceIdField, @@ -82,28 +82,28 @@ public void testSparseVectorQuery() throws Exception { // Query a field has the same inference ID value across clusters, but with different backing inference services assertSearchResponse( new SparseVectorQueryBuilder(commonInferenceIdField, null, "a"), - queryIndices, + QUERY_INDICES, List.of( - new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_1"), - new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_1") + new SearchResult(REMOTE_CLUSTER, REMOTE_INDEX_NAME, "remote_doc_1"), + new SearchResult(LOCAL_CLUSTER, LOCAL_INDEX_NAME, "local_doc_1") ) ); // Query a field that has mixed types across clusters assertSearchResponse( new SparseVectorQueryBuilder(mixedTypeField1, commonInferenceId, "b"), - queryIndices, + QUERY_INDICES, List.of( - new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_2"), - new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_2") + new SearchResult(REMOTE_CLUSTER, REMOTE_INDEX_NAME, "remote_doc_2"), + new SearchResult(LOCAL_CLUSTER, LOCAL_INDEX_NAME, "local_doc_2") ) ); assertSearchResponse( new SparseVectorQueryBuilder(mixedTypeField2, commonInferenceId, "c"), - queryIndices, + QUERY_INDICES, List.of( - new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3"), - new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_3") + new SearchResult(LOCAL_CLUSTER, LOCAL_INDEX_NAME, "local_doc_3"), + new SearchResult(REMOTE_CLUSTER, REMOTE_INDEX_NAME, "remote_doc_3") ) ); @@ -114,26 +114,26 @@ public void testSparseVectorQuery() throws Exception { .toList(); assertSearchResponse( new SparseVectorQueryBuilder(mixedTypeField1, queryVector, null, null, null, null), - queryIndices, + QUERY_INDICES, List.of( - new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_2"), - new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_2") + new SearchResult(REMOTE_CLUSTER, REMOTE_INDEX_NAME, "remote_doc_2"), + new SearchResult(LOCAL_CLUSTER, LOCAL_INDEX_NAME, "local_doc_2") ) ); assertSearchResponse( new SparseVectorQueryBuilder(mixedTypeField2, queryVector, null, null, null, null), - queryIndices, + QUERY_INDICES, List.of( - new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3"), - new SearchResult(REMOTE_CLUSTER, remoteIndexName, "remote_doc_3") + new SearchResult(LOCAL_CLUSTER, LOCAL_INDEX_NAME, "local_doc_3"), + new SearchResult(REMOTE_CLUSTER, REMOTE_INDEX_NAME, "remote_doc_3") ) ); // Check that omitting the inference ID when querying a remote sparse vector field leads to the expected partial failure assertSearchResponse( new SparseVectorQueryBuilder(mixedTypeField2, null, "c"), - queryIndices, - List.of(new SearchResult(LOCAL_CLUSTER, localIndexName, "local_doc_3")), + QUERY_INDICES, + List.of(new SearchResult(LOCAL_CLUSTER, LOCAL_INDEX_NAME, "local_doc_3")), new ClusterFailure( SearchResponse.Cluster.Status.SKIPPED, Set.of(new FailureCause(IllegalArgumentException.class, "inference_id required to perform vector search on query string")) @@ -143,15 +143,9 @@ public void testSparseVectorQuery() throws Exception { } public void testSparseVectorQueryWithCcsMinimizeRoundTripsFalse() throws Exception { - final String localIndexName = "local-index"; - final String remoteIndexName = "remote-index"; - final List queryIndices = List.of( - new IndexWithBoost(localIndexName), - new IndexWithBoost(fullyQualifiedIndexName(REMOTE_CLUSTER, remoteIndexName)) - ); final Consumer assertCcsMinimizeRoundTripsFalseFailure = q -> { SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(q); - SearchRequest searchRequest = new SearchRequest(convertToArray(queryIndices), searchSourceBuilder); + SearchRequest searchRequest = new SearchRequest(convertToArray(QUERY_INDICES), searchSourceBuilder); searchRequest.setCcsMinimizeRoundtrips(false); IllegalArgumentException e = assertThrows( @@ -175,7 +169,7 @@ public void testSparseVectorQueryWithCcsMinimizeRoundTripsFalse() throws Excepti final String sparseVectorField = "sparse-vector-field"; final TestIndexInfo localIndexInfo = new TestIndexInfo( - localIndexName, + LOCAL_INDEX_NAME, Map.of(commonInferenceId, sparseEmbeddingServiceSettings()), Map.of( commonInferenceIdField, @@ -195,7 +189,7 @@ public void testSparseVectorQueryWithCcsMinimizeRoundTripsFalse() throws Excepti ) ); final TestIndexInfo remoteIndexInfo = new TestIndexInfo( - remoteIndexName, + REMOTE_INDEX_NAME, Map.of(commonInferenceId, sparseEmbeddingServiceSettings()), Map.of( commonInferenceIdField, @@ -225,8 +219,8 @@ public void testSparseVectorQueryWithCcsMinimizeRoundTripsFalse() throws Excepti // Validate the expected ccs_minimize_roundtrips=false detection gap and failure mode when querying non-inference fields locally assertSearchResponse( new SparseVectorQueryBuilder(mixedTypeField2, commonInferenceId, "foo"), - queryIndices, - List.of(new SearchResult(null, localIndexName, mixedTypeField2 + "_doc")), + QUERY_INDICES, + List.of(new SearchResult(null, LOCAL_INDEX_NAME, mixedTypeField2 + "_doc")), new ClusterFailure( SearchResponse.Cluster.Status.SKIPPED, Set.of( @@ -242,10 +236,10 @@ public void testSparseVectorQueryWithCcsMinimizeRoundTripsFalse() throws Excepti // Validate that a CCS sparse vector query functions when only sparse vector fields are queried assertSearchResponse( new SparseVectorQueryBuilder(sparseVectorField, commonInferenceId, "foo"), - queryIndices, + QUERY_INDICES, List.of( - new SearchResult(null, localIndexName, sparseVectorField + "_doc"), - new SearchResult(REMOTE_CLUSTER, remoteIndexName, sparseVectorField + "_doc") + new SearchResult(null, LOCAL_INDEX_NAME, sparseVectorField + "_doc"), + new SearchResult(REMOTE_CLUSTER, REMOTE_INDEX_NAME, sparseVectorField + "_doc") ), null, s -> s.setCcsMinimizeRoundtrips(false) From 0794501022dbbf06fc944f14d6ae21f1e2dcd7be Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 25 Sep 2025 16:07:59 -0400 Subject: [PATCH 52/52] Update transport version --- .../definitions/referable/semantic_search_ccs_support.csv | 2 +- server/src/main/resources/transport/upper_bounds/9.2.csv | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/resources/transport/definitions/referable/semantic_search_ccs_support.csv b/server/src/main/resources/transport/definitions/referable/semantic_search_ccs_support.csv index 2c8dc4fc73614..35154103cd0da 100644 --- a/server/src/main/resources/transport/definitions/referable/semantic_search_ccs_support.csv +++ b/server/src/main/resources/transport/definitions/referable/semantic_search_ccs_support.csv @@ -1 +1 @@ -9173000 +9174000 diff --git a/server/src/main/resources/transport/upper_bounds/9.2.csv b/server/src/main/resources/transport/upper_bounds/9.2.csv index e60434a3e2189..57900e0428e01 100644 --- a/server/src/main/resources/transport/upper_bounds/9.2.csv +++ b/server/src/main/resources/transport/upper_bounds/9.2.csv @@ -1 +1 @@ -sampling_configuration,9173000 +semantic_search_ccs_support,9174000