diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java index fbde912e35cb4..46b700fb1afd5 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java @@ -16,6 +16,7 @@ import org.apache.lucene.index.LeafReader; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; @@ -54,11 +55,14 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import java.util.stream.Collectors; +import java.util.stream.Stream; import static org.elasticsearch.index.IndexVersions.NEW_SPARSE_VECTOR; -import static org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper.NEW_SPARSE_VECTOR_INDEX_VERSION; -import static org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper.PREVIOUS_SPARSE_VECTOR_INDEX_VERSION; -import static org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X; +import static org.elasticsearch.index.IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT_BACKPORT_8_X; +import static org.elasticsearch.index.IndexVersions.V_8_0_0; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertToXContentEquivalent; import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder; import static org.hamcrest.Matchers.containsString; @@ -67,6 +71,22 @@ public class SparseVectorFieldMapperTests extends MapperTestCase { + public static final float STRICT_TOKENS_WEIGHT_THRESHOLD = 0.5f; + public static final float STRICT_TOKENS_FREQ_RATIO_THRESHOLD = 1; + + private static final Map COMMON_TOKENS = Map.of( + "common1_drop_default", + 0.1f, + "common2_drop_default", + 0.1f, + "common3_drop_default", + 0.1f + ); + + private static final Map MEDIUM_TOKENS = Map.of("medium1_keep_strict", 0.5f, "medium2_keep_default", 0.25f); + + private static final Map RARE_TOKENS = Map.of("rare1_keep_strict", 0.9f, "rare2_keep_strict", 0.85f); + @Override protected Object getSampleValueForDocument() { Map map = new LinkedHashMap<>(); @@ -122,7 +142,7 @@ protected void minimalMappingWithExplicitIndexOptions(XContentBuilder b) throws b.field("prune", true); b.startObject("pruning_config"); { - b.field("tokens_freq_ratio_threshold", 3.0f); + b.field("tokens_freq_ratio_threshold", 1.0f); b.field("tokens_weight_threshold", 0.5f); } b.endObject(); @@ -177,6 +197,13 @@ protected void mappingWithIndexOptionsPruneFalse(XContentBuilder b) throws IOExc b.endObject(); } + private void mapping(XContentBuilder b, @Nullable Boolean prune, PruningConfig pruningConfig) throws IOException { + b.field("type", "sparse_vector"); + if (prune != null) { + b.field("index_options", new SparseVectorFieldMapper.IndexOptions(prune, pruningConfig.tokenPruningConfig)); + } + } + @Override protected boolean supportsStoredFields() { return false; @@ -266,7 +293,7 @@ public void testDefaultsWithAndWithoutIncludeDefaultsOlderIndexVersion() throws IndexVersion indexVersion = IndexVersionUtils.randomVersionBetween( random(), NEW_SPARSE_VECTOR, - IndexVersionUtils.getPreviousVersion(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X) + IndexVersionUtils.getPreviousVersion(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT_BACKPORT_8_X) ); XContentBuilder orig = JsonXContent.contentBuilder().startObject(); @@ -487,7 +514,7 @@ protected IngestScriptSupport ingestScriptSupport() { @Override protected String[] getParseMinimalWarnings(IndexVersion indexVersion) { String[] additionalWarnings = null; - if (indexVersion.before(PREVIOUS_SPARSE_VECTOR_INDEX_VERSION)) { + if (indexVersion.before(V_8_0_0)) { additionalWarnings = new String[] { SparseVectorFieldMapper.ERROR_MESSAGE_7X }; } return Strings.concatStringArrays(super.getParseMinimalWarnings(indexVersion), additionalWarnings); @@ -495,11 +522,11 @@ protected String[] getParseMinimalWarnings(IndexVersion indexVersion) { @Override protected IndexVersion boostNotAllowedIndexVersion() { - return NEW_SPARSE_VECTOR_INDEX_VERSION; + return NEW_SPARSE_VECTOR; } public void testSparseVectorWith7xIndex() throws Exception { - IndexVersion version = IndexVersionUtils.randomPreviousCompatibleVersion(random(), PREVIOUS_SPARSE_VECTOR_INDEX_VERSION); + IndexVersion version = IndexVersionUtils.randomPreviousCompatibleVersion(random(), V_8_0_0); XContentBuilder builder = XContentFactory.jsonBuilder() .startObject() @@ -537,11 +564,7 @@ public void testSparseVectorWith7xIndex() throws Exception { } public void testSparseVectorUnsupportedIndex() throws Exception { - IndexVersion version = IndexVersionUtils.randomVersionBetween( - random(), - PREVIOUS_SPARSE_VECTOR_INDEX_VERSION, - IndexVersions.FIRST_DETACHED_INDEX_VERSION - ); + IndexVersion version = IndexVersionUtils.randomVersionBetween(random(), V_8_0_0, IndexVersions.FIRST_DETACHED_INDEX_VERSION); Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(version, fieldMapping(b -> { b.field("type", "sparse_vector"); }))); @@ -720,14 +743,60 @@ public void testTokensWeightThresholdCorrect() { ); } + private enum PruningScenario { + NO_PRUNING, // No pruning applied - all tokens preserved + DEFAULT_PRUNING, // Default pruning configuration + STRICT_PRUNING // Stricter pruning with higher thresholds + } + + private enum PruningConfig { + NULL(null), + EXPLICIT_DEFAULT(new TokenPruningConfig()), + STRICT(new TokenPruningConfig(STRICT_TOKENS_FREQ_RATIO_THRESHOLD, STRICT_TOKENS_WEIGHT_THRESHOLD, false)); + + public final @Nullable TokenPruningConfig tokenPruningConfig; + + PruningConfig(@Nullable TokenPruningConfig tokenPruningConfig) { + this.tokenPruningConfig = tokenPruningConfig; + } + } + + private final Set validIndexPruningScenarios = Set.of( + new PruningOptions(false, PruningConfig.NULL), + new PruningOptions(true, PruningConfig.NULL), + new PruningOptions(true, PruningConfig.EXPLICIT_DEFAULT), + new PruningOptions(true, PruningConfig.STRICT), + new PruningOptions(null, PruningConfig.NULL) + ); + + private record PruningOptions(@Nullable Boolean prune, PruningConfig pruningConfig) {} + private void withSearchExecutionContext(MapperService mapperService, CheckedConsumer consumer) throws IOException { var mapper = mapperService.documentMapper(); try (Directory directory = newDirectory()) { RandomIndexWriter iw = new RandomIndexWriter(random(), directory); - var sourceToParse = source(this::writeField); - ParsedDocument doc1 = mapper.parse(sourceToParse); - iw.addDocument(doc1.rootDoc()); + + int commonDocs = 20; + for (int i = 0; i < commonDocs; i++) { + iw.addDocument(mapper.parse(source(b -> b.field("field", COMMON_TOKENS))).rootDoc()); + } + + int mediumDocs = 5; + for (int i = 0; i < mediumDocs; i++) { + iw.addDocument(mapper.parse(source(b -> b.field("field", MEDIUM_TOKENS))).rootDoc()); + } + + iw.addDocument(mapper.parse(source(b -> b.field("field", RARE_TOKENS))).rootDoc()); + + // This will lower the averageTokenFreqRatio so that common tokens get pruned with default settings. + // Depending on how the index is created, we will have 30-37 numUniqueTokens + // this will result in an averageTokenFreqRatio of 0.1021 - 0.1259 + Map uniqueDoc = new TreeMap<>(); + for (int i = 0; i < 30; i++) { + uniqueDoc.put("unique" + i, 0.5f); + } + iw.addDocument(mapper.parse(source(b -> b.field("field", uniqueDoc))).rootDoc()); iw.close(); try (DirectoryReader reader = wrapInMockESDirectoryReader(DirectoryReader.open(directory))) { @@ -737,212 +806,129 @@ private void withSearchExecutionContext(MapperService mapperService, CheckedCons } } - public void testTypeQueryFinalizationWithRandomOptions() throws Exception { - for (int i = 0; i < 20; i++) { - runTestTypeQueryFinalization( - randomBoolean(), // useIndexVersionBeforeIndexOptions - randomBoolean(), // useMapperDefaultIndexOptions - randomBoolean(), // setMapperIndexOptionsPruneToFalse - randomBoolean(), // queryOverridesPruningConfig - randomBoolean() // queryOverridesPruneToBeFalse + public void testPruningScenarios() throws Exception { + for (int i = 0; i < 200; i++) { + assertPruningScenario( + randomFrom(validIndexPruningScenarios), + new PruningOptions(randomFrom(true, false, null), randomFrom(PruningConfig.values())) ); } } - public void testTypeQueryFinalizationDefaultsCurrentVersion() throws Exception { - IndexVersion version = IndexVersion.current(); - MapperService mapperService = createMapperService(version, fieldMapping(this::minimalMapping)); - - // query should be pruned by default on newer index versions - performTypeQueryFinalizationTest(mapperService, null, null, true); + private XContentBuilder getIndexMapping(PruningOptions pruningOptions) throws IOException { + return fieldMapping(b -> mapping(b, pruningOptions.prune, pruningOptions.pruningConfig)); } - public void testTypeQueryFinalizationDefaultsPreviousVersion() throws Exception { - IndexVersion version = IndexVersionUtils.randomVersionBetween( - random(), - NEW_SPARSE_VECTOR, - IndexVersionUtils.getPreviousVersion(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X) - ); - MapperService mapperService = createMapperService(version, fieldMapping(this::minimalMapping)); - - // query should _not_ be pruned by default on older index versions - performTypeQueryFinalizationTest(mapperService, null, null, false); - } - - public void testTypeQueryFinalizationWithIndexExplicit() throws Exception { - IndexVersion version = IndexVersion.current(); - MapperService mapperService = createMapperService(version, fieldMapping(this::minimalMapping)); - - // query should be pruned via explicit index options - performTypeQueryFinalizationTest(mapperService, null, null, true); - } - - public void testTypeQueryFinalizationWithIndexExplicitDoNotPrune() throws Exception { - IndexVersion version = IndexVersion.current(); - MapperService mapperService = createMapperService(version, fieldMapping(this::mappingWithIndexOptionsPruneFalse)); + private void assertQueryContains(List expectedClauses, Query query) { + SparseVectorQueryWrapper queryWrapper = (SparseVectorQueryWrapper) query; + var termsQuery = queryWrapper.getTermsQuery(); + assertNotNull(termsQuery); + var booleanQuery = (BooleanQuery) termsQuery; + List shouldClauses = booleanQuery.clauses() + .stream() + .filter(clause -> clause.getOccur() == BooleanClause.Occur.SHOULD) + .map(BooleanClause::getQuery) + .collect(Collectors.toList()); + assertThat(shouldClauses, Matchers.containsInAnyOrder(expectedClauses.toArray())); + } + + private PruningScenario getEffectivePruningScenario( + PruningOptions indexPruningOptions, + PruningOptions queryPruningOptions, + IndexVersion indexVersion + ) { + Boolean shouldPrune = queryPruningOptions.prune; + if (shouldPrune == null) { + shouldPrune = indexPruningOptions.prune; + } - // query should be pruned via explicit index options - performTypeQueryFinalizationTest(mapperService, null, null, false); - } + if (shouldPrune == null) { + shouldPrune = indexVersion.onOrAfter(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT_BACKPORT_8_X); + } - public void testTypeQueryFinalizationQueryOverridesPruning() throws Exception { - IndexVersion version = IndexVersion.current(); - MapperService mapperService = createMapperService(version, fieldMapping(this::mappingWithIndexOptionsPruneFalse)); + PruningScenario pruningScenario = PruningScenario.NO_PRUNING; + if (shouldPrune) { + PruningConfig pruningConfig = queryPruningOptions.pruningConfig; + if (pruningConfig == PruningConfig.NULL) { + pruningConfig = indexPruningOptions.pruningConfig; + } + pruningScenario = switch (pruningConfig) { + case STRICT -> PruningScenario.STRICT_PRUNING; + case EXPLICIT_DEFAULT, NULL -> PruningScenario.DEFAULT_PRUNING; + }; + } - // query should still be pruned due to query builder setting it - performTypeQueryFinalizationTest(mapperService, true, new TokenPruningConfig(), true); + return pruningScenario; } - public void testTypeQueryFinalizationQueryOverridesPruningOff() throws Exception { - IndexVersion version = IndexVersion.current(); - MapperService mapperService = createMapperService(version, fieldMapping(this::mappingWithIndexOptionsPruneFalse)); + private List getExpectedQueryClauses( + SparseVectorFieldMapper.SparseVectorFieldType ft, + PruningScenario pruningScenario, + SearchExecutionContext searchExecutionContext + ) { + List tokens = switch (pruningScenario) { + case NO_PRUNING -> QUERY_VECTORS; + case DEFAULT_PRUNING -> QUERY_VECTORS.stream() + .filter(t -> t.token().startsWith("rare") || t.token().startsWith("medium")) + .toList(); + case STRICT_PRUNING -> QUERY_VECTORS.stream().filter(t -> t.token().endsWith("keep_strict")).toList(); + }; - // query should not pruned due to query builder setting it - performTypeQueryFinalizationTest(mapperService, false, null, false); + return tokens.stream().map(t -> { + Query termQuery = ft.termQuery(t.token(), searchExecutionContext); + return new BoostQuery(termQuery, t.weight()); + }).collect(Collectors.toUnmodifiableList()); } - private void performTypeQueryFinalizationTest( - MapperService mapperService, - @Nullable Boolean queryPrune, - @Nullable TokenPruningConfig queryTokenPruningConfig, - boolean queryShouldBePruned - ) throws IOException { + private void assertPruningScenario(PruningOptions indexPruningOptions, PruningOptions queryPruningOptions) throws IOException { + IndexVersion indexVersion = getIndexVersion(); + MapperService mapperService = createMapperService(indexVersion, getIndexMapping(indexPruningOptions)); + PruningScenario effectivePruningScenario = getEffectivePruningScenario(indexPruningOptions, queryPruningOptions, indexVersion); withSearchExecutionContext(mapperService, (context) -> { SparseVectorFieldMapper.SparseVectorFieldType ft = (SparseVectorFieldMapper.SparseVectorFieldType) mapperService.fieldType( "field" ); - Query finalizedQuery = ft.finalizeSparseVectorQuery(context, "field", QUERY_VECTORS, queryPrune, queryTokenPruningConfig); - - if (queryShouldBePruned) { - assertQueryWasPruned(finalizedQuery); - } else { - assertQueryWasNotPruned(finalizedQuery); - } + List expectedQueryClauses = getExpectedQueryClauses(ft, effectivePruningScenario, context); + Query finalizedQuery = ft.finalizeSparseVectorQuery( + context, + "field", + QUERY_VECTORS, + queryPruningOptions.prune, + queryPruningOptions.pruningConfig.tokenPruningConfig + ); + assertQueryContains(expectedQueryClauses, finalizedQuery); }); } - private void assertQueryWasPruned(Query query) { - assertQueryHasClauseCount(query, 0); - } - - private void assertQueryWasNotPruned(Query query) { - assertQueryHasClauseCount(query, QUERY_VECTORS.size()); + private static IndexVersion getIndexVersion() { + VersionRange versionRange = randomFrom(VersionRange.values()); + return versionRange.getRandomVersion(); } - private void assertQueryHasClauseCount(Query query, int clauseCount) { - SparseVectorQueryWrapper queryWrapper = (SparseVectorQueryWrapper) query; - var termsQuery = queryWrapper.getTermsQuery(); - assertNotNull(termsQuery); - var booleanQuery = (BooleanQuery) termsQuery; - - int occurShouldCount = 0; - for (var clause : booleanQuery.clauses()) { - if (clause != null && clause.getOccur() != null) { - var occur = clause.getOccur(); - if (occur.compareTo(BooleanClause.Occur.SHOULD) == 0) { - occurShouldCount++; - } - } - } - assertThat(occurShouldCount, equalTo(clauseCount)); - } + private enum VersionRange { + ES_V8X_WITHOUT_INDEX_OPTIONS_SUPPORT( + NEW_SPARSE_VECTOR, + IndexVersionUtils.getPreviousVersion(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT_BACKPORT_8_X) + ), + ES_V8X_WITH_INDEX_OPTIONS_SUPPORT(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT_BACKPORT_8_X, IndexVersion.current()); - /** - * Runs a test of the query finalization based on various parameters - * that provides - * @param useIndexVersionBeforeIndexOptions set to true to use a previous index version before mapper index_options - * @param useMapperDefaultIndexOptions set to false to use an explicit, non-default mapper index_options - * @param setMapperIndexOptionsPruneToFalse set to true to use prune:false in the mapper index_options - * @param queryOverridesPruningConfig set to true to designate the query will provide a pruning_config - * @param queryOverridesPruneToBeFalse if true and queryOverridesPruningConfig is true, the query will provide prune:false - * @throws IOException - */ - private void runTestTypeQueryFinalization( - boolean useIndexVersionBeforeIndexOptions, - boolean useMapperDefaultIndexOptions, - boolean setMapperIndexOptionsPruneToFalse, - boolean queryOverridesPruningConfig, - boolean queryOverridesPruneToBeFalse - ) throws IOException { - MapperService mapperService = getMapperServiceForTest( - useIndexVersionBeforeIndexOptions, - useMapperDefaultIndexOptions, - setMapperIndexOptionsPruneToFalse - ); + private final IndexVersion fromVersion; + private final IndexVersion toVersion; - // check and see if the query should explicitly override the index_options - Boolean shouldQueryPrune = queryOverridesPruningConfig ? (queryOverridesPruneToBeFalse == false) : null; - - // get the pruning configuration for the query if it's overriding - TokenPruningConfig queryPruningConfig = Boolean.TRUE.equals(shouldQueryPrune) ? new TokenPruningConfig() : null; - - // our logic if the results should be pruned or not - // we should _not_ prune if any of the following: - // - the query explicitly overrides the options and `prune` is set to false - // - the query does not override the pruning options and: - // - either we are using a previous index version - // - or the index_options explicitly sets `prune` to false - boolean resultShouldNotBePruned = ((queryOverridesPruningConfig && queryOverridesPruneToBeFalse) - || (queryOverridesPruningConfig == false && (useIndexVersionBeforeIndexOptions || setMapperIndexOptionsPruneToFalse))); - - try { - performTypeQueryFinalizationTest(mapperService, shouldQueryPrune, queryPruningConfig, resultShouldNotBePruned == false); - } catch (AssertionError e) { - String message = "performTypeQueryFinalizationTest failed using parameters: " - + "useIndexVersionBeforeIndexOptions: " - + useIndexVersionBeforeIndexOptions - + ", useMapperDefaultIndexOptions: " - + useMapperDefaultIndexOptions - + ", setMapperIndexOptionsPruneToFalse: " - + setMapperIndexOptionsPruneToFalse - + ", queryOverridesPruningConfig: " - + queryOverridesPruningConfig - + ", queryOverridesPruneToBeFalse: " - + queryOverridesPruneToBeFalse; - throw new AssertionError(message, e); + VersionRange(IndexVersion fromVersion, IndexVersion toVersion) { + this.fromVersion = fromVersion; + this.toVersion = toVersion; } - } - private IndexVersion getIndexVersionForTest(boolean usePreviousIndex) { - return usePreviousIndex - ? IndexVersionUtils.randomVersionBetween( - random(), - NEW_SPARSE_VECTOR, - IndexVersionUtils.getPreviousVersion(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X) - ) - : IndexVersionUtils.randomVersionBetween(random(), SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X, IndexVersion.current()); - } - - private MapperService getMapperServiceForTest( - boolean usePreviousIndex, - boolean useIndexOptionsDefaults, - boolean explicitIndexOptionsDoNotPrune - ) throws IOException { - // get the index version of the test to use - // either a current version that supports index options, or a previous version that does not - IndexVersion indexVersion = getIndexVersionForTest(usePreviousIndex); - - // if it's using the old index, we always use the minimal mapping without index_options - if (usePreviousIndex) { - return createMapperService(indexVersion, fieldMapping(this::minimalMapping)); + IndexVersion getRandomVersion() { + return IndexVersionUtils.randomVersionBetween(random(), fromVersion, toVersion); } - - // if we set explicitIndexOptionsDoNotPrune, the index_options (if present) will explicitly include "prune: false" - if (explicitIndexOptionsDoNotPrune) { - return createMapperService(indexVersion, fieldMapping(this::mappingWithIndexOptionsPruneFalse)); - } - - // either return the default (minimal) mapping or one with an explicit pruning_config - return useIndexOptionsDefaults - ? createMapperService(indexVersion, fieldMapping(this::minimalMapping)) - : createMapperService(indexVersion, fieldMapping(this::minimalMappingWithExplicitIndexOptions)); } - private static List QUERY_VECTORS = List.of( - new WeightedToken("pugs", 0.5f), - new WeightedToken("cats", 0.4f), - new WeightedToken("is", 0.1f) - ); + private static final List QUERY_VECTORS = Stream.of(RARE_TOKENS, MEDIUM_TOKENS, COMMON_TOKENS) + .flatMap(map -> map.entrySet().stream()) + .map(entry -> new WeightedToken(entry.getKey(), entry.getValue())) + .collect(Collectors.toList()); /** * Handles float/double conversion when reading/writing with xcontent by converting all numbers to floats. diff --git a/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/multi_cluster/50_sparse_vector.yml b/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/multi_cluster/50_sparse_vector.yml index c3255467ff443..80e4103045489 100644 --- a/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/multi_cluster/50_sparse_vector.yml +++ b/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/multi_cluster/50_sparse_vector.yml @@ -622,7 +622,7 @@ teardown: {"index": { "_id": "11" }} {"content_embedding":{"is": 0.6, "pugs": 0.6 }} {"index": { "_id": "12" }} - {"content_embedding":{"is": 0.1891394, "pugs": 0.1 }} + {"content_embedding":{"cats": 0.1 }} - do: search: @@ -633,13 +633,25 @@ teardown: field: content_embedding query_vector: pugs: 0.5 - cats: 0.5 - is: 0.04600334 + cats: 0.18 + is: 0.20 - match: { hits.total.value: 2 } - match: { hits.hits.0._id: "11" } - match: { hits.hits.1._id: "12" } + - do: + search: + index: test-sparse-vector-pruning-default + body: + query: + sparse_vector: + field: content_embedding + query_vector: + is: 0.21 # 0.2 is the weight threshold for the default pruning config + + - match: { hits.total.value: 11 } + - do: search: index: test-sparse-vector-pruning-default diff --git a/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/remote_cluster/50_sparse_vector.yml b/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/remote_cluster/50_sparse_vector.yml index 3014883e5b42a..cb789260f4e69 100644 --- a/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/remote_cluster/50_sparse_vector.yml +++ b/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/remote_cluster/50_sparse_vector.yml @@ -620,7 +620,7 @@ teardown: {"index": { "_id": "11" }} {"content_embedding":{"is": 0.6, "pugs": 0.6 }} {"index": { "_id": "12" }} - {"content_embedding":{"is": 0.1891394, "pugs": 0.1 }} + {"content_embedding":{"cats": 0.1 }} - do: search: @@ -631,13 +631,26 @@ teardown: field: content_embedding query_vector: pugs: 0.5 - cats: 0.5 - is: 0.04600334 + cats: 0.18 + is: 0.20 - match: { hits.total.value: 2 } - match: { hits.hits.0._id: "11" } - match: { hits.hits.1._id: "12" } + - do: + search: + index: test-sparse-vector-pruning-default + body: + query: + sparse_vector: + field: content_embedding + query_vector: + is: 0.21 # 0.2 is the weight threshold for the default pruning config + + - match: { hits.total.value: 11 } + + - do: search: index: test-sparse-vector-pruning-default diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml index 2d440f1cf5987..408ddd1ec50c6 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml @@ -944,7 +944,7 @@ teardown: {"index": { "_id": "11" }} {"content_embedding":{"is": 0.6, "pugs": 0.6 }} {"index": { "_id": "12" }} - {"content_embedding":{"is": 0.1891394, "pugs": 0.1 }} + {"content_embedding":{"cats": 0.1 }} - do: search: @@ -955,13 +955,25 @@ teardown: field: content_embedding query_vector: pugs: 0.5 - cats: 0.5 - is: 0.04600334 + cats: 0.18 + is: 0.2 - match: { hits.total.value: 2 } - match: { hits.hits.0._id: "11" } - match: { hits.hits.1._id: "12" } + - do: + search: + index: test-sparse-vector-pruning-default + body: + query: + sparse_vector: + field: content_embedding + query_vector: + is: 0.21 # 0.2 is the weight threshold for the default pruning config + + - match: { hits.total.value: 11 } + - do: search: index: test-sparse-vector-pruning-default