diff --git a/docs/changelog/129693.yaml b/docs/changelog/129693.yaml new file mode 100644 index 0000000000000..8edab59b7d03f --- /dev/null +++ b/docs/changelog/129693.yaml @@ -0,0 +1,5 @@ +pr: 129693 +summary: Add top level normalizer for linear retriever +area: Search +type: enhancement +issues: [] diff --git a/docs/reference/elasticsearch/rest-apis/retrievers/linear-retriever.md b/docs/reference/elasticsearch/rest-apis/retrievers/linear-retriever.md index d7699f3c2c0ce..dd266b0092d6c 100644 --- a/docs/reference/elasticsearch/rest-apis/retrievers/linear-retriever.md +++ b/docs/reference/elasticsearch/rest-apis/retrievers/linear-retriever.md @@ -31,9 +31,16 @@ Combining `query` and `retrievers` is not supported. `normalizer` {applies_to}`stack: ga 9.1` : (Optional, String) - The normalizer to use when using the [multi-field query format](../retrievers.md#multi-field-query-format). + The top-level normalizer to use when combining results. See [normalizers](#linear-retriever-normalizers) for supported values. Required when `query` is specified. + + When used with the [multi-field query format](../retrievers.md#multi-field-query-format) (`query` parameter), normalizes scores per [field grouping](../retrievers.md#multi-field-field-grouping). + Otherwise serves as the default normalizer for any sub-retriever that doesn't specify its own normalizer. Per-retriever normalizers always take precedence over the top-level normalizer. + + :::{note} + **Top-level normalizer support for sub-retrievers**: The ability to use a top-level normalizer as a default for sub-retrievers was introduced in Elasticsearch 9.2+. In earlier versions, only per-retriever normalizers are supported. + ::: ::::{warning} Avoid using `none` as that will disable normalization and may bias the result set towards lexical matches. @@ -74,9 +81,10 @@ Each entry in the `retrievers` array specifies the following parameters: `normalizer` : (Optional, String) - Specifies how the retriever’s score will be normalized before applying the specified `weight`. + Specifies how the retriever's score will be normalized before applying the specified `weight`. See [normalizers](#linear-retriever-normalizers) for supported values. - Defaults to `none`. + If not specified, uses the top-level `normalizer` or defaults to `none` if no top-level normalizer is set. + {applies_to}`stack: ga 9.2` See also [this hybrid search example](retrievers-examples.md#retrievers-examples-linear-retriever) using a linear retriever on how to independently configure and apply normalizers to retrievers. @@ -94,7 +102,7 @@ The `linear` retriever supports the following normalizers: ## Example -This example of a hybrid search weights KNN results five times more heavily than BM25 results in the final ranking. +This example of a hybrid search weights KNN results five times more heavily than BM25 results in the final ranking, with a top-level normalizer applied to all retrievers. ```console GET my_index/_search @@ -105,7 +113,10 @@ GET my_index/_search { "retriever": { "knn": { - ... + "field": "title_vector", + "query_vector": [0.1, 0.2, 0.3], + "k": 10, + "num_candidates": 100 } }, "weight": 5 # KNN query weighted 5x @@ -113,15 +124,22 @@ GET my_index/_search { "retriever": { "standard": { - ... + "query": { + "match": { + "title": "elasticsearch" + } + } } }, "weight": 1.5 # BM25 query weighted 1.5x } - ] + ], + "normalizer": "minmax" } } } ``` +In this example, the `minmax` normalizer is applied to both the kNN retriever and the standard retriever. The top-level normalizer serves as a default that can be overridden by individual sub-retrievers. When using the multi-field query format, the top-level normalizer is applied to all generated inner retrievers. + See also [this hybrid search example](retrievers-examples.md#retrievers-examples-linear-retriever). diff --git a/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverIT.java b/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverIT.java index f98231a647470..b00af1713dcb6 100644 --- a/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverIT.java +++ b/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverIT.java @@ -835,4 +835,37 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws ); assertThat(numAsyncCalls.get(), equalTo(4)); } + + public void testMixedNormalizerInheritance() throws IOException { + client().prepareIndex(INDEX).setId("1").setSource("field1", "elasticsearch only", "field2", "no technology here").get(); + client().prepareIndex(INDEX).setId("2").setSource("field1", "no elasticsearch", "field2", "technology only").get(); + client().prepareIndex(INDEX).setId("3").setSource("field1", "search term", "field2", "no technology").get(); + refresh(INDEX); + + LinearRetrieverBuilder linearRetriever = new LinearRetrieverBuilder( + List.of( + CompoundRetrieverBuilder.RetrieverSource.from( + new StandardRetrieverBuilder(QueryBuilders.matchQuery("field1", "elasticsearch")) + ), + CompoundRetrieverBuilder.RetrieverSource.from( + new StandardRetrieverBuilder(QueryBuilders.matchQuery("field2", "technology")) + ), + CompoundRetrieverBuilder.RetrieverSource.from(new StandardRetrieverBuilder(QueryBuilders.matchQuery("field1", "search"))) + ), + null, + null, + MinMaxScoreNormalizer.INSTANCE, + 10, + new float[] { 1.0f, 1.0f, 1.0f }, + new ScoreNormalizer[] { null, L2ScoreNormalizer.INSTANCE, null } + ); + + assertThat(linearRetriever.getNormalizers()[0], equalTo(MinMaxScoreNormalizer.INSTANCE)); + assertThat(linearRetriever.getNormalizers()[1], equalTo(L2ScoreNormalizer.INSTANCE)); + assertThat(linearRetriever.getNormalizers()[2], equalTo(MinMaxScoreNormalizer.INSTANCE)); + + assertResponse(client().prepareSearch(INDEX).setSource(new SearchSourceBuilder().retriever(linearRetriever)), searchResponse -> { + assertThat(searchResponse.getHits().getTotalHits().value(), equalTo(3L)); + }); + } } diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java index 326a2f276fa6a..a6a193d62de88 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java @@ -22,6 +22,7 @@ public class RankRRFFeatures implements FeatureSpecification { public static final NodeFeature LINEAR_RETRIEVER_SUPPORTED = new NodeFeature("linear_retriever_supported"); + public static final NodeFeature LINEAR_RETRIEVER_TOP_LEVEL_NORMALIZER = new NodeFeature("linear_retriever.top_level_normalizer"); @Override public Set getFeatures() { @@ -37,7 +38,8 @@ public Set getTestFeatures() { LINEAR_RETRIEVER_MINSCORE_FIX, LinearRetrieverBuilder.MULTI_FIELDS_QUERY_FORMAT_SUPPORT, RRFRetrieverBuilder.MULTI_FIELDS_QUERY_FORMAT_SUPPORT, - RRFRetrieverBuilder.WEIGHTED_SUPPORT + RRFRetrieverBuilder.WEIGHTED_SUPPORT, + LINEAR_RETRIEVER_TOP_LEVEL_NORMALIZER ); } } diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java index c1a3f7d174487..d5196c8078a75 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java @@ -43,6 +43,7 @@ import static org.elasticsearch.action.ValidateActions.addValidationError; import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg; import static org.elasticsearch.xpack.rank.RankRRFFeatures.LINEAR_RETRIEVER_SUPPORTED; +import static org.elasticsearch.xpack.rank.linear.LinearRetrieverComponent.DEFAULT_NORMALIZER; import static org.elasticsearch.xpack.rank.linear.LinearRetrieverComponent.DEFAULT_WEIGHT; /** @@ -92,7 +93,7 @@ public final class LinearRetrieverBuilder extends CompoundRetrieverBuilder innerRetrievers) { private static ScoreNormalizer[] getDefaultNormalizers(List innerRetrievers) { int size = innerRetrievers != null ? innerRetrievers.size() : 0; ScoreNormalizer[] normalizers = new ScoreNormalizer[size]; - Arrays.fill(normalizers, IdentityScoreNormalizer.INSTANCE); + Arrays.fill(normalizers, DEFAULT_NORMALIZER); return normalizers; } + private static ScoreNormalizer resolveNormalizer(ScoreNormalizer componentNormalizer, ScoreNormalizer topLevelNormalizer) { + if (componentNormalizer != null) { + return componentNormalizer; + } + if (topLevelNormalizer != null) { + return topLevelNormalizer; + } + return DEFAULT_NORMALIZER; + } + public static LinearRetrieverBuilder fromXContent(XContentParser parser, RetrieverParserContext context) throws IOException { if (context.clusterSupportsFeature(LINEAR_RETRIEVER_SUPPORTED) == false) { throw new ParsingException(parser.getTokenLocation(), "unknown retriever [" + NAME + "]"); @@ -167,7 +178,10 @@ public LinearRetrieverBuilder( this.query = query; this.normalizer = normalizer; this.weights = weights; - this.normalizers = normalizers; + this.normalizers = new ScoreNormalizer[normalizers.length]; + for (int i = 0; i < normalizers.length; i++) { + this.normalizers[i] = resolveNormalizer(normalizers[i], normalizer); + } } public LinearRetrieverBuilder( @@ -221,19 +235,7 @@ public ActionRequestValidationException validate( ), validationException ); - } else if (innerRetrievers.isEmpty() == false && normalizer != null) { - validationException = addValidationError( - String.format( - Locale.ROOT, - "[%s] [%s] cannot be provided when [%s] is specified", - getName(), - NORMALIZER_FIELD.getPreferredName(), - RETRIEVERS_FIELD.getPreferredName() - ), - validationException - ); } - return validationException; } diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java index bb0d79d3fe488..963ba6883e7c9 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java @@ -38,7 +38,7 @@ public LinearRetrieverComponent(RetrieverBuilder retrieverBuilder, Float weight, assert retrieverBuilder != null; this.retriever = retrieverBuilder; this.weight = weight == null ? DEFAULT_WEIGHT : weight; - this.normalizer = normalizer == null ? DEFAULT_NORMALIZER : normalizer; + this.normalizer = normalizer; // Don't default to identity, allow null for top-level fallback if (this.weight < 0) { throw new IllegalArgumentException("[weight] must be non-negative"); } diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java index 74e18bf12fffc..99b67773e9d05 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java @@ -19,6 +19,7 @@ import org.elasticsearch.xcontent.NamedXContentRegistry; import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.xcontent.XContentType; import org.junit.AfterClass; import org.junit.BeforeClass; @@ -27,10 +28,17 @@ import java.util.List; import static java.util.Collections.emptyList; +import static org.hamcrest.Matchers.instanceOf; public class LinearRetrieverBuilderParsingTests extends AbstractXContentTestCase { private static List xContentRegistryEntries; + private static final ScoreNormalizer[] SCORE_NORMALIZERS = new ScoreNormalizer[] { + null, + MinMaxScoreNormalizer.INSTANCE, + L2ScoreNormalizer.INSTANCE, + IdentityScoreNormalizer.INSTANCE }; + @BeforeClass public static void init() { xContentRegistryEntries = new SearchModule(Settings.EMPTY, emptyList()).getNamedXContents(); @@ -108,10 +116,46 @@ protected NamedXContentRegistry xContentRegistry() { } private static ScoreNormalizer randomScoreNormalizer() { - if (randomBoolean()) { - return MinMaxScoreNormalizer.INSTANCE; - } else { - return IdentityScoreNormalizer.INSTANCE; + return randomFrom(SCORE_NORMALIZERS); + } + + public void testTopLevelNormalizer() throws IOException { + String json = """ + { + "linear": { + "retrievers": [ + { + "retriever": { + "test": { + "value": "test1" + } + }, + "weight": 1.0, + "normalizer": "none" + }, + { + "retriever": { + "test": { + "value": "test2" + } + }, + "weight": 1.0, + "normalizer": "none" + } + ], + "normalizer": "minmax" + } + }"""; + + try (XContentParser parser = createParser(XContentType.JSON.xContent(), json)) { + LinearRetrieverBuilder builder = doParseInstance(parser); + // Test that the top-level normalizer is properly applied - the individual + // Per-retriever 'none' should override top-level 'minmax' + ScoreNormalizer[] normalizers = builder.getNormalizers(); + assertEquals(2, normalizers.length); + for (ScoreNormalizer normalizer : normalizers) { + assertThat(normalizer, instanceOf(IdentityScoreNormalizer.class)); + } } } } diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java index c211440d10bae..8264ccdb22e38 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java @@ -22,6 +22,7 @@ import org.elasticsearch.index.query.QueryRewriteContext; import org.elasticsearch.search.builder.PointInTimeBuilder; import org.elasticsearch.search.retriever.CompoundRetrieverBuilder; +import org.elasticsearch.search.retriever.KnnRetrieverBuilder; import org.elasticsearch.search.retriever.RetrieverBuilder; import org.elasticsearch.search.retriever.StandardRetrieverBuilder; import org.elasticsearch.test.ESTestCase; @@ -326,4 +327,102 @@ public int hashCode() { return Objects.hash(retriever, weight, normalizer); } } + + public void testTopLevelNormalizerWithRetrieversArray() { + StandardRetrieverBuilder standardRetriever = new StandardRetrieverBuilder(new MatchQueryBuilder("title", "elasticsearch")); + KnnRetrieverBuilder knnRetriever = new KnnRetrieverBuilder( + "title_vector", + new float[] { 0.1f, 0.2f, 0.3f }, + null, + 10, + 100, + null, + null + ); + + LinearRetrieverBuilder retriever = new LinearRetrieverBuilder( + List.of( + CompoundRetrieverBuilder.RetrieverSource.from(standardRetriever), + CompoundRetrieverBuilder.RetrieverSource.from(knnRetriever) + ), + null, + null, + MinMaxScoreNormalizer.INSTANCE, + DEFAULT_RANK_WINDOW_SIZE, + new float[] { 1.0f, 2.0f }, + new ScoreNormalizer[] { null, null } + ); + + assertEquals(MinMaxScoreNormalizer.INSTANCE, retriever.getNormalizers()[0]); + assertEquals(MinMaxScoreNormalizer.INSTANCE, retriever.getNormalizers()[1]); + } + + public void testTopLevelNormalizerWithPerRetrieverOverrides() { + StandardRetrieverBuilder standardRetriever = new StandardRetrieverBuilder(new MatchQueryBuilder("title", "elasticsearch")); + KnnRetrieverBuilder knnRetriever = new KnnRetrieverBuilder( + "title_vector", + new float[] { 0.1f, 0.2f, 0.3f }, + null, + 10, + 100, + null, + null + ); + + LinearRetrieverBuilder retriever = new LinearRetrieverBuilder( + List.of( + CompoundRetrieverBuilder.RetrieverSource.from(standardRetriever), + CompoundRetrieverBuilder.RetrieverSource.from(knnRetriever) + ), + null, + null, + MinMaxScoreNormalizer.INSTANCE, + DEFAULT_RANK_WINDOW_SIZE, + new float[] { 1.0f, 2.0f }, + new ScoreNormalizer[] { L2ScoreNormalizer.INSTANCE, null } + ); + + assertEquals(L2ScoreNormalizer.INSTANCE, retriever.getNormalizers()[0]); + assertEquals(MinMaxScoreNormalizer.INSTANCE, retriever.getNormalizers()[1]); + } + + public void testNullNormalizersWithoutTopLevelUsesIdentity() { + StandardRetrieverBuilder standardRetriever = new StandardRetrieverBuilder(new MatchQueryBuilder("title", "elasticsearch")); + + LinearRetrieverBuilder retriever = new LinearRetrieverBuilder( + List.of(CompoundRetrieverBuilder.RetrieverSource.from(standardRetriever)), + null, + null, + null, + DEFAULT_RANK_WINDOW_SIZE, + new float[] { 1.0f }, + new ScoreNormalizer[] { null } + ); + + assertEquals(IdentityScoreNormalizer.INSTANCE, retriever.getNormalizers()[0]); + } + + public void testMixedNormalizerInheritanceScenario() { + StandardRetrieverBuilder standardRetriever1 = new StandardRetrieverBuilder(new MatchQueryBuilder("title", "elasticsearch")); + StandardRetrieverBuilder standardRetriever2 = new StandardRetrieverBuilder(new MatchQueryBuilder("content", "search")); + StandardRetrieverBuilder standardRetriever3 = new StandardRetrieverBuilder(new MatchQueryBuilder("tags", "java")); + + LinearRetrieverBuilder retriever = new LinearRetrieverBuilder( + List.of( + CompoundRetrieverBuilder.RetrieverSource.from(standardRetriever1), + CompoundRetrieverBuilder.RetrieverSource.from(standardRetriever2), + CompoundRetrieverBuilder.RetrieverSource.from(standardRetriever3) + ), + null, + null, + L2ScoreNormalizer.INSTANCE, + DEFAULT_RANK_WINDOW_SIZE, + new float[] { 1.0f, 2.0f, 3.0f }, + new ScoreNormalizer[] { null, MinMaxScoreNormalizer.INSTANCE, IdentityScoreNormalizer.INSTANCE } + ); + assertEquals(L2ScoreNormalizer.INSTANCE, retriever.getNormalizers()[0]); + assertEquals(MinMaxScoreNormalizer.INSTANCE, retriever.getNormalizers()[1]); + assertEquals(IdentityScoreNormalizer.INSTANCE, retriever.getNormalizers()[2]); + } + } diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml index f62c7e4987046..7822f470d9794 100644 --- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml +++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml @@ -1333,3 +1333,549 @@ setup: - match: { hits.total.value: 1 } - match: { hits.hits.0._id: "1" } - close_to: { hits.hits.0._score: { value: 1.0, error: 0.001} } + +--- +"linear retriever with top-level normalizer - minmax": + - requires: + cluster_features: [ "linear_retriever.top_level_normalizer" ] + reason: "Support for top-level normalizer in linear retriever" + - do: + search: + index: test + body: + retriever: + linear: + normalizer: minmax + retrievers: [ + { + retriever: { + standard: { + query: { + bool: { + should: [ + { constant_score: { filter: { term: { keyword: { value: "one" } } }, boost: 10.0 } }, + { constant_score: { filter: { term: { keyword: { value: "two" } } }, boost: 5.0 } } + ] + } + } + } + }, + weight: 1.0 + }, + { + retriever: { + knn: { + field: "vector", + query_vector: [4], + k: 2, + num_candidates: 10 + } + }, + weight: 2.0 + } + ] + + - match: { hits.total.value: 4 } + - match: { hits.hits.0._id: "4" } + - close_to: { hits.hits.0._score: { value: 2.0, error: 0.01} } + - match: { hits.hits.1._id: "1" } + - close_to: { hits.hits.1._score: { value: 1.0, error: 0.01} } + - match: { hits.hits.2._id: "2" } + - close_to: { hits.hits.2._score: { value: 0.0, error: 0.01} } + +--- +"linear retriever with top-level normalizer - l2_norm": + - requires: + cluster_features: [ "linear_retriever.top_level_normalizer" ] + reason: "Support for top-level normalizer in linear retriever" + - do: + search: + index: test + body: + retriever: + linear: + normalizer: l2_norm + retrievers: [ + { + retriever: { + standard: { + query: { + bool: { + should: [ + { constant_score: { filter: { term: { keyword: { value: "one" } } }, boost: 3.0 } }, + { constant_score: { filter: { term: { keyword: { value: "two" } } }, boost: 4.0 } } + ] + } + } + } + }, + weight: 1.0 + }, + { + retriever: { + knn: { + field: "vector", + query_vector: [4], + k: 2, + num_candidates: 10 + } + }, + weight: 2.0 + } + ] + + - match: { hits.total.value: 4 } + - match: { hits.hits.0._id: "4" } + - close_to: { hits.hits.0._score: { value: 1.79, error: 0.01} } + - match: { hits.hits.1._id: "3" } + - close_to: { hits.hits.1._score: { value: 0.89, error: 0.01} } + - match: { hits.hits.2._id: "2" } + - close_to: { hits.hits.2._score: { value: 0.8, error: 0.01} } + +--- +"linear retriever with top-level normalizer and per-retriever override": + - requires: + cluster_features: [ "linear_retriever.top_level_normalizer" ] + reason: "Support for top-level normalizer in linear retriever" + - do: + search: + index: test + body: + retriever: + linear: + normalizer: minmax + retrievers: [ + { + retriever: { + standard: { + query: { + bool: { + should: [ + { constant_score: { filter: { term: { keyword: { value: "one" } } }, boost: 10.0 } }, + { constant_score: { filter: { term: { keyword: { value: "two" } } }, boost: 5.0 } } + ] + } + } + } + }, + weight: 1.0, + normalizer: l2_norm + }, + { + retriever: { + knn: { + field: "vector", + query_vector: [4], + k: 2, + num_candidates: 10 + } + }, + weight: 2.0 + } + ] + + - match: { hits.total.value: 4 } + - match: { hits.hits.0._id: "4" } + - close_to: { hits.hits.0._score: { value: 2.0, error: 0.01} } + - match: { hits.hits.1._id: "1" } + - close_to: { hits.hits.1._score: { value: 0.89, error: 0.01} } + - match: { hits.hits.2._id: "2" } + - close_to: { hits.hits.2._score: { value: 0.45, error: 0.01} } + +--- +"linear retriever with top-level normalizer - empty results": + - requires: + cluster_features: [ "linear_retriever.top_level_normalizer" ] + reason: "Support for top-level normalizer in linear retriever" + - do: + search: + index: test + body: + retriever: + linear: + normalizer: minmax + retrievers: [ + { + retriever: { + standard: { + query: { + term: { + keyword: { + value: "nonexistent" + } + } + } + } + }, + weight: 1.0 + } + ] + + - match: { hits.total.value: 0 } + - length: { hits.hits: 0 } + +--- +"Linear retriever with mixed normalizers - inheritance test": + - requires: + cluster_features: [ "linear_retriever.top_level_normalizer" ] + reason: "SSupport for top-level normalizer in linear retriever" + + - do: + search: + index: test + body: + retriever: + linear: + normalizer: l2_norm # Top-level normalizer + retrievers: [ + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 4.0 + } + } + } + }, + weight: 1.0 + # No normalizer specified - should inherit top-level L2 norm + }, + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "two" + } + } + }, + boost: 8.0 + } + } + } + }, + weight: 1.0, + normalizer: minmax # Override with minmax + }, + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "three" + } + } + }, + boost: 6.0 + } + } + } + }, + weight: 2.0 + # No normalizer specified - should inherit top-level L2 norm + } + ] + + - match: { hits.total.value: 3 } + # Complex mixed normalization scenario with different weights + # First retriever: L2 norm on [4.0] = 1.0, final = 1.0 * 1.0 = 1.0 + # Second retriever: minmax on [8.0] = 1.0, final = 1.0 * 1.0 = 1.0 + # Third retriever: L2 norm on [6.0] = 1.0, final = 1.0 * 2.0 = 2.0 + - match: { hits.hits.0._id: "3" } # Highest score due to weight=2.0 + - close_to: { hits.hits.0._score: { value: 2.0, error: 0.001} } + - match: { hits.hits.1._id: "1" } # Second score = 1.0 + - close_to: { hits.hits.1._score: { value: 1.0, error: 0.001} } + - match: { hits.hits.2._id: "2" } # Third score = 1.0 + - close_to: { hits.hits.2._score: { value: 1.0, error: 0.001} } + +--- +"Linear retriever with identity normalizer (no normalization)": + - requires: + cluster_features: [ "linear_retriever.top_level_normalizer" ] + reason: "Support for top-level normalizer in linear retriever" + - do: + search: + index: test + body: + retriever: + linear: + normalizer: none + retrievers: [ + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 7.5 + } + } + } + }, + weight: 2.0 + }, + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "four" + } + } + }, + boost: 3.2 + } + } + } + }, + weight: 1.0 + } + ] + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "4" } + - close_to: { hits.hits.0._score: { value: 15.0, error: 0.001} } + - close_to: { hits.hits.1._score: { value: 3.2, error: 0.001} } + +--- +"Linear retriever with all normalizers applied to same data": + - requires: + cluster_features: [ "linear_retriever.l2_norm" ] + reason: "Support for L2 normalization" + + - do: + search: + index: test + body: + retriever: + linear: + retrievers: [ + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 10.0 + } + } + } + }, + weight: 1.0, + normalizer: none + }, + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 10.0 + } + } + } + }, + weight: 1.0, + normalizer: l2_norm + }, + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 10.0 + } + } + } + }, + weight: 1.0, + normalizer: minmax + } + ] + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "1" } + - close_to: { hits.hits.0._score: { value: 12.0, error: 0.001} } + +--- +"Linear retriever normalization with zero scores": + - requires: + cluster_features: [ "linear_retriever.top_level_normalizer" ] + reason: "Support for top-level normalizer in linear retriever" + - do: + search: + index: test + body: + retriever: + linear: + normalizer: minmax + retrievers: [ + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + nonexistent_field: "nonexistent_value" + } + }, + boost: 5.0 + } + } + } + }, + weight: 1.0 + }, + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 10.0 + } + } + } + }, + weight: 2.0 + } + ] + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "1" } + - close_to: { hits.hits.0._score: { value: 2.0, error: 0.001} } + +--- +"Linear retriever error handling - invalid normalizer": + - requires: + cluster_features: [ "linear_retriever.top_level_normalizer" ] + reason: "Support for top-level normalizer in linear retriever" + - do: + catch: /Unknown normalizer \[invalid\]/ + search: + index: test + body: + retriever: + linear: + normalizer: invalid + retrievers: [ + { + retriever: { + standard: { + query: { + match_all: {} + } + } + }, + weight: 1.0 + } + ] + +--- +"Linear retriever with large score differences and L2 normalization": + - requires: + cluster_features: [ "linear_retriever.top_level_normalizer" ] + reason: "Support for top-level normalizer in linear retriever" + - do: + search: + index: test + body: + retriever: + linear: + normalizer: l2_norm + retrievers: [ + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 1000.0 + } + } + } + }, + weight: 1.0 + }, + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "four" + } + } + }, + boost: 1.0 + } + } + } + }, + weight: 1.0 + } + ] + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "4" } + - close_to: { hits.hits.0._score: { value: 1.0, error: 0.001} } + - close_to: { hits.hits.1._score: { value: 1.0, error: 0.001} } diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml index 7ab0d727a7383..a90354a8d8976 100644 --- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml +++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml @@ -410,25 +410,6 @@ setup: - contains: { error.root_cause.0.reason: "[linear] cannot combine [retrievers] and [query]" } ---- -"Cannot set top-level normalizer when using custom sub-retrievers": - - do: - catch: bad_request - search: - index: test-index - body: - retriever: - linear: - normalizer: "minmax" - retrievers: - - retriever: - standard: - query: - match: - keyword: "bar" - - - contains: { error.root_cause.0.reason: "[linear] [normalizer] cannot be provided when [retrievers] is specified" } - --- "Missing required params": - do: