From a10e92dc2ecf980819aa37bc22128faac7e89b60 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Tue, 20 May 2025 15:32:07 -0400 Subject: [PATCH 01/47] Add simplified inner retriever parser --- .../SimplifiedInnerRetrieverInfo.java | 11 ++++++++ .../SimplifiedInnerRetrieverParser.java | 27 +++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverInfo.java create mode 100644 x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverParser.java diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverInfo.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverInfo.java new file mode 100644 index 0000000000000..d8bbe9f506c50 --- /dev/null +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverInfo.java @@ -0,0 +1,11 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.rank.simplified; + +public record SimplifiedInnerRetrieverInfo(String field, float weight, String query) { +} diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverParser.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverParser.java new file mode 100644 index 0000000000000..0272e48d4d48b --- /dev/null +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverParser.java @@ -0,0 +1,27 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.rank.simplified; + +import org.elasticsearch.index.search.QueryParserHelper; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +public class SimplifiedInnerRetrieverParser { + private SimplifiedInnerRetrieverParser() {} + + public static List parse(List fieldsAndWeights, String query) { + Map parsedFieldsAndWeights = QueryParserHelper.parseFieldsAndWeights(fieldsAndWeights); + List simplifiedInnerRetrieverInfo = new ArrayList<>(parsedFieldsAndWeights.size()); + for (Map.Entry entry : parsedFieldsAndWeights.entrySet()) { + simplifiedInnerRetrieverInfo.add(new SimplifiedInnerRetrieverInfo(entry.getKey(), entry.getValue(), query)); + } + return simplifiedInnerRetrieverInfo; + } +} From 07e368f842103a5395a53fe909609d06d096b04b Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Tue, 20 May 2025 16:41:43 -0400 Subject: [PATCH 02/47] Integrate simplified handling into RRF --- .../xpack/rank/rrf/RRFRetrieverBuilder.java | 44 ++++++++++++++++--- .../SimplifiedInnerRetrieverInfo.java | 3 +- .../SimplifiedInnerRetrieverParser.java | 28 +++++++++++- 3 files changed, 66 insertions(+), 9 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java index 26ca35ccff9f5..874f6c5f719a0 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java @@ -21,6 +21,8 @@ import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xpack.core.XPackPlugin; +import org.elasticsearch.xpack.rank.simplified.SimplifiedInnerRetrieverInfo; +import org.elasticsearch.xpack.rank.simplified.SimplifiedInnerRetrieverParser; import java.io.IOException; import java.util.ArrayList; @@ -29,7 +31,6 @@ import java.util.Map; import java.util.Objects; -import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg; import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg; /** @@ -45,6 +46,8 @@ public final class RRFRetrieverBuilder extends CompoundRetrieverBuilder { - List childRetrievers = (List) args[0]; - List innerRetrievers = childRetrievers.stream().map(r -> new RetrieverSource(r, null)).toList(); - int rankWindowSize = args[1] == null ? RankBuilder.DEFAULT_RANK_WINDOW_SIZE : (int) args[1]; - int rankConstant = args[2] == null ? DEFAULT_RANK_CONSTANT : (int) args[2]; + List childRetrievers = args[0] == null ? List.of() : (List) args[0]; + List fields = args[1] == null ? List.of() : (List) args[1]; + String query = (String) args[2]; + if (childRetrievers.isEmpty() == false && fields.isEmpty() == false) { + throw new IllegalArgumentException( + "Cannot specify both [" + RETRIEVERS_FIELD.getPreferredName() + "] and [" + FIELDS_FIELD.getPreferredName() + "]" + ); + } + + List innerRetrievers; + if (childRetrievers.isEmpty() == false) { + innerRetrievers = childRetrievers.stream().map(r -> new RetrieverSource(r, null)).toList(); + } else if (fields.isEmpty() == false) { + if (query == null) { + throw new IllegalArgumentException( + "[" + QUERY_FIELD.getPreferredName() + "] must be specified when [" + FIELDS_FIELD.getPreferredName() + "] is used" + ); + } + + List simplifiedInnerRetrieverInfo = SimplifiedInnerRetrieverParser.parse(fields, query, w -> { + if (w != 1.0f) { + throw new IllegalArgumentException("[" + NAME + "] does not support per-field weights"); + } + }); + innerRetrievers = SimplifiedInnerRetrieverParser.convertToRetrievers(simplifiedInnerRetrieverInfo); + } else { + innerRetrievers = List.of(); + } + + int rankWindowSize = args[3] == null ? RankBuilder.DEFAULT_RANK_WINDOW_SIZE : (int) args[3]; + int rankConstant = args[4] == null ? DEFAULT_RANK_CONSTANT : (int) args[4]; return new RRFRetrieverBuilder(innerRetrievers, rankWindowSize, rankConstant); } ); static { - PARSER.declareObjectArray(constructorArg(), (p, c) -> { + PARSER.declareObjectArray(optionalConstructorArg(), (p, c) -> { p.nextToken(); String name = p.currentName(); RetrieverBuilder retrieverBuilder = p.namedObject(RetrieverBuilder.class, name, c); @@ -69,6 +99,8 @@ public final class RRFRetrieverBuilder extends CompoundRetrieverBuilder parse(List fieldsAndWeights, String query) { + return parse(fieldsAndWeights, query, null); + } + + public static List parse(List fieldsAndWeights, String query, Consumer weightValidator) { Map parsedFieldsAndWeights = QueryParserHelper.parseFieldsAndWeights(fieldsAndWeights); List simplifiedInnerRetrieverInfo = new ArrayList<>(parsedFieldsAndWeights.size()); for (Map.Entry entry : parsedFieldsAndWeights.entrySet()) { - simplifiedInnerRetrieverInfo.add(new SimplifiedInnerRetrieverInfo(entry.getKey(), entry.getValue(), query)); + String field = entry.getKey(); + Float weight = entry.getValue(); + + if (weightValidator != null) { + weightValidator.accept(weight); + } + simplifiedInnerRetrieverInfo.add(new SimplifiedInnerRetrieverInfo(field, weight, query)); } return simplifiedInnerRetrieverInfo; } + + public static List convertToRetrievers( + List simplifiedInnerRetrieverInfo + ) { + List retrievers = new ArrayList<>(simplifiedInnerRetrieverInfo.size()); + for (SimplifiedInnerRetrieverInfo info : simplifiedInnerRetrieverInfo) { + RetrieverBuilder retrieverBuilder = new StandardRetrieverBuilder(new MatchQueryBuilder(info.field(), info.query())); + retrievers.add(new CompoundRetrieverBuilder.RetrieverSource(retrieverBuilder, null)); + } + return retrievers; + } } From 3fc2567d1aea795a34193fe80dbee6f0d82db8c5 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Tue, 20 May 2025 16:54:55 -0400 Subject: [PATCH 03/47] Refactoring --- .../xpack/rank/rrf/RRFRetrieverBuilder.java | 14 +++-- .../SimplifiedInnerRetrieverInfo.java | 10 ---- .../SimplifiedInnerRetrieverParser.java | 53 ------------------- .../SimplifiedInnerRetrieverUtils.java | 30 +++++++++++ 4 files changed, 39 insertions(+), 68 deletions(-) delete mode 100644 x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverInfo.java delete mode 100644 x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverParser.java create mode 100644 x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverUtils.java diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java index 874f6c5f719a0..06b0277288d8e 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java @@ -10,6 +10,7 @@ import org.apache.lucene.search.ScoreDoc; import org.elasticsearch.common.util.Maps; import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.search.QueryParserHelper; import org.elasticsearch.license.LicenseUtils; import org.elasticsearch.search.rank.RankBuilder; import org.elasticsearch.search.rank.RankDoc; @@ -21,8 +22,7 @@ import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xpack.core.XPackPlugin; -import org.elasticsearch.xpack.rank.simplified.SimplifiedInnerRetrieverInfo; -import org.elasticsearch.xpack.rank.simplified.SimplifiedInnerRetrieverParser; +import org.elasticsearch.xpack.rank.simplified.SimplifiedInnerRetrieverUtils; import java.io.IOException; import java.util.ArrayList; @@ -74,12 +74,16 @@ public final class RRFRetrieverBuilder extends CompoundRetrieverBuilder simplifiedInnerRetrieverInfo = SimplifiedInnerRetrieverParser.parse(fields, query, w -> { + Map fieldsAndWeights = QueryParserHelper.parseFieldsAndWeights(fields); + fieldsAndWeights.values().forEach(w -> { if (w != 1.0f) { - throw new IllegalArgumentException("[" + NAME + "] does not support per-field weights"); + throw new IllegalArgumentException( + "[" + NAME + "] does not support per-field weights in [" + FIELDS_FIELD.getPreferredName() + "]" + ); } }); - innerRetrievers = SimplifiedInnerRetrieverParser.convertToRetrievers(simplifiedInnerRetrieverInfo); + + innerRetrievers = SimplifiedInnerRetrieverUtils.convertToRetrievers(fieldsAndWeights.keySet(), query); } else { innerRetrievers = List.of(); } diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverInfo.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverInfo.java deleted file mode 100644 index fd1762d6f5510..0000000000000 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverInfo.java +++ /dev/null @@ -1,10 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.rank.simplified; - -public record SimplifiedInnerRetrieverInfo(String field, float weight, String query) {} diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverParser.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverParser.java deleted file mode 100644 index 5c5e165860239..0000000000000 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverParser.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.rank.simplified; - -import org.elasticsearch.index.query.MatchQueryBuilder; -import org.elasticsearch.index.search.QueryParserHelper; -import org.elasticsearch.search.retriever.CompoundRetrieverBuilder; -import org.elasticsearch.search.retriever.RetrieverBuilder; -import org.elasticsearch.search.retriever.StandardRetrieverBuilder; - -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.function.Consumer; - -public class SimplifiedInnerRetrieverParser { - private SimplifiedInnerRetrieverParser() {} - - public static List parse(List fieldsAndWeights, String query) { - return parse(fieldsAndWeights, query, null); - } - - public static List parse(List fieldsAndWeights, String query, Consumer weightValidator) { - Map parsedFieldsAndWeights = QueryParserHelper.parseFieldsAndWeights(fieldsAndWeights); - List simplifiedInnerRetrieverInfo = new ArrayList<>(parsedFieldsAndWeights.size()); - for (Map.Entry entry : parsedFieldsAndWeights.entrySet()) { - String field = entry.getKey(); - Float weight = entry.getValue(); - - if (weightValidator != null) { - weightValidator.accept(weight); - } - simplifiedInnerRetrieverInfo.add(new SimplifiedInnerRetrieverInfo(field, weight, query)); - } - return simplifiedInnerRetrieverInfo; - } - - public static List convertToRetrievers( - List simplifiedInnerRetrieverInfo - ) { - List retrievers = new ArrayList<>(simplifiedInnerRetrieverInfo.size()); - for (SimplifiedInnerRetrieverInfo info : simplifiedInnerRetrieverInfo) { - RetrieverBuilder retrieverBuilder = new StandardRetrieverBuilder(new MatchQueryBuilder(info.field(), info.query())); - retrievers.add(new CompoundRetrieverBuilder.RetrieverSource(retrieverBuilder, null)); - } - return retrievers; - } -} diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverUtils.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverUtils.java new file mode 100644 index 0000000000000..71b88a5c2d060 --- /dev/null +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverUtils.java @@ -0,0 +1,30 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.rank.simplified; + +import org.elasticsearch.index.query.MatchQueryBuilder; +import org.elasticsearch.search.retriever.CompoundRetrieverBuilder; +import org.elasticsearch.search.retriever.RetrieverBuilder; +import org.elasticsearch.search.retriever.StandardRetrieverBuilder; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +public class SimplifiedInnerRetrieverUtils { + private SimplifiedInnerRetrieverUtils() {} + + public static List convertToRetrievers(Collection fields, String query) { + List retrievers = new ArrayList<>(fields.size()); + for (String field : fields) { + RetrieverBuilder retrieverBuilder = new StandardRetrieverBuilder(new MatchQueryBuilder(field, query)); + retrievers.add(new CompoundRetrieverBuilder.RetrieverSource(retrieverBuilder, null)); + } + return retrievers; + } +} From 822edace13fd4c832592b1872e3b25a242957c13 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Wed, 21 May 2025 15:08:22 -0400 Subject: [PATCH 04/47] Added doRewrite method --- .../search/retriever/CompoundRetrieverBuilder.java | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java b/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java index 6cf0af0ef1541..175ce5c224450 100644 --- a/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java @@ -99,6 +99,10 @@ public final RetrieverBuilder rewrite(QueryRewriteContext ctx) throws IOExceptio throw new IllegalStateException("PIT is required"); } + if (doRewrite(ctx)) { + return clone(innerRetrievers, preFilterQueryBuilders); + } + // Rewrite prefilters // We eagerly rewrite prefilters, because some of the innerRetrievers // could be compound too, so we want to propagate all the necessary filter information to them @@ -316,6 +320,16 @@ protected SearchSourceBuilder finalizeSourceBuilder(SearchSourceBuilder sourceBu return sourceBuilder; } + /** + * Perform any custom rewrite logic necessary + * + * @param ctx The query rewrite context + * @return true if the retriever changed, false otherwise + */ + protected boolean doRewrite(QueryRewriteContext ctx) { + return false; + } + private RankDoc[] getRankDocs(SearchResponse searchResponse) { int size = searchResponse.getHits().getHits().length; RankDoc[] docs = new RankDoc[size]; From 4b066361dd48f6c2e1570e579190e274c806928f Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 22 May 2025 10:51:26 -0400 Subject: [PATCH 05/47] Add fields and query instance vars --- .../xpack/rank/rrf/RRFRetrieverBuilderIT.java | 2 ++ .../xpack/rank/rrf/RRFRetrieverBuilder.java | 28 +++++++++++++++++-- .../rrf/RRFRetrieverBuilderParsingTests.java | 2 ++ 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderIT.java b/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderIT.java index 6854fc436038f..0e829b1254a9e 100644 --- a/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderIT.java +++ b/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderIT.java @@ -54,6 +54,8 @@ import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.lessThanOrEqualTo; +// TODO: Add simplified format tests + @ESIntegTestCase.ClusterScope(minNumDataNodes = 3) public class RRFRetrieverBuilderIT extends ESIntegTestCase { diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java index 06b0277288d8e..176446c37294d 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java @@ -117,6 +117,8 @@ public static RRFRetrieverBuilder fromXContent(XContentParser parser, RetrieverP return PARSER.apply(parser, context); } + private List fields; + private String query; private final int rankConstant; public RRFRetrieverBuilder(int rankWindowSize, int rankConstant) { @@ -124,7 +126,13 @@ public RRFRetrieverBuilder(int rankWindowSize, int rankConstant) { } RRFRetrieverBuilder(List childRetrievers, int rankWindowSize, int rankConstant) { + this(childRetrievers, null, null, rankWindowSize, rankConstant); + } + + RRFRetrieverBuilder(List childRetrievers, List fields, String query, int rankWindowSize, int rankConstant) { super(childRetrievers, rankWindowSize); + this.fields = fields == null ? List.of() : List.copyOf(fields); + this.query = query; this.rankConstant = rankConstant; } @@ -135,7 +143,7 @@ public String getName() { @Override protected RRFRetrieverBuilder clone(List newRetrievers, List newPreFilterQueryBuilders) { - RRFRetrieverBuilder clone = new RRFRetrieverBuilder(newRetrievers, this.rankWindowSize, this.rankConstant); + RRFRetrieverBuilder clone = new RRFRetrieverBuilder(newRetrievers, this.fields, this.query, this.rankWindowSize, this.rankConstant); clone.preFilterQueryBuilders = newPreFilterQueryBuilders; clone.retrieverName = retrieverName; return clone; @@ -203,12 +211,15 @@ protected RRFRankDoc[] combineInnerRetrieverResults(List rankResults @Override public boolean doEquals(Object o) { RRFRetrieverBuilder that = (RRFRetrieverBuilder) o; - return super.doEquals(o) && rankConstant == that.rankConstant; + return super.doEquals(o) + && Objects.equals(fields, that.fields) + && Objects.equals(query, that.query) + && rankConstant == that.rankConstant; } @Override public int doHashCode() { - return Objects.hash(super.doHashCode(), rankConstant); + return Objects.hash(super.doHashCode(), fields, query, rankConstant); } @Override @@ -222,6 +233,17 @@ public void doToXContent(XContentBuilder builder, Params params) throws IOExcept builder.endArray(); } + if (fields.isEmpty() == false) { + builder.startArray(FIELDS_FIELD.getPreferredName()); + for (String field : fields) { + builder.value(field); + } + builder.endArray(); + } + if (query != null) { + builder.field(QUERY_FIELD.getPreferredName(), query); + } + builder.field(RANK_WINDOW_SIZE_FIELD.getPreferredName(), rankWindowSize); builder.field(RANK_CONSTANT_FIELD.getPreferredName(), rankConstant); } diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderParsingTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderParsingTests.java index cae758457a2ac..0cfeea2905680 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderParsingTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderParsingTests.java @@ -29,6 +29,8 @@ import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; +// TODO: Add simplified format tests? + public class RRFRetrieverBuilderParsingTests extends AbstractXContentTestCase { /** From 22121675acde841f320c56b53071f4adcdf9d50b Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 22 May 2025 11:13:48 -0400 Subject: [PATCH 06/47] RRF retriever parsing updates --- .../xpack/rank/rrf/RRFRetrieverBuilder.java | 47 ++++--------------- 1 file changed, 10 insertions(+), 37 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java index 176446c37294d..2483412e52751 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java @@ -10,7 +10,6 @@ import org.apache.lucene.search.ScoreDoc; import org.elasticsearch.common.util.Maps; import org.elasticsearch.index.query.QueryBuilder; -import org.elasticsearch.index.search.QueryParserHelper; import org.elasticsearch.license.LicenseUtils; import org.elasticsearch.search.rank.RankBuilder; import org.elasticsearch.search.rank.RankDoc; @@ -22,7 +21,6 @@ import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xpack.core.XPackPlugin; -import org.elasticsearch.xpack.rank.simplified.SimplifiedInnerRetrieverUtils; import java.io.IOException; import java.util.ArrayList; @@ -55,42 +53,16 @@ public final class RRFRetrieverBuilder extends CompoundRetrieverBuilder { - List childRetrievers = args[0] == null ? List.of() : (List) args[0]; - List fields = args[1] == null ? List.of() : (List) args[1]; + List childRetrievers = (List) args[0]; + List fields = (List) args[1]; String query = (String) args[2]; - if (childRetrievers.isEmpty() == false && fields.isEmpty() == false) { - throw new IllegalArgumentException( - "Cannot specify both [" + RETRIEVERS_FIELD.getPreferredName() + "] and [" + FIELDS_FIELD.getPreferredName() + "]" - ); - } - - List innerRetrievers; - if (childRetrievers.isEmpty() == false) { - innerRetrievers = childRetrievers.stream().map(r -> new RetrieverSource(r, null)).toList(); - } else if (fields.isEmpty() == false) { - if (query == null) { - throw new IllegalArgumentException( - "[" + QUERY_FIELD.getPreferredName() + "] must be specified when [" + FIELDS_FIELD.getPreferredName() + "] is used" - ); - } - - Map fieldsAndWeights = QueryParserHelper.parseFieldsAndWeights(fields); - fieldsAndWeights.values().forEach(w -> { - if (w != 1.0f) { - throw new IllegalArgumentException( - "[" + NAME + "] does not support per-field weights in [" + FIELDS_FIELD.getPreferredName() + "]" - ); - } - }); - - innerRetrievers = SimplifiedInnerRetrieverUtils.convertToRetrievers(fieldsAndWeights.keySet(), query); - } else { - innerRetrievers = List.of(); - } - int rankWindowSize = args[3] == null ? RankBuilder.DEFAULT_RANK_WINDOW_SIZE : (int) args[3]; int rankConstant = args[4] == null ? DEFAULT_RANK_CONSTANT : (int) args[4]; - return new RRFRetrieverBuilder(innerRetrievers, rankWindowSize, rankConstant); + + List innerRetrievers = childRetrievers != null ? + childRetrievers.stream().map(r -> new RetrieverSource(r, null)).toList() : + List.of(); + return new RRFRetrieverBuilder(innerRetrievers, fields, query, rankWindowSize, rankConstant); } ); @@ -122,7 +94,7 @@ public static RRFRetrieverBuilder fromXContent(XContentParser parser, RetrieverP private final int rankConstant; public RRFRetrieverBuilder(int rankWindowSize, int rankConstant) { - this(new ArrayList<>(), rankWindowSize, rankConstant); + this(null, rankWindowSize, rankConstant); } RRFRetrieverBuilder(List childRetrievers, int rankWindowSize, int rankConstant) { @@ -130,7 +102,8 @@ public RRFRetrieverBuilder(int rankWindowSize, int rankConstant) { } RRFRetrieverBuilder(List childRetrievers, List fields, String query, int rankWindowSize, int rankConstant) { - super(childRetrievers, rankWindowSize); + // Use a mutable list for childRetrievers so that we can add more child retrievers during rewrite + super(childRetrievers == null ? new ArrayList<>() : new ArrayList<>(childRetrievers), rankWindowSize); this.fields = fields == null ? List.of() : List.copyOf(fields); this.query = query; this.rankConstant = rankConstant; From 40598945768d3d11cc13971c66a5301e8ad629ac Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 22 May 2025 17:03:41 -0400 Subject: [PATCH 07/47] Added custom rewrite logic for RRF retriever --- .../xpack/rank/rrf/RRFRetrieverBuilder.java | 60 ++++++++++++- .../SimplifiedInnerRetrieverUtils.java | 89 +++++++++++++++++++ 2 files changed, 146 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java index 2483412e52751..b1ec4e13cd12a 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java @@ -8,8 +8,10 @@ package org.elasticsearch.xpack.rank.rrf; import org.apache.lucene.search.ScoreDoc; +import org.elasticsearch.action.ResolvedIndices; import org.elasticsearch.common.util.Maps; import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryRewriteContext; import org.elasticsearch.license.LicenseUtils; import org.elasticsearch.search.rank.RankBuilder; import org.elasticsearch.search.rank.RankDoc; @@ -21,6 +23,7 @@ import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xpack.core.XPackPlugin; +import org.elasticsearch.xpack.rank.simplified.SimplifiedInnerRetrieverUtils; import java.io.IOException; import java.util.ArrayList; @@ -59,9 +62,9 @@ public final class RRFRetrieverBuilder extends CompoundRetrieverBuilder innerRetrievers = childRetrievers != null ? - childRetrievers.stream().map(r -> new RetrieverSource(r, null)).toList() : - List.of(); + List innerRetrievers = childRetrievers != null + ? childRetrievers.stream().map(r -> new RetrieverSource(r, null)).toList() + : List.of(); return new RRFRetrieverBuilder(innerRetrievers, fields, query, rankWindowSize, rankConstant); } ); @@ -179,6 +182,57 @@ protected RRFRankDoc[] combineInnerRetrieverResults(List rankResults return topResults; } + @Override + protected boolean doRewrite(QueryRewriteContext ctx) { + // TODO: Review error messages + // TODO: Rewrite this to handle when only query is supplied by the user + boolean modified = false; + + if (innerRetrievers.isEmpty() == false && fields.isEmpty() == false) { + throw new IllegalArgumentException( + "Cannot specify both [" + RETRIEVERS_FIELD.getPreferredName() + "] and [" + FIELDS_FIELD.getPreferredName() + "]" + ); + } + + ResolvedIndices resolvedIndices = ctx.getResolvedIndices(); + if (resolvedIndices != null && fields.isEmpty() == false) { + if (query == null || query.isEmpty()) { + throw new IllegalArgumentException( + "Cannot specify [" + FIELDS_FIELD.getPreferredName() + "] without [" + QUERY_FIELD.getPreferredName() + "]" + ); + } + + var localIndicesMetadata = resolvedIndices.getConcreteLocalIndicesMetadata(); + if (localIndicesMetadata.size() > 1) { + throw new IllegalArgumentException( + "Cannot specify [" + FIELDS_FIELD.getPreferredName() + "] when querying multiple indices" + ); + } + + List fieldsInnerRetrievers = SimplifiedInnerRetrieverUtils.generateInnerRetrievers( + fields, + query, + localIndicesMetadata.values(), + (r, w) -> new RRFRetrieverBuilder(r, rankWindowSize, rankConstant), + w -> { + if (w != 1.0f) { + throw new IllegalArgumentException( + "[" + NAME + "] does not support per-field weights in [" + FIELDS_FIELD.getPreferredName() + "]" + ); + } + } + ); + fieldsInnerRetrievers.forEach(this::addChild); + + // Clear fields and query to indicate that this stage of the rewrite process is complete + fields = List.of(); + query = null; + modified = true; + } + + return modified; + } + // ---- FOR TESTING XCONTENT PARSING ---- @Override diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverUtils.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverUtils.java index 71b88a5c2d060..9645b6adf32b7 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverUtils.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverUtils.java @@ -7,7 +7,13 @@ package org.elasticsearch.xpack.rank.simplified; +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.metadata.InferenceFieldMetadata; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.Nullable; import org.elasticsearch.index.query.MatchQueryBuilder; +import org.elasticsearch.index.query.MultiMatchQueryBuilder; +import org.elasticsearch.index.search.QueryParserHelper; import org.elasticsearch.search.retriever.CompoundRetrieverBuilder; import org.elasticsearch.search.retriever.RetrieverBuilder; import org.elasticsearch.search.retriever.StandardRetrieverBuilder; @@ -15,8 +21,15 @@ import java.util.ArrayList; import java.util.Collection; import java.util.List; +import java.util.Map; +import java.util.function.BiFunction; +import java.util.function.Consumer; + +import static org.elasticsearch.index.IndexSettings.DEFAULT_FIELD_SETTING; public class SimplifiedInnerRetrieverUtils { + private static final String ALL_FIELDS_WILDCARD = "*"; + private SimplifiedInnerRetrieverUtils() {} public static List convertToRetrievers(Collection fields, String query) { @@ -27,4 +40,80 @@ public static List convertToRetrievers } return retrievers; } + + public static List generateInnerRetrievers( + List fieldsAndWeights, + String query, + Collection indicesMetadata, + BiFunction, List, CompoundRetrieverBuilder> innerNormalizerGenerator, + @Nullable Consumer weightValidator + ) { + Map parsedFieldsAndWeights = QueryParserHelper.parseFieldsAndWeights(fieldsAndWeights); + if (weightValidator != null) { + parsedFieldsAndWeights.values().forEach(weightValidator); + } + + // We expect up to 2 inner retrievers to be generated for each index queried + List innerRetrievers = new ArrayList<>(indicesMetadata.size() * 2); + for (IndexMetadata indexMetadata : indicesMetadata) { + innerRetrievers.addAll( + generateInnerRetrieversForIndex(parsedFieldsAndWeights, query, indexMetadata, innerNormalizerGenerator, weightValidator) + ); + } + return innerRetrievers; + } + + private static List generateInnerRetrieversForIndex( + Map parsedFieldsAndWeights, + String query, + IndexMetadata indexMetadata, + BiFunction, List, CompoundRetrieverBuilder> innerNormalizerGenerator, + @Nullable Consumer weightValidator + ) { + Map fieldsAndWeightsToQuery = parsedFieldsAndWeights; + if (fieldsAndWeightsToQuery.isEmpty()) { + Settings settings = indexMetadata.getSettings(); + List defaultFields = settings.getAsList(DEFAULT_FIELD_SETTING.getKey(), DEFAULT_FIELD_SETTING.getDefault(settings)); + fieldsAndWeightsToQuery = QueryParserHelper.parseFieldsAndWeights(defaultFields); + if (weightValidator != null) { + fieldsAndWeightsToQuery.values().forEach(weightValidator); + } + } + + if (fieldsAndWeightsToQuery.size() == 1 && fieldsAndWeightsToQuery.get(ALL_FIELDS_WILDCARD) != null) { + // TODO: Implement support for this case + throw new UnsupportedOperationException("All fields wildcard is not supported yet"); + } + + // TODO: Should we use a separate match query for each non-inference field, perform secondary normalization, + // and apply the boost after secondary normalization, like is done for inference fields? + List inferenceFieldRetrievers = new ArrayList<>(); + List inferenceFieldWeights = new ArrayList<>(); + MultiMatchQueryBuilder nonInferenceFieldQueryBuilder = new MultiMatchQueryBuilder(query); + + Map indexInferenceFields = indexMetadata.getInferenceFields(); + for (Map.Entry entry : fieldsAndWeightsToQuery.entrySet()) { + String field = entry.getKey(); + Float weight = entry.getValue(); + + InferenceFieldMetadata inferenceFieldMetadata = indexInferenceFields.get(field); + if (inferenceFieldMetadata != null) { + RetrieverBuilder retrieverBuilder = new StandardRetrieverBuilder(new MatchQueryBuilder(field, query)); + inferenceFieldRetrievers.add(new CompoundRetrieverBuilder.RetrieverSource(retrieverBuilder, null)); + inferenceFieldWeights.add(weight); + } else { + nonInferenceFieldQueryBuilder.field(field, weight); + } + } + + // TODO: Set index pre-filters on returned retrievers when we want to implement multi-index support + List innerRetrievers = new ArrayList<>(2); + if (nonInferenceFieldQueryBuilder.fields().isEmpty() == false) { + innerRetrievers.add(new StandardRetrieverBuilder(nonInferenceFieldQueryBuilder)); + } + if (inferenceFieldRetrievers.isEmpty() == false) { + innerRetrievers.add(innerNormalizerGenerator.apply(inferenceFieldRetrievers, inferenceFieldWeights)); + } + return innerRetrievers; + } } From 1c8dab3a29ef291b4d722b2d5f8c22a7eb36a26d Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 22 May 2025 17:11:14 -0400 Subject: [PATCH 08/47] Remove unused method --- .../rank/simplified/SimplifiedInnerRetrieverUtils.java | 9 --------- 1 file changed, 9 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverUtils.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverUtils.java index 9645b6adf32b7..f73541b6493fb 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverUtils.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverUtils.java @@ -32,15 +32,6 @@ public class SimplifiedInnerRetrieverUtils { private SimplifiedInnerRetrieverUtils() {} - public static List convertToRetrievers(Collection fields, String query) { - List retrievers = new ArrayList<>(fields.size()); - for (String field : fields) { - RetrieverBuilder retrieverBuilder = new StandardRetrieverBuilder(new MatchQueryBuilder(field, query)); - retrievers.add(new CompoundRetrieverBuilder.RetrieverSource(retrieverBuilder, null)); - } - return retrievers; - } - public static List generateInnerRetrievers( List fieldsAndWeights, String query, From a217e528f69ebf44467223b6e689caf109c50399 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Fri, 23 May 2025 13:26:27 -0400 Subject: [PATCH 09/47] Use a most_fields multi-match query --- .../xpack/rank/simplified/SimplifiedInnerRetrieverUtils.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverUtils.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverUtils.java index f73541b6493fb..ba42f2361862e 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverUtils.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverUtils.java @@ -80,7 +80,8 @@ private static List generateInnerRetrieversForIndex( // and apply the boost after secondary normalization, like is done for inference fields? List inferenceFieldRetrievers = new ArrayList<>(); List inferenceFieldWeights = new ArrayList<>(); - MultiMatchQueryBuilder nonInferenceFieldQueryBuilder = new MultiMatchQueryBuilder(query); + MultiMatchQueryBuilder nonInferenceFieldQueryBuilder = new MultiMatchQueryBuilder(query) + .type(MultiMatchQueryBuilder.Type.MOST_FIELDS); Map indexInferenceFields = indexMetadata.getInferenceFields(); for (Map.Entry entry : fieldsAndWeightsToQuery.entrySet()) { From 11d14455cb5aaa92ca4a3c48e5f5772b7b0af77e Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Fri, 23 May 2025 13:34:26 -0400 Subject: [PATCH 10/47] Added TODO --- .../xpack/rank/simplified/SimplifiedInnerRetrieverUtils.java | 1 + 1 file changed, 1 insertion(+) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverUtils.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverUtils.java index ba42f2361862e..8ed290863a915 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverUtils.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverUtils.java @@ -88,6 +88,7 @@ private static List generateInnerRetrieversForIndex( String field = entry.getKey(); Float weight = entry.getValue(); + // TODO: Support glob matches for inference fields InferenceFieldMetadata inferenceFieldMetadata = indexInferenceFields.get(field); if (inferenceFieldMetadata != null) { RetrieverBuilder retrieverBuilder = new StandardRetrieverBuilder(new MatchQueryBuilder(field, query)); From 7d3e78e4272370fee3291db13e87b40152a4ab27 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Fri, 23 May 2025 16:03:25 -0400 Subject: [PATCH 11/47] Added support for wildcards --- .../xpack/rank/rrf/RRFRetrieverBuilder.java | 7 +- .../SimplifiedInnerRetrieverUtils.java | 69 +++++++++++-------- 2 files changed, 48 insertions(+), 28 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java index b1ec4e13cd12a..2d8e642d949b0 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java @@ -213,7 +213,12 @@ protected boolean doRewrite(QueryRewriteContext ctx) { fields, query, localIndicesMetadata.values(), - (r, w) -> new RRFRetrieverBuilder(r, rankWindowSize, rankConstant), + r -> { + List retrievers = r.stream() + .map(SimplifiedInnerRetrieverUtils.WeightedRetrieverSource::retrieverSource) + .toList(); + return new RRFRetrieverBuilder(retrievers, rankWindowSize, rankConstant); + }, w -> { if (w != 1.0f) { throw new IllegalArgumentException( diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverUtils.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverUtils.java index 8ed290863a915..fc31ea6a6ff2a 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverUtils.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverUtils.java @@ -9,6 +9,7 @@ import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.InferenceFieldMetadata; +import org.elasticsearch.common.regex.Regex; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Nullable; import org.elasticsearch.index.query.MatchQueryBuilder; @@ -20,23 +21,24 @@ import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.function.BiFunction; import java.util.function.Consumer; +import java.util.function.Function; import static org.elasticsearch.index.IndexSettings.DEFAULT_FIELD_SETTING; public class SimplifiedInnerRetrieverUtils { - private static final String ALL_FIELDS_WILDCARD = "*"; - private SimplifiedInnerRetrieverUtils() {} + public record WeightedRetrieverSource(CompoundRetrieverBuilder.RetrieverSource retrieverSource, float weight) {} + public static List generateInnerRetrievers( List fieldsAndWeights, String query, Collection indicesMetadata, - BiFunction, List, CompoundRetrieverBuilder> innerNormalizerGenerator, + Function, CompoundRetrieverBuilder> innerNormalizerGenerator, @Nullable Consumer weightValidator ) { Map parsedFieldsAndWeights = QueryParserHelper.parseFieldsAndWeights(fieldsAndWeights); @@ -58,7 +60,7 @@ private static List generateInnerRetrieversForIndex( Map parsedFieldsAndWeights, String query, IndexMetadata indexMetadata, - BiFunction, List, CompoundRetrieverBuilder> innerNormalizerGenerator, + Function, CompoundRetrieverBuilder> innerNormalizerGenerator, @Nullable Consumer weightValidator ) { Map fieldsAndWeightsToQuery = parsedFieldsAndWeights; @@ -71,42 +73,55 @@ private static List generateInnerRetrieversForIndex( } } - if (fieldsAndWeightsToQuery.size() == 1 && fieldsAndWeightsToQuery.get(ALL_FIELDS_WILDCARD) != null) { - // TODO: Implement support for this case - throw new UnsupportedOperationException("All fields wildcard is not supported yet"); - } - - // TODO: Should we use a separate match query for each non-inference field, perform secondary normalization, - // and apply the boost after secondary normalization, like is done for inference fields? - List inferenceFieldRetrievers = new ArrayList<>(); - List inferenceFieldWeights = new ArrayList<>(); - MultiMatchQueryBuilder nonInferenceFieldQueryBuilder = new MultiMatchQueryBuilder(query) - .type(MultiMatchQueryBuilder.Type.MOST_FIELDS); - + Map inferenceFields = new HashMap<>(); Map indexInferenceFields = indexMetadata.getInferenceFields(); for (Map.Entry entry : fieldsAndWeightsToQuery.entrySet()) { String field = entry.getKey(); Float weight = entry.getValue(); - // TODO: Support glob matches for inference fields - InferenceFieldMetadata inferenceFieldMetadata = indexInferenceFields.get(field); - if (inferenceFieldMetadata != null) { - RetrieverBuilder retrieverBuilder = new StandardRetrieverBuilder(new MatchQueryBuilder(field, query)); - inferenceFieldRetrievers.add(new CompoundRetrieverBuilder.RetrieverSource(retrieverBuilder, null)); - inferenceFieldWeights.add(weight); + if (Regex.isMatchAllPattern(field)) { + indexInferenceFields.keySet().forEach(f -> addToInferenceFieldsMap(inferenceFields, f, weight)); + } else if (Regex.isSimpleMatchPattern(field)) { + indexInferenceFields.keySet() + .stream() + .filter(f -> Regex.simpleMatch(field, f)) + .forEach(f -> addToInferenceFieldsMap(inferenceFields, f, weight)); } else { - nonInferenceFieldQueryBuilder.field(field, weight); + // No wildcards in field name + if (indexInferenceFields.containsKey(field)) { + addToInferenceFieldsMap(inferenceFields, field, weight); + } } } + Map nonInferenceFields = new HashMap<>(fieldsAndWeightsToQuery); + nonInferenceFields.keySet().removeAll(inferenceFields.keySet()); // Remove all inference fields from non-inference fields map + // TODO: Set index pre-filters on returned retrievers when we want to implement multi-index support + // TODO: Should we use a separate match query for each non-inference field, perform secondary normalization, + // and apply the boost after secondary normalization, like is done for inference fields? List innerRetrievers = new ArrayList<>(2); - if (nonInferenceFieldQueryBuilder.fields().isEmpty() == false) { + if (nonInferenceFields.isEmpty() == false) { + MultiMatchQueryBuilder nonInferenceFieldQueryBuilder = new MultiMatchQueryBuilder(query).type( + MultiMatchQueryBuilder.Type.MOST_FIELDS + ).fields(nonInferenceFields); innerRetrievers.add(new StandardRetrieverBuilder(nonInferenceFieldQueryBuilder)); } - if (inferenceFieldRetrievers.isEmpty() == false) { - innerRetrievers.add(innerNormalizerGenerator.apply(inferenceFieldRetrievers, inferenceFieldWeights)); + if (inferenceFields.isEmpty() == false) { + List inferenceFieldRetrievers = new ArrayList<>(inferenceFields.size()); + inferenceFields.forEach((f, w) -> { + RetrieverBuilder retrieverBuilder = new StandardRetrieverBuilder(new MatchQueryBuilder(f, query)); + inferenceFieldRetrievers.add( + new WeightedRetrieverSource(new CompoundRetrieverBuilder.RetrieverSource(retrieverBuilder, null), w) + ); + }); + + innerRetrievers.add(innerNormalizerGenerator.apply(inferenceFieldRetrievers)); } return innerRetrievers; } + + private static void addToInferenceFieldsMap(Map inferenceFields, String field, Float weight) { + inferenceFields.compute(field, (k, v) -> v == null ? weight : v * weight); + } } From c6bf0139010db78e02cd9c8d0c3240d3daf2df98 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Fri, 23 May 2025 16:50:24 -0400 Subject: [PATCH 12/47] Handle when only query is supplied by the user --- .../xpack/rank/rrf/RRFRetrieverBuilder.java | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java index 2d8e642d949b0..738a8a87a2562 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java @@ -184,28 +184,27 @@ protected RRFRankDoc[] combineInnerRetrieverResults(List rankResults @Override protected boolean doRewrite(QueryRewriteContext ctx) { - // TODO: Review error messages - // TODO: Rewrite this to handle when only query is supplied by the user boolean modified = false; - if (innerRetrievers.isEmpty() == false && fields.isEmpty() == false) { - throw new IllegalArgumentException( - "Cannot specify both [" + RETRIEVERS_FIELD.getPreferredName() + "] and [" + FIELDS_FIELD.getPreferredName() + "]" - ); - } - ResolvedIndices resolvedIndices = ctx.getResolvedIndices(); - if (resolvedIndices != null && fields.isEmpty() == false) { + if (resolvedIndices != null && (query != null || fields.isEmpty() == false)) { + // Using the simplified query format if (query == null || query.isEmpty()) { throw new IllegalArgumentException( - "Cannot specify [" + FIELDS_FIELD.getPreferredName() + "] without [" + QUERY_FIELD.getPreferredName() + "]" + "[" + NAME + "] [" + QUERY_FIELD.getPreferredName() + "] must be provided when using the simplified query format" + ); + } + + if (innerRetrievers.isEmpty() == false) { + throw new IllegalArgumentException( + "[" + NAME + "] does not support [" + RETRIEVERS_FIELD.getPreferredName() + "] and the simplified query format combined" ); } var localIndicesMetadata = resolvedIndices.getConcreteLocalIndicesMetadata(); if (localIndicesMetadata.size() > 1) { throw new IllegalArgumentException( - "Cannot specify [" + FIELDS_FIELD.getPreferredName() + "] when querying multiple indices" + "[" + NAME + "] does not support the simplified query format when querying multiple indices" ); } From 510fe548c545289391c95f22cfb5fb4a539ffb56 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Tue, 27 May 2025 16:41:57 -0400 Subject: [PATCH 13/47] Added fields, query, and normalizer params to linear retriever --- .../rank/linear/LinearRetrieverBuilder.java | 80 ++++++++++++++++--- 1 file changed, 67 insertions(+), 13 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java index 436096523a1ec..2fa8936655f9e 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java @@ -31,7 +31,6 @@ import java.util.List; import java.util.Map; -import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg; import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg; import static org.elasticsearch.xpack.rank.RankRRFFeatures.LINEAR_RETRIEVER_SUPPORTED; import static org.elasticsearch.xpack.rank.linear.LinearRetrieverComponent.DEFAULT_WEIGHT; @@ -49,11 +48,17 @@ public final class LinearRetrieverBuilder extends CompoundRetrieverBuilder fields; + private String query; @SuppressWarnings("unchecked") static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( @@ -61,34 +66,43 @@ public final class LinearRetrieverBuilder extends CompoundRetrieverBuilder { List retrieverComponents = (List) args[0]; - int rankWindowSize = args[1] == null ? RankBuilder.DEFAULT_RANK_WINDOW_SIZE : (int) args[1]; - List innerRetrievers = new ArrayList<>(); + List fields = (List) args[1]; + String query = (String) args[2]; + String normalizer = (String) args[3]; + int rankWindowSize = args[4] == null ? RankBuilder.DEFAULT_RANK_WINDOW_SIZE : (int) args[4]; + + int index = 0; float[] weights = new float[retrieverComponents.size()]; ScoreNormalizer[] normalizers = new ScoreNormalizer[retrieverComponents.size()]; - int index = 0; + List innerRetrievers = new ArrayList<>(); for (LinearRetrieverComponent component : retrieverComponents) { innerRetrievers.add(new RetrieverSource(component.retriever, null)); weights[index] = component.weight; normalizers[index] = component.normalizer; index++; } - return new LinearRetrieverBuilder(innerRetrievers, rankWindowSize, weights, normalizers); + return new LinearRetrieverBuilder(innerRetrievers, fields, query, normalizer, rankWindowSize, weights, normalizers); } ); static { - PARSER.declareObjectArray(constructorArg(), LinearRetrieverComponent::fromXContent, RETRIEVERS_FIELD); + PARSER.declareObjectArray(optionalConstructorArg(), LinearRetrieverComponent::fromXContent, RETRIEVERS_FIELD); + PARSER.declareStringArray(optionalConstructorArg(), FIELDS_FIELD); + PARSER.declareString(optionalConstructorArg(), QUERY_FIELD); + PARSER.declareString(optionalConstructorArg(), NORMALIZER_FIELD); PARSER.declareInt(optionalConstructorArg(), RANK_WINDOW_SIZE_FIELD); RetrieverBuilder.declareBaseParserFields(PARSER); } - private static float[] getDefaultWeight(int size) { + private static float[] getDefaultWeight(List innerRetrievers) { + int size = innerRetrievers != null ? innerRetrievers.size() : 0; float[] weights = new float[size]; Arrays.fill(weights, DEFAULT_WEIGHT); return weights; } - private static ScoreNormalizer[] getDefaultNormalizers(int size) { + private static ScoreNormalizer[] getDefaultNormalizers(List innerRetrievers) { + int size = innerRetrievers != null ? innerRetrievers.size() : 0; ScoreNormalizer[] normalizers = new ScoreNormalizer[size]; Arrays.fill(normalizers, IdentityScoreNormalizer.INSTANCE); return normalizers; @@ -105,7 +119,7 @@ public static LinearRetrieverBuilder fromXContent(XContentParser parser, Retriev } LinearRetrieverBuilder(List innerRetrievers, int rankWindowSize) { - this(innerRetrievers, rankWindowSize, getDefaultWeight(innerRetrievers.size()), getDefaultNormalizers(innerRetrievers.size())); + this(innerRetrievers, null, null, null, rankWindowSize, getDefaultWeight(innerRetrievers), getDefaultNormalizers(innerRetrievers)); } public LinearRetrieverBuilder( @@ -114,20 +128,45 @@ public LinearRetrieverBuilder( float[] weights, ScoreNormalizer[] normalizers ) { - super(innerRetrievers, rankWindowSize); - if (weights.length != innerRetrievers.size()) { + this(innerRetrievers, null, null, null, rankWindowSize, weights, normalizers); + } + + public LinearRetrieverBuilder( + List innerRetrievers, + List fields, + String query, + String normalizer, + int rankWindowSize, + float[] weights, + ScoreNormalizer[] normalizers + ) { + // Use a mutable list for innerRetrievers so that we can add more child retrievers during rewrite + super(innerRetrievers == null ? new ArrayList<>() : new ArrayList<>(innerRetrievers), rankWindowSize); + if (weights.length != this.innerRetrievers.size()) { throw new IllegalArgumentException("The number of weights must match the number of inner retrievers"); } - if (normalizers.length != innerRetrievers.size()) { + if (normalizers.length != this.innerRetrievers.size()) { throw new IllegalArgumentException("The number of normalizers must match the number of inner retrievers"); } + + this.fields = fields == null ? List.of() : List.copyOf(fields); + this.query = query; + this.normalizer = normalizer; this.weights = weights; this.normalizers = normalizers; } @Override protected LinearRetrieverBuilder clone(List newChildRetrievers, List newPreFilterQueryBuilders) { - LinearRetrieverBuilder clone = new LinearRetrieverBuilder(newChildRetrievers, rankWindowSize, weights, normalizers); + LinearRetrieverBuilder clone = new LinearRetrieverBuilder( + newChildRetrievers, + fields, + query, + normalizer, + rankWindowSize, + weights, + normalizers + ); clone.preFilterQueryBuilders = newPreFilterQueryBuilders; clone.retrieverName = retrieverName; return clone; @@ -203,6 +242,21 @@ public void doToXContent(XContentBuilder builder, Params params) throws IOExcept } builder.endArray(); } + + if (fields.isEmpty() == false) { + builder.startArray(FIELDS_FIELD.getPreferredName()); + for (String field : fields) { + builder.value(field); + } + builder.endArray(); + } + if (query != null) { + builder.field(QUERY_FIELD.getPreferredName(), query); + } + if (normalizer != null) { + builder.field(NORMALIZER_FIELD.getPreferredName(), normalizer); + } + builder.field(RANK_WINDOW_SIZE_FIELD.getPreferredName(), rankWindowSize); } } From 187758f14a28baca748c7c77b6eb1bfb6eb84e84 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Wed, 28 May 2025 14:21:03 -0400 Subject: [PATCH 14/47] Added custom rewrite logic for linear retriever --- .../rank/linear/LinearRetrieverBuilder.java | 73 +++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java index 2fa8936655f9e..8a378d25b9721 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java @@ -8,9 +8,11 @@ package org.elasticsearch.xpack.rank.linear; import org.apache.lucene.search.ScoreDoc; +import org.elasticsearch.action.ResolvedIndices; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.util.Maps; import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryRewriteContext; import org.elasticsearch.license.LicenseUtils; import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.search.rank.RankBuilder; @@ -24,6 +26,7 @@ import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xpack.core.XPackPlugin; import org.elasticsearch.xpack.rank.rrf.RRFRankPlugin; +import org.elasticsearch.xpack.rank.simplified.SimplifiedInnerRetrieverUtils; import java.io.IOException; import java.util.ArrayList; @@ -223,6 +226,76 @@ protected RankDoc[] combineInnerRetrieverResults(List rankResults, b return topResults; } + @Override + protected boolean doRewrite(QueryRewriteContext ctx) { + boolean modified = false; + + ResolvedIndices resolvedIndices = ctx.getResolvedIndices(); + if (resolvedIndices != null && (query != null || fields.isEmpty() == false)) { + // Using the simplified query format + if (query == null || query.isEmpty()) { + throw new IllegalArgumentException( + "[" + NAME + "] [" + QUERY_FIELD.getPreferredName() + "] must be provided when using the simplified query format" + ); + } + + if (normalizer == null || normalizer.isEmpty()) { + throw new IllegalArgumentException( + "[" + NAME + "] [" + NORMALIZER_FIELD.getPreferredName() + "] must be provided when using the simplified query format" + ); + } + ScoreNormalizer fieldsNormalizer = ScoreNormalizer.valueOf(normalizer); + + if (innerRetrievers.isEmpty() == false) { + throw new IllegalArgumentException( + "[" + NAME + "] does not support [" + RETRIEVERS_FIELD.getPreferredName() + "] and the simplified query format combined" + ); + } + + var localIndicesMetadata = resolvedIndices.getConcreteLocalIndicesMetadata(); + if (localIndicesMetadata.size() > 1) { + throw new IllegalArgumentException( + "[" + NAME + "] does not support the simplified query format when querying multiple indices" + ); + } + + List fieldsInnerRetrievers = SimplifiedInnerRetrieverUtils.generateInnerRetrievers( + fields, + query, + localIndicesMetadata.values(), + r -> { + List retrievers = new ArrayList<>(r.size()); + float[] weights = new float[r.size()]; + ScoreNormalizer[] normalizers = new ScoreNormalizer[r.size()]; + + int index = 0; + for (var weightedRetriever : r) { + retrievers.add(weightedRetriever.retrieverSource()); + weights[index] = weightedRetriever.weight(); + normalizers[index] = fieldsNormalizer; + index++; + } + + return new LinearRetrieverBuilder(retrievers, rankWindowSize, weights, normalizers); + }, + w -> { + if (w < 0) { + throw new IllegalArgumentException("[" + NAME + "] per-field weights must be non-negative"); + } + } + ); + fieldsInnerRetrievers.forEach(this::addChild); + // TODO: Set weight and normalizer for each child retriever added + + // Clear fields and query to indicate that this stage of the rewrite process is complete + fields = List.of(); + query = null; + modified = true; + } + + return modified; + } + @Override public String getName() { return NAME; From 51a3dbe3bc5eee06ba57a54811730183478d579f Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Wed, 28 May 2025 14:56:56 -0400 Subject: [PATCH 15/47] Updated doRewrite method to return rewritten retriever --- .../retriever/CompoundRetrieverBuilder.java | 18 +++++++---- .../rank/linear/LinearRetrieverBuilder.java | 31 ++++++++++--------- .../xpack/rank/rrf/RRFRetrieverBuilder.java | 20 +++++------- 3 files changed, 37 insertions(+), 32 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java b/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java index 175ce5c224450..46061b03c4272 100644 --- a/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java @@ -65,7 +65,7 @@ protected CompoundRetrieverBuilder(List innerRetrievers, int ra @SuppressWarnings("unchecked") public T addChild(RetrieverBuilder retrieverBuilder) { - innerRetrievers.add(new RetrieverSource(retrieverBuilder, null)); + innerRetrievers.add(convertToRetrieverSource(retrieverBuilder)); return (T) this; } @@ -99,8 +99,9 @@ public final RetrieverBuilder rewrite(QueryRewriteContext ctx) throws IOExceptio throw new IllegalStateException("PIT is required"); } - if (doRewrite(ctx)) { - return clone(innerRetrievers, preFilterQueryBuilders); + T rewritten = doRewrite(ctx); + if (rewritten != this) { + return rewritten; } // Rewrite prefilters @@ -324,10 +325,15 @@ protected SearchSourceBuilder finalizeSourceBuilder(SearchSourceBuilder sourceBu * Perform any custom rewrite logic necessary * * @param ctx The query rewrite context - * @return true if the retriever changed, false otherwise + * @return T the rewritten retriever */ - protected boolean doRewrite(QueryRewriteContext ctx) { - return false; + @SuppressWarnings("unchecked") + protected T doRewrite(QueryRewriteContext ctx) { + return (T) this; + } + + protected static RetrieverSource convertToRetrieverSource(RetrieverBuilder retrieverBuilder) { + return new RetrieverSource(retrieverBuilder, null); } private RankDoc[] getRankDocs(SearchResponse searchResponse) { diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java index 8a378d25b9721..c3282ceb1f491 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java @@ -38,6 +38,8 @@ import static org.elasticsearch.xpack.rank.RankRRFFeatures.LINEAR_RETRIEVER_SUPPORTED; import static org.elasticsearch.xpack.rank.linear.LinearRetrieverComponent.DEFAULT_WEIGHT; +// TODO: Add toEquals method + /** * The {@code LinearRetrieverBuilder} supports the combination of different retrievers through a weighted linear combination. * For example, assume that we have retrievers r1 and r2, the final score of the {@code LinearRetrieverBuilder} is defined as @@ -59,9 +61,9 @@ public final class LinearRetrieverBuilder extends CompoundRetrieverBuilder fields; + private final String query; private final String normalizer; - private List fields; - private String query; @SuppressWarnings("unchecked") static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( @@ -227,8 +229,8 @@ protected RankDoc[] combineInnerRetrieverResults(List rankResults, b } @Override - protected boolean doRewrite(QueryRewriteContext ctx) { - boolean modified = false; + protected LinearRetrieverBuilder doRewrite(QueryRewriteContext ctx) { + LinearRetrieverBuilder rewritten = this; ResolvedIndices resolvedIndices = ctx.getResolvedIndices(); if (resolvedIndices != null && (query != null || fields.isEmpty() == false)) { @@ -259,7 +261,7 @@ protected boolean doRewrite(QueryRewriteContext ctx) { ); } - List fieldsInnerRetrievers = SimplifiedInnerRetrieverUtils.generateInnerRetrievers( + List fieldsInnerRetrievers = SimplifiedInnerRetrieverUtils.generateInnerRetrievers( fields, query, localIndicesMetadata.values(), @@ -283,17 +285,18 @@ protected boolean doRewrite(QueryRewriteContext ctx) { throw new IllegalArgumentException("[" + NAME + "] per-field weights must be non-negative"); } } - ); - fieldsInnerRetrievers.forEach(this::addChild); - // TODO: Set weight and normalizer for each child retriever added - - // Clear fields and query to indicate that this stage of the rewrite process is complete - fields = List.of(); - query = null; - modified = true; + ).stream().map(CompoundRetrieverBuilder::convertToRetrieverSource).toList(); + + float[] weights = new float[fieldsInnerRetrievers.size()]; + Arrays.fill(weights, DEFAULT_WEIGHT); + + ScoreNormalizer[] normalizers = new ScoreNormalizer[fieldsInnerRetrievers.size()]; + Arrays.fill(normalizers, fieldsNormalizer); + + rewritten = new LinearRetrieverBuilder(fieldsInnerRetrievers, null, null, normalizer, rankWindowSize, weights, normalizers); } - return modified; + return rewritten; } @Override diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java index 738a8a87a2562..60b72c3df6541 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java @@ -92,8 +92,8 @@ public static RRFRetrieverBuilder fromXContent(XContentParser parser, RetrieverP return PARSER.apply(parser, context); } - private List fields; - private String query; + private final List fields; + private final String query; private final int rankConstant; public RRFRetrieverBuilder(int rankWindowSize, int rankConstant) { @@ -183,8 +183,8 @@ protected RRFRankDoc[] combineInnerRetrieverResults(List rankResults } @Override - protected boolean doRewrite(QueryRewriteContext ctx) { - boolean modified = false; + protected RRFRetrieverBuilder doRewrite(QueryRewriteContext ctx) { + RRFRetrieverBuilder rewritten = this; ResolvedIndices resolvedIndices = ctx.getResolvedIndices(); if (resolvedIndices != null && (query != null || fields.isEmpty() == false)) { @@ -208,7 +208,7 @@ protected boolean doRewrite(QueryRewriteContext ctx) { ); } - List fieldsInnerRetrievers = SimplifiedInnerRetrieverUtils.generateInnerRetrievers( + List fieldsInnerRetrievers = SimplifiedInnerRetrieverUtils.generateInnerRetrievers( fields, query, localIndicesMetadata.values(), @@ -225,16 +225,12 @@ protected boolean doRewrite(QueryRewriteContext ctx) { ); } } - ); - fieldsInnerRetrievers.forEach(this::addChild); + ).stream().map(CompoundRetrieverBuilder::convertToRetrieverSource).toList(); - // Clear fields and query to indicate that this stage of the rewrite process is complete - fields = List.of(); - query = null; - modified = true; + rewritten = new RRFRetrieverBuilder(fieldsInnerRetrievers, rankWindowSize, rankConstant); } - return modified; + return rewritten; } // ---- FOR TESTING XCONTENT PARSING ---- From e33cebf203c9d05eb5b8cbe55500abb6787d6b3a Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Wed, 28 May 2025 15:12:32 -0400 Subject: [PATCH 16/47] Fix NPE --- .../elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java index c3282ceb1f491..7df02d9fdcef7 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java @@ -70,7 +70,7 @@ public final class LinearRetrieverBuilder extends CompoundRetrieverBuilder { - List retrieverComponents = (List) args[0]; + List retrieverComponents = args[0] == null ? List.of() : (List) args[0]; List fields = (List) args[1]; String query = (String) args[2]; String normalizer = (String) args[3]; From a7776012b20a62bd5f97a9117af5c021364b5912 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 29 May 2025 09:26:38 -0400 Subject: [PATCH 17/47] Temporary fix for minmax bug --- .../elasticsearch/xpack/rank/linear/MinMaxScoreNormalizer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/MinMaxScoreNormalizer.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/MinMaxScoreNormalizer.java index 56b42b48a5d47..a9a038f41a1dd 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/MinMaxScoreNormalizer.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/MinMaxScoreNormalizer.java @@ -54,7 +54,7 @@ public ScoreDoc[] normalizeScores(ScoreDoc[] docs) { for (int i = 0; i < docs.length; i++) { float score; if (minEqualsMax) { - score = min; + score = 1.0f; // TODO: Address bug in separate PR } else { score = (docs[i].score - min) / (max - min); } From 34250dc64e57ce16c27f3a7fd882d0d285bcdb2a Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 29 May 2025 14:06:35 -0400 Subject: [PATCH 18/47] Validation improvements --- .../rank/linear/LinearRetrieverBuilder.java | 87 +++++++++++++------ .../xpack/rank/rrf/RRFRetrieverBuilder.java | 41 ++++++--- .../SimplifiedInnerRetrieverUtils.java | 52 +++++++++++ 3 files changed, 139 insertions(+), 41 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java index 7df02d9fdcef7..65c8a6bfdfb1a 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java @@ -8,6 +8,7 @@ package org.elasticsearch.xpack.rank.linear; import org.apache.lucene.search.ScoreDoc; +import org.elasticsearch.action.ActionRequestValidationException; import org.elasticsearch.action.ResolvedIndices; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.util.Maps; @@ -32,8 +33,10 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Locale; import java.util.Map; +import static org.elasticsearch.action.ValidateActions.addValidationError; import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg; import static org.elasticsearch.xpack.rank.RankRRFFeatures.LINEAR_RETRIEVER_SUPPORTED; import static org.elasticsearch.xpack.rank.linear.LinearRetrieverComponent.DEFAULT_WEIGHT; @@ -63,7 +66,7 @@ public final class LinearRetrieverBuilder extends CompoundRetrieverBuilder fields; private final String query; - private final String normalizer; + private final ScoreNormalizer normalizer; @SuppressWarnings("unchecked") static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( @@ -73,7 +76,7 @@ public final class LinearRetrieverBuilder extends CompoundRetrieverBuilder retrieverComponents = args[0] == null ? List.of() : (List) args[0]; List fields = (List) args[1]; String query = (String) args[2]; - String normalizer = (String) args[3]; + ScoreNormalizer normalizer = args[3] == null ? null : ScoreNormalizer.valueOf((String) args[3]); int rankWindowSize = args[4] == null ? RankBuilder.DEFAULT_RANK_WINDOW_SIZE : (int) args[4]; int index = 0; @@ -140,12 +143,12 @@ public LinearRetrieverBuilder( List innerRetrievers, List fields, String query, - String normalizer, + ScoreNormalizer normalizer, int rankWindowSize, float[] weights, ScoreNormalizer[] normalizers ) { - // Use a mutable list for innerRetrievers so that we can add more child retrievers during rewrite + // Use a mutable list for innerRetrievers so that we can use addChild super(innerRetrievers == null ? new ArrayList<>() : new ArrayList<>(innerRetrievers), rankWindowSize); if (weights.length != this.innerRetrievers.size()) { throw new IllegalArgumentException("The number of weights must match the number of inner retrievers"); @@ -159,6 +162,55 @@ public LinearRetrieverBuilder( this.normalizer = normalizer; this.weights = weights; this.normalizers = normalizers; + + // TODO: Validate simplified query format args here? + // Otherwise some of the validation is skipped when creating the retriever programmatically. + } + + @Override + public ActionRequestValidationException validate( + SearchSourceBuilder source, + ActionRequestValidationException validationException, + boolean isScroll, + boolean allowPartialSearchResults + ) { + validationException = super.validate(source, validationException, isScroll, allowPartialSearchResults); + validationException = SimplifiedInnerRetrieverUtils.validateSimplifiedFormatParams( + innerRetrievers, + fields, + query, + getName(), + RETRIEVERS_FIELD.getPreferredName(), + FIELDS_FIELD.getPreferredName(), + QUERY_FIELD.getPreferredName(), + validationException + ); + + if (query != null && normalizer == null) { + validationException = addValidationError( + String.format( + Locale.ROOT, + "[%s] [%s] must be provided when [%s] is specified", + getName(), + NORMALIZER_FIELD.getPreferredName(), + QUERY_FIELD.getPreferredName() + ), + validationException + ); + } else if (innerRetrievers.isEmpty() == false && normalizer != null) { + validationException = addValidationError( + String.format( + Locale.ROOT, + "[%s] [%s] cannot be provided when [%s] is specified", + getName(), + NORMALIZER_FIELD.getPreferredName(), + RETRIEVERS_FIELD.getPreferredName() + ), + validationException + ); + } + + return validationException; } @Override @@ -233,27 +285,8 @@ protected LinearRetrieverBuilder doRewrite(QueryRewriteContext ctx) { LinearRetrieverBuilder rewritten = this; ResolvedIndices resolvedIndices = ctx.getResolvedIndices(); - if (resolvedIndices != null && (query != null || fields.isEmpty() == false)) { + if (resolvedIndices != null && query != null) { // Using the simplified query format - if (query == null || query.isEmpty()) { - throw new IllegalArgumentException( - "[" + NAME + "] [" + QUERY_FIELD.getPreferredName() + "] must be provided when using the simplified query format" - ); - } - - if (normalizer == null || normalizer.isEmpty()) { - throw new IllegalArgumentException( - "[" + NAME + "] [" + NORMALIZER_FIELD.getPreferredName() + "] must be provided when using the simplified query format" - ); - } - ScoreNormalizer fieldsNormalizer = ScoreNormalizer.valueOf(normalizer); - - if (innerRetrievers.isEmpty() == false) { - throw new IllegalArgumentException( - "[" + NAME + "] does not support [" + RETRIEVERS_FIELD.getPreferredName() + "] and the simplified query format combined" - ); - } - var localIndicesMetadata = resolvedIndices.getConcreteLocalIndicesMetadata(); if (localIndicesMetadata.size() > 1) { throw new IllegalArgumentException( @@ -274,7 +307,7 @@ protected LinearRetrieverBuilder doRewrite(QueryRewriteContext ctx) { for (var weightedRetriever : r) { retrievers.add(weightedRetriever.retrieverSource()); weights[index] = weightedRetriever.weight(); - normalizers[index] = fieldsNormalizer; + normalizers[index] = normalizer; index++; } @@ -291,7 +324,7 @@ protected LinearRetrieverBuilder doRewrite(QueryRewriteContext ctx) { Arrays.fill(weights, DEFAULT_WEIGHT); ScoreNormalizer[] normalizers = new ScoreNormalizer[fieldsInnerRetrievers.size()]; - Arrays.fill(normalizers, fieldsNormalizer); + Arrays.fill(normalizers, normalizer); rewritten = new LinearRetrieverBuilder(fieldsInnerRetrievers, null, null, normalizer, rankWindowSize, weights, normalizers); } @@ -330,7 +363,7 @@ public void doToXContent(XContentBuilder builder, Params params) throws IOExcept builder.field(QUERY_FIELD.getPreferredName(), query); } if (normalizer != null) { - builder.field(NORMALIZER_FIELD.getPreferredName(), normalizer); + builder.field(NORMALIZER_FIELD.getPreferredName(), normalizer.getName()); } builder.field(RANK_WINDOW_SIZE_FIELD.getPreferredName(), rankWindowSize); diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java index 60b72c3df6541..c4cac8ed39cfa 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java @@ -8,11 +8,13 @@ package org.elasticsearch.xpack.rank.rrf; import org.apache.lucene.search.ScoreDoc; +import org.elasticsearch.action.ActionRequestValidationException; import org.elasticsearch.action.ResolvedIndices; import org.elasticsearch.common.util.Maps; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryRewriteContext; import org.elasticsearch.license.LicenseUtils; +import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.search.rank.RankBuilder; import org.elasticsearch.search.rank.RankDoc; import org.elasticsearch.search.retriever.CompoundRetrieverBuilder; @@ -105,11 +107,14 @@ public RRFRetrieverBuilder(int rankWindowSize, int rankConstant) { } RRFRetrieverBuilder(List childRetrievers, List fields, String query, int rankWindowSize, int rankConstant) { - // Use a mutable list for childRetrievers so that we can add more child retrievers during rewrite + // Use a mutable list for childRetrievers so that we can use addChild super(childRetrievers == null ? new ArrayList<>() : new ArrayList<>(childRetrievers), rankWindowSize); this.fields = fields == null ? List.of() : List.copyOf(fields); this.query = query; this.rankConstant = rankConstant; + + // TODO: Validate simplified query format args here? + // Otherwise some of the validation is skipped when creating the retriever programmatically. } @Override @@ -117,6 +122,26 @@ public String getName() { return NAME; } + @Override + public ActionRequestValidationException validate( + SearchSourceBuilder source, + ActionRequestValidationException validationException, + boolean isScroll, + boolean allowPartialSearchResults + ) { + validationException = super.validate(source, validationException, isScroll, allowPartialSearchResults); + return SimplifiedInnerRetrieverUtils.validateSimplifiedFormatParams( + innerRetrievers, + fields, + query, + getName(), + RETRIEVERS_FIELD.getPreferredName(), + FIELDS_FIELD.getPreferredName(), + QUERY_FIELD.getPreferredName(), + validationException + ); + } + @Override protected RRFRetrieverBuilder clone(List newRetrievers, List newPreFilterQueryBuilders) { RRFRetrieverBuilder clone = new RRFRetrieverBuilder(newRetrievers, this.fields, this.query, this.rankWindowSize, this.rankConstant); @@ -187,20 +212,8 @@ protected RRFRetrieverBuilder doRewrite(QueryRewriteContext ctx) { RRFRetrieverBuilder rewritten = this; ResolvedIndices resolvedIndices = ctx.getResolvedIndices(); - if (resolvedIndices != null && (query != null || fields.isEmpty() == false)) { + if (resolvedIndices != null && query != null) { // Using the simplified query format - if (query == null || query.isEmpty()) { - throw new IllegalArgumentException( - "[" + NAME + "] [" + QUERY_FIELD.getPreferredName() + "] must be provided when using the simplified query format" - ); - } - - if (innerRetrievers.isEmpty() == false) { - throw new IllegalArgumentException( - "[" + NAME + "] does not support [" + RETRIEVERS_FIELD.getPreferredName() + "] and the simplified query format combined" - ); - } - var localIndicesMetadata = resolvedIndices.getConcreteLocalIndicesMetadata(); if (localIndicesMetadata.size() > 1) { throw new IllegalArgumentException( diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverUtils.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverUtils.java index fc31ea6a6ff2a..99e5b8086b5f9 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverUtils.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverUtils.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.rank.simplified; +import org.elasticsearch.action.ActionRequestValidationException; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.InferenceFieldMetadata; import org.elasticsearch.common.regex.Regex; @@ -23,10 +24,12 @@ import java.util.Collection; import java.util.HashMap; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.function.Consumer; import java.util.function.Function; +import static org.elasticsearch.action.ValidateActions.addValidationError; import static org.elasticsearch.index.IndexSettings.DEFAULT_FIELD_SETTING; public class SimplifiedInnerRetrieverUtils { @@ -34,6 +37,55 @@ private SimplifiedInnerRetrieverUtils() {} public record WeightedRetrieverSource(CompoundRetrieverBuilder.RetrieverSource retrieverSource, float weight) {} + public static ActionRequestValidationException validateSimplifiedFormatParams( + List innerRetrievers, + List fields, + @Nullable String query, + String retrieverName, + String retrieversParamName, + String fieldsParamName, + String queryParamName, + ActionRequestValidationException validationException + ) { + if (fields.isEmpty() == false || query != null) { + // Using the simplified query format + if (query == null) { + // Return early here because the following validation checks assume a query param value is provided + return addValidationError( + String.format( + Locale.ROOT, + "[%s] [%s] must be provided when [%s] is specified", + retrieverName, + queryParamName, + fieldsParamName + ), + validationException + ); + } + + if (query.isEmpty()) { + validationException = addValidationError( + String.format(Locale.ROOT, "[%s] [%s] cannot be empty", retrieverName, queryParamName), + validationException + ); + } + + if (innerRetrievers.isEmpty() == false) { + validationException = addValidationError( + String.format(Locale.ROOT, "[%s] cannot combine [%s] and [%s]", retrieverName, retrieversParamName, queryParamName), + validationException + ); + } + } else if (innerRetrievers.isEmpty()) { + validationException = addValidationError( + String.format(Locale.ROOT, "[%s] must provide [%s] or [%s]", retrieverName, retrieversParamName, queryParamName), + validationException + ); + } + + return validationException; + } + public static List generateInnerRetrievers( List fieldsAndWeights, String query, From 551e7f59265f398d395e33c2771dc798353e315d Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 29 May 2025 14:39:55 -0400 Subject: [PATCH 19/47] Update docs/changelog/128633.yaml --- docs/changelog/128633.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/128633.yaml diff --git a/docs/changelog/128633.yaml b/docs/changelog/128633.yaml new file mode 100644 index 0000000000000..dbfdcdaffbcb9 --- /dev/null +++ b/docs/changelog/128633.yaml @@ -0,0 +1,5 @@ +pr: 128633 +summary: Simplified linear and RRF retrievers +area: "Relevance, Search" +type: enhancement +issues: [] From 7267b591e1d23844d80918ad963271fad275af4a Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 29 May 2025 14:47:11 -0400 Subject: [PATCH 20/47] Fix changelog --- docs/changelog/128633.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changelog/128633.yaml b/docs/changelog/128633.yaml index dbfdcdaffbcb9..926384f7fcdf6 100644 --- a/docs/changelog/128633.yaml +++ b/docs/changelog/128633.yaml @@ -1,5 +1,5 @@ pr: 128633 summary: Simplified linear and RRF retrievers -area: "Relevance, Search" +area: Search type: enhancement issues: [] From 397a1b8c006a167e7c273b74789f118e9aba8b76 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Mon, 2 Jun 2025 15:13:36 -0400 Subject: [PATCH 21/47] Updated RRFRetrieverBuilderParsingTests --- .../retriever/CompoundRetrieverBuilder.java | 8 +-- .../rrf/RRFRetrieverBuilderParsingTests.java | 67 +++++++++++-------- 2 files changed, 44 insertions(+), 31 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java b/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java index 46061b03c4272..6f023f78b9b54 100644 --- a/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java @@ -295,6 +295,10 @@ public int rankWindowSize() { return rankWindowSize; } + public static RetrieverSource convertToRetrieverSource(RetrieverBuilder retrieverBuilder) { + return new RetrieverSource(retrieverBuilder, null); + } + protected final SearchSourceBuilder createSearchSourceBuilder(PointInTimeBuilder pit, RetrieverBuilder retrieverBuilder) { var sourceBuilder = new SearchSourceBuilder().pointInTimeBuilder(pit) .trackTotalHits(false) @@ -332,10 +336,6 @@ protected T doRewrite(QueryRewriteContext ctx) { return (T) this; } - protected static RetrieverSource convertToRetrieverSource(RetrieverBuilder retrieverBuilder) { - return new RetrieverSource(retrieverBuilder, null); - } - private RankDoc[] getRankDocs(SearchResponse searchResponse) { int size = searchResponse.getHits().getHits().length; RankDoc[] docs = new RankDoc[size]; diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderParsingTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderParsingTests.java index 0cfeea2905680..2cf4caa80ccad 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderParsingTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderParsingTests.java @@ -10,6 +10,7 @@ import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.common.Strings; import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.elasticsearch.search.retriever.CompoundRetrieverBuilder; import org.elasticsearch.search.retriever.RetrieverBuilder; import org.elasticsearch.search.retriever.RetrieverParserContext; import org.elasticsearch.search.retriever.TestRetrieverBuilder; @@ -26,11 +27,10 @@ import java.util.ArrayList; import java.util.List; +import static org.elasticsearch.search.retriever.CompoundRetrieverBuilder.convertToRetrieverSource; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; -// TODO: Add simplified format tests? - public class RRFRetrieverBuilderParsingTests extends AbstractXContentTestCase { /** @@ -47,13 +47,22 @@ public static RRFRetrieverBuilder createRandomRRFRetrieverBuilder() { if (randomBoolean()) { rankConstant = randomIntBetween(1, 1000000); } - var ret = new RRFRetrieverBuilder(rankWindowSize, rankConstant); + + List fields = null; + String query = null; + if (randomBoolean()) { + fields = randomList(1, 10, () -> randomAlphaOfLengthBetween(1, 10)); + query = randomAlphaOfLengthBetween(1, 10); + } + int retrieverCount = randomIntBetween(2, 50); + List innerRetrievers = new ArrayList<>(retrieverCount); while (retrieverCount > 0) { - ret.addChild(TestRetrieverBuilder.createRandomTestRetrieverBuilder()); + innerRetrievers.add(convertToRetrieverSource(TestRetrieverBuilder.createRandomTestRetrieverBuilder())); --retrieverCount; } - return ret; + + return new RRFRetrieverBuilder(innerRetrievers, fields, query, rankWindowSize, rankConstant); } @Override @@ -96,28 +105,32 @@ protected NamedXContentRegistry xContentRegistry() { } public void testRRFRetrieverParsing() throws IOException { - String restContent = "{" - + " \"retriever\": {" - + " \"rrf\": {" - + " \"retrievers\": [" - + " {" - + " \"test\": {" - + " \"value\": \"foo\"" - + " }" - + " }," - + " {" - + " \"test\": {" - + " \"value\": \"bar\"" - + " }" - + " }" - + " ]," - + " \"rank_window_size\": 100," - + " \"rank_constant\": 10," - + " \"min_score\": 20.0," - + " \"_name\": \"foo_rrf\"" - + " }" - + " }" - + "}"; + String restContent = """ + { + "retriever": { + "rrf": { + "retrievers": [ + { + "test": { + "value": "foo" + } + }, + { + "test": { + "value": "bar" + } + } + ], + "fields": ["field1", "field2"], + "query": "baz", + "rank_window_size": 100, + "rank_constant": 10, + "min_score": 20.0, + "_name": "foo_rrf" + } + } + } + """; SearchUsageHolder searchUsageHolder = new UsageService().getSearchUsageHolder(); try (XContentParser jsonParser = createParser(JsonXContent.jsonXContent, restContent)) { SearchSourceBuilder source = new SearchSourceBuilder().parseXContent(jsonParser, true, searchUsageHolder, nf -> true); From 1703f1f7491d7dca0fcaeefea05b9030f31a1421 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Tue, 3 Jun 2025 16:52:24 -0400 Subject: [PATCH 22/47] _almost_ working version of rewrite tests --- .../xpack/rank/rrf/RRFRetrieverBuilderIT.java | 2 - .../xpack/rank/rrf/RRFRetrieverBuilder.java | 4 + .../rank/rrf/RRFRetrieverBuilderTests.java | 111 ++++++++++++++++++ 3 files changed, 115 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderIT.java b/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderIT.java index 0e829b1254a9e..6854fc436038f 100644 --- a/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderIT.java +++ b/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderIT.java @@ -54,8 +54,6 @@ import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.lessThanOrEqualTo; -// TODO: Add simplified format tests - @ESIntegTestCase.ClusterScope(minNumDataNodes = 3) public class RRFRetrieverBuilderIT extends ESIntegTestCase { diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java index c4cac8ed39cfa..9b1cbbd9b8e09 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java @@ -117,6 +117,10 @@ public RRFRetrieverBuilder(int rankWindowSize, int rankConstant) { // Otherwise some of the validation is skipped when creating the retriever programmatically. } + public int rankConstant() { + return rankConstant; + } + @Override public String getName() { return NAME; diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java index 3a77b733d6129..4293d12f3517b 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java @@ -7,14 +7,26 @@ package org.elasticsearch.xpack.rank.rrf; +import org.elasticsearch.action.MockResolvedIndices; +import org.elasticsearch.action.OriginalIndices; +import org.elasticsearch.action.ResolvedIndices; +import org.elasticsearch.action.support.IndicesOptions; +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.metadata.InferenceFieldMetadata; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.query.MatchQueryBuilder; +import org.elasticsearch.index.query.MultiMatchQueryBuilder; import org.elasticsearch.index.query.QueryRewriteContext; import org.elasticsearch.search.SearchModule; import org.elasticsearch.search.builder.PointInTimeBuilder; import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.elasticsearch.search.retriever.CompoundRetrieverBuilder; import org.elasticsearch.search.retriever.RetrieverBuilder; import org.elasticsearch.search.retriever.RetrieverParserContext; +import org.elasticsearch.search.retriever.StandardRetrieverBuilder; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xcontent.NamedXContentRegistry; import org.elasticsearch.xcontent.ParseField; @@ -23,6 +35,9 @@ import java.io.IOException; import java.util.List; +import java.util.Map; + +import static org.elasticsearch.search.retriever.CompoundRetrieverBuilder.convertToRetrieverSource; /** Tests for the rrf retriever. */ public class RRFRetrieverBuilderTests extends ESTestCase { @@ -66,6 +81,37 @@ public void testRetrieverExtractionErrors() throws IOException { } } + public void testSimplifiedParamsRewrite() { + final String indexName = "test-index"; + final List testInferenceFields = List.of("semantic_field_1", "semantic_field_2"); + final ResolvedIndices resolvedIndices = createMockResolvedIndices(indexName, testInferenceFields); + final QueryRewriteContext queryRewriteContext = new QueryRewriteContext( + parserConfig(), + null, + null, + resolvedIndices, + new PointInTimeBuilder(new BytesArray("pitid")), + null + ); + + RRFRetrieverBuilder rrfRetrieverBuilder = new RRFRetrieverBuilder( + null, + List.of("field_1", "field_2", "semantic_field_1", "semantic_field_2"), + "foo", + 10, + RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT + ); + assertSimplifiedParamsRewrite( + rrfRetrieverBuilder, + queryRewriteContext, + Map.of("field_1", 1.0f, "field_2", 1.0f), + Map.of("semantic_field_1", 1.0f, "semantic_field_2", 1.0f), + "foo" + ); + + // TODO: Test with wildcard resolution + } + @Override protected NamedXContentRegistry xContentRegistry() { List entries = new SearchModule(Settings.EMPTY, List.of()).getNamedXContents(); @@ -86,4 +132,69 @@ protected NamedXContentRegistry xContentRegistry() { ); return new NamedXContentRegistry(entries); } + + private static ResolvedIndices createMockResolvedIndices(String indexName, List inferenceFields) { + Index index = new Index(indexName, randomAlphaOfLength(10)); + IndexMetadata.Builder indexMetadataBuilder = IndexMetadata.builder(index.getName()) + .settings( + Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current()) + .put(IndexMetadata.SETTING_INDEX_UUID, index.getUUID()) + ) + .numberOfShards(1) + .numberOfReplicas(0); + + for (String inferenceField : inferenceFields) { + indexMetadataBuilder.putInferenceField( + new InferenceFieldMetadata(inferenceField, randomAlphaOfLengthBetween(3, 5), new String[] { inferenceField }, null) + ); + } + + return new MockResolvedIndices( + Map.of(), + new OriginalIndices(new String[] { indexName }, IndicesOptions.DEFAULT), + Map.of(index, indexMetadataBuilder.build()) + ); + } + + private static void assertSimplifiedParamsRewrite( + RRFRetrieverBuilder retriever, + QueryRewriteContext ctx, + Map expectedNonInferenceFields, + Map expectedInferenceFields, + String expectedQuery + ) { + List expectedInnerRetrievers = List.of( + convertToRetrieverSource( + new StandardRetrieverBuilder( + new MultiMatchQueryBuilder(expectedQuery).type(MultiMatchQueryBuilder.Type.MOST_FIELDS) + .fields(expectedNonInferenceFields) + ) + ), + convertToRetrieverSource( + new RRFRetrieverBuilder( + expectedInferenceFields.entrySet() + .stream() + .map(e -> { + if (e.getValue() != 1.0f) { + throw new IllegalArgumentException("Cannot apply per-field weights in RRF"); + } + return convertToRetrieverSource(new StandardRetrieverBuilder(new MatchQueryBuilder(e.getKey(), expectedQuery))); + }) + .toList(), + retriever.rankWindowSize(), + retriever.rankConstant() + ) + ) + ); + RRFRetrieverBuilder expectedRewritten = new RRFRetrieverBuilder( + expectedInnerRetrievers, + retriever.rankWindowSize(), + retriever.rankConstant() + ); + + RRFRetrieverBuilder rewritten = retriever.doRewrite(ctx); + assertNotSame(retriever, rewritten); + assertEquals(expectedRewritten, rewritten); + } } From e7b547646a60e9d965b6e09a0acf432c17b1c0a8 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 3 Jun 2025 21:01:20 +0000 Subject: [PATCH 23/47] [CI] Auto commit changes from spotless --- .../rank/rrf/RRFRetrieverBuilderTests.java | 21 ++++++------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java index 4293d12f3517b..1fd96ae2aab60 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java @@ -171,21 +171,12 @@ private static void assertSimplifiedParamsRewrite( .fields(expectedNonInferenceFields) ) ), - convertToRetrieverSource( - new RRFRetrieverBuilder( - expectedInferenceFields.entrySet() - .stream() - .map(e -> { - if (e.getValue() != 1.0f) { - throw new IllegalArgumentException("Cannot apply per-field weights in RRF"); - } - return convertToRetrieverSource(new StandardRetrieverBuilder(new MatchQueryBuilder(e.getKey(), expectedQuery))); - }) - .toList(), - retriever.rankWindowSize(), - retriever.rankConstant() - ) - ) + convertToRetrieverSource(new RRFRetrieverBuilder(expectedInferenceFields.entrySet().stream().map(e -> { + if (e.getValue() != 1.0f) { + throw new IllegalArgumentException("Cannot apply per-field weights in RRF"); + } + return convertToRetrieverSource(new StandardRetrieverBuilder(new MatchQueryBuilder(e.getKey(), expectedQuery))); + }).toList(), retriever.rankWindowSize(), retriever.rankConstant())) ); RRFRetrieverBuilder expectedRewritten = new RRFRetrieverBuilder( expectedInnerRetrievers, From d43ac884065d1aeb339fd646e3c86029c3836cf8 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Wed, 4 Jun 2025 10:09:13 -0400 Subject: [PATCH 24/47] Fixed rewrite tests --- .../retriever/CompoundRetrieverBuilder.java | 5 ++++ .../rank/rrf/RRFRetrieverBuilderTests.java | 28 +++++++++++++------ 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java b/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java index 6f023f78b9b54..1daed5dc72185 100644 --- a/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java @@ -36,6 +36,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Locale; import java.util.Objects; @@ -295,6 +296,10 @@ public int rankWindowSize() { return rankWindowSize; } + public List innerRetrievers() { + return Collections.unmodifiableList(innerRetrievers); + } + public static RetrieverSource convertToRetrieverSource(RetrieverBuilder retrieverBuilder) { return new RetrieverSource(retrieverBuilder, null); } diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java index 1fd96ae2aab60..da528a7ffebb4 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java @@ -34,8 +34,10 @@ import org.elasticsearch.xcontent.json.JsonXContent; import java.io.IOException; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import static org.elasticsearch.search.retriever.CompoundRetrieverBuilder.convertToRetrieverSource; @@ -164,28 +166,36 @@ private static void assertSimplifiedParamsRewrite( Map expectedInferenceFields, String expectedQuery ) { - List expectedInnerRetrievers = List.of( + Set expectedInnerRetrievers = Set.of( convertToRetrieverSource( new StandardRetrieverBuilder( new MultiMatchQueryBuilder(expectedQuery).type(MultiMatchQueryBuilder.Type.MOST_FIELDS) .fields(expectedNonInferenceFields) ) ), - convertToRetrieverSource(new RRFRetrieverBuilder(expectedInferenceFields.entrySet().stream().map(e -> { + Set.of(expectedInferenceFields.entrySet().stream().map(e -> { if (e.getValue() != 1.0f) { throw new IllegalArgumentException("Cannot apply per-field weights in RRF"); } return convertToRetrieverSource(new StandardRetrieverBuilder(new MatchQueryBuilder(e.getKey(), expectedQuery))); - }).toList(), retriever.rankWindowSize(), retriever.rankConstant())) - ); - RRFRetrieverBuilder expectedRewritten = new RRFRetrieverBuilder( - expectedInnerRetrievers, - retriever.rankWindowSize(), - retriever.rankConstant() + }).toArray()) ); RRFRetrieverBuilder rewritten = retriever.doRewrite(ctx); assertNotSame(retriever, rewritten); - assertEquals(expectedRewritten, rewritten); + assertEquals(expectedInnerRetrievers, getInnerRetrieversAsSet(rewritten)); + } + + private static Set getInnerRetrieversAsSet(RRFRetrieverBuilder retriever) { + Set innerRetrieversSet = new HashSet<>(); + for (CompoundRetrieverBuilder.RetrieverSource innerRetriever : retriever.innerRetrievers()) { + if (innerRetriever.retriever() instanceof RRFRetrieverBuilder innerRrfRetriever) { + innerRetrieversSet.add(getInnerRetrieversAsSet(innerRrfRetriever)); + } else { + innerRetrieversSet.add(innerRetriever); + } + } + + return innerRetrieversSet; } } From e8715b214a3e3acc0abed10c9ffdcde1ff8ebaa0 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Wed, 4 Jun 2025 10:24:01 -0400 Subject: [PATCH 25/47] Add wildcards to rewrite test --- .../rank/rrf/RRFRetrieverBuilderTests.java | 27 ++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java index da528a7ffebb4..211104ef43ecd 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java @@ -96,6 +96,7 @@ public void testSimplifiedParamsRewrite() { null ); + // No wildcards RRFRetrieverBuilder rrfRetrieverBuilder = new RRFRetrieverBuilder( null, List.of("field_1", "field_2", "semantic_field_1", "semantic_field_2"), @@ -111,7 +112,31 @@ public void testSimplifiedParamsRewrite() { "foo" ); - // TODO: Test with wildcard resolution + // Glob matching on inference and non-inference fields + rrfRetrieverBuilder = new RRFRetrieverBuilder( + null, + List.of("field_*", "*_field_1"), + "bar", + 10, + RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT + ); + assertSimplifiedParamsRewrite( + rrfRetrieverBuilder, + queryRewriteContext, + Map.of("field_*", 1.0f, "*_field_1", 1.0f), + Map.of("semantic_field_1", 1.0f), + "bar" + ); + + // All-fields wildcard + rrfRetrieverBuilder = new RRFRetrieverBuilder(null, List.of("*"), "baz", 10, RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT); + assertSimplifiedParamsRewrite( + rrfRetrieverBuilder, + queryRewriteContext, + Map.of("*", 1.0f), + Map.of("semantic_field_1", 1.0f, "semantic_field_2", 1.0f), + "baz" + ); } @Override From a014e02239346de9dedc1d90e78eda7f7da9c6de Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Wed, 4 Jun 2025 10:42:19 -0400 Subject: [PATCH 26/47] Updated LinearRetrieverBuilderParsingTests --- .../linear/LinearRetrieverBuilderParsingTests.java | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java index 5cc66c6f50d3c..0b85635cd00f0 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java @@ -43,6 +43,16 @@ public static void afterClass() throws Exception { @Override protected LinearRetrieverBuilder createTestInstance() { int rankWindowSize = randomInt(100); + + List fields = null; + String query = null; + ScoreNormalizer normalizer = null; + if (randomBoolean()) { + fields = randomList(1, 10, () -> randomAlphaOfLengthBetween(1, 10)); + query = randomAlphaOfLengthBetween(1, 10); + normalizer = randomScoreNormalizer(); + } + int num = randomIntBetween(1, 3); List innerRetrievers = new ArrayList<>(); float[] weights = new float[num]; @@ -54,7 +64,8 @@ protected LinearRetrieverBuilder createTestInstance() { weights[i] = randomFloat(); normalizers[i] = randomScoreNormalizer(); } - return new LinearRetrieverBuilder(innerRetrievers, rankWindowSize, weights, normalizers); + + return new LinearRetrieverBuilder(innerRetrievers, fields, query, normalizer, rankWindowSize, weights, normalizers); } @Override From 0050ad5ddd1330dae0d1822a5d1b4056015d64d0 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Wed, 4 Jun 2025 10:45:11 -0400 Subject: [PATCH 27/47] Clean up RRFRetrieverBuilder constructors --- .../xpack/rank/rrf/RRFRetrieverBuilder.java | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java index 9b1cbbd9b8e09..58574667da13f 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java @@ -98,15 +98,17 @@ public static RRFRetrieverBuilder fromXContent(XContentParser parser, RetrieverP private final String query; private final int rankConstant; - public RRFRetrieverBuilder(int rankWindowSize, int rankConstant) { - this(null, rankWindowSize, rankConstant); - } - - RRFRetrieverBuilder(List childRetrievers, int rankWindowSize, int rankConstant) { + public RRFRetrieverBuilder(List childRetrievers, int rankWindowSize, int rankConstant) { this(childRetrievers, null, null, rankWindowSize, rankConstant); } - RRFRetrieverBuilder(List childRetrievers, List fields, String query, int rankWindowSize, int rankConstant) { + public RRFRetrieverBuilder( + List childRetrievers, + List fields, + String query, + int rankWindowSize, + int rankConstant + ) { // Use a mutable list for childRetrievers so that we can use addChild super(childRetrievers == null ? new ArrayList<>() : new ArrayList<>(childRetrievers), rankWindowSize); this.fields = fields == null ? List.of() : List.copyOf(fields); From 6b4c04baa6da133538cedad32722e4c76191d380 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Wed, 4 Jun 2025 11:10:38 -0400 Subject: [PATCH 28/47] Added equals and hash code implementations to LinearRetrieverBuilder --- .../rank/linear/LinearRetrieverBuilder.java | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java index 65c8a6bfdfb1a..a3a032b14b2f7 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java @@ -35,14 +35,13 @@ import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Objects; import static org.elasticsearch.action.ValidateActions.addValidationError; import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg; import static org.elasticsearch.xpack.rank.RankRRFFeatures.LINEAR_RETRIEVER_SUPPORTED; import static org.elasticsearch.xpack.rank.linear.LinearRetrieverComponent.DEFAULT_WEIGHT; -// TODO: Add toEquals method - /** * The {@code LinearRetrieverBuilder} supports the combination of different retrievers through a weighted linear combination. * For example, assume that we have retrievers r1 and r2, the final score of the {@code LinearRetrieverBuilder} is defined as @@ -368,4 +367,20 @@ public void doToXContent(XContentBuilder builder, Params params) throws IOExcept builder.field(RANK_WINDOW_SIZE_FIELD.getPreferredName(), rankWindowSize); } + + @Override + public boolean doEquals(Object o) { + LinearRetrieverBuilder that = (LinearRetrieverBuilder) o; + return super.doEquals(o) + && Arrays.equals(weights, that.weights) + && Arrays.equals(normalizers, that.normalizers) + && Objects.equals(fields, that.fields) + && Objects.equals(query, that.query) + && Objects.equals(normalizer, that.normalizer); + } + + @Override + public int doHashCode() { + return Objects.hash(super.doHashCode(), Arrays.hashCode(weights), Arrays.hashCode(normalizers), fields, query, normalizer); + } } From 1ce5a9a0bd25b520b80cc89bfbacd2001ce45e32 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Wed, 4 Jun 2025 16:27:25 -0400 Subject: [PATCH 29/47] Added linear retriever rewrite tests --- .../rank/linear/LinearRetrieverBuilder.java | 8 + .../linear/LinearRetrieverBuilderTests.java | 243 ++++++++++++++++++ 2 files changed, 251 insertions(+) create mode 100644 x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java index a3a032b14b2f7..2d9b8396d7b0e 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java @@ -336,6 +336,14 @@ public String getName() { return NAME; } + float[] getWeights() { + return weights; + } + + ScoreNormalizer[] getNormalizers() { + return normalizers; + } + public void doToXContent(XContentBuilder builder, Params params) throws IOException { int index = 0; if (innerRetrievers.isEmpty() == false) { diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java new file mode 100644 index 0000000000000..7b08618da6c10 --- /dev/null +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java @@ -0,0 +1,243 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.rank.linear; + +import org.elasticsearch.action.MockResolvedIndices; +import org.elasticsearch.action.OriginalIndices; +import org.elasticsearch.action.ResolvedIndices; +import org.elasticsearch.action.support.IndicesOptions; +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.metadata.InferenceFieldMetadata; +import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.query.MatchQueryBuilder; +import org.elasticsearch.index.query.MultiMatchQueryBuilder; +import org.elasticsearch.index.query.QueryRewriteContext; +import org.elasticsearch.search.builder.PointInTimeBuilder; +import org.elasticsearch.search.retriever.CompoundRetrieverBuilder; +import org.elasticsearch.search.retriever.RetrieverBuilder; +import org.elasticsearch.search.retriever.StandardRetrieverBuilder; +import org.elasticsearch.test.ESTestCase; + +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; + +public class LinearRetrieverBuilderTests extends ESTestCase { + public void testSimplifiedParamsRewrite() { + final String indexName = "test-index"; + final List testInferenceFields = List.of("semantic_field_1", "semantic_field_2"); + final ResolvedIndices resolvedIndices = createMockResolvedIndices(indexName, testInferenceFields); + final QueryRewriteContext queryRewriteContext = new QueryRewriteContext( + parserConfig(), + null, + null, + resolvedIndices, + new PointInTimeBuilder(new BytesArray("pitid")), + null + ); + + // No wildcards, no per-field boosting + LinearRetrieverBuilder retriever = new LinearRetrieverBuilder( + null, + List.of("field_1", "field_2", "semantic_field_1", "semantic_field_2"), + "foo", + MinMaxScoreNormalizer.INSTANCE, + 10, + new float[0], + new ScoreNormalizer[0] + ); + assertSimplifiedParamsRewrite( + retriever, + queryRewriteContext, + Map.of("field_1", 1.0f, "field_2", 1.0f), + Map.of("semantic_field_1", 1.0f, "semantic_field_2", 1.0f), + "foo", + MinMaxScoreNormalizer.INSTANCE + ); + + // No wildcards, per-field boosting + retriever = new LinearRetrieverBuilder( + null, + List.of("field_1", "field_2^1.5", "semantic_field_1", "semantic_field_2^2"), + "bar", + MinMaxScoreNormalizer.INSTANCE, + 10, + new float[0], + new ScoreNormalizer[0] + ); + assertSimplifiedParamsRewrite( + retriever, + queryRewriteContext, + Map.of("field_1", 1.0f, "field_2", 1.5f), + Map.of("semantic_field_1", 1.0f, "semantic_field_2", 2.0f), + "bar", + MinMaxScoreNormalizer.INSTANCE + ); + + // Glob matching on inference and non-inference fields with per-field boosting + retriever = new LinearRetrieverBuilder( + null, + List.of("field_*^1.5", "*_field_1^2.5"), + "baz", + MinMaxScoreNormalizer.INSTANCE, + 10, + new float[0], + new ScoreNormalizer[0] + ); + assertSimplifiedParamsRewrite( + retriever, + queryRewriteContext, + Map.of("field_*", 1.5f, "*_field_1", 2.5f), + Map.of("semantic_field_1", 2.5f), + "baz", + MinMaxScoreNormalizer.INSTANCE + ); + + // All-fields wildcard + retriever = new LinearRetrieverBuilder( + null, + List.of("*"), + "qux", + MinMaxScoreNormalizer.INSTANCE, + 10, + new float[0], + new ScoreNormalizer[0] + ); + assertSimplifiedParamsRewrite( + retriever, + queryRewriteContext, + Map.of("*", 1.0f), + Map.of("semantic_field_1", 1.0f, "semantic_field_2", 1.0f), + "qux", + MinMaxScoreNormalizer.INSTANCE + ); + } + + private static ResolvedIndices createMockResolvedIndices(String indexName, List inferenceFields) { + Index index = new Index(indexName, randomAlphaOfLength(10)); + IndexMetadata.Builder indexMetadataBuilder = IndexMetadata.builder(index.getName()) + .settings( + Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current()) + .put(IndexMetadata.SETTING_INDEX_UUID, index.getUUID()) + ) + .numberOfShards(1) + .numberOfReplicas(0); + + for (String inferenceField : inferenceFields) { + indexMetadataBuilder.putInferenceField( + new InferenceFieldMetadata(inferenceField, randomAlphaOfLengthBetween(3, 5), new String[] { inferenceField }, null) + ); + } + + return new MockResolvedIndices( + Map.of(), + new OriginalIndices(new String[] { indexName }, IndicesOptions.DEFAULT), + Map.of(index, indexMetadataBuilder.build()) + ); + } + + private static void assertSimplifiedParamsRewrite( + LinearRetrieverBuilder retriever, + QueryRewriteContext ctx, + Map expectedNonInferenceFields, + Map expectedInferenceFields, + String expectedQuery, + ScoreNormalizer expectedNormalizer + ) { + Set expectedInnerRetrievers = Set.of( + new InnerRetriever( + new StandardRetrieverBuilder( + new MultiMatchQueryBuilder(expectedQuery).type(MultiMatchQueryBuilder.Type.MOST_FIELDS) + .fields(expectedNonInferenceFields) + ), + 1.0f, + expectedNormalizer + ), + new InnerRetriever( + expectedInferenceFields.entrySet() + .stream() + .map( + e -> new InnerRetriever( + new StandardRetrieverBuilder(new MatchQueryBuilder(e.getKey(), expectedQuery)), + e.getValue(), + expectedNormalizer + ) + ) + .collect(Collectors.toSet()), + 1.0f, + expectedNormalizer + ) + ); + + LinearRetrieverBuilder rewritten = retriever.doRewrite(ctx); + assertNotSame(retriever, rewritten); + assertEquals(expectedInnerRetrievers, getInnerRetrieversAsSet(rewritten)); + } + + private static Set getInnerRetrieversAsSet(LinearRetrieverBuilder retriever) { + float[] weights = retriever.getWeights(); + ScoreNormalizer[] normalizers = retriever.getNormalizers(); + + int i = 0; + Set innerRetrieversSet = new HashSet<>(); + for (CompoundRetrieverBuilder.RetrieverSource innerRetriever : retriever.innerRetrievers()) { + float weight = weights[i]; + ScoreNormalizer normalizer = normalizers[i]; + + if (innerRetriever.retriever() instanceof LinearRetrieverBuilder innerLinearRetriever) { + innerRetrieversSet.add(new InnerRetriever(getInnerRetrieversAsSet(innerLinearRetriever), weight, normalizer)); + } else { + innerRetrieversSet.add(new InnerRetriever(innerRetriever.retriever(), weight, normalizer)); + } + + i++; + } + + return innerRetrieversSet; + } + + private static class InnerRetriever { + private final Object retriever; + private final float weight; + private final ScoreNormalizer normalizer; + + InnerRetriever(RetrieverBuilder retriever, float weight, ScoreNormalizer normalizer) { + this.retriever = retriever; + this.weight = weight; + this.normalizer = normalizer; + } + + InnerRetriever(Set innerRetrievers, float weight, ScoreNormalizer normalizer) { + this.retriever = innerRetrievers; + this.weight = weight; + this.normalizer = normalizer; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + InnerRetriever that = (InnerRetriever) o; + return Float.compare(weight, that.weight) == 0 + && Objects.equals(retriever, that.retriever) + && Objects.equals(normalizer, that.normalizer); + } + + @Override + public int hashCode() { + return Objects.hash(retriever, weight, normalizer); + } + } +} From 2915332566ae8d026401c4d0e54da80dca7a0625 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Wed, 4 Jun 2025 16:29:44 -0400 Subject: [PATCH 30/47] Fix inappropriate reference to RRFRankPlugin.NAME --- .../xpack/rank/rrf/RRFRetrieverBuilderTests.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java index 211104ef43ecd..5d78c7991075b 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java @@ -145,7 +145,7 @@ protected NamedXContentRegistry xContentRegistry() { entries.add( new NamedXContentRegistry.Entry( RetrieverBuilder.class, - new ParseField(RRFRankPlugin.NAME), + new ParseField(RRFRetrieverBuilder.NAME), (p, c) -> RRFRetrieverBuilder.fromXContent(p, (RetrieverParserContext) c) ) ); @@ -153,7 +153,7 @@ protected NamedXContentRegistry xContentRegistry() { entries.add( new NamedXContentRegistry.Entry( RetrieverBuilder.class, - new ParseField(RRFRankPlugin.NAME + "_nl"), + new ParseField(RRFRetrieverBuilder.NAME + "_nl"), (p, c) -> RRFRetrieverBuilder.PARSER.apply(p, (RetrieverParserContext) c) ) ); From dc5467395bbe737ec9ade2dd4a1d1bbf18dbdc74 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Fri, 6 Jun 2025 08:20:28 -0400 Subject: [PATCH 31/47] Resolve TODO --- .../xpack/rank/simplified/SimplifiedInnerRetrieverUtils.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverUtils.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverUtils.java index 99e5b8086b5f9..1fc7970dfd0c3 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverUtils.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/simplified/SimplifiedInnerRetrieverUtils.java @@ -150,8 +150,6 @@ private static List generateInnerRetrieversForIndex( nonInferenceFields.keySet().removeAll(inferenceFields.keySet()); // Remove all inference fields from non-inference fields map // TODO: Set index pre-filters on returned retrievers when we want to implement multi-index support - // TODO: Should we use a separate match query for each non-inference field, perform secondary normalization, - // and apply the boost after secondary normalization, like is done for inference fields? List innerRetrievers = new ArrayList<>(2); if (nonInferenceFields.isEmpty() == false) { MultiMatchQueryBuilder nonInferenceFieldQueryBuilder = new MultiMatchQueryBuilder(query).type( From 7adb091a1692480fe6981e073a3c505f692d97fd Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Fri, 6 Jun 2025 12:22:18 -0400 Subject: [PATCH 32/47] Added linear retriever YAML tests --- .../xpack/rank/RankRRFFeatures.java | 3 +- .../linear/20_linear_retriever_simplified.yml | 122 ++++++++++++++++++ 2 files changed, 124 insertions(+), 1 deletion(-) create mode 100644 x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java index 5119c4ee3e7e0..f1ce30a68960f 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java @@ -18,6 +18,7 @@ public class RankRRFFeatures implements FeatureSpecification { public static final NodeFeature LINEAR_RETRIEVER_SUPPORTED = new NodeFeature("linear_retriever_supported"); + public static final NodeFeature SIMPLIFIED_RETRIEVER_FORMAT = new NodeFeature("simplified_retriever_format"); @Override public Set getFeatures() { @@ -26,6 +27,6 @@ public Set getFeatures() { @Override public Set getTestFeatures() { - return Set.of(INNER_RETRIEVERS_FILTER_SUPPORT, LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX); + return Set.of(INNER_RETRIEVERS_FILTER_SUPPORT, LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX, SIMPLIFIED_RETRIEVER_FORMAT); } } diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml new file mode 100644 index 0000000000000..97139d905bff3 --- /dev/null +++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml @@ -0,0 +1,122 @@ +setup: + - requires: + cluster_features: [ "simplified_retriever_format" ] + reason: "Simplified retriever format" + test_runner_features: [ "close_to", "headers" ] + + - do: + inference.put: + task_type: sparse_embedding + inference_id: sparse-inference-id + body: > + { + "service": "test_service", + "service_settings": { + "model": "my_model", + "api_key": "abc64" + }, + "task_settings": { + } + } + + - do: + inference.put: + task_type: text_embedding + inference_id: dense-inference-id + body: > + { + "service": "text_embedding_test_service", + "service_settings": { + "model": "my_model", + "dimensions": 10, + "similarity": "cosine", + "api_key": "abc64" + }, + "task_settings": { + } + } + + - do: + indices.create: + index: test-index + body: + mappings: + properties: + keyword: + type: keyword + inference_1: + type: semantic_text + inference_id: dense-inference-id + inference_2: + type: semantic_text + inference_id: sparse-inference-id + text_1: + type: text + text_2: + type: text + timestamp: + type: date + vector: + type: dense_vector + dims: 1 + index: true + similarity: l2_norm + index_options: + type: flat + + - do: + bulk: + index: test-index + refresh: true + body: | + {"index": {"_id": "1"}} + {"keyword": "keyword match 1", "inference_1": "inference_1 match 1", "inference_2": "inference_2 match 1", "text_1": "foo match 1", "text_2": "x match 1", "timestamp": "2000-03-30", "vector": [1]} + {"index": {"_id": "2"}} + {"keyword": "keyword match 2", "inference_1": "inference_1 match 2", "inference_2": "inference_2 match 2", "text_1": "bar match 2", "text_2": "y match 2", "timestamp": "2010-02-08", "vector": [2]} + {"index": {"_id": "3"}} + {"keyword": "keyword match 3", "inference_1": "inference_1 match 3", "inference_2": "inference_2 match 3", "text_1": "baz match 3", "text_2": "z match 3", "timestamp": "2024-08-08", "vector": [3]} + +--- +"Query all fields using the simplified format": + - do: + headers: + Content-Type: application/json + search: + index: test-index + body: + retriever: + linear: + query: "match" + normalizer: "minmax" + + - match: { hits.total.value: 3 } + - length: { hits.hits: 3 } + - match: { hits.hits.0._id: "1" } + - lte: { hits.hits.0._score: 2.0 } + - match: { hits.hits.1._id: "2" } + - lte: { hits.hits.1._score: 2.0 } + - match: { hits.hits.2._id: "3" } + - lte: { hits.hits.2._score: 2.0 } + +--- +"Lexical match per-field boosting using the simplified format": + - do: + headers: + Content-Type: application/json + search: + index: test-index + body: + retriever: + linear: + fields: ["text_1", "text_2^2"] + query: "foo z" + normalizer: "minmax" + + # Lexical-only match, so max score is 1 + - match: { hits.total.value: 2 } + - length: { hits.hits: 2 } + - match: { hits.hits.0._id: "3" } + - close_to: { hits.hits.0._score: { value: 1.0, error: 0.0001 } } + - match: { hits.hits.1._id: "1" } + - lt: { hits.hits.1._score: 1.0 } + From 0655c1c3278fdf093479766f9101caf0b16f0cd2 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Fri, 6 Jun 2025 13:47:09 -0400 Subject: [PATCH 33/47] Adjust test --- .../test/linear/20_linear_retriever_simplified.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml index 97139d905bff3..801f8e1221157 100644 --- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml +++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml @@ -70,11 +70,11 @@ setup: refresh: true body: | {"index": {"_id": "1"}} - {"keyword": "keyword match 1", "inference_1": "inference_1 match 1", "inference_2": "inference_2 match 1", "text_1": "foo match 1", "text_2": "x match 1", "timestamp": "2000-03-30", "vector": [1]} + {"keyword": "keyword match 1", "inference_1": "inference_1 match 1", "inference_2": "inference_2 match 1", "text_1": "foo match 1", "text_2": "x match 2", "timestamp": "2000-03-30", "vector": [1]} {"index": {"_id": "2"}} - {"keyword": "keyword match 2", "inference_1": "inference_1 match 2", "inference_2": "inference_2 match 2", "text_1": "bar match 2", "text_2": "y match 2", "timestamp": "2010-02-08", "vector": [2]} + {"keyword": "keyword match 2", "inference_1": "inference_1 match 2", "inference_2": "inference_2 match 2", "text_1": "bar match 3", "text_2": "y match 4", "timestamp": "2010-02-08", "vector": [2]} {"index": {"_id": "3"}} - {"keyword": "keyword match 3", "inference_1": "inference_1 match 3", "inference_2": "inference_2 match 3", "text_1": "baz match 3", "text_2": "z match 3", "timestamp": "2024-08-08", "vector": [3]} + {"keyword": "keyword match 3", "inference_1": "inference_1 match 3", "inference_2": "inference_2 match 3", "text_1": "baz match 5", "text_2": "z match 6", "timestamp": "2024-08-08", "vector": [3]} --- "Query all fields using the simplified format": @@ -108,8 +108,8 @@ setup: body: retriever: linear: - fields: ["text_1", "text_2^2"] - query: "foo z" + fields: ["text_1", "text_2^3"] + query: "foo 1 z" normalizer: "minmax" # Lexical-only match, so max score is 1 From 54b67c24312d849a953c48249046661c6aab3f5b Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Fri, 6 Jun 2025 13:48:55 -0400 Subject: [PATCH 34/47] Fix inference API calls in YAML tests --- x-pack/plugin/rank-rrf/build.gradle | 4 ++++ .../xpack/rank/rrf/LinearRankClientYamlTestSuiteIT.java | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/x-pack/plugin/rank-rrf/build.gradle b/x-pack/plugin/rank-rrf/build.gradle index fa598c6ef677a..bf8cbba1390a2 100644 --- a/x-pack/plugin/rank-rrf/build.gradle +++ b/x-pack/plugin/rank-rrf/build.gradle @@ -30,3 +30,7 @@ dependencies { clusterPlugins project(':x-pack:plugin:inference:qa:test-service-plugin') } + +tasks.named('yamlRestTest') { + usesDefaultDistribution("Uses the inference API") +} diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/java/org/elasticsearch/xpack/rank/rrf/LinearRankClientYamlTestSuiteIT.java b/x-pack/plugin/rank-rrf/src/yamlRestTest/java/org/elasticsearch/xpack/rank/rrf/LinearRankClientYamlTestSuiteIT.java index 8af4ae307a51a..00f756ff6ee3f 100644 --- a/x-pack/plugin/rank-rrf/src/yamlRestTest/java/org/elasticsearch/xpack/rank/rrf/LinearRankClientYamlTestSuiteIT.java +++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/java/org/elasticsearch/xpack/rank/rrf/LinearRankClientYamlTestSuiteIT.java @@ -11,6 +11,7 @@ import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; import org.elasticsearch.test.cluster.ElasticsearchCluster; +import org.elasticsearch.test.cluster.local.distribution.DistributionType; import org.elasticsearch.test.rest.yaml.ClientYamlTestCandidate; import org.elasticsearch.test.rest.yaml.ESClientYamlSuiteTestCase; import org.junit.ClassRule; @@ -25,8 +26,12 @@ public class LinearRankClientYamlTestSuiteIT extends ESClientYamlSuiteTestCase { .module("rank-rrf") .module("lang-painless") .module("x-pack-inference") + .systemProperty("tests.seed", System.getProperty("tests.seed")) + .setting("xpack.security.enabled", "false") + .setting("xpack.security.http.ssl.enabled", "false") .setting("xpack.license.self_generated.type", "trial") .plugin("inference-service-test") + .distribution(DistributionType.DEFAULT) .build(); public LinearRankClientYamlTestSuiteIT(@Name("yaml") ClientYamlTestCandidate testCandidate) { From c9f7a84a339c8e69cc62932fbff809020b2a1cec Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Fri, 6 Jun 2025 13:49:25 -0400 Subject: [PATCH 35/47] Update comment --- x-pack/plugin/inference/build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/inference/build.gradle b/x-pack/plugin/inference/build.gradle index 58aa9b29f8565..b58e1e941b168 100644 --- a/x-pack/plugin/inference/build.gradle +++ b/x-pack/plugin/inference/build.gradle @@ -403,5 +403,5 @@ tasks.named("thirdPartyAudit").configure { } tasks.named('yamlRestTest') { - usesDefaultDistribution("to be triaged") + usesDefaultDistribution("Uses the inference API") } From e3f7a542630204b0eeda377dd73675eb47b21f0f Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Fri, 6 Jun 2025 14:29:19 -0400 Subject: [PATCH 36/47] Linear boosting test development --- .../linear/20_linear_retriever_simplified.yml | 81 +++++++++++++++++-- 1 file changed, 75 insertions(+), 6 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml index 801f8e1221157..6d3e4436f1d40 100644 --- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml +++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml @@ -28,7 +28,7 @@ setup: "service": "text_embedding_test_service", "service_settings": { "model": "my_model", - "dimensions": 10, + "dimensions": 128, "similarity": "cosine", "api_key": "abc64" }, @@ -70,11 +70,11 @@ setup: refresh: true body: | {"index": {"_id": "1"}} - {"keyword": "keyword match 1", "inference_1": "inference_1 match 1", "inference_2": "inference_2 match 1", "text_1": "foo match 1", "text_2": "x match 2", "timestamp": "2000-03-30", "vector": [1]} + {"keyword": "keyword match 1", "inference_1": "you know", "inference_2": "for testing", "text_1": "foo match 1", "text_2": "x match 2", "timestamp": "2000-03-30", "vector": [1]} {"index": {"_id": "2"}} - {"keyword": "keyword match 2", "inference_1": "inference_1 match 2", "inference_2": "inference_2 match 2", "text_1": "bar match 3", "text_2": "y match 4", "timestamp": "2010-02-08", "vector": [2]} + {"keyword": "keyword match 2", "inference_1": "ElasticSearch is an open source", "inference_2": "distributed, RESTful, search engine", "text_1": "bar match 3", "text_2": "y match 4", "timestamp": "2010-02-08", "vector": [2]} {"index": {"_id": "3"}} - {"keyword": "keyword match 3", "inference_1": "inference_1 match 3", "inference_2": "inference_2 match 3", "text_1": "baz match 5", "text_2": "z match 6", "timestamp": "2024-08-08", "vector": [3]} + {"keyword": "keyword match 3", "inference_1": "which is built on top of Lucene internally", "inference_2": "and enjoys all the features it provides", "text_1": "baz match 5", "text_2": "z match 6", "timestamp": "2024-08-08", "vector": [3]} --- "Query all fields using the simplified format": @@ -91,15 +91,35 @@ setup: - match: { hits.total.value: 3 } - length: { hits.hits: 3 } - - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0._id: "3" } - lte: { hits.hits.0._score: 2.0 } - match: { hits.hits.1._id: "2" } - lte: { hits.hits.1._score: 2.0 } - - match: { hits.hits.2._id: "3" } + - match: { hits.hits.2._id: "1" } - lte: { hits.hits.2._score: 2.0 } --- "Lexical match per-field boosting using the simplified format": + - do: + headers: + Content-Type: application/json + search: + index: test-index + body: + retriever: + linear: + fields: [ "text_1", "text_2" ] + query: "foo 1 z" + normalizer: "minmax" + + # Lexical-only match, so max score is 1 + - match: { hits.total.value: 2 } + - length: { hits.hits: 2 } + - match: { hits.hits.0._id: "1" } + - close_to: { hits.hits.0._score: { value: 1.0, error: 0.0001 } } + - match: { hits.hits.1._id: "3" } + - lt: { hits.hits.1._score: 1.0 } + - do: headers: Content-Type: application/json @@ -120,3 +140,52 @@ setup: - match: { hits.hits.1._id: "1" } - lt: { hits.hits.1._score: 1.0 } +--- +"Semantic match per-field boosting using the simplified format": + # The mock inference services generate synthetic vectors that don't accurately represent similarity to non-identical + # input, so it's hard to create a test produces intuitive results. Instead, we rely on the fact that the inference + # services generate consistent vectors (i.e. same input -> same output) to demonstrate that per-field boosting on + # a semantic_text field can change the result order. + - do: + headers: + Content-Type: application/json + search: + index: test-index + body: + retriever: + linear: + fields: [ "inference_1", "inference_2" ] + query: "distributed, RESTful, search engine" + normalizer: "minmax" + + # Semantic-only match, so max score is 1 + - match: { hits.total.value: 3 } + - length: { hits.hits: 3 } + - match: { hits.hits.0._id: "2" } + - close_to: { hits.hits.0._score: { value: 1.0, error: 0.0001 } } + - match: { hits.hits.1._id: "3" } + - lt: { hits.hits.1._score: 1.0 } + - match: { hits.hits.2._id: "1" } + - lt: { hits.hits.2._score: 1.0 } + + - do: + headers: + Content-Type: application/json + search: + index: test-index + body: + retriever: + linear: + fields: [ "inference_1^3", "inference_2" ] + query: "distributed, RESTful, search engine" + normalizer: "minmax" + + # Semantic-only match, so max score is 1 + - match: { hits.total.value: 3 } + - length: { hits.hits: 3 } + - match: { hits.hits.0._id: "3" } + - close_to: { hits.hits.0._score: { value: 1.0, error: 0.0001 } } + - match: { hits.hits.1._id: "2" } + - lt: { hits.hits.1._score: 1.0 } + - match: { hits.hits.2._id: "1" } + - lt: { hits.hits.2._score: 1.0 } From 1240aa1ce2f3dec782a29a8f4bb0ab318fb295e4 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Fri, 6 Jun 2025 14:41:42 -0400 Subject: [PATCH 37/47] Formatting --- .../linear/20_linear_retriever_simplified.yml | 30 +++++++++++++++++-- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml index 6d3e4436f1d40..32a9418d2466a 100644 --- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml +++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml @@ -70,11 +70,35 @@ setup: refresh: true body: | {"index": {"_id": "1"}} - {"keyword": "keyword match 1", "inference_1": "you know", "inference_2": "for testing", "text_1": "foo match 1", "text_2": "x match 2", "timestamp": "2000-03-30", "vector": [1]} + { + "keyword": "keyword match 1", + "inference_1": "you know", + "inference_2": "for testing", + "text_1": "foo match 1", + "text_2": "x match 2", + "timestamp": "2000-03-30", + "vector": [1] + } {"index": {"_id": "2"}} - {"keyword": "keyword match 2", "inference_1": "ElasticSearch is an open source", "inference_2": "distributed, RESTful, search engine", "text_1": "bar match 3", "text_2": "y match 4", "timestamp": "2010-02-08", "vector": [2]} + { + "keyword": "keyword match 2", + "inference_1": "ElasticSearch is an open source", + "inference_2": "distributed, RESTful, search engine", + "text_1": "bar match 3", + "text_2": "y match 4", + "timestamp": "2010-02-08", + "vector": [2] + } {"index": {"_id": "3"}} - {"keyword": "keyword match 3", "inference_1": "which is built on top of Lucene internally", "inference_2": "and enjoys all the features it provides", "text_1": "baz match 5", "text_2": "z match 6", "timestamp": "2024-08-08", "vector": [3]} + { + "keyword": "keyword match 3", + "inference_1": "which is built on top of Lucene internally", + "inference_2": "and enjoys all the features it provides", + "text_1": "baz match 5", + "text_2": "z match 6", + "timestamp": "2024-08-08", + "vector": [3] + } --- "Query all fields using the simplified format": From 1cd8797506e48654ec9c5eaddc92b2fd56ee40ae Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Fri, 6 Jun 2025 14:43:37 -0400 Subject: [PATCH 38/47] Check linear retriever rank window size propagation --- .../linear/LinearRetrieverBuilderTests.java | 31 ++++++++++++++++--- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java index 7b08618da6c10..ff0576c43c4b5 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java @@ -33,6 +33,8 @@ import java.util.Set; import java.util.stream.Collectors; +import static org.elasticsearch.search.rank.RankBuilder.DEFAULT_RANK_WINDOW_SIZE; + public class LinearRetrieverBuilderTests extends ESTestCase { public void testSimplifiedParamsRewrite() { final String indexName = "test-index"; @@ -53,7 +55,7 @@ public void testSimplifiedParamsRewrite() { List.of("field_1", "field_2", "semantic_field_1", "semantic_field_2"), "foo", MinMaxScoreNormalizer.INSTANCE, - 10, + DEFAULT_RANK_WINDOW_SIZE, new float[0], new ScoreNormalizer[0] ); @@ -66,13 +68,32 @@ public void testSimplifiedParamsRewrite() { MinMaxScoreNormalizer.INSTANCE ); + // Non-default rank window size + retriever = new LinearRetrieverBuilder( + null, + List.of("field_1", "field_2", "semantic_field_1", "semantic_field_2"), + "foo2", + MinMaxScoreNormalizer.INSTANCE, + DEFAULT_RANK_WINDOW_SIZE * 2, + new float[0], + new ScoreNormalizer[0] + ); + assertSimplifiedParamsRewrite( + retriever, + queryRewriteContext, + Map.of("field_1", 1.0f, "field_2", 1.0f), + Map.of("semantic_field_1", 1.0f, "semantic_field_2", 1.0f), + "foo2", + MinMaxScoreNormalizer.INSTANCE + ); + // No wildcards, per-field boosting retriever = new LinearRetrieverBuilder( null, List.of("field_1", "field_2^1.5", "semantic_field_1", "semantic_field_2^2"), "bar", MinMaxScoreNormalizer.INSTANCE, - 10, + DEFAULT_RANK_WINDOW_SIZE, new float[0], new ScoreNormalizer[0] ); @@ -91,7 +112,7 @@ public void testSimplifiedParamsRewrite() { List.of("field_*^1.5", "*_field_1^2.5"), "baz", MinMaxScoreNormalizer.INSTANCE, - 10, + DEFAULT_RANK_WINDOW_SIZE, new float[0], new ScoreNormalizer[0] ); @@ -110,7 +131,7 @@ public void testSimplifiedParamsRewrite() { List.of("*"), "qux", MinMaxScoreNormalizer.INSTANCE, - 10, + DEFAULT_RANK_WINDOW_SIZE, new float[0], new ScoreNormalizer[0] ); @@ -183,6 +204,7 @@ private static void assertSimplifiedParamsRewrite( LinearRetrieverBuilder rewritten = retriever.doRewrite(ctx); assertNotSame(retriever, rewritten); + assertEquals(retriever.rankWindowSize(), rewritten.rankWindowSize()); assertEquals(expectedInnerRetrievers, getInnerRetrieversAsSet(rewritten)); } @@ -197,6 +219,7 @@ private static Set getInnerRetrieversAsSet(LinearRetrieverBuilde ScoreNormalizer normalizer = normalizers[i]; if (innerRetriever.retriever() instanceof LinearRetrieverBuilder innerLinearRetriever) { + assertEquals(retriever.rankWindowSize(), innerLinearRetriever.rankWindowSize()); innerRetrieversSet.add(new InnerRetriever(getInnerRetrieversAsSet(innerLinearRetriever), weight, normalizer)); } else { innerRetrieversSet.add(new InnerRetriever(innerRetriever.retriever(), weight, normalizer)); From f8923c333c74f7467457c1bd388f3620f6bcf347 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Fri, 6 Jun 2025 14:49:39 -0400 Subject: [PATCH 39/47] Check RRF retriever rank window size and rank constant propagation --- .../rank/rrf/RRFRetrieverBuilderTests.java | 33 +++++++++++++++++-- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java index 5d78c7991075b..8b7f5cb1cfaf8 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java @@ -39,6 +39,7 @@ import java.util.Map; import java.util.Set; +import static org.elasticsearch.search.rank.RankBuilder.DEFAULT_RANK_WINDOW_SIZE; import static org.elasticsearch.search.retriever.CompoundRetrieverBuilder.convertToRetrieverSource; /** Tests for the rrf retriever. */ @@ -101,7 +102,7 @@ public void testSimplifiedParamsRewrite() { null, List.of("field_1", "field_2", "semantic_field_1", "semantic_field_2"), "foo", - 10, + DEFAULT_RANK_WINDOW_SIZE, RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT ); assertSimplifiedParamsRewrite( @@ -112,12 +113,28 @@ public void testSimplifiedParamsRewrite() { "foo" ); + // Non-default rank window size and rank constant + rrfRetrieverBuilder = new RRFRetrieverBuilder( + null, + List.of("field_1", "field_2", "semantic_field_1", "semantic_field_2"), + "foo2", + DEFAULT_RANK_WINDOW_SIZE * 2, + RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT / 2 + ); + assertSimplifiedParamsRewrite( + rrfRetrieverBuilder, + queryRewriteContext, + Map.of("field_1", 1.0f, "field_2", 1.0f), + Map.of("semantic_field_1", 1.0f, "semantic_field_2", 1.0f), + "foo2" + ); + // Glob matching on inference and non-inference fields rrfRetrieverBuilder = new RRFRetrieverBuilder( null, List.of("field_*", "*_field_1"), "bar", - 10, + DEFAULT_RANK_WINDOW_SIZE, RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT ); assertSimplifiedParamsRewrite( @@ -129,7 +146,13 @@ public void testSimplifiedParamsRewrite() { ); // All-fields wildcard - rrfRetrieverBuilder = new RRFRetrieverBuilder(null, List.of("*"), "baz", 10, RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT); + rrfRetrieverBuilder = new RRFRetrieverBuilder( + null, + List.of("*"), + "baz", + DEFAULT_RANK_WINDOW_SIZE, + RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT + ); assertSimplifiedParamsRewrite( rrfRetrieverBuilder, queryRewriteContext, @@ -208,6 +231,8 @@ private static void assertSimplifiedParamsRewrite( RRFRetrieverBuilder rewritten = retriever.doRewrite(ctx); assertNotSame(retriever, rewritten); + assertEquals(retriever.rankWindowSize(), rewritten.rankWindowSize()); + assertEquals(retriever.rankConstant(), rewritten.rankConstant()); assertEquals(expectedInnerRetrievers, getInnerRetrieversAsSet(rewritten)); } @@ -215,6 +240,8 @@ private static Set getInnerRetrieversAsSet(RRFRetrieverBuilder retriever Set innerRetrieversSet = new HashSet<>(); for (CompoundRetrieverBuilder.RetrieverSource innerRetriever : retriever.innerRetrievers()) { if (innerRetriever.retriever() instanceof RRFRetrieverBuilder innerRrfRetriever) { + assertEquals(retriever.rankWindowSize(), innerRrfRetriever.rankWindowSize()); + assertEquals(retriever.rankConstant(), innerRrfRetriever.rankConstant()); innerRetrieversSet.add(getInnerRetrieversAsSet(innerRrfRetriever)); } else { innerRetrieversSet.add(innerRetriever); From 2dc0b44f22dbf4e4accd12914e9b40e9b6a3b39c Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Fri, 6 Jun 2025 16:06:51 -0400 Subject: [PATCH 40/47] Improve edge case handling: - Do not allow querying remote indices - Handle when index wildcard pattern does not match any indices --- .../retriever/CompoundRetrieverBuilder.java | 9 +++---- .../rank/linear/LinearRetrieverBuilder.java | 25 +++++++++++++------ .../xpack/rank/rrf/RRFRetrieverBuilder.java | 17 ++++++++++--- .../linear/LinearRetrieverBuilderTests.java | 9 ++++--- .../rank/rrf/RRFRetrieverBuilderTests.java | 11 +++++--- 5 files changed, 49 insertions(+), 22 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java b/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java index 1daed5dc72185..aa28c79f2d1ca 100644 --- a/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java @@ -100,7 +100,7 @@ public final RetrieverBuilder rewrite(QueryRewriteContext ctx) throws IOExceptio throw new IllegalStateException("PIT is required"); } - T rewritten = doRewrite(ctx); + RetrieverBuilder rewritten = doRewrite(ctx); if (rewritten != this) { return rewritten; } @@ -334,11 +334,10 @@ protected SearchSourceBuilder finalizeSourceBuilder(SearchSourceBuilder sourceBu * Perform any custom rewrite logic necessary * * @param ctx The query rewrite context - * @return T the rewritten retriever + * @return RetrieverBuilder the rewritten retriever */ - @SuppressWarnings("unchecked") - protected T doRewrite(QueryRewriteContext ctx) { - return (T) this; + protected RetrieverBuilder doRewrite(QueryRewriteContext ctx) { + return this; } private RankDoc[] getRankDocs(SearchResponse searchResponse) { diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java index 2d9b8396d7b0e..4358e09d417f2 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java @@ -12,6 +12,7 @@ import org.elasticsearch.action.ResolvedIndices; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.util.Maps; +import org.elasticsearch.index.query.MatchNoneQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryRewriteContext; import org.elasticsearch.license.LicenseUtils; @@ -21,6 +22,7 @@ import org.elasticsearch.search.retriever.CompoundRetrieverBuilder; import org.elasticsearch.search.retriever.RetrieverBuilder; import org.elasticsearch.search.retriever.RetrieverParserContext; +import org.elasticsearch.search.retriever.StandardRetrieverBuilder; import org.elasticsearch.xcontent.ConstructingObjectParser; import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.XContentBuilder; @@ -280,8 +282,8 @@ protected RankDoc[] combineInnerRetrieverResults(List rankResults, b } @Override - protected LinearRetrieverBuilder doRewrite(QueryRewriteContext ctx) { - LinearRetrieverBuilder rewritten = this; + protected RetrieverBuilder doRewrite(QueryRewriteContext ctx) { + RetrieverBuilder rewritten = this; ResolvedIndices resolvedIndices = ctx.getResolvedIndices(); if (resolvedIndices != null && query != null) { @@ -291,6 +293,10 @@ protected LinearRetrieverBuilder doRewrite(QueryRewriteContext ctx) { throw new IllegalArgumentException( "[" + NAME + "] does not support the simplified query format when querying multiple indices" ); + } else if (resolvedIndices.getRemoteClusterIndices().isEmpty() == false) { + throw new IllegalArgumentException( + "[" + NAME + "] does not support the simplified query format when querying remote indices" + ); } List fieldsInnerRetrievers = SimplifiedInnerRetrieverUtils.generateInnerRetrievers( @@ -319,13 +325,18 @@ protected LinearRetrieverBuilder doRewrite(QueryRewriteContext ctx) { } ).stream().map(CompoundRetrieverBuilder::convertToRetrieverSource).toList(); - float[] weights = new float[fieldsInnerRetrievers.size()]; - Arrays.fill(weights, DEFAULT_WEIGHT); + if (fieldsInnerRetrievers.isEmpty() == false) { + float[] weights = new float[fieldsInnerRetrievers.size()]; + Arrays.fill(weights, DEFAULT_WEIGHT); - ScoreNormalizer[] normalizers = new ScoreNormalizer[fieldsInnerRetrievers.size()]; - Arrays.fill(normalizers, normalizer); + ScoreNormalizer[] normalizers = new ScoreNormalizer[fieldsInnerRetrievers.size()]; + Arrays.fill(normalizers, normalizer); - rewritten = new LinearRetrieverBuilder(fieldsInnerRetrievers, null, null, normalizer, rankWindowSize, weights, normalizers); + rewritten = new LinearRetrieverBuilder(fieldsInnerRetrievers, null, null, normalizer, rankWindowSize, weights, normalizers); + } else { + // Inner retriever list can be empty when using an index wildcard pattern that doesn't match any indices + rewritten = new StandardRetrieverBuilder(new MatchNoneQueryBuilder()); + } } return rewritten; diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java index 58574667da13f..a84151e1aca05 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java @@ -11,6 +11,7 @@ import org.elasticsearch.action.ActionRequestValidationException; import org.elasticsearch.action.ResolvedIndices; import org.elasticsearch.common.util.Maps; +import org.elasticsearch.index.query.MatchNoneQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryRewriteContext; import org.elasticsearch.license.LicenseUtils; @@ -20,6 +21,7 @@ import org.elasticsearch.search.retriever.CompoundRetrieverBuilder; import org.elasticsearch.search.retriever.RetrieverBuilder; import org.elasticsearch.search.retriever.RetrieverParserContext; +import org.elasticsearch.search.retriever.StandardRetrieverBuilder; import org.elasticsearch.xcontent.ConstructingObjectParser; import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.XContentBuilder; @@ -214,8 +216,8 @@ protected RRFRankDoc[] combineInnerRetrieverResults(List rankResults } @Override - protected RRFRetrieverBuilder doRewrite(QueryRewriteContext ctx) { - RRFRetrieverBuilder rewritten = this; + protected RetrieverBuilder doRewrite(QueryRewriteContext ctx) { + RetrieverBuilder rewritten = this; ResolvedIndices resolvedIndices = ctx.getResolvedIndices(); if (resolvedIndices != null && query != null) { @@ -225,6 +227,10 @@ protected RRFRetrieverBuilder doRewrite(QueryRewriteContext ctx) { throw new IllegalArgumentException( "[" + NAME + "] does not support the simplified query format when querying multiple indices" ); + } else if (resolvedIndices.getRemoteClusterIndices().isEmpty() == false) { + throw new IllegalArgumentException( + "[" + NAME + "] does not support the simplified query format when querying remote indices" + ); } List fieldsInnerRetrievers = SimplifiedInnerRetrieverUtils.generateInnerRetrievers( @@ -246,7 +252,12 @@ protected RRFRetrieverBuilder doRewrite(QueryRewriteContext ctx) { } ).stream().map(CompoundRetrieverBuilder::convertToRetrieverSource).toList(); - rewritten = new RRFRetrieverBuilder(fieldsInnerRetrievers, rankWindowSize, rankConstant); + if (fieldsInnerRetrievers.isEmpty() == false) { + rewritten = new RRFRetrieverBuilder(fieldsInnerRetrievers, rankWindowSize, rankConstant); + } else { + // Inner retriever list can be empty when using an index wildcard pattern that doesn't match any indices + rewritten = new StandardRetrieverBuilder(new MatchNoneQueryBuilder()); + } } return rewritten; diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java index ff0576c43c4b5..f3767e744fdd8 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java @@ -202,10 +202,13 @@ private static void assertSimplifiedParamsRewrite( ) ); - LinearRetrieverBuilder rewritten = retriever.doRewrite(ctx); + RetrieverBuilder rewritten = retriever.doRewrite(ctx); assertNotSame(retriever, rewritten); - assertEquals(retriever.rankWindowSize(), rewritten.rankWindowSize()); - assertEquals(expectedInnerRetrievers, getInnerRetrieversAsSet(rewritten)); + assertTrue(rewritten instanceof LinearRetrieverBuilder); + + LinearRetrieverBuilder rewrittenLinear = (LinearRetrieverBuilder) rewritten; + assertEquals(retriever.rankWindowSize(), rewrittenLinear.rankWindowSize()); + assertEquals(expectedInnerRetrievers, getInnerRetrieversAsSet(rewrittenLinear)); } private static Set getInnerRetrieversAsSet(LinearRetrieverBuilder retriever) { diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java index 8b7f5cb1cfaf8..f0dca8affcd29 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java @@ -229,11 +229,14 @@ private static void assertSimplifiedParamsRewrite( }).toArray()) ); - RRFRetrieverBuilder rewritten = retriever.doRewrite(ctx); + RetrieverBuilder rewritten = retriever.doRewrite(ctx); assertNotSame(retriever, rewritten); - assertEquals(retriever.rankWindowSize(), rewritten.rankWindowSize()); - assertEquals(retriever.rankConstant(), rewritten.rankConstant()); - assertEquals(expectedInnerRetrievers, getInnerRetrieversAsSet(rewritten)); + assertTrue(rewritten instanceof RRFRetrieverBuilder); + + RRFRetrieverBuilder rewrittenRrf = (RRFRetrieverBuilder) rewritten; + assertEquals(retriever.rankWindowSize(), rewrittenRrf.rankWindowSize()); + assertEquals(retriever.rankConstant(), rewrittenRrf.rankConstant()); + assertEquals(expectedInnerRetrievers, getInnerRetrieversAsSet(rewrittenRrf)); } private static Set getInnerRetrieversAsSet(RRFRetrieverBuilder retriever) { From 6d8eb0b8817251bf00590ff12caf2e701d89a926 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Fri, 6 Jun 2025 16:36:07 -0400 Subject: [PATCH 41/47] Added more linear YAML tests --- .../linear/20_linear_retriever_simplified.yml | 88 +++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml index 32a9418d2466a..ffbb3e9476971 100644 --- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml +++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml @@ -213,3 +213,91 @@ setup: - lt: { hits.hits.1._score: 1.0 } - match: { hits.hits.2._id: "1" } - lt: { hits.hits.2._score: 1.0 } + +--- +"Can query keyword fields": + - do: + headers: + Content-Type: application/json + search: + index: test-index + body: + retriever: + linear: + fields: [ "keyword" ] + query: "keyword match 1" + normalizer: "minmax" + + # Lexical-only match, so max score is 1 + - match: { hits.total.value: 1 } + - length: { hits.hits: 1 } + - match: { hits.hits.0._id: "1" } + - close_to: { hits.hits.0._score: { value: 1.0, error: 0.0001 } } + +--- +"Can query date fields": + - do: + headers: + Content-Type: application/json + search: + index: test-index + body: + retriever: + linear: + fields: [ "timestamp" ] + query: "2010-02-08" + normalizer: "minmax" + + # Lexical-only match, so max score is 1 + - match: { hits.total.value: 1 } + - length: { hits.hits: 1 } + - match: { hits.hits.0._id: "2" } + - close_to: { hits.hits.0._score: { value: 1.0, error: 0.0001 } } + +--- +"Wildcard index patterns that do not resolve to any index are handled gracefully": + - do: + search: + index: wildcard-* + body: + retriever: + linear: + query: "match" + normalizer: "minmax" + + - match: { hits.total.value: 0 } + - length: { hits.hits: 0 } + +--- +"Multi-index searches are not allowed": + - do: + indices.create: + index: test-index-2 + + - do: + catch: bad_request + search: + index: [ test-index, test-index-2 ] + body: + retriever: + linear: + query: "match" + normalizer: "minmax" + + - match: { error.root_cause.0.reason: "[linear] does not support the simplified query format when querying multiple indices" } + +--- +"Wildcard field patterns that do not resolve to any field are handled gracefully": + - do: + search: + index: test-index + body: + retriever: + linear: + fields: [ "wildcard-*" ] + query: "match" + normalizer: "minmax" + + - match: { hits.total.value: 0 } + - length: { hits.hits: 0 } + From ffe7374aa230edeba0b9eaedc1912966614462ff Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Fri, 6 Jun 2025 17:02:35 -0400 Subject: [PATCH 42/47] Added raw vector field query test --- .../linear/20_linear_retriever_simplified.yml | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml index ffbb3e9476971..4083770d1895e 100644 --- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml +++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml @@ -2,7 +2,7 @@ setup: - requires: cluster_features: [ "simplified_retriever_format" ] reason: "Simplified retriever format" - test_runner_features: [ "close_to", "headers" ] + test_runner_features: [ "close_to", "headers", "contains" ] - do: inference.put: @@ -254,6 +254,22 @@ setup: - match: { hits.hits.0._id: "2" } - close_to: { hits.hits.0._score: { value: 1.0, error: 0.0001 } } +--- +"Cannot query raw vector fields": + - do: + catch: bad_request + search: + index: test-index + body: + retriever: + linear: + fields: [ "vector" ] + query: "foo" + normalizer: "minmax" + + - contains: { error.root_cause.0.reason: "[linear] search failed - retrievers '[standard]' returned errors" } + - contains: { error.root_cause.0.suppressed.0.failed_shards.0.reason.reason: "Field [vector] of type [dense_vector] does not support match queries" } + --- "Wildcard index patterns that do not resolve to any index are handled gracefully": - do: From 966b6415dcecaad6e067ac151233650ff2d1787f Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Mon, 9 Jun 2025 08:47:44 -0400 Subject: [PATCH 43/47] Add more linear retriever YAML tests --- .../linear/20_linear_retriever_simplified.yml | 84 +++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml index 4083770d1895e..25f22fadb154c 100644 --- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml +++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml @@ -317,3 +317,87 @@ setup: - match: { hits.total.value: 0 } - length: { hits.hits: 0 } +--- +"Cannot mix simplified query format with custom sub-retrievers": + - do: + catch: bad_request + search: + index: test-index + body: + retriever: + linear: + query: "foo" + normalizer: "minmax" + retrievers: + - retriever: + standard: + query: + match: + keyword: "bar" + + - contains: { error.root_cause.0.reason: "[linear] cannot combine [retrievers] and [query]" } + +--- +"Cannot set top-level normalizer when using custom sub-retrievers": + - do: + catch: bad_request + search: + index: test-index + body: + retriever: + linear: + normalizer: "minmax" + retrievers: + - retriever: + standard: + query: + match: + keyword: "bar" + + - contains: { error.root_cause.0.reason: "[linear] [normalizer] cannot be provided when [retrievers] is specified" } + +--- +"Missing required params": + - do: + catch: bad_request + search: + index: test-index + body: + retriever: + linear: + query: "foo" + + - contains: { error.root_cause.0.reason: "[linear] [normalizer] must be provided when [query] is specified" } + + - do: + catch: bad_request + search: + index: test-index + body: + retriever: + linear: + fields: ["text_1", "text_2"] + + - contains: { error.root_cause.0.reason: "[linear] [query] must be provided when [fields] is specified" } + + - do: + catch: bad_request + search: + index: test-index + body: + retriever: + linear: + fields: [ "text_1", "text_2" ] + query: "" + + - contains: { error.root_cause.0.reason: "[linear] [query] cannot be empty" } + + - do: + catch: bad_request + search: + index: test-index + body: + retriever: + linear: {} + + - contains: { error.root_cause.0.reason: "[linear] must provide [retrievers] or [query]" } From a84eac49c6d7a6f6108b63bf909cf8faf7705088 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Mon, 9 Jun 2025 09:11:13 -0400 Subject: [PATCH 44/47] Added remote index search test for linear retriever --- .../linear/LinearRetrieverBuilderTests.java | 51 +++++++++++++++++-- 1 file changed, 46 insertions(+), 5 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java index f3767e744fdd8..c6f75e63454b1 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java @@ -26,6 +26,7 @@ import org.elasticsearch.search.retriever.StandardRetrieverBuilder; import org.elasticsearch.test.ESTestCase; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -39,7 +40,7 @@ public class LinearRetrieverBuilderTests extends ESTestCase { public void testSimplifiedParamsRewrite() { final String indexName = "test-index"; final List testInferenceFields = List.of("semantic_field_1", "semantic_field_2"); - final ResolvedIndices resolvedIndices = createMockResolvedIndices(indexName, testInferenceFields); + final ResolvedIndices resolvedIndices = createMockResolvedIndices(indexName, testInferenceFields, null); final QueryRewriteContext queryRewriteContext = new QueryRewriteContext( parserConfig(), null, @@ -145,8 +146,41 @@ public void testSimplifiedParamsRewrite() { ); } - private static ResolvedIndices createMockResolvedIndices(String indexName, List inferenceFields) { - Index index = new Index(indexName, randomAlphaOfLength(10)); + public void testSearchRemoteIndex() { + final ResolvedIndices resolvedIndices = createMockResolvedIndices( + "local-index", + List.of(), + Map.of("remote-cluster", "remote-index") + ); + final QueryRewriteContext queryRewriteContext = new QueryRewriteContext( + parserConfig(), + null, + null, + resolvedIndices, + new PointInTimeBuilder(new BytesArray("pitid")), + null + ); + + LinearRetrieverBuilder retriever = new LinearRetrieverBuilder( + null, + null, + "foo", + MinMaxScoreNormalizer.INSTANCE, + DEFAULT_RANK_WINDOW_SIZE, + new float[0], + new ScoreNormalizer[0] + ); + + IllegalArgumentException iae = expectThrows(IllegalArgumentException.class, () -> retriever.doRewrite(queryRewriteContext)); + assertEquals("[linear] does not support the simplified query format when querying remote indices", iae.getMessage()); + } + + private static ResolvedIndices createMockResolvedIndices( + String localIndexName, + List inferenceFields, + Map remoteIndexNames + ) { + Index index = new Index(localIndexName, randomAlphaOfLength(10)); IndexMetadata.Builder indexMetadataBuilder = IndexMetadata.builder(index.getName()) .settings( Settings.builder() @@ -162,9 +196,16 @@ private static ResolvedIndices createMockResolvedIndices(String indexName, List< ); } + Map remoteIndices = new HashMap<>(); + if (remoteIndexNames != null) { + for (Map.Entry entry : remoteIndexNames.entrySet()) { + remoteIndices.put(entry.getKey(), new OriginalIndices(new String[] { entry.getValue() }, IndicesOptions.DEFAULT)); + } + } + return new MockResolvedIndices( - Map.of(), - new OriginalIndices(new String[] { indexName }, IndicesOptions.DEFAULT), + remoteIndices, + new OriginalIndices(new String[] { localIndexName }, IndicesOptions.DEFAULT), Map.of(index, indexMetadataBuilder.build()) ); } From 5632821d6ab3baf7cc844e103edcc8a64f335e20 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Mon, 9 Jun 2025 09:15:37 -0400 Subject: [PATCH 45/47] Added remote index search test for rrf retriever --- .../rank/rrf/RRFRetrieverBuilderTests.java | 52 +++++++++++++++++-- 1 file changed, 47 insertions(+), 5 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java index f0dca8affcd29..32ad4f7587a82 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java @@ -34,6 +34,7 @@ import org.elasticsearch.xcontent.json.JsonXContent; import java.io.IOException; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -87,7 +88,7 @@ public void testRetrieverExtractionErrors() throws IOException { public void testSimplifiedParamsRewrite() { final String indexName = "test-index"; final List testInferenceFields = List.of("semantic_field_1", "semantic_field_2"); - final ResolvedIndices resolvedIndices = createMockResolvedIndices(indexName, testInferenceFields); + final ResolvedIndices resolvedIndices = createMockResolvedIndices(indexName, testInferenceFields, null); final QueryRewriteContext queryRewriteContext = new QueryRewriteContext( parserConfig(), null, @@ -162,6 +163,36 @@ public void testSimplifiedParamsRewrite() { ); } + public void testSearchRemoteIndex() { + final ResolvedIndices resolvedIndices = createMockResolvedIndices( + "local-index", + List.of(), + Map.of("remote-cluster", "remote-index") + ); + final QueryRewriteContext queryRewriteContext = new QueryRewriteContext( + parserConfig(), + null, + null, + resolvedIndices, + new PointInTimeBuilder(new BytesArray("pitid")), + null + ); + + RRFRetrieverBuilder rrfRetrieverBuilder = new RRFRetrieverBuilder( + null, + null, + "foo", + DEFAULT_RANK_WINDOW_SIZE, + RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT + ); + + IllegalArgumentException iae = expectThrows( + IllegalArgumentException.class, + () -> rrfRetrieverBuilder.doRewrite(queryRewriteContext) + ); + assertEquals("[rrf] does not support the simplified query format when querying remote indices", iae.getMessage()); + } + @Override protected NamedXContentRegistry xContentRegistry() { List entries = new SearchModule(Settings.EMPTY, List.of()).getNamedXContents(); @@ -183,8 +214,12 @@ protected NamedXContentRegistry xContentRegistry() { return new NamedXContentRegistry(entries); } - private static ResolvedIndices createMockResolvedIndices(String indexName, List inferenceFields) { - Index index = new Index(indexName, randomAlphaOfLength(10)); + private static ResolvedIndices createMockResolvedIndices( + String localIndexName, + List inferenceFields, + Map remoteIndexNames + ) { + Index index = new Index(localIndexName, randomAlphaOfLength(10)); IndexMetadata.Builder indexMetadataBuilder = IndexMetadata.builder(index.getName()) .settings( Settings.builder() @@ -200,9 +235,16 @@ private static ResolvedIndices createMockResolvedIndices(String indexName, List< ); } + Map remoteIndices = new HashMap<>(); + if (remoteIndexNames != null) { + for (Map.Entry entry : remoteIndexNames.entrySet()) { + remoteIndices.put(entry.getKey(), new OriginalIndices(new String[] { entry.getValue() }, IndicesOptions.DEFAULT)); + } + } + return new MockResolvedIndices( - Map.of(), - new OriginalIndices(new String[] { indexName }, IndicesOptions.DEFAULT), + remoteIndices, + new OriginalIndices(new String[] { localIndexName }, IndicesOptions.DEFAULT), Map.of(index, indexMetadataBuilder.build()) ); } From f906d413978b888e09925a1efc91b3b44ddb3ee8 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Mon, 9 Jun 2025 09:22:21 -0400 Subject: [PATCH 46/47] Added field boost combination test --- .../linear/LinearRetrieverBuilderTests.java | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java index c6f75e63454b1..e6ff00503f5e2 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java @@ -126,6 +126,25 @@ public void testSimplifiedParamsRewrite() { MinMaxScoreNormalizer.INSTANCE ); + // Multiple boosts defined on the same field + retriever = new LinearRetrieverBuilder( + null, + List.of("field_*^1.5", "field_1^3.0", "*_field_1^2.5", "semantic_*^1.5"), + "baz2", + MinMaxScoreNormalizer.INSTANCE, + DEFAULT_RANK_WINDOW_SIZE, + new float[0], + new ScoreNormalizer[0] + ); + assertSimplifiedParamsRewrite( + retriever, + queryRewriteContext, + Map.of("field_*", 1.5f, "field_1", 3.0f, "*_field_1", 2.5f, "semantic_*", 1.5f), + Map.of("semantic_field_1", 3.75f, "semantic_field_2", 1.5f), + "baz2", + MinMaxScoreNormalizer.INSTANCE + ); + // All-fields wildcard retriever = new LinearRetrieverBuilder( null, From 7c2dc4deda0d6d5ff86e9b94bfa9d95fbcc9cd3e Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Mon, 9 Jun 2025 17:15:20 -0400 Subject: [PATCH 47/47] Added semantic text multi-match query test --- .../SemanticMultiMatchQueryBuilderTests.java | 125 ++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryBuilderTests.java diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryBuilderTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryBuilderTests.java new file mode 100644 index 0000000000000..b54ca946e6179 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryBuilderTests.java @@ -0,0 +1,125 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.queries; + +import org.apache.lucene.index.Term; +import org.apache.lucene.search.DisjunctionMaxQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.elasticsearch.cluster.ClusterChangedEvent; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.IOUtils; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.mapper.MapperServiceTestCase; +import org.elasticsearch.index.mapper.ParsedDocument; +import org.elasticsearch.index.query.MultiMatchQueryBuilder; +import org.elasticsearch.index.query.SearchExecutionContext; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.test.ClusterServiceUtils; +import org.elasticsearch.test.client.NoOpClient; +import org.elasticsearch.threadpool.TestThreadPool; +import org.elasticsearch.xpack.inference.InferencePlugin; +import org.elasticsearch.xpack.inference.registry.ModelRegistry; +import org.junit.AfterClass; +import org.junit.BeforeClass; + +import java.util.Collection; +import java.util.List; +import java.util.function.Supplier; + +public class SemanticMultiMatchQueryBuilderTests extends MapperServiceTestCase { + private static TestThreadPool threadPool; + private static ModelRegistry modelRegistry; + + private static class InferencePluginWithModelRegistry extends InferencePlugin { + InferencePluginWithModelRegistry(Settings settings) { + super(settings); + } + + @Override + protected Supplier getModelRegistry() { + return () -> modelRegistry; + } + } + + @BeforeClass + public static void startModelRegistry() { + threadPool = new TestThreadPool(SemanticMultiMatchQueryBuilderTests.class.getName()); + var clusterService = ClusterServiceUtils.createClusterService(threadPool); + modelRegistry = new ModelRegistry(clusterService, new NoOpClient(threadPool)); + modelRegistry.clusterChanged(new ClusterChangedEvent("init", clusterService.state(), clusterService.state()) { + @Override + public boolean localNodeMaster() { + return false; + } + }); + } + + @AfterClass + public static void stopModelRegistry() { + IOUtils.closeWhileHandlingException(threadPool); + } + + @Override + protected Collection getPlugins() { + return List.of(new InferencePluginWithModelRegistry(Settings.EMPTY)); + } + + public void testResolveSemanticTextFieldFromWildcard() throws Exception { + MapperService mapperService = createMapperService(""" + { + "_doc" : { + "properties": { + "text_field": { "type": "text" }, + "keyword_field": { "type": "keyword" }, + "inference_field": { "type": "semantic_text", "inference_id": "test_service" } + } + } + } + """); + + ParsedDocument doc = mapperService.documentMapper().parse(source(""" + { + "text_field" : "foo", + "keyword_field" : "foo", + "inference_field" : "foo", + "_inference_fields": { + "inference_field": { + "inference": { + "inference_id": "test_service", + "model_settings": { + "task_type": "sparse_embedding" + }, + "chunks": { + "inference_field": [ + { + "start_offset": 0, + "end_offset": 3, + "embeddings": { + "foo": 1.0 + } + } + ] + } + } + } + } + } + """)); + + withLuceneIndex(mapperService, iw -> iw.addDocument(doc.rootDoc()), ir -> { + SearchExecutionContext context = createSearchExecutionContext(mapperService, newSearcher(ir)); + Query query = new MultiMatchQueryBuilder("foo", "*_field").toQuery(context); + Query expected = new DisjunctionMaxQuery( + List.of(new TermQuery(new Term("text_field", "foo")), new TermQuery(new Term("keyword_field", "foo"))), + 0f + ); + assertEquals(expected, query); + }); + } +}