Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
a10e92d
Add simplified inner retriever parser
Mikep86 May 20, 2025
07e368f
Integrate simplified handling into RRF
Mikep86 May 20, 2025
3fc2567
Refactoring
Mikep86 May 20, 2025
822edac
Added doRewrite method
Mikep86 May 21, 2025
4b06636
Add fields and query instance vars
Mikep86 May 22, 2025
2212167
RRF retriever parsing updates
Mikep86 May 22, 2025
4059894
Added custom rewrite logic for RRF retriever
Mikep86 May 22, 2025
1c8dab3
Remove unused method
Mikep86 May 22, 2025
a217e52
Use a most_fields multi-match query
Mikep86 May 23, 2025
11d1445
Added TODO
Mikep86 May 23, 2025
7d3e78e
Added support for wildcards
Mikep86 May 23, 2025
c6bf013
Handle when only query is supplied by the user
Mikep86 May 23, 2025
510fe54
Added fields, query, and normalizer params to linear retriever
Mikep86 May 27, 2025
187758f
Added custom rewrite logic for linear retriever
Mikep86 May 28, 2025
51a3dbe
Updated doRewrite method to return rewritten retriever
Mikep86 May 28, 2025
e33cebf
Fix NPE
Mikep86 May 28, 2025
a777601
Temporary fix for minmax bug
Mikep86 May 29, 2025
34250dc
Validation improvements
Mikep86 May 29, 2025
551e7f5
Update docs/changelog/128633.yaml
Mikep86 May 29, 2025
7267b59
Fix changelog
Mikep86 May 29, 2025
22594a5
Merge branch 'main' into simplified-linear-and-rrf-retrievers
Mikep86 Jun 2, 2025
397a1b8
Updated RRFRetrieverBuilderParsingTests
Mikep86 Jun 2, 2025
1703f1f
_almost_ working version of rewrite tests
Mikep86 Jun 3, 2025
e7b5476
[CI] Auto commit changes from spotless
Jun 3, 2025
d43ac88
Fixed rewrite tests
Mikep86 Jun 4, 2025
55f8f6a
Merge branch 'main' into simplified-linear-and-rrf-retrievers
Mikep86 Jun 4, 2025
e8715b2
Add wildcards to rewrite test
Mikep86 Jun 4, 2025
a014e02
Updated LinearRetrieverBuilderParsingTests
Mikep86 Jun 4, 2025
0050ad5
Clean up RRFRetrieverBuilder constructors
Mikep86 Jun 4, 2025
6b4c04b
Added equals and hash code implementations to LinearRetrieverBuilder
Mikep86 Jun 4, 2025
1ce5a9a
Added linear retriever rewrite tests
Mikep86 Jun 4, 2025
2915332
Fix inappropriate reference to RRFRankPlugin.NAME
Mikep86 Jun 4, 2025
dc54673
Resolve TODO
Mikep86 Jun 6, 2025
e5db2e4
Merge branch 'main' into simplified-linear-and-rrf-retrievers
Mikep86 Jun 6, 2025
7adb091
Added linear retriever YAML tests
Mikep86 Jun 6, 2025
0655c1c
Adjust test
Mikep86 Jun 6, 2025
54b67c2
Fix inference API calls in YAML tests
Mikep86 Jun 6, 2025
c9f7a84
Update comment
Mikep86 Jun 6, 2025
e3f7a54
Linear boosting test development
Mikep86 Jun 6, 2025
1240aa1
Formatting
Mikep86 Jun 6, 2025
1cd8797
Check linear retriever rank window size propagation
Mikep86 Jun 6, 2025
f8923c3
Check RRF retriever rank window size and rank constant propagation
Mikep86 Jun 6, 2025
2dc0b44
Improve edge case handling:
Mikep86 Jun 6, 2025
6d8eb0b
Added more linear YAML tests
Mikep86 Jun 6, 2025
ffe7374
Added raw vector field query test
Mikep86 Jun 6, 2025
966b641
Add more linear retriever YAML tests
Mikep86 Jun 9, 2025
a84eac4
Added remote index search test for linear retriever
Mikep86 Jun 9, 2025
5632821
Added remote index search test for rrf retriever
Mikep86 Jun 9, 2025
f906d41
Added field boost combination test
Mikep86 Jun 9, 2025
42c10c6
Merge branch 'main' into simplified-linear-and-rrf-retrievers
Mikep86 Jun 9, 2025
7c2dc4d
Added semantic text multi-match query test
Mikep86 Jun 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/128633.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 128633
summary: Simplified linear and RRF retrievers
area: Search
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
Expand Down Expand Up @@ -65,7 +66,7 @@ protected CompoundRetrieverBuilder(List<RetrieverSource> innerRetrievers, int ra

@SuppressWarnings("unchecked")
public T addChild(RetrieverBuilder retrieverBuilder) {
innerRetrievers.add(new RetrieverSource(retrieverBuilder, null));
innerRetrievers.add(convertToRetrieverSource(retrieverBuilder));
return (T) this;
}

Expand Down Expand Up @@ -99,6 +100,11 @@ public final RetrieverBuilder rewrite(QueryRewriteContext ctx) throws IOExceptio
throw new IllegalStateException("PIT is required");
}

RetrieverBuilder rewritten = doRewrite(ctx);
if (rewritten != this) {
return rewritten;
}

// Rewrite prefilters
// We eagerly rewrite prefilters, because some of the innerRetrievers
// could be compound too, so we want to propagate all the necessary filter information to them
Expand Down Expand Up @@ -290,6 +296,14 @@ public int rankWindowSize() {
return rankWindowSize;
}

public List<RetrieverSource> innerRetrievers() {
return Collections.unmodifiableList(innerRetrievers);
}

public static RetrieverSource convertToRetrieverSource(RetrieverBuilder retrieverBuilder) {
return new RetrieverSource(retrieverBuilder, null);
}

protected final SearchSourceBuilder createSearchSourceBuilder(PointInTimeBuilder pit, RetrieverBuilder retrieverBuilder) {
var sourceBuilder = new SearchSourceBuilder().pointInTimeBuilder(pit)
.trackTotalHits(false)
Expand All @@ -316,6 +330,16 @@ protected SearchSourceBuilder finalizeSourceBuilder(SearchSourceBuilder sourceBu
return sourceBuilder;
}

/**
* Perform any custom rewrite logic necessary
*
* @param ctx The query rewrite context
* @return RetrieverBuilder the rewritten retriever
*/
protected RetrieverBuilder doRewrite(QueryRewriteContext ctx) {
return this;
}

private RankDoc[] getRankDocs(SearchResponse searchResponse) {
int size = searchResponse.getHits().getHits().length;
RankDoc[] docs = new RankDoc[size];
Expand Down
2 changes: 1 addition & 1 deletion x-pack/plugin/inference/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -403,5 +403,5 @@ tasks.named("thirdPartyAudit").configure {
}

tasks.named('yamlRestTest') {
usesDefaultDistribution("to be triaged")
usesDefaultDistribution("Uses the inference API")
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.inference.queries;

import org.apache.lucene.index.Term;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.elasticsearch.cluster.ClusterChangedEvent;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.core.IOUtils;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.MapperServiceTestCase;
import org.elasticsearch.index.mapper.ParsedDocument;
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ClusterServiceUtils;
import org.elasticsearch.test.client.NoOpClient;
import org.elasticsearch.threadpool.TestThreadPool;
import org.elasticsearch.xpack.inference.InferencePlugin;
import org.elasticsearch.xpack.inference.registry.ModelRegistry;
import org.junit.AfterClass;
import org.junit.BeforeClass;

import java.util.Collection;
import java.util.List;
import java.util.function.Supplier;

public class SemanticMultiMatchQueryBuilderTests extends MapperServiceTestCase {
private static TestThreadPool threadPool;
private static ModelRegistry modelRegistry;

private static class InferencePluginWithModelRegistry extends InferencePlugin {
InferencePluginWithModelRegistry(Settings settings) {
super(settings);
}

@Override
protected Supplier<ModelRegistry> getModelRegistry() {
return () -> modelRegistry;
}
}

@BeforeClass
public static void startModelRegistry() {
threadPool = new TestThreadPool(SemanticMultiMatchQueryBuilderTests.class.getName());
var clusterService = ClusterServiceUtils.createClusterService(threadPool);
modelRegistry = new ModelRegistry(clusterService, new NoOpClient(threadPool));
modelRegistry.clusterChanged(new ClusterChangedEvent("init", clusterService.state(), clusterService.state()) {
@Override
public boolean localNodeMaster() {
return false;
}
});
}

@AfterClass
public static void stopModelRegistry() {
IOUtils.closeWhileHandlingException(threadPool);
}

@Override
protected Collection<? extends Plugin> getPlugins() {
return List.of(new InferencePluginWithModelRegistry(Settings.EMPTY));
}

public void testResolveSemanticTextFieldFromWildcard() throws Exception {
MapperService mapperService = createMapperService("""
{
"_doc" : {
"properties": {
"text_field": { "type": "text" },
"keyword_field": { "type": "keyword" },
"inference_field": { "type": "semantic_text", "inference_id": "test_service" }
}
}
}
""");

ParsedDocument doc = mapperService.documentMapper().parse(source("""
{
"text_field" : "foo",
"keyword_field" : "foo",
"inference_field" : "foo",
"_inference_fields": {
"inference_field": {
"inference": {
"inference_id": "test_service",
"model_settings": {
"task_type": "sparse_embedding"
},
"chunks": {
"inference_field": [
{
"start_offset": 0,
"end_offset": 3,
"embeddings": {
"foo": 1.0
}
}
]
}
}
}
}
}
"""));

withLuceneIndex(mapperService, iw -> iw.addDocument(doc.rootDoc()), ir -> {
SearchExecutionContext context = createSearchExecutionContext(mapperService, newSearcher(ir));
Query query = new MultiMatchQueryBuilder("foo", "*_field").toQuery(context);
Query expected = new DisjunctionMaxQuery(
List.of(new TermQuery(new Term("text_field", "foo")), new TermQuery(new Term("keyword_field", "foo"))),
0f
);
assertEquals(expected, query);
});
}
}
4 changes: 4 additions & 0 deletions x-pack/plugin/rank-rrf/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,7 @@ dependencies {

clusterPlugins project(':x-pack:plugin:inference:qa:test-service-plugin')
}

tasks.named('yamlRestTest') {
usesDefaultDistribution("Uses the inference API")
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
public class RankRRFFeatures implements FeatureSpecification {

public static final NodeFeature LINEAR_RETRIEVER_SUPPORTED = new NodeFeature("linear_retriever_supported");
public static final NodeFeature SIMPLIFIED_RETRIEVER_FORMAT = new NodeFeature("simplified_retriever_format");

@Override
public Set<NodeFeature> getFeatures() {
Expand All @@ -26,6 +27,6 @@ public Set<NodeFeature> getFeatures() {

@Override
public Set<NodeFeature> getTestFeatures() {
return Set.of(INNER_RETRIEVERS_FILTER_SUPPORT, LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX);
return Set.of(INNER_RETRIEVERS_FILTER_SUPPORT, LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX, SIMPLIFIED_RETRIEVER_FORMAT);
}
}
Loading
Loading