Skip to content

Commit bf15d15

Browse files
authored
Support querying multiple indices with the simplified RRF retriever (#134822)
1 parent 8933c15 commit bf15d15

File tree

5 files changed

+412
-44
lines changed

5 files changed

+412
-44
lines changed

docs/changelog/134822.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 134822
2+
summary: Support querying multiple indices with the simplified RRF retriever
3+
area: Relevance
4+
type: enhancement
5+
issues: []

x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@ public Set<NodeFeature> getTestFeatures() {
4040
RRFRetrieverBuilder.MULTI_FIELDS_QUERY_FORMAT_SUPPORT,
4141
RRFRetrieverBuilder.WEIGHTED_SUPPORT,
4242
LINEAR_RETRIEVER_TOP_LEVEL_NORMALIZER,
43-
LinearRetrieverBuilder.MULTI_INDEX_SIMPLIFIED_FORMAT_SUPPORT
43+
LinearRetrieverBuilder.MULTI_INDEX_SIMPLIFIED_FORMAT_SUPPORT,
44+
RRFRetrieverBuilder.MULTI_INDEX_SIMPLIFIED_FORMAT_SUPPORT
4445
);
4546
}
4647
}

x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,9 @@
5050
public final class RRFRetrieverBuilder extends CompoundRetrieverBuilder<RRFRetrieverBuilder> {
5151
public static final NodeFeature MULTI_FIELDS_QUERY_FORMAT_SUPPORT = new NodeFeature("rrf_retriever.multi_fields_query_format_support");
5252
public static final NodeFeature WEIGHTED_SUPPORT = new NodeFeature("rrf_retriever.weighted_support");
53-
53+
public static final NodeFeature MULTI_INDEX_SIMPLIFIED_FORMAT_SUPPORT = new NodeFeature(
54+
"rrf_retriever.multi_index_simplified_format_support"
55+
);
5456
public static final String NAME = "rrf";
5557

5658
public static final ParseField RETRIEVERS_FIELD = new ParseField("retrievers");
@@ -253,11 +255,7 @@ protected RetrieverBuilder doRewrite(QueryRewriteContext ctx) {
253255
// TODO: Refactor duplicate code
254256
// Using the multi-fields query format
255257
var localIndicesMetadata = resolvedIndices.getConcreteLocalIndicesMetadata();
256-
if (localIndicesMetadata.size() > 1) {
257-
throw new IllegalArgumentException(
258-
"[" + NAME + "] cannot specify [" + QUERY_FIELD.getPreferredName() + "] when querying multiple indices"
259-
);
260-
} else if (resolvedIndices.getRemoteClusterIndices().isEmpty() == false) {
258+
if (resolvedIndices.getRemoteClusterIndices().isEmpty() == false) {
261259
throw new IllegalArgumentException(
262260
"[" + NAME + "] cannot specify [" + QUERY_FIELD.getPreferredName() + "] when querying remote indices"
263261
);

x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java

Lines changed: 265 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import org.elasticsearch.action.ResolvedIndices;
1212
import org.elasticsearch.common.bytes.BytesArray;
1313
import org.elasticsearch.common.settings.Settings;
14+
import org.elasticsearch.core.Tuple;
1415
import org.elasticsearch.index.query.QueryRewriteContext;
1516
import org.elasticsearch.search.SearchModule;
1617
import org.elasticsearch.search.builder.PointInTimeBuilder;
@@ -235,6 +236,270 @@ public void testMultiFieldsParamsRewrite() {
235236
);
236237
}
237238

239+
public void testMultiIndexMultiFieldsParamsRewrite() {
240+
String indexName = "test-index";
241+
String anotherIndexName = "test-another-index";
242+
final ResolvedIndices resolvedIndices = createMockResolvedIndices(
243+
Map.of(
244+
indexName,
245+
List.of("semantic_field_1", "semantic_field_2"),
246+
anotherIndexName,
247+
List.of("semantic_field_2", "semantic_field_3")
248+
),
249+
null,
250+
Map.of() // use random and different inference IDs for semantic_text fields
251+
);
252+
253+
final QueryRewriteContext queryRewriteContext = new QueryRewriteContext(
254+
parserConfig(),
255+
null,
256+
null,
257+
TransportVersion.current(),
258+
RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY,
259+
resolvedIndices,
260+
new PointInTimeBuilder(new BytesArray("pitid")),
261+
null,
262+
null
263+
);
264+
265+
// No wildcards, no per-field boosting
266+
RRFRetrieverBuilder retriever = new RRFRetrieverBuilder(
267+
null,
268+
List.of("field_1", "field_2", "semantic_field_1", "semantic_field_2"),
269+
"foo",
270+
DEFAULT_RANK_WINDOW_SIZE,
271+
RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT,
272+
new float[0]
273+
);
274+
assertMultiIndexMultiFieldsParamsRewrite(
275+
retriever,
276+
queryRewriteContext,
277+
Map.of(
278+
Map.of("field_1", 1.0f, "field_2", 1.0f),
279+
List.of(indexName),
280+
Map.of("field_1", 1.0f, "field_2", 1.0f, "semantic_field_1", 1.0f),
281+
List.of(anotherIndexName)
282+
),
283+
Map.of(
284+
new Tuple<>("semantic_field_1", List.of(indexName)),
285+
1.0f,
286+
new Tuple<>("semantic_field_2", List.of(indexName)), // field with different inference IDs, we filter on index name
287+
1.0f,
288+
new Tuple<>("semantic_field_2", List.of(anotherIndexName)),
289+
1.0f
290+
),
291+
"foo",
292+
null
293+
);
294+
295+
// Glob matching on inference and non-inference fields
296+
retriever = new RRFRetrieverBuilder(
297+
null,
298+
List.of("field_*", "field_1", "*_field_1", "semantic_*"),
299+
"baz2",
300+
DEFAULT_RANK_WINDOW_SIZE,
301+
RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT,
302+
new float[0]
303+
);
304+
assertMultiIndexMultiFieldsParamsRewrite(
305+
retriever,
306+
queryRewriteContext,
307+
Map.of(Map.of("field_*", 1.0f, "field_1", 1.0f, "*_field_1", 1.0f, "semantic_*", 1.0f), List.of()),
308+
Map.of(
309+
new Tuple<>("semantic_field_1", List.of(indexName)),
310+
1.0f,
311+
new Tuple<>("semantic_field_2", List.of(indexName)),
312+
1.0f,
313+
new Tuple<>("semantic_field_2", List.of(anotherIndexName)),
314+
1.0f,
315+
new Tuple<>("semantic_field_3", List.of(anotherIndexName)),
316+
1.0f
317+
),
318+
"baz2",
319+
null
320+
);
321+
322+
// Non-default rank window size
323+
retriever = new RRFRetrieverBuilder(
324+
null,
325+
List.of("field_1", "field_2", "semantic_field_1", "semantic_field_2"),
326+
"foo2",
327+
DEFAULT_RANK_WINDOW_SIZE * 2,
328+
RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT,
329+
new float[0]
330+
);
331+
assertMultiIndexMultiFieldsParamsRewrite(
332+
retriever,
333+
queryRewriteContext,
334+
Map.of(
335+
Map.of("field_1", 1.0f, "field_2", 1.0f),
336+
List.of(indexName),
337+
Map.of("field_1", 1.0f, "field_2", 1.0f, "semantic_field_1", 1.0f),
338+
List.of(anotherIndexName)
339+
),
340+
Map.of(
341+
new Tuple<>("semantic_field_1", List.of(indexName)),
342+
1.0f,
343+
new Tuple<>("semantic_field_2", List.of(indexName)),
344+
1.0f,
345+
new Tuple<>("semantic_field_2", List.of(anotherIndexName)),
346+
1.0f
347+
),
348+
"foo2",
349+
null
350+
);
351+
352+
// All-fields wildcard
353+
retriever = new RRFRetrieverBuilder(
354+
null,
355+
List.of("*"),
356+
"qux",
357+
DEFAULT_RANK_WINDOW_SIZE,
358+
RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT,
359+
new float[0]
360+
);
361+
assertMultiIndexMultiFieldsParamsRewrite(
362+
retriever,
363+
queryRewriteContext,
364+
Map.of(Map.of("*", 1.0f), List.of()), // no index filter for the lexical retriever
365+
Map.of(
366+
new Tuple<>("semantic_field_1", List.of(indexName)),
367+
1.0f,
368+
new Tuple<>("semantic_field_2", List.of(indexName)),
369+
1.0f,
370+
new Tuple<>("semantic_field_2", List.of(anotherIndexName)),
371+
1.0f,
372+
new Tuple<>("semantic_field_3", List.of(anotherIndexName)),
373+
1.0f
374+
),
375+
"qux",
376+
null
377+
);
378+
}
379+
380+
public void testMultiIndexMultiFieldsParamsRewriteWithSameInferenceIds() {
381+
String indexName = "test-index";
382+
String anotherIndexName = "test-another-index";
383+
final ResolvedIndices resolvedIndices = createMockResolvedIndices(
384+
Map.of(
385+
indexName,
386+
List.of("semantic_field_1", "semantic_field_2"),
387+
anotherIndexName,
388+
List.of("semantic_field_2", "semantic_field_3")
389+
),
390+
null,
391+
Map.of("semantic_field_2", "common_inference_id") // use the same inference ID for semantic_field_2
392+
);
393+
394+
final QueryRewriteContext queryRewriteContext = new QueryRewriteContext(
395+
parserConfig(),
396+
null,
397+
null,
398+
TransportVersion.current(),
399+
RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY,
400+
resolvedIndices,
401+
new PointInTimeBuilder(new BytesArray("pitid")),
402+
null,
403+
null
404+
);
405+
406+
// No wildcards, no per-field boosting
407+
RRFRetrieverBuilder retriever = new RRFRetrieverBuilder(
408+
null,
409+
List.of("field_1", "field_2", "semantic_field_1", "semantic_field_2"),
410+
"foo",
411+
DEFAULT_RANK_WINDOW_SIZE,
412+
RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT,
413+
new float[0]
414+
);
415+
assertMultiIndexMultiFieldsParamsRewrite(
416+
retriever,
417+
queryRewriteContext,
418+
Map.of(
419+
Map.of("field_1", 1.0f, "field_2", 1.0f),
420+
List.of(indexName),
421+
Map.of("field_1", 1.0f, "field_2", 1.0f, "semantic_field_1", 1.0f),
422+
List.of(anotherIndexName)
423+
),
424+
Map.of(new Tuple<>("semantic_field_1", List.of(indexName)), 1.0f, new Tuple<>("semantic_field_2", List.of()), 1.0f),
425+
"foo",
426+
null
427+
);
428+
429+
// Non-default rank window size
430+
retriever = new RRFRetrieverBuilder(
431+
null,
432+
List.of("field_1", "field_2", "semantic_field_1", "semantic_field_2"),
433+
"foo2",
434+
DEFAULT_RANK_WINDOW_SIZE * 2,
435+
RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT,
436+
new float[0]
437+
);
438+
assertMultiIndexMultiFieldsParamsRewrite(
439+
retriever,
440+
queryRewriteContext,
441+
Map.of(
442+
Map.of("field_1", 1.0f, "field_2", 1.0f),
443+
List.of(indexName),
444+
Map.of("field_1", 1.0f, "field_2", 1.0f, "semantic_field_1", 1.0f),
445+
List.of(anotherIndexName)
446+
),
447+
Map.of(new Tuple<>("semantic_field_1", List.of(indexName)), 1.0f, new Tuple<>("semantic_field_2", List.of()), 1.0f),
448+
"foo2",
449+
null
450+
);
451+
452+
// Glob matching on inference and non-inference fields
453+
retriever = new RRFRetrieverBuilder(
454+
null,
455+
List.of("field_*", "field_1", "*_field_1", "semantic_*"),
456+
"baz2",
457+
DEFAULT_RANK_WINDOW_SIZE,
458+
RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT,
459+
new float[0]
460+
);
461+
assertMultiIndexMultiFieldsParamsRewrite(
462+
retriever,
463+
queryRewriteContext,
464+
Map.of(Map.of("field_*", 1.0f, "field_1", 1.0f, "*_field_1", 1.0f, "semantic_*", 1.0f), List.of()),
465+
Map.of(
466+
new Tuple<>("semantic_field_1", List.of(indexName)),
467+
1.0f,
468+
new Tuple<>("semantic_field_2", List.of()),
469+
1.0f,
470+
new Tuple<>("semantic_field_3", List.of(anotherIndexName)),
471+
1.0f
472+
),
473+
"baz2",
474+
null
475+
);
476+
477+
// All-fields wildcard
478+
retriever = new RRFRetrieverBuilder(
479+
null,
480+
List.of("*"),
481+
"qux",
482+
DEFAULT_RANK_WINDOW_SIZE,
483+
RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT,
484+
new float[0]
485+
);
486+
assertMultiIndexMultiFieldsParamsRewrite(
487+
retriever,
488+
queryRewriteContext,
489+
Map.of(Map.of("*", 1.0f), List.of()), // on index filter on the lexical query
490+
Map.of(
491+
new Tuple<>("semantic_field_1", List.of(indexName)),
492+
1.0f,
493+
new Tuple<>("semantic_field_2", List.of()), // no index filter since both indices have this field
494+
1.0f,
495+
new Tuple<>("semantic_field_3", List.of(anotherIndexName)),
496+
1.0f
497+
),
498+
"qux",
499+
null
500+
);
501+
}
502+
238503
public void testSearchRemoteIndex() {
239504
final ResolvedIndices resolvedIndices = createMockResolvedIndices(
240505
Map.of("local-index", List.of()),

0 commit comments

Comments
 (0)