Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
23f5caa
Internal MMR based result diversification method
markjhoy Oct 2, 2025
23494e4
cleanup and complete implementation
markjhoy Oct 3, 2025
1cba1ff
Merge branch 'main' into markjhoy/add_mmr_internal_framework
markjhoy Oct 3, 2025
70407f2
Merge branch 'main' into markjhoy/add_mmr_internal_framework
markjhoy Oct 6, 2025
a7c7070
try force GC for search hits array
markjhoy Oct 6, 2025
d760242
use TopDocs and not ScoreDocs
markjhoy Oct 7, 2025
5be922f
Merge branch 'main' into markjhoy/add_mmr_internal_framework
markjhoy Oct 7, 2025
1c4e3a3
Merge branch 'main' into markjhoy/add_mmr_internal_framework
markjhoy Oct 7, 2025
bac22dd
use RankDoc[] to prepare for use by retriever
markjhoy Oct 8, 2025
297fdfa
Merge branch 'main' into markjhoy/add_mmr_internal_framework
markjhoy Oct 8, 2025
7bde1f9
Initial add of the retriever - needs tests
markjhoy Oct 15, 2025
5d0683c
Merge branch 'main' into markjhoy/add_mmr_internal_framework
markjhoy Oct 15, 2025
5d32183
[CI] Auto commit changes from spotless
Oct 15, 2025
19d4699
cleanups before adding tests
markjhoy Oct 15, 2025
dab4d45
added parsing tests
markjhoy Oct 15, 2025
a36a828
[CI] Auto commit changes from spotless
Oct 15, 2025
eaf4a56
Merge branch 'main' into markjhoy/add_mmr_internal_framework
markjhoy Oct 15, 2025
a2528e8
initial retriever tests
markjhoy Oct 16, 2025
60abbfe
fix missing locale for format
markjhoy Oct 16, 2025
561b0a6
Merge branch 'main' into markjhoy/add_mmr_internal_framework
markjhoy Oct 16, 2025
f7f0321
complete initial tests
markjhoy Oct 16, 2025
992d81b
Merge branch 'main' into markjhoy/add_mmr_internal_framework
markjhoy Oct 16, 2025
1c1c4ab
register the retriever
markjhoy Oct 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@
import org.elasticsearch.search.aggregations.pipeline.StatsBucketPipelineAggregationBuilder;
import org.elasticsearch.search.aggregations.pipeline.SumBucketPipelineAggregationBuilder;
import org.elasticsearch.search.aggregations.support.ValuesSourceRegistry;
import org.elasticsearch.search.diversification.ResultDiversificationRetrieverBuilder;
import org.elasticsearch.search.fetch.FetchPhase;
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.fetch.subphase.ExplainPhase;
Expand Down Expand Up @@ -1087,6 +1088,9 @@ private void registerRetrieverParsers(List<SearchPlugin> plugins) {
registerRetriever(new RetrieverSpec<>(StandardRetrieverBuilder.NAME, StandardRetrieverBuilder::fromXContent));
registerRetriever(new RetrieverSpec<>(KnnRetrieverBuilder.NAME, KnnRetrieverBuilder::fromXContent));
registerRetriever(new RetrieverSpec<>(RescorerRetrieverBuilder.NAME, RescorerRetrieverBuilder::fromXContent));
registerRetriever(
new RetrieverSpec<>(ResultDiversificationRetrieverBuilder.NAME, ResultDiversificationRetrieverBuilder::fromXContent)
);

registerFromPlugin(plugins, SearchPlugin::getRetrievers, this::registerRetriever);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.search.diversification;

import org.apache.lucene.index.VectorSimilarityFunction;
import org.elasticsearch.search.rank.RankDoc;
import org.elasticsearch.search.vectors.VectorData;

import java.io.IOException;

/**
* Base interface for result diversification.
*/
public abstract class ResultDiversification {

public abstract RankDoc[] diversify(RankDoc[] docs, ResultDiversificationContext diversificationContext) throws IOException;

protected float getVectorComparisonScore(
VectorSimilarityFunction similarityFunction,
boolean useFloat,
VectorData thisDocVector,
VectorData comparisonVector
) {
return useFloat
? similarityFunction.compare(thisDocVector.floatVector(), comparisonVector.floatVector())
: similarityFunction.compare(thisDocVector.byteVector(), comparisonVector.byteVector());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.search.diversification;

import org.elasticsearch.core.Nullable;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
import org.elasticsearch.search.vectors.VectorData;

import java.util.HashMap;
import java.util.Map;
import java.util.Set;

public abstract class ResultDiversificationContext {
private final String field;
private final int numCandidates;
private final DenseVectorFieldMapper fieldMapper;
private final IndexVersion indexVersion;
private final VectorData queryVector;
private Map<Integer, VectorData> fieldVectors;

// Field _must_ be a dense_vector type
protected ResultDiversificationContext(
String field,
int numCandidates,
DenseVectorFieldMapper fieldMapper,
IndexVersion indexVersion,
@Nullable VectorData queryVector,
@Nullable Map<Integer, VectorData> fieldVectors
) {
this.field = field;
this.numCandidates = numCandidates;
this.fieldMapper = fieldMapper;
this.indexVersion = indexVersion;
this.queryVector = queryVector;
this.fieldVectors = fieldVectors == null ? new HashMap<>() : fieldVectors;
}

public String getField() {
return field;
}

public int getNumCandidates() {
return numCandidates;
}

public DenseVectorFieldMapper getFieldMapper() {
return fieldMapper;
}

public DenseVectorFieldMapper.ElementType getElementType() {
return fieldMapper.fieldType().getElementType();
}

public IndexVersion getIndexVersion() {
return indexVersion;
}

public void setFieldVectors(Map<Integer, VectorData> fieldVectors) {
this.fieldVectors = fieldVectors;
}

public VectorData getQueryVector() {
return queryVector;
}

public VectorData getFieldVector(int docId) {
return fieldVectors.getOrDefault(docId, null);
}

public Set<Map.Entry<Integer, VectorData>> getFieldVectorsEntrySet() {
return fieldVectors.entrySet();
}
}
Loading