Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ public enum FeatureFlag {
"es.index_dimensions_tsid_optimization_feature_flag_enabled=true",
Version.fromString("9.2.0"),
null
),
ELASTIC_RERANKER_CHUNKING("es.elastic_reranker_chunking_long_documents=true", Version.fromString("9.2.0"), null);
);

public final String systemProperty;
public final Version from;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,13 @@ public List<String> getChunkedInputs() {
return chunkedInputs;
}

public ActionListener<InferenceServiceResults> parseChunkedRerankResultsListener(ActionListener<InferenceServiceResults> listener) {
public ActionListener<InferenceServiceResults> parseChunkedRerankResultsListener(
ActionListener<InferenceServiceResults> listener,
boolean returnDocuments
) {
return ActionListener.wrap(results -> {
if (results instanceof RankedDocsResults rankedDocsResults) {
listener.onResponse(parseRankedDocResultsForChunks(rankedDocsResults));
listener.onResponse(parseRankedDocResultsForChunks(rankedDocsResults, returnDocuments));

} else {
listener.onFailure(new IllegalArgumentException("Expected RankedDocsResults but got: " + results.getClass()));
Expand All @@ -65,7 +68,7 @@ public ActionListener<InferenceServiceResults> parseChunkedRerankResultsListener
}, listener::onFailure);
}

private RankedDocsResults parseRankedDocResultsForChunks(RankedDocsResults rankedDocsResults) {
private RankedDocsResults parseRankedDocResultsForChunks(RankedDocsResults rankedDocsResults, boolean returnDocuments) {
List<RankedDocsResults.RankedDoc> topRankedDocs = new ArrayList<>();
Set<Integer> docIndicesSeen = new HashSet<>();

Expand All @@ -80,7 +83,7 @@ private RankedDocsResults parseRankedDocResultsForChunks(RankedDocsResults ranke
RankedDocsResults.RankedDoc updatedRankedDoc = new RankedDocsResults.RankedDoc(
docIndex,
rankedDoc.relevanceScore(),
inputs.get(docIndex)
returnDocuments ? inputs.get(docIndex) : null
);
topRankedDocs.add(updatedRankedDoc);
docIndicesSeen.add(docIndex);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@

import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalEnum;
import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalPositiveInteger;
import static org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalService.ELASTIC_RERANKER_CHUNKING;
import static org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalService.RERANKER_ID;

public class ElasticRerankerServiceSettings extends ElasticsearchInternalServiceSettings {
Expand Down Expand Up @@ -102,30 +101,26 @@ public static ElasticRerankerServiceSettings fromMap(Map<String, Object> map) {
ValidationException validationException = new ValidationException();
var baseSettings = ElasticsearchInternalServiceSettings.fromMap(map, validationException);

LongDocumentStrategy longDocumentStrategy = null;
Integer maxChunksPerDoc = null;
if (ELASTIC_RERANKER_CHUNKING.isEnabled()) {
longDocumentStrategy = extractOptionalEnum(
map,
LONG_DOCUMENT_STRATEGY,
ModelConfigurations.SERVICE_SETTINGS,
LongDocumentStrategy::fromString,
EnumSet.allOf(LongDocumentStrategy.class),
validationException
LongDocumentStrategy longDocumentStrategy = extractOptionalEnum(
map,
LONG_DOCUMENT_STRATEGY,
ModelConfigurations.SERVICE_SETTINGS,
LongDocumentStrategy::fromString,
EnumSet.allOf(LongDocumentStrategy.class),
validationException
);

Integer maxChunksPerDoc = extractOptionalPositiveInteger(
map,
MAX_CHUNKS_PER_DOC,
ModelConfigurations.SERVICE_SETTINGS,
validationException
);

if (maxChunksPerDoc != null && (longDocumentStrategy == null || longDocumentStrategy == LongDocumentStrategy.TRUNCATE)) {
validationException.addValidationError(
"The [" + MAX_CHUNKS_PER_DOC + "] setting requires [" + LONG_DOCUMENT_STRATEGY + "] to be set to [chunk]"
);

maxChunksPerDoc = extractOptionalPositiveInteger(
map,
MAX_CHUNKS_PER_DOC,
ModelConfigurations.SERVICE_SETTINGS,
validationException
);

if (maxChunksPerDoc != null && (longDocumentStrategy == null || longDocumentStrategy == LongDocumentStrategy.TRUNCATE)) {
validationException.addValidationError(
"The [" + MAX_CHUNKS_PER_DOC + "] setting requires [" + LONG_DOCUMENT_STRATEGY + "] to be set to [chunk]"
);
}
}

if (validationException.validationErrors().isEmpty() == false) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import org.elasticsearch.common.logging.DeprecationCategory;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.FeatureFlag;
import org.elasticsearch.common.util.LazyInitializable;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.Strings;
Expand Down Expand Up @@ -116,8 +115,6 @@ public class ElasticsearchInternalService extends BaseElasticsearchInternalServi
private static final Logger logger = LogManager.getLogger(ElasticsearchInternalService.class);
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(ElasticsearchInternalService.class);

public static final FeatureFlag ELASTIC_RERANKER_CHUNKING = new FeatureFlag("elastic_reranker_chunking_long_documents");

/**
* Fix for https://github.com/elastic/elasticsearch/issues/124675
* In 8.13.0 we transitioned from model_version to model_id. Any elser inference endpoints created prior to 8.13.0 will still use
Expand Down Expand Up @@ -698,26 +695,26 @@ public void inferRerank(
}
});

if (model instanceof ElasticRerankerModel elasticRerankerModel && ELASTIC_RERANKER_CHUNKING.isEnabled()) {
var returnDocs = Boolean.TRUE;
if (returnDocuments != null) {
returnDocs = returnDocuments;
} else if (model.getTaskSettings() instanceof RerankTaskSettings modelSettings) {
var requestSettings = RerankTaskSettings.fromMap(requestTaskSettings);
returnDocs = RerankTaskSettings.of(modelSettings, requestSettings).returnDocuments();
}

if (model instanceof ElasticRerankerModel elasticRerankerModel) {
var serviceSettings = elasticRerankerModel.getServiceSettings();
var longDocumentStrategy = serviceSettings.getLongDocumentStrategy();
if (longDocumentStrategy == ElasticRerankerServiceSettings.LongDocumentStrategy.CHUNK) {
var rerankChunker = new RerankRequestChunker(query, inputs, serviceSettings.getMaxChunksPerDoc());
inputs = rerankChunker.getChunkedInputs();
resultsListener = rerankChunker.parseChunkedRerankResultsListener(resultsListener);
resultsListener = rerankChunker.parseChunkedRerankResultsListener(resultsListener, returnDocs);
}

}
var request = buildInferenceRequest(model.mlNodeDeploymentId(), new TextSimilarityConfigUpdate(query), inputs, inputType, timeout);

var returnDocs = Boolean.TRUE;
if (returnDocuments != null) {
returnDocs = returnDocuments;
} else if (model.getTaskSettings() instanceof RerankTaskSettings modelSettings) {
var requestSettings = RerankTaskSettings.fromMap(requestTaskSettings);
returnDocs = RerankTaskSettings.of(modelSettings, requestSettings).returnDocuments();
}

Function<Integer, String> inputSupplier = returnDocs == Boolean.TRUE ? inputs::get : i -> null;

ActionListener<InferModelAction.Response> mlResultsListener = resultsListener.delegateFailureAndWrap(
Expand Down
Loading