Skip to content

Commit 2b89009

Browse files
committed
Remove ElasticInferenceServiceRerankRequestManager
1 parent 603b91b commit 2b89009

File tree

9 files changed

+42
-115
lines changed

9 files changed

+42
-115
lines changed

x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/inference/results/RankedDocsResultsTests.java

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -66,27 +66,6 @@ protected RankedDocsResults mutateInstanceForVersion(RankedDocsResults instance,
6666
}
6767
}
6868

69-
public record RerankExpectation(int index, float relevanceScore) {}
70-
71-
public static Map<String, Object> buildExpectationRankedDocResults(List<RerankExpectation> rerankExpectations) {
72-
return Map.of(
73-
RankedDocsResults.RERANK,
74-
rerankExpectations.stream()
75-
.map(
76-
rerankExpectation -> Map.of(
77-
RankedDocsResults.RankedDoc.NAME,
78-
Map.of(
79-
RankedDocsResults.RankedDoc.INDEX,
80-
rerankExpectation.index,
81-
RankedDocsResults.RankedDoc.RELEVANCE_SCORE,
82-
rerankExpectation.relevanceScore
83-
)
84-
)
85-
)
86-
.toList()
87-
);
88-
}
89-
9069
private List<RankedDocsResults.RankedDoc> rankedDocsNullStringToEmpty(List<RankedDocsResults.RankedDoc> rankedDocs) {
9170
var result = new ArrayList<RankedDocsResults.RankedDoc>(rankedDocs.size());
9271
for (var doc : rankedDocs) {

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/elastic/rerank/ElasticInferenceServiceRerankRequest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
import org.elasticsearch.common.Strings;
1616
import org.elasticsearch.xcontent.XContentType;
1717
import org.elasticsearch.xpack.inference.external.request.Request;
18-
import org.elasticsearch.xpack.inference.external.request.elastic.ElasticInferenceServiceRequest;
19-
import org.elasticsearch.xpack.inference.external.request.elastic.ElasticInferenceServiceRequestMetadata;
18+
import org.elasticsearch.xpack.inference.services.elastic.request.ElasticInferenceServiceRequest;
19+
import org.elasticsearch.xpack.inference.services.elastic.request.ElasticInferenceServiceRequestMetadata;
2020
import org.elasticsearch.xpack.inference.services.elastic.rerank.ElasticInferenceServiceRerankModel;
2121
import org.elasticsearch.xpack.inference.telemetry.TraceContext;
2222
import org.elasticsearch.xpack.inference.telemetry.TraceContextHandler;

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceModel.java

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,15 @@
77

88
package org.elasticsearch.xpack.inference.services.elastic;
99

10-
import org.elasticsearch.inference.Model;
1110
import org.elasticsearch.inference.ModelConfigurations;
1211
import org.elasticsearch.inference.ModelSecrets;
1312
import org.elasticsearch.inference.ServiceSettings;
13+
import org.elasticsearch.xpack.inference.services.RateLimitGroupingModel;
14+
import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings;
1415

1516
import java.util.Objects;
1617

17-
public abstract class ElasticInferenceServiceModel extends Model {
18+
public abstract class ElasticInferenceServiceModel extends RateLimitGroupingModel {
1819

1920
private final ElasticInferenceServiceRateLimitServiceSettings rateLimitServiceSettings;
2021

@@ -35,12 +36,18 @@ public ElasticInferenceServiceModel(
3536
public ElasticInferenceServiceModel(ElasticInferenceServiceModel model, ServiceSettings serviceSettings) {
3637
super(model, serviceSettings);
3738

38-
this.rateLimitServiceSettings = model.rateLimitServiceSettings();
39+
this.rateLimitServiceSettings = model.rateLimitServiceSettings;
3940
this.elasticInferenceServiceComponents = model.elasticInferenceServiceComponents();
4041
}
4142

42-
public ElasticInferenceServiceRateLimitServiceSettings rateLimitServiceSettings() {
43-
return rateLimitServiceSettings;
43+
@Override
44+
public int rateLimitGroupingHash() {
45+
// We only have one model for rerank
46+
return Objects.hash(this.getServiceSettings().modelId());
47+
}
48+
49+
public RateLimitSettings rateLimitSettings() {
50+
return rateLimitServiceSettings.rateLimitSettings();
4451
}
4552

4653
public ElasticInferenceServiceComponents elasticInferenceServiceComponents() {

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceRequestManager.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ public abstract class ElasticInferenceServiceRequestManager extends BaseRequestM
2020
private final ElasticInferenceServiceRequestMetadata requestMetadata;
2121

2222
protected ElasticInferenceServiceRequestManager(ThreadPool threadPool, ElasticInferenceServiceModel model) {
23-
super(threadPool, model.getInferenceEntityId(), RateLimitGrouping.of(model), model.rateLimitServiceSettings().rateLimitSettings());
23+
super(threadPool, model.getInferenceEntityId(), RateLimitGrouping.of(model), model.rateLimitSettings());
2424
this.requestMetadata = extractRequestMetadataFromThreadContext(threadPool.getThreadContext());
2525
}
2626

@@ -32,7 +32,7 @@ record RateLimitGrouping(int modelIdHash) {
3232
public static RateLimitGrouping of(ElasticInferenceServiceModel model) {
3333
Objects.requireNonNull(model);
3434

35-
return new RateLimitGrouping(model.rateLimitServiceSettings().modelId().hashCode());
35+
return new RateLimitGrouping(model.rateLimitGroupingHash());
3636
}
3737
}
3838
}

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceRerankRequestManager.java

Lines changed: 0 additions & 79 deletions
This file was deleted.

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceSparseEmbeddingsRequestManager.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import static org.elasticsearch.xpack.inference.common.Truncator.truncate;
3232
import static org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceService.ELASTIC_INFERENCE_SERVICE_IDENTIFIER;
3333

34+
// TODO: remove and use GenericRequestManager in ElasticInferenceServiceActionCreator
3435
public class ElasticInferenceServiceSparseEmbeddingsRequestManager extends ElasticInferenceServiceRequestManager {
3536

3637
private static final Logger logger = LogManager.getLogger(ElasticInferenceServiceSparseEmbeddingsRequestManager.class);

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/action/ElasticInferenceServiceActionCreator.java

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,14 @@
99

1010
import org.elasticsearch.xpack.inference.external.action.ExecutableAction;
1111
import org.elasticsearch.xpack.inference.external.action.SenderExecutableAction;
12+
import org.elasticsearch.xpack.inference.external.http.retry.ResponseHandler;
13+
import org.elasticsearch.xpack.inference.external.http.sender.GenericRequestManager;
14+
import org.elasticsearch.xpack.inference.external.http.sender.QueryAndDocsInputs;
1215
import org.elasticsearch.xpack.inference.external.http.sender.Sender;
16+
import org.elasticsearch.xpack.inference.external.request.elastic.rerank.ElasticInferenceServiceRerankRequest;
17+
import org.elasticsearch.xpack.inference.external.response.elastic.ElasticInferenceServiceRerankResponseEntity;
1318
import org.elasticsearch.xpack.inference.services.ServiceComponents;
14-
import org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceServiceRerankRequestManager;
19+
import org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceServiceResponseHandler;
1520
import org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceServiceSparseEmbeddingsRequestManager;
1621
import org.elasticsearch.xpack.inference.services.elastic.rerank.ElasticInferenceServiceRerankModel;
1722
import org.elasticsearch.xpack.inference.services.elastic.sparseembeddings.ElasticInferenceServiceSparseEmbeddingsModel;
@@ -22,6 +27,7 @@
2227

2328
import static org.elasticsearch.xpack.inference.external.action.ActionUtils.constructFailedToSendRequestMessage;
2429
import static org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceService.ELASTIC_INFERENCE_SERVICE_IDENTIFIER;
30+
import static org.elasticsearch.xpack.inference.services.elastic.request.ElasticInferenceServiceRequest.extractRequestMetadataFromThreadContext;
2531

2632
public class ElasticInferenceServiceActionCreator implements ElasticInferenceServiceActionVisitor {
2733

@@ -31,6 +37,11 @@ public class ElasticInferenceServiceActionCreator implements ElasticInferenceSer
3137

3238
private final TraceContext traceContext;
3339

40+
static final ResponseHandler RERANK_HANDLER = new ElasticInferenceServiceResponseHandler(
41+
"elastic rerank",
42+
(request, response) -> ElasticInferenceServiceRerankResponseEntity.fromResponse(response)
43+
);
44+
3445
public ElasticInferenceServiceActionCreator(Sender sender, ServiceComponents serviceComponents, TraceContext traceContext) {
3546
this.sender = Objects.requireNonNull(sender);
3647
this.serviceComponents = Objects.requireNonNull(serviceComponents);
@@ -48,7 +59,15 @@ public ExecutableAction create(ElasticInferenceServiceSparseEmbeddingsModel mode
4859

4960
@Override
5061
public ExecutableAction create(ElasticInferenceServiceRerankModel model) {
51-
var requestManager = new ElasticInferenceServiceRerankRequestManager(model, serviceComponents, traceContext);
62+
var threadPool = serviceComponents.threadPool();
63+
var requestManager = new GenericRequestManager<>(
64+
threadPool,
65+
model,
66+
RERANK_HANDLER,
67+
(rerankInput) -> new ElasticInferenceServiceRerankRequest(rerankInput.getQuery(), rerankInput.getChunks(), model,
68+
traceContext, extractRequestMetadataFromThreadContext(threadPool.getThreadContext())),
69+
QueryAndDocsInputs.class
70+
);
5271
var errorMessage = constructFailedToSendRequestMessage(
5372
String.format(Locale.ROOT, "%s rerank", ELASTIC_INFERENCE_SERVICE_IDENTIFIER)
5473
);

x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/elastic/ElasticInferenceServiceRerankRequestTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import java.util.List;
1919

2020
import static org.elasticsearch.xpack.inference.external.http.Utils.entityAsMap;
21-
import static org.elasticsearch.xpack.inference.external.request.elastic.ElasticInferenceServiceRequestTests.randomElasticInferenceServiceRequestMetadata;
21+
import static org.elasticsearch.xpack.inference.services.elastic.request.ElasticInferenceServiceRequestTests.randomElasticInferenceServiceRequestMetadata;
2222
import static org.hamcrest.Matchers.aMapWithSize;
2323
import static org.hamcrest.Matchers.instanceOf;
2424
import static org.hamcrest.Matchers.is;

x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/action/ElasticInferenceServiceActionCreatorTests.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -225,10 +225,10 @@ public void testExecute_ReturnsSuccessfulResponse_ForRerankAction() throws IOExc
225225
assertThat(
226226
result.asMap(),
227227
equalTo(
228-
RankedDocsResultsTests.buildExpectationRankedDocResults(
228+
RankedDocsResultsTests.buildExpectationRerank(
229229
List.of(
230-
new RankedDocsResultsTests.RerankExpectation(0, 0.94f),
231-
new RankedDocsResultsTests.RerankExpectation(1, 0.21f)
230+
new RankedDocsResultsTests.RerankExpectation(Map.of("index", 0, "relevance_score", 0.94f)),
231+
new RankedDocsResultsTests.RerankExpectation(Map.of("index", 1, "relevance_score", 0.21f))
232232
)
233233
)
234234
)

0 commit comments

Comments
 (0)