Skip to content

Commit 5bc83f2

Browse files
Adding acl call
1 parent bbf6693 commit 5bc83f2

File tree

13 files changed

+522
-72
lines changed

13 files changed

+522
-72
lines changed

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import org.apache.logging.log4j.LogManager;
1111
import org.apache.logging.log4j.Logger;
1212
import org.apache.lucene.util.SetOnce;
13+
import org.elasticsearch.action.ActionListener;
1314
import org.elasticsearch.action.ActionRequest;
1415
import org.elasticsearch.action.ActionResponse;
1516
import org.elasticsearch.action.support.MappedActionFilter;
@@ -46,6 +47,7 @@
4647
import org.elasticsearch.search.fetch.subphase.highlight.Highlighter;
4748
import org.elasticsearch.search.rank.RankBuilder;
4849
import org.elasticsearch.search.rank.RankDoc;
50+
import org.elasticsearch.tasks.Task;
4951
import org.elasticsearch.threadpool.ExecutorBuilder;
5052
import org.elasticsearch.threadpool.ScalingExecutorBuilder;
5153
import org.elasticsearch.threadpool.ThreadPool;
@@ -74,11 +76,15 @@
7476
import org.elasticsearch.xpack.inference.action.filter.ShardBulkInferenceActionFilter;
7577
import org.elasticsearch.xpack.inference.common.Truncator;
7678
import org.elasticsearch.xpack.inference.external.amazonbedrock.AmazonBedrockRequestSender;
79+
import org.elasticsearch.xpack.inference.external.elastic.ElasticInferenceServiceResponseHandler;
7780
import org.elasticsearch.xpack.inference.external.http.HttpClientManager;
7881
import org.elasticsearch.xpack.inference.external.http.HttpSettings;
82+
import org.elasticsearch.xpack.inference.external.http.retry.ResponseHandler;
7983
import org.elasticsearch.xpack.inference.external.http.retry.RetrySettings;
8084
import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender;
8185
import org.elasticsearch.xpack.inference.external.http.sender.RequestExecutorServiceSettings;
86+
import org.elasticsearch.xpack.inference.external.request.elastic.ElasticInferenceServiceAclRequest;
87+
import org.elasticsearch.xpack.inference.external.response.elastic.ElasticInferenceServiceAclResponseEntity;
8288
import org.elasticsearch.xpack.inference.highlight.SemanticTextHighlighter;
8389
import org.elasticsearch.xpack.inference.logging.ThrottlerManager;
8490
import org.elasticsearch.xpack.inference.mapper.OffsetSourceFieldMapper;
@@ -124,17 +130,20 @@
124130
import org.elasticsearch.xpack.inference.services.mistral.MistralService;
125131
import org.elasticsearch.xpack.inference.services.openai.OpenAiService;
126132
import org.elasticsearch.xpack.inference.telemetry.InferenceStats;
133+
import org.elasticsearch.xpack.inference.telemetry.TraceContext;
127134

128135
import java.util.ArrayList;
129136
import java.util.Collection;
130137
import java.util.EnumSet;
131138
import java.util.List;
139+
import java.util.Locale;
132140
import java.util.Map;
133141
import java.util.function.Predicate;
134142
import java.util.function.Supplier;
135143
import java.util.stream.Stream;
136144

137145
import static java.util.Collections.singletonList;
146+
import static org.elasticsearch.xpack.core.inference.action.InferenceAction.Request.DEFAULT_TIMEOUT;
138147
import static org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceService.ELASTIC_INFERENCE_SERVICE_IDENTIFIER;
139148
import static org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceServiceFeature.DEPRECATED_ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG;
140149
import static org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceServiceFeature.ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG;
@@ -279,8 +288,8 @@ public Collection<?> createComponents(PluginServices services) {
279288
String elasticInferenceUrl = this.getElasticInferenceServiceUrl(inferenceServiceSettings);
280289
elasticInferenceServiceComponents.set(
281290
new ElasticInferenceServiceComponents(
282-
elasticInferenceUrl,
283-
new ElasticInferenceServiceACL(Map.of("model-abc", EnumSet.of(TaskType.SPARSE_EMBEDDING, TaskType.CHAT_COMPLETION)))
291+
elasticInferenceUrl
292+
// new ElasticInferenceServiceACL(Map.of("model-abc", EnumSet.of(TaskType.SPARSE_EMBEDDING, TaskType.CHAT_COMPLETION)))
284293
)
285294
);
286295

@@ -289,7 +298,8 @@ public Collection<?> createComponents(PluginServices services) {
289298
context -> new ElasticInferenceService(
290299
elasicInferenceServiceFactory.get(),
291300
serviceComponents.get(),
292-
elasticInferenceServiceComponents.get()
301+
elasticInferenceServiceComponents.get(),
302+
modelRegistry
293303
)
294304
)
295305
);
@@ -320,6 +330,13 @@ public Collection<?> createComponents(PluginServices services) {
320330
return List.of(modelRegistry, registry, httpClientManager, stats);
321331
}
322332

333+
private TraceContext getCurrentTraceInfo() {
334+
var traceParent = threadPoolSetOnce.get().getThreadContext().getHeader(Task.TRACE_PARENT);
335+
var traceState = threadPoolSetOnce.get().getThreadContext().getHeader(Task.TRACE_STATE);
336+
337+
return new TraceContext(traceParent, traceState);
338+
}
339+
323340
@Override
324341
public void loadExtensions(ExtensionLoader loader) {
325342
inferenceServiceExtensions = loader.loadExtensions(InferenceServiceExtension.class);

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockRequestSender.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,22 @@
77

88
package org.elasticsearch.xpack.inference.external.amazonbedrock;
99

10+
import org.apache.logging.log4j.Logger;
1011
import org.elasticsearch.ElasticsearchException;
1112
import org.elasticsearch.action.ActionListener;
1213
import org.elasticsearch.cluster.service.ClusterService;
1314
import org.elasticsearch.common.settings.Settings;
1415
import org.elasticsearch.core.TimeValue;
1516
import org.elasticsearch.inference.InferenceServiceResults;
1617
import org.elasticsearch.threadpool.ThreadPool;
18+
import org.elasticsearch.xpack.inference.external.http.retry.ResponseHandler;
1719
import org.elasticsearch.xpack.inference.external.http.sender.AmazonBedrockRequestExecutorService;
1820
import org.elasticsearch.xpack.inference.external.http.sender.AmazonBedrockRequestManager;
1921
import org.elasticsearch.xpack.inference.external.http.sender.InferenceInputs;
2022
import org.elasticsearch.xpack.inference.external.http.sender.RequestExecutorServiceSettings;
2123
import org.elasticsearch.xpack.inference.external.http.sender.RequestManager;
2224
import org.elasticsearch.xpack.inference.external.http.sender.Sender;
25+
import org.elasticsearch.xpack.inference.external.request.Request;
2326
import org.elasticsearch.xpack.inference.services.ServiceComponents;
2427

2528
import java.io.IOException;
@@ -123,6 +126,17 @@ public void send(
123126
listener.onFailure(new ElasticsearchException("Amazon Bedrock request sender did not receive a valid request request manager"));
124127
}
125128

129+
@Override
130+
public void sendWithoutQueuing(
131+
Logger logger,
132+
Request request,
133+
ResponseHandler responseHandler,
134+
TimeValue timeout,
135+
ActionListener<InferenceServiceResults> listener
136+
) {
137+
throw new UnsupportedOperationException("not implemented");
138+
}
139+
126140
@Override
127141
public void close() throws IOException {
128142
executorService.shutdown();

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/HttpRequestSender.java

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@
77

88
package org.elasticsearch.xpack.inference.external.http.sender;
99

10+
import org.apache.logging.log4j.Logger;
1011
import org.elasticsearch.action.ActionListener;
12+
import org.elasticsearch.action.support.ContextPreservingActionListener;
1113
import org.elasticsearch.cluster.service.ClusterService;
1214
import org.elasticsearch.common.settings.Settings;
1315
import org.elasticsearch.core.Nullable;
@@ -17,8 +19,10 @@
1719
import org.elasticsearch.xpack.inference.external.http.HttpClientManager;
1820
import org.elasticsearch.xpack.inference.external.http.RequestExecutor;
1921
import org.elasticsearch.xpack.inference.external.http.retry.RequestSender;
22+
import org.elasticsearch.xpack.inference.external.http.retry.ResponseHandler;
2023
import org.elasticsearch.xpack.inference.external.http.retry.RetrySettings;
2124
import org.elasticsearch.xpack.inference.external.http.retry.RetryingHttpSender;
25+
import org.elasticsearch.xpack.inference.external.request.Request;
2226
import org.elasticsearch.xpack.inference.services.ServiceComponents;
2327

2428
import java.io.IOException;
@@ -74,6 +78,7 @@ public Sender createSender() {
7478
private final RequestExecutor service;
7579
private final AtomicBoolean started = new AtomicBoolean(false);
7680
private final CountDownLatch startCompleted = new CountDownLatch(1);
81+
private final RequestSender requestSender;
7782

7883
private HttpRequestSender(
7984
ThreadPool threadPool,
@@ -84,6 +89,7 @@ private HttpRequestSender(
8489
) {
8590
this.threadPool = Objects.requireNonNull(threadPool);
8691
this.manager = Objects.requireNonNull(httpClientManager);
92+
this.requestSender = Objects.requireNonNull(requestSender);
8793
service = new RequestExecutorService(
8894
threadPool,
8995
startCompleted,
@@ -141,4 +147,32 @@ public void send(
141147
waitForStartToComplete();
142148
service.execute(requestCreator, inferenceInputs, timeout, listener);
143149
}
150+
151+
/**
152+
* This method sends a request and parses the response. It does not leverage any queuing or
153+
* rate limiting logic. This method should only be used for requests that are not sent often.
154+
*
155+
* @param logger A logger to use for messages
156+
* @param request A request to be sent
157+
* @param responseHandler A handler for parsing the response
158+
* @param timeout the maximum time the request should wait for a response before timing out. If null, the timeout is ignored.
159+
* The queuing logic may still throw a timeout if it fails to send the request because it couldn't get a leased
160+
* @param listener a listener to handle the response
161+
*/
162+
public void sendWithoutQueuing(
163+
Logger logger,
164+
Request request,
165+
ResponseHandler responseHandler,
166+
@Nullable TimeValue timeout,
167+
ActionListener<InferenceServiceResults> listener
168+
) {
169+
assert started.get() : "call start() before sending a request";
170+
waitForStartToComplete();
171+
172+
var preservedListener = ContextPreservingActionListener.wrapPreservingContext(listener, threadPool.getThreadContext());
173+
var timedListener = new TimedListener<>(timeout, preservedListener, threadPool);
174+
175+
threadPool.executor(UTILITY_THREAD_POOL_NAME)
176+
.execute(() -> requestSender.send(logger, request, timedListener::hasCompleted, responseHandler, timedListener.getListener()));
177+
}
144178
}

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/RequestTask.java

Lines changed: 5 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -7,28 +7,20 @@
77

88
package org.elasticsearch.xpack.inference.external.http.sender;
99

10-
import org.elasticsearch.ElasticsearchStatusException;
1110
import org.elasticsearch.action.ActionListener;
12-
import org.elasticsearch.action.support.ListenerTimeouts;
13-
import org.elasticsearch.common.Strings;
1411
import org.elasticsearch.core.Nullable;
1512
import org.elasticsearch.core.TimeValue;
1613
import org.elasticsearch.inference.InferenceServiceResults;
17-
import org.elasticsearch.rest.RestStatus;
1814
import org.elasticsearch.threadpool.ThreadPool;
1915

2016
import java.util.Objects;
21-
import java.util.concurrent.atomic.AtomicBoolean;
2217
import java.util.function.Supplier;
2318

24-
import static org.elasticsearch.xpack.inference.InferencePlugin.UTILITY_THREAD_POOL_NAME;
25-
2619
class RequestTask implements RejectableTask {
2720

28-
private final AtomicBoolean finished = new AtomicBoolean();
2921
private final RequestManager requestCreator;
3022
private final InferenceInputs inferenceInputs;
31-
private final ActionListener<InferenceServiceResults> listener;
23+
private final TimedListener<InferenceServiceResults> timedListener;
3224

3325
RequestTask(
3426
RequestManager requestCreator,
@@ -38,44 +30,13 @@ class RequestTask implements RejectableTask {
3830
ActionListener<InferenceServiceResults> listener
3931
) {
4032
this.requestCreator = Objects.requireNonNull(requestCreator);
41-
this.listener = getListener(Objects.requireNonNull(listener), timeout, Objects.requireNonNull(threadPool));
33+
this.timedListener = new TimedListener<>(timeout, listener, threadPool);
4234
this.inferenceInputs = Objects.requireNonNull(inferenceInputs);
4335
}
4436

45-
private ActionListener<InferenceServiceResults> getListener(
46-
ActionListener<InferenceServiceResults> origListener,
47-
@Nullable TimeValue timeout,
48-
ThreadPool threadPool
49-
) {
50-
ActionListener<InferenceServiceResults> notificationListener = ActionListener.wrap(result -> {
51-
finished.set(true);
52-
origListener.onResponse(result);
53-
}, e -> {
54-
finished.set(true);
55-
origListener.onFailure(e);
56-
});
57-
58-
if (timeout == null) {
59-
return notificationListener;
60-
}
61-
62-
return ListenerTimeouts.wrapWithTimeout(
63-
threadPool,
64-
timeout,
65-
threadPool.executor(UTILITY_THREAD_POOL_NAME),
66-
notificationListener,
67-
(ignored) -> notificationListener.onFailure(
68-
new ElasticsearchStatusException(
69-
Strings.format("Request timed out waiting to be sent after [%s]", timeout),
70-
RestStatus.REQUEST_TIMEOUT
71-
)
72-
)
73-
);
74-
}
75-
7637
@Override
7738
public boolean hasCompleted() {
78-
return finished.get();
39+
return timedListener.hasCompleted();
7940
}
8041

8142
@Override
@@ -90,12 +51,12 @@ public InferenceInputs getInferenceInputs() {
9051

9152
@Override
9253
public ActionListener<InferenceServiceResults> getListener() {
93-
return listener;
54+
return timedListener.getListener();
9455
}
9556

9657
@Override
9758
public void onRejection(Exception e) {
98-
listener.onFailure(e);
59+
timedListener.getListener().onFailure(e);
9960
}
10061

10162
@Override

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/Sender.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,13 @@
77

88
package org.elasticsearch.xpack.inference.external.http.sender;
99

10+
import org.apache.logging.log4j.Logger;
1011
import org.elasticsearch.action.ActionListener;
1112
import org.elasticsearch.core.Nullable;
1213
import org.elasticsearch.core.TimeValue;
1314
import org.elasticsearch.inference.InferenceServiceResults;
15+
import org.elasticsearch.xpack.inference.external.http.retry.ResponseHandler;
16+
import org.elasticsearch.xpack.inference.external.request.Request;
1417

1518
import java.io.Closeable;
1619

@@ -23,4 +26,12 @@ void send(
2326
@Nullable TimeValue timeout,
2427
ActionListener<InferenceServiceResults> listener
2528
);
29+
30+
void sendWithoutQueuing(
31+
Logger logger,
32+
Request request,
33+
ResponseHandler responseHandler,
34+
@Nullable TimeValue timeout,
35+
ActionListener<InferenceServiceResults> listener
36+
);
2637
}
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.inference.external.http.sender;
9+
10+
import org.elasticsearch.ElasticsearchStatusException;
11+
import org.elasticsearch.action.ActionListener;
12+
import org.elasticsearch.action.support.ListenerTimeouts;
13+
import org.elasticsearch.common.Strings;
14+
import org.elasticsearch.core.Nullable;
15+
import org.elasticsearch.core.TimeValue;
16+
import org.elasticsearch.rest.RestStatus;
17+
import org.elasticsearch.threadpool.ThreadPool;
18+
19+
import java.util.Objects;
20+
import java.util.concurrent.atomic.AtomicBoolean;
21+
22+
import static org.elasticsearch.xpack.inference.InferencePlugin.UTILITY_THREAD_POOL_NAME;
23+
24+
/**
25+
* Provides a way to set a timeout on the listener. If the time expires, the original listener's
26+
* {@link ActionListener#onFailure(Exception)} is called with an error indicating there was a timeout.
27+
*
28+
* @param <Response> the type of the value that is passed in {@link ActionListener#onResponse(Object)}
29+
*/
30+
public class TimedListener<Response> {
31+
32+
private final ActionListener<Response> listenerWithTimeout;
33+
private final AtomicBoolean finished = new AtomicBoolean();
34+
35+
public TimedListener(@Nullable TimeValue timeout, ActionListener<Response> listener, ThreadPool threadPool) {
36+
listenerWithTimeout = getListener(Objects.requireNonNull(listener), timeout, Objects.requireNonNull(threadPool));
37+
}
38+
39+
private ActionListener<Response> getListener(
40+
ActionListener<Response> origListener,
41+
@Nullable TimeValue timeout,
42+
ThreadPool threadPool
43+
) {
44+
ActionListener<Response> notificationListener = ActionListener.wrap(result -> {
45+
finished.set(true);
46+
origListener.onResponse(result);
47+
}, e -> {
48+
finished.set(true);
49+
origListener.onFailure(e);
50+
});
51+
52+
if (timeout == null) {
53+
return notificationListener;
54+
}
55+
56+
return ListenerTimeouts.wrapWithTimeout(
57+
threadPool,
58+
timeout,
59+
threadPool.executor(UTILITY_THREAD_POOL_NAME),
60+
notificationListener,
61+
(ignored) -> notificationListener.onFailure(
62+
new ElasticsearchStatusException(Strings.format("Request timed out after [%s]", timeout), RestStatus.REQUEST_TIMEOUT)
63+
)
64+
);
65+
}
66+
67+
public boolean hasCompleted() {
68+
return finished.get();
69+
}
70+
71+
public ActionListener<Response> getListener() {
72+
return listenerWithTimeout;
73+
}
74+
}

0 commit comments

Comments
 (0)