diff --git a/docs/changelog/121396.yaml b/docs/changelog/121396.yaml new file mode 100644 index 0000000000000..1d77a8fbb0079 --- /dev/null +++ b/docs/changelog/121396.yaml @@ -0,0 +1,5 @@ +pr: 121396 +summary: Change format for Unified Chat +area: Machine Learning +type: bug +issues: [] diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/UnifiedChatCompletionException.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/UnifiedChatCompletionException.java new file mode 100644 index 0000000000000..f2844e6534a94 --- /dev/null +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/UnifiedChatCompletionException.java @@ -0,0 +1,117 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.core.inference.results; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.common.xcontent.ChunkedToXContentHelper; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.xcontent.ToXContent; + +import java.util.Iterator; +import java.util.Locale; +import java.util.Objects; + +import static java.util.Collections.emptyIterator; +import static org.elasticsearch.ExceptionsHelper.maybeError; +import static org.elasticsearch.common.collect.Iterators.concat; +import static org.elasticsearch.common.xcontent.ChunkedToXContentHelper.endObject; +import static org.elasticsearch.common.xcontent.ChunkedToXContentHelper.startObject; + +public class UnifiedChatCompletionException extends XContentFormattedException { + + private static final Logger log = LogManager.getLogger(UnifiedChatCompletionException.class); + private final String message; + private final String type; + @Nullable + private final String code; + @Nullable + private final String param; + + public UnifiedChatCompletionException(RestStatus status, String message, String type, @Nullable String code) { + this(status, message, type, code, null); + } + + public UnifiedChatCompletionException(RestStatus status, String message, String type, @Nullable String code, @Nullable String param) { + super(message, status); + this.message = Objects.requireNonNull(message); + this.type = Objects.requireNonNull(type); + this.code = code; + this.param = param; + } + + public UnifiedChatCompletionException( + Throwable cause, + RestStatus status, + String message, + String type, + @Nullable String code, + @Nullable String param + ) { + super(message, cause, status); + this.message = Objects.requireNonNull(message); + this.type = Objects.requireNonNull(type); + this.code = code; + this.param = param; + } + + @Override + public Iterator toXContentChunked(Params params) { + return concat( + startObject(), + startObject("error"), + optionalField("code", code), + field("message", message), + optionalField("param", param), + field("type", type), + endObject(), + endObject() + ); + } + + private static Iterator field(String key, String value) { + return ChunkedToXContentHelper.chunk((b, p) -> b.field(key, value)); + } + + private static Iterator optionalField(String key, String value) { + return value != null ? ChunkedToXContentHelper.chunk((b, p) -> b.field(key, value)) : emptyIterator(); + } + + public static UnifiedChatCompletionException fromThrowable(Throwable t) { + if (ExceptionsHelper.unwrapCause(t) instanceof UnifiedChatCompletionException e) { + return e; + } else { + return maybeError(t).map(error -> { + // we should never be throwing Error, but just in case we are, rethrow it on another thread so the JVM can handle it and + // return a vague error to the user so that they at least see something went wrong but don't leak JVM details to users + ExceptionsHelper.maybeDieOnAnotherThread(error); + var e = new RuntimeException("Fatal error while streaming response. Please retry the request."); + log.error(e.getMessage(), t); + return new UnifiedChatCompletionException( + RestStatus.INTERNAL_SERVER_ERROR, + e.getMessage(), + getExceptionName(e), + RestStatus.INTERNAL_SERVER_ERROR.name().toLowerCase(Locale.ROOT) + ); + }).orElseGet(() -> { + log.atDebug().withThrowable(t).log("UnifiedChatCompletionException stack trace for debugging purposes."); + var status = ExceptionsHelper.status(t); + return new UnifiedChatCompletionException( + t, + status, + t.getMessage(), + getExceptionName(t), + status.name().toLowerCase(Locale.ROOT), + null + ); + }); + } + } +} diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/XContentFormattedException.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/XContentFormattedException.java new file mode 100644 index 0000000000000..799953d452f0d --- /dev/null +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/XContentFormattedException.java @@ -0,0 +1,87 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.core.inference.results; + +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.common.collect.Iterators; +import org.elasticsearch.common.xcontent.ChunkedToXContent; +import org.elasticsearch.common.xcontent.ChunkedToXContentHelper; +import org.elasticsearch.core.RestApiVersion; +import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.xcontent.ToXContent; +import org.elasticsearch.xcontent.XContentBuilder; + +import java.util.Iterator; +import java.util.Objects; + +/** + * Similar to {@link org.elasticsearch.ElasticsearchWrapperException}, this will wrap an Exception to generate an xContent using + * {@link ElasticsearchException#generateFailureXContent(XContentBuilder, Params, Exception, boolean)}. + * Extends {@link ElasticsearchException} to provide REST handlers the {@link #status()} method in order to set the response header. + */ +public class XContentFormattedException extends ElasticsearchException implements ChunkedToXContent { + + public static final String X_CONTENT_PARAM = "detailedErrorsEnabled"; + private final RestStatus status; + private final Throwable cause; + + public XContentFormattedException(String message, RestStatus status) { + super(message); + this.status = Objects.requireNonNull(status); + this.cause = null; + } + + public XContentFormattedException(Throwable cause, RestStatus status) { + super(cause); + this.status = Objects.requireNonNull(status); + this.cause = cause; + } + + public XContentFormattedException(String message, Throwable cause, RestStatus status) { + super(message, cause); + this.status = Objects.requireNonNull(status); + this.cause = cause; + } + + @Override + public RestStatus status() { + return status; + } + + @Override + public Iterator toXContentChunked(Params params) { + return Iterators.concat( + ChunkedToXContentHelper.startObject(), + Iterators.single( + (b, p) -> ElasticsearchException.generateFailureXContent( + b, + p, + cause instanceof Exception e ? e : this, + params.paramAsBoolean(X_CONTENT_PARAM, false) + ) + ), + Iterators.single((b, p) -> b.field("status", status.getStatus())), + ChunkedToXContentHelper.endObject() + ); + } + + @Override + public Iterator toXContentChunked(RestApiVersion restApiVersion, Params params) { + return ChunkedToXContent.super.toXContentChunked(restApiVersion, params); + } + + @Override + public Iterator toXContentChunkedV8(Params params) { + return ChunkedToXContent.super.toXContentChunkedV8(params); + } + + @Override + public boolean isFragment() { + return super.isFragment(); + } +} diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/rest/ServerSentEventsRestActionListenerTests.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/rest/ServerSentEventsRestActionListenerTests.java index a22e179479dec..903961794b337 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/rest/ServerSentEventsRestActionListenerTests.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/rest/ServerSentEventsRestActionListenerTests.java @@ -44,10 +44,12 @@ import org.elasticsearch.rest.RestController; import org.elasticsearch.rest.RestHandler; import org.elasticsearch.rest.RestRequest; +import org.elasticsearch.rest.RestStatus; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.xcontent.ToXContent; import org.elasticsearch.xpack.core.inference.action.InferenceAction; +import org.elasticsearch.xpack.core.inference.results.XContentFormattedException; import org.elasticsearch.xpack.inference.external.response.streaming.ServerSentEvent; import org.elasticsearch.xpack.inference.external.response.streaming.ServerSentEventField; import org.elasticsearch.xpack.inference.external.response.streaming.ServerSentEventParser; @@ -80,6 +82,7 @@ public class ServerSentEventsRestActionListenerTests extends ESIntegTestCase { private static final String REQUEST_COUNT = "request_count"; private static final String WITH_ERROR = "with_error"; private static final String ERROR_ROUTE = "/_inference_error"; + private static final String FORMATTED_ERROR_ROUTE = "/_formatted_inference_error"; private static final String NO_STREAM_ROUTE = "/_inference_no_stream"; private static final Exception expectedException = new IllegalStateException("hello there"); private static final String expectedExceptionAsServerSentEvent = """ @@ -88,6 +91,11 @@ public class ServerSentEventsRestActionListenerTests extends ESIntegTestCase { "type":"illegal_state_exception","reason":"hello there"},"status":500\ }"""; + private static final Exception expectedFormattedException = new XContentFormattedException( + expectedException, + RestStatus.INTERNAL_SERVER_ERROR + ); + @Override protected boolean addMockHttpTransport() { return false; @@ -145,6 +153,16 @@ public List routes() { public void handleRequest(RestRequest request, RestChannel channel, NodeClient client) { new ServerSentEventsRestActionListener(channel, threadPool).onFailure(expectedException); } + }, new RestHandler() { + @Override + public List routes() { + return List.of(new Route(RestRequest.Method.POST, FORMATTED_ERROR_ROUTE)); + } + + @Override + public void handleRequest(RestRequest request, RestChannel channel, NodeClient client) { + new ServerSentEventsRestActionListener(channel, threadPool).onFailure(expectedFormattedException); + } }, new RestHandler() { @Override public List routes() { @@ -424,6 +442,21 @@ public void testErrorMidStream() { assertThat(collector.stringsVerified.getLast(), equalTo(expectedExceptionAsServerSentEvent)); } + public void testFormattedError() throws IOException { + var request = new Request(RestRequest.Method.POST.name(), FORMATTED_ERROR_ROUTE); + + try { + getRestClient().performRequest(request); + fail("Expected an exception to be thrown from the error route"); + } catch (ResponseException e) { + var response = e.getResponse(); + assertThat(response.getStatusLine().getStatusCode(), is(HttpStatus.SC_INTERNAL_SERVER_ERROR)); + assertThat(EntityUtils.toString(response.getEntity(), StandardCharsets.UTF_8), equalTo(""" + \uFEFFevent: error + data:\s""" + expectedExceptionAsServerSentEvent + "\n\n")); + } + } + public void testNoStream() { var collector = new RandomStringCollector(); var expectedTestCount = randomIntBetween(2, 30); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/BaseTransportInferenceAction.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/BaseTransportInferenceAction.java index 23117d0daa35a..b390a51f6d3e2 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/BaseTransportInferenceAction.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/BaseTransportInferenceAction.java @@ -50,9 +50,11 @@ import java.io.IOException; import java.util.Random; import java.util.concurrent.Executor; +import java.util.concurrent.Flow; import java.util.function.Supplier; import java.util.stream.Collectors; +import static org.elasticsearch.ExceptionsHelper.unwrapCause; import static org.elasticsearch.core.Strings.format; import static org.elasticsearch.xpack.inference.InferencePlugin.INFERENCE_API_FEATURE; import static org.elasticsearch.xpack.inference.telemetry.InferenceStats.modelAttributes; @@ -280,7 +282,9 @@ private void inferOnServiceWithMetrics( var instrumentedStream = new PublisherWithMetrics(timer, model); taskProcessor.subscribe(instrumentedStream); - listener.onResponse(new InferenceAction.Response(inferenceResults, instrumentedStream)); + var streamErrorHandler = streamErrorHandler(instrumentedStream); + + listener.onResponse(new InferenceAction.Response(inferenceResults, streamErrorHandler)); } else { recordMetrics(model, timer, null); listener.onResponse(new InferenceAction.Response(inferenceResults)); @@ -291,9 +295,13 @@ private void inferOnServiceWithMetrics( })); } + protected Flow.Publisher streamErrorHandler(Flow.Processor upstream) { + return upstream; + } + private void recordMetrics(Model model, InferenceTimer timer, @Nullable Throwable t) { try { - inferenceStats.inferenceDuration().record(timer.elapsedMillis(), responseAttributes(model, t)); + inferenceStats.inferenceDuration().record(timer.elapsedMillis(), responseAttributes(model, unwrapCause(t))); } catch (Exception e) { log.atDebug().withThrowable(e).log("Failed to record metrics with a parsed model, dropping metrics"); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/TransportUnifiedCompletionInferenceAction.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/TransportUnifiedCompletionInferenceAction.java index 2e3090f2afd59..1144a11d86cc9 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/TransportUnifiedCompletionInferenceAction.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/TransportUnifiedCompletionInferenceAction.java @@ -11,6 +11,7 @@ import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.client.internal.node.NodeClient; +import org.elasticsearch.common.xcontent.ChunkedToXContent; import org.elasticsearch.inference.InferenceService; import org.elasticsearch.inference.InferenceServiceRegistry; import org.elasticsearch.inference.InferenceServiceResults; @@ -20,14 +21,19 @@ import org.elasticsearch.injection.guice.Inject; import org.elasticsearch.license.XPackLicenseState; import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.tasks.Task; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.TransportService; +import org.elasticsearch.xpack.core.inference.action.InferenceAction; import org.elasticsearch.xpack.core.inference.action.UnifiedCompletionAction; +import org.elasticsearch.xpack.core.inference.results.UnifiedChatCompletionException; import org.elasticsearch.xpack.inference.action.task.StreamingTaskManager; import org.elasticsearch.xpack.inference.common.InferenceServiceRateLimitCalculator; import org.elasticsearch.xpack.inference.registry.ModelRegistry; import org.elasticsearch.xpack.inference.telemetry.InferenceStats; +import java.util.concurrent.Flow; + public class TransportUnifiedCompletionInferenceAction extends BaseTransportInferenceAction { @Inject @@ -86,4 +92,40 @@ protected void doInference( ) { service.unifiedCompletionInfer(model, request.getUnifiedCompletionRequest(), null, listener); } + + @Override + protected void doExecute(Task task, UnifiedCompletionAction.Request request, ActionListener listener) { + super.doExecute(task, request, listener.delegateResponse((l, e) -> l.onFailure(UnifiedChatCompletionException.fromThrowable(e)))); + } + + /** + * If we get any errors, either in {@link #doExecute} via the listener.onFailure or while streaming, make sure that they are formatted + * as {@link UnifiedChatCompletionException}. + */ + @Override + protected Flow.Publisher streamErrorHandler(Flow.Processor upstream) { + return downstream -> { + upstream.subscribe(new Flow.Subscriber<>() { + @Override + public void onSubscribe(Flow.Subscription subscription) { + downstream.onSubscribe(subscription); + } + + @Override + public void onNext(ChunkedToXContent item) { + downstream.onNext(item); + } + + @Override + public void onError(Throwable throwable) { + downstream.onError(UnifiedChatCompletionException.fromThrowable(throwable)); + } + + @Override + public void onComplete() { + downstream.onComplete(); + } + }); + }; + } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/elastic/ElasticInferenceServiceUnifiedChatCompletionResponseHandler.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/elastic/ElasticInferenceServiceUnifiedChatCompletionResponseHandler.java index c0bccb9b2cd49..a9d69ee7d04b9 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/elastic/ElasticInferenceServiceUnifiedChatCompletionResponseHandler.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/elastic/ElasticInferenceServiceUnifiedChatCompletionResponseHandler.java @@ -9,13 +9,16 @@ import org.elasticsearch.inference.InferenceServiceResults; import org.elasticsearch.xpack.core.inference.results.StreamingUnifiedChatCompletionResults; +import org.elasticsearch.xpack.core.inference.results.UnifiedChatCompletionException; import org.elasticsearch.xpack.inference.external.http.HttpResult; +import org.elasticsearch.xpack.inference.external.http.retry.ErrorResponse; import org.elasticsearch.xpack.inference.external.http.retry.ResponseParser; import org.elasticsearch.xpack.inference.external.openai.OpenAiUnifiedStreamingProcessor; import org.elasticsearch.xpack.inference.external.request.Request; import org.elasticsearch.xpack.inference.external.response.streaming.ServerSentEventParser; import org.elasticsearch.xpack.inference.external.response.streaming.ServerSentEventProcessor; +import java.util.Locale; import java.util.concurrent.Flow; public class ElasticInferenceServiceUnifiedChatCompletionResponseHandler extends ElasticInferenceServiceResponseHandler { @@ -37,4 +40,21 @@ public InferenceServiceResults parseResult(Request request, Flow.Publisher errorParseFunction + ) { + super(requestType, parseFunction, errorParseFunction, true); + } + @Override protected RetryException buildExceptionHandling429(Request request, HttpResult result) { // We don't retry, if the chat completion input is too large diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/openai/OpenAiResponseHandler.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/openai/OpenAiResponseHandler.java index cf867fb1a0ab0..07dfd8300ae5b 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/openai/OpenAiResponseHandler.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/openai/OpenAiResponseHandler.java @@ -13,6 +13,7 @@ import org.elasticsearch.xpack.inference.external.http.HttpResult; import org.elasticsearch.xpack.inference.external.http.retry.BaseResponseHandler; import org.elasticsearch.xpack.inference.external.http.retry.ContentTooLargeException; +import org.elasticsearch.xpack.inference.external.http.retry.ErrorResponse; import org.elasticsearch.xpack.inference.external.http.retry.ResponseParser; import org.elasticsearch.xpack.inference.external.http.retry.RetryException; import org.elasticsearch.xpack.inference.external.request.Request; @@ -21,6 +22,7 @@ import org.elasticsearch.xpack.inference.external.response.streaming.ServerSentEventProcessor; import java.util.concurrent.Flow; +import java.util.function.Function; import static org.elasticsearch.xpack.inference.external.http.retry.ResponseHandlerUtils.getFirstHeaderOrUnknown; @@ -44,7 +46,16 @@ public class OpenAiResponseHandler extends BaseResponseHandler { private final boolean canHandleStreamingResponses; public OpenAiResponseHandler(String requestType, ResponseParser parseFunction, boolean canHandleStreamingResponses) { - super(requestType, parseFunction, ErrorMessageResponseEntity::fromResponse); + this(requestType, parseFunction, ErrorMessageResponseEntity::fromResponse, canHandleStreamingResponses); + } + + protected OpenAiResponseHandler( + String requestType, + ResponseParser parseFunction, + Function errorParseFunction, + boolean canHandleStreamingResponses + ) { + super(requestType, parseFunction, errorParseFunction); this.canHandleStreamingResponses = canHandleStreamingResponses; } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/openai/OpenAiUnifiedChatCompletionResponseHandler.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/openai/OpenAiUnifiedChatCompletionResponseHandler.java index fce2556efc5e0..2901b449f8a6e 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/openai/OpenAiUnifiedChatCompletionResponseHandler.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/openai/OpenAiUnifiedChatCompletionResponseHandler.java @@ -7,19 +7,31 @@ package org.elasticsearch.xpack.inference.external.openai; +import org.elasticsearch.core.Nullable; import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.xcontent.ConstructingObjectParser; +import org.elasticsearch.xcontent.ParseField; +import org.elasticsearch.xcontent.XContentFactory; +import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.xcontent.XContentParserConfiguration; +import org.elasticsearch.xcontent.XContentType; import org.elasticsearch.xpack.core.inference.results.StreamingUnifiedChatCompletionResults; +import org.elasticsearch.xpack.core.inference.results.UnifiedChatCompletionException; import org.elasticsearch.xpack.inference.external.http.HttpResult; +import org.elasticsearch.xpack.inference.external.http.retry.ErrorResponse; import org.elasticsearch.xpack.inference.external.http.retry.ResponseParser; import org.elasticsearch.xpack.inference.external.request.Request; import org.elasticsearch.xpack.inference.external.response.streaming.ServerSentEventParser; import org.elasticsearch.xpack.inference.external.response.streaming.ServerSentEventProcessor; +import java.util.Locale; +import java.util.Objects; +import java.util.Optional; import java.util.concurrent.Flow; public class OpenAiUnifiedChatCompletionResponseHandler extends OpenAiChatCompletionResponseHandler { public OpenAiUnifiedChatCompletionResponseHandler(String requestType, ResponseParser parseFunction) { - super(requestType, parseFunction); + super(requestType, parseFunction, OpenAiErrorResponse::fromResponse); } @Override @@ -31,4 +43,92 @@ public InferenceServiceResults parseResult(Request request, Flow.Publisher, Void> ERROR_PARSER = new ConstructingObjectParser<>( + "open_ai_error", + true, + args -> Optional.ofNullable((OpenAiErrorResponse) args[0]) + ); + private static final ConstructingObjectParser ERROR_BODY_PARSER = new ConstructingObjectParser<>( + "open_ai_error", + true, + args -> new OpenAiErrorResponse((String) args[0], (String) args[1], (String) args[2], (String) args[3]) + ); + + static { + ERROR_BODY_PARSER.declareString(ConstructingObjectParser.constructorArg(), new ParseField("message")); + ERROR_BODY_PARSER.declareStringOrNull(ConstructingObjectParser.optionalConstructorArg(), new ParseField("code")); + ERROR_BODY_PARSER.declareStringOrNull(ConstructingObjectParser.optionalConstructorArg(), new ParseField("param")); + ERROR_BODY_PARSER.declareString(ConstructingObjectParser.constructorArg(), new ParseField("type")); + + ERROR_PARSER.declareObjectOrNull( + ConstructingObjectParser.optionalConstructorArg(), + ERROR_BODY_PARSER, + null, + new ParseField("error") + ); + } + + private static ErrorResponse fromResponse(HttpResult response) { + try ( + XContentParser parser = XContentFactory.xContent(XContentType.JSON) + .createParser(XContentParserConfiguration.EMPTY, response.body()) + ) { + return ERROR_PARSER.apply(parser, null).orElse(ErrorResponse.UNDEFINED_ERROR); + } catch (Exception e) { + // swallow the error + } + + return ErrorResponse.UNDEFINED_ERROR; + } + + @Nullable + private final String code; + @Nullable + private final String param; + private final String type; + + OpenAiErrorResponse(String errorMessage, @Nullable String code, @Nullable String param, String type) { + super(errorMessage); + this.code = code; + this.param = param; + this.type = Objects.requireNonNull(type); + } + + @Nullable + public String code() { + return code; + } + + @Nullable + public String param() { + return param; + } + + public String type() { + return type; + } + } + } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/RestUnifiedCompletionInferenceAction.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/RestUnifiedCompletionInferenceAction.java index 51f1bc48c8306..0efd31a6832cc 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/RestUnifiedCompletionInferenceAction.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/RestUnifiedCompletionInferenceAction.java @@ -15,6 +15,7 @@ import org.elasticsearch.rest.ServerlessScope; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.xpack.core.inference.action.UnifiedCompletionAction; +import org.elasticsearch.xpack.core.inference.results.UnifiedChatCompletionException; import java.io.IOException; import java.util.List; @@ -57,7 +58,10 @@ protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient return channel -> client.execute( UnifiedCompletionAction.INSTANCE, request, - new ServerSentEventsRestActionListener(channel, threadPool) + new ServerSentEventsRestActionListener(channel, threadPool).delegateResponse((l, e) -> { + // format any validation exceptions from the rest -> transport path as UnifiedChatCompletionException + l.onFailure(UnifiedChatCompletionException.fromThrowable(e)); + }) ); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/ServerSentEventsRestActionListener.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/ServerSentEventsRestActionListener.java index 6991e1325f3bc..cadf3e5f1806b 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/ServerSentEventsRestActionListener.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/ServerSentEventsRestActionListener.java @@ -35,15 +35,19 @@ import org.elasticsearch.xcontent.ToXContent; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xpack.core.inference.action.InferenceAction; +import org.elasticsearch.xpack.core.inference.results.XContentFormattedException; import java.io.IOException; import java.io.OutputStream; import java.nio.charset.StandardCharsets; import java.util.Iterator; +import java.util.Map; import java.util.Objects; import java.util.concurrent.Flow; import java.util.concurrent.atomic.AtomicBoolean; +import static org.elasticsearch.xpack.core.inference.results.XContentFormattedException.X_CONTENT_PARAM; + /** * A version of {@link org.elasticsearch.rest.action.RestChunkedToXContentListener} that reads from a {@link Flow.Publisher} and encodes * the response in Server-Sent Events. @@ -72,7 +76,7 @@ public ServerSentEventsRestActionListener(RestChannel channel, SetOnce threadPool) { this.channel = channel; - this.params = params; + this.params = new ToXContent.DelegatingMapParams(Map.of(X_CONTENT_PARAM, String.valueOf(channel.detailedErrorsEnabled())), params); this.threadPool = Objects.requireNonNull(threadPool); } @@ -150,6 +154,12 @@ public void onFailure(Exception e) { } private ChunkedToXContent errorChunk(Throwable t) { + // if we've already formatted it, just return that format + if (ExceptionsHelper.unwrapCause(t) instanceof XContentFormattedException xContentFormattedException) { + return xContentFormattedException; + } + + // else, try to parse the format and return something that the ES client knows how to interpret var status = ExceptionsHelper.status(t); Exception e; @@ -158,7 +168,8 @@ private ChunkedToXContent errorChunk(Throwable t) { } else { // if not exception, then error, and we should not let it escape. rethrow on another thread, and inform the user we're stopping. ExceptionsHelper.maybeDieOnAnotherThread(t); - e = new RuntimeException("Fatal error while streaming response", t); + e = new RuntimeException("Fatal error while streaming response. Please retry the request."); + logger.error(e.getMessage(), t); } return params -> Iterators.concat( ChunkedToXContentHelper.startObject(), diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/BaseTransportInferenceActionTestCase.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/BaseTransportInferenceActionTestCase.java index 562c99c0887b5..56966ca40c478 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/BaseTransportInferenceActionTestCase.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/BaseTransportInferenceActionTestCase.java @@ -7,7 +7,7 @@ package org.elasticsearch.xpack.inference.action; -import org.elasticsearch.ElasticsearchStatusException; +import org.elasticsearch.ElasticsearchException; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.client.internal.node.NodeClient; @@ -47,9 +47,9 @@ import static org.mockito.ArgumentMatchers.anyBoolean; import static org.mockito.ArgumentMatchers.anyLong; import static org.mockito.ArgumentMatchers.assertArg; -import static org.mockito.ArgumentMatchers.same; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -127,8 +127,7 @@ public void testMetricsAfterModelRegistryError() { return null; }).when(modelRegistry).getModelWithSecrets(any(), any()); - var listener = doExecute(taskType); - verify(listener).onFailure(same(expectedException)); + doExecute(taskType); verify(inferenceStats.inferenceDuration()).record(anyLong(), assertArg(attributes -> { assertThat(attributes.get("service"), nullValue()); @@ -148,7 +147,13 @@ protected ActionListener doExecute(TaskType taskType, when(request.getInferenceEntityId()).thenReturn(inferenceId); when(request.getTaskType()).thenReturn(taskType); when(request.isStreaming()).thenReturn(stream); - ActionListener listener = mock(); + ActionListener listener = spy(new ActionListener<>() { + @Override + public void onResponse(InferenceAction.Response o) {} + + @Override + public void onFailure(Exception e) {} + }); action.doExecute(mock(), request, listener); return listener; } @@ -161,9 +166,9 @@ public void testMetricsAfterMissingService() { var listener = doExecute(taskType); verify(listener).onFailure(assertArg(e -> { - assertThat(e, isA(ElasticsearchStatusException.class)); + assertThat(e, isA(ElasticsearchException.class)); assertThat(e.getMessage(), is("Unknown service [" + serviceId + "] for model [" + inferenceId + "]. ")); - assertThat(((ElasticsearchStatusException) e).status(), is(RestStatus.BAD_REQUEST)); + assertThat(((ElasticsearchException) e).status(), is(RestStatus.BAD_REQUEST)); })); verify(inferenceStats.inferenceDuration()).record(anyLong(), assertArg(attributes -> { assertThat(attributes.get("service"), is(serviceId)); @@ -192,7 +197,7 @@ public void testMetricsAfterUnknownTaskType() { var listener = doExecute(requestTaskType); verify(listener).onFailure(assertArg(e -> { - assertThat(e, isA(ElasticsearchStatusException.class)); + assertThat(e, isA(ElasticsearchException.class)); assertThat( e.getMessage(), is( @@ -203,7 +208,7 @@ public void testMetricsAfterUnknownTaskType() { + "]" ) ); - assertThat(((ElasticsearchStatusException) e).status(), is(RestStatus.BAD_REQUEST)); + assertThat(((ElasticsearchException) e).status(), is(RestStatus.BAD_REQUEST)); })); verify(inferenceStats.inferenceDuration()).record(anyLong(), assertArg(attributes -> { assertThat(attributes.get("service"), is(serviceId)); @@ -221,7 +226,6 @@ public void testMetricsAfterInferError() { var listener = doExecute(taskType); - verify(listener).onFailure(same(expectedException)); verify(inferenceStats.inferenceDuration()).record(anyLong(), assertArg(attributes -> { assertThat(attributes.get("service"), is(serviceId)); assertThat(attributes.get("task_type"), is(taskType.toString())); @@ -239,8 +243,8 @@ public void testMetricsAfterStreamUnsupported() { var listener = doExecute(taskType, true); verify(listener).onFailure(assertArg(e -> { - assertThat(e, isA(ElasticsearchStatusException.class)); - var ese = (ElasticsearchStatusException) e; + assertThat(e, isA(ElasticsearchException.class)); + var ese = (ElasticsearchException) e; assertThat(ese.getMessage(), is("Streaming is not allowed for service [" + serviceId + "].")); assertThat(ese.status(), is(expectedStatus)); })); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/TransportUnifiedCompletionActionTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/TransportUnifiedCompletionActionTests.java index e6b5c6d336134..7dac6a1015aae 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/TransportUnifiedCompletionActionTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/TransportUnifiedCompletionActionTests.java @@ -7,7 +7,6 @@ package org.elasticsearch.xpack.inference.action; -import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.client.internal.node.NodeClient; import org.elasticsearch.inference.InferenceServiceRegistry; @@ -17,6 +16,7 @@ import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.TransportService; import org.elasticsearch.xpack.core.inference.action.UnifiedCompletionAction; +import org.elasticsearch.xpack.core.inference.results.UnifiedChatCompletionException; import org.elasticsearch.xpack.inference.action.task.StreamingTaskManager; import org.elasticsearch.xpack.inference.common.InferenceServiceRateLimitCalculator; import org.elasticsearch.xpack.inference.registry.ModelRegistry; @@ -81,12 +81,12 @@ public void testThrows_IncompatibleTaskTypeException_WhenUsingATextEmbeddingInfe var listener = doExecute(requestTaskType); verify(listener).onFailure(assertArg(e -> { - assertThat(e, isA(ElasticsearchStatusException.class)); + assertThat(e, isA(UnifiedChatCompletionException.class)); assertThat( e.getMessage(), is("Incompatible task_type for unified API, the requested type [" + requestTaskType + "] must be one of [chat_completion]") ); - assertThat(((ElasticsearchStatusException) e).status(), is(RestStatus.BAD_REQUEST)); + assertThat(((UnifiedChatCompletionException) e).status(), is(RestStatus.BAD_REQUEST)); })); verify(inferenceStats.inferenceDuration()).record(anyLong(), assertArg(attributes -> { assertThat(attributes.get("service"), is(serviceId)); @@ -106,12 +106,12 @@ public void testThrows_IncompatibleTaskTypeException_WhenUsingRequestIsAny_Model var listener = doExecute(requestTaskType); verify(listener).onFailure(assertArg(e -> { - assertThat(e, isA(ElasticsearchStatusException.class)); + assertThat(e, isA(UnifiedChatCompletionException.class)); assertThat( e.getMessage(), is("Incompatible task_type for unified API, the requested type [" + requestTaskType + "] must be one of [chat_completion]") ); - assertThat(((ElasticsearchStatusException) e).status(), is(RestStatus.BAD_REQUEST)); + assertThat(((UnifiedChatCompletionException) e).status(), is(RestStatus.BAD_REQUEST)); })); verify(inferenceStats.inferenceDuration()).record(anyLong(), assertArg(attributes -> { assertThat(attributes.get("service"), is(serviceId)); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/openai/OpenAiUnifiedChatCompletionResponseHandlerTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/openai/OpenAiUnifiedChatCompletionResponseHandlerTests.java new file mode 100644 index 0000000000000..4853aa8d2c563 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/openai/OpenAiUnifiedChatCompletionResponseHandlerTests.java @@ -0,0 +1,134 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.openai; + +import org.apache.http.HttpResponse; +import org.apache.http.StatusLine; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.xcontent.XContentHelper; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xcontent.XContentFactory; +import org.elasticsearch.xpack.core.inference.results.UnifiedChatCompletionException; +import org.elasticsearch.xpack.inference.external.http.HttpResult; +import org.elasticsearch.xpack.inference.external.http.retry.RetryException; +import org.elasticsearch.xpack.inference.external.request.Request; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; + +import static org.elasticsearch.ExceptionsHelper.unwrapCause; +import static org.elasticsearch.xcontent.ToXContent.EMPTY_PARAMS; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.isA; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class OpenAiUnifiedChatCompletionResponseHandlerTests extends ESTestCase { + private final OpenAiUnifiedChatCompletionResponseHandler responseHandler = new OpenAiUnifiedChatCompletionResponseHandler( + "chat completions", + (a, b) -> mock() + ); + + public void testFailValidationWithAllFields() throws IOException { + var responseJson = """ + { + "error": { + "type": "not_found_error", + "message": "a message", + "code": "ahh", + "param": "model" + } + } + """; + + var errorJson = invalidResponseJson(responseJson); + + assertThat(errorJson, is(""" + {"error":{"code":"ahh","message":"Received a server error status code for request from inference entity id [abc] status [500]. \ + Error message: [a message]","param":"model","type":"not_found_error"}}""")); + } + + public void testFailValidationWithoutOptionalFields() throws IOException { + var responseJson = """ + { + "error": { + "type": "not_found_error", + "message": "a message" + } + } + """; + + var errorJson = invalidResponseJson(responseJson); + + assertThat(errorJson, is(""" + {"error":{"message":"Received a server error status code for request from inference entity id [abc] status [500]. \ + Error message: [a message]","type":"not_found_error"}}""")); + } + + public void testFailValidationWithInvalidJson() throws IOException { + var responseJson = """ + what? this isn't a json + """; + + var errorJson = invalidResponseJson(responseJson); + + assertThat(errorJson, is(""" + {"error":{"code":"bad_request","message":"Received a server error status code for request from inference entity id [abc] status\ + [500]","type":"ErrorResponse"}}""")); + } + + private String invalidResponseJson(String responseJson) throws IOException { + var exception = invalidResponse(responseJson); + assertThat(exception, isA(RetryException.class)); + assertThat(unwrapCause(exception), isA(UnifiedChatCompletionException.class)); + return toJson((UnifiedChatCompletionException) unwrapCause(exception)); + } + + private Exception invalidResponse(String responseJson) { + return expectThrows( + RetryException.class, + () -> responseHandler.validateResponse( + mock(), + mock(), + mockRequest(), + new HttpResult(mock500Response(), responseJson.getBytes(StandardCharsets.UTF_8)) + ) + ); + } + + private static Request mockRequest() { + var request = mock(Request.class); + when(request.getInferenceEntityId()).thenReturn("abc"); + when(request.isStreaming()).thenReturn(true); + return request; + } + + private static HttpResponse mock500Response() { + int statusCode = 500; + var statusLine = mock(StatusLine.class); + when(statusLine.getStatusCode()).thenReturn(statusCode); + + var response = mock(HttpResponse.class); + when(response.getStatusLine()).thenReturn(statusLine); + + return response; + } + + private String toJson(UnifiedChatCompletionException e) throws IOException { + try (var builder = XContentFactory.jsonBuilder()) { + e.toXContentChunked(EMPTY_PARAMS).forEachRemaining(xContent -> { + try { + xContent.toXContent(builder, EMPTY_PARAMS); + } catch (IOException ex) { + throw new RuntimeException(ex); + } + }); + return XContentHelper.convertToJson(BytesReference.bytes(builder), false, builder.contentType()); + } + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/InferenceEventsAssertion.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/InferenceEventsAssertion.java index 7cfd231be39f3..637ae726572a4 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/InferenceEventsAssertion.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/InferenceEventsAssertion.java @@ -11,6 +11,7 @@ import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.xcontent.ChunkedToXContent; import org.elasticsearch.common.xcontent.XContentHelper; +import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.inference.InferenceServiceResults; import org.elasticsearch.xcontent.XContentFactory; import org.hamcrest.MatcherAssert; @@ -85,6 +86,16 @@ public InferenceEventsAssertion hasErrorContaining(String message) { return this; } + public InferenceEventsAssertion hasErrorMatching(CheckedConsumer matcher) { + hasError(); + try { + matcher.accept(error); + } catch (Exception e) { + fail(e); + } + return this; + } + public InferenceEventsAssertion hasEvents(String... events) { Arrays.stream(events).forEach(this::hasEvent); return this; diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java index b46fd4941e6f6..971a5f46b8d9b 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java @@ -27,14 +27,17 @@ import org.elasticsearch.inference.MinimalServiceSettings; import org.elasticsearch.inference.Model; import org.elasticsearch.inference.TaskType; +import org.elasticsearch.inference.UnifiedCompletionRequest; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.http.MockResponse; import org.elasticsearch.test.http.MockWebServer; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.xcontent.ToXContent; +import org.elasticsearch.xcontent.XContentFactory; import org.elasticsearch.xcontent.XContentType; import org.elasticsearch.xpack.core.inference.action.InferenceAction; import org.elasticsearch.xpack.core.inference.results.ChunkedInferenceEmbeddingSparse; +import org.elasticsearch.xpack.core.inference.results.UnifiedChatCompletionException; import org.elasticsearch.xpack.core.ml.search.WeightedToken; import org.elasticsearch.xpack.inference.external.http.HttpClientManager; import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender; @@ -44,11 +47,15 @@ import org.elasticsearch.xpack.inference.logging.ThrottlerManager; import org.elasticsearch.xpack.inference.registry.ModelRegistry; import org.elasticsearch.xpack.inference.results.SparseEmbeddingResultsTests; +import org.elasticsearch.xpack.inference.services.InferenceEventsAssertion; import org.elasticsearch.xpack.inference.services.ServiceFields; import org.elasticsearch.xpack.inference.services.elastic.authorization.ElasticInferenceServiceAuthorization; import org.elasticsearch.xpack.inference.services.elastic.authorization.ElasticInferenceServiceAuthorizationHandler; import org.elasticsearch.xpack.inference.services.elastic.authorization.ElasticInferenceServiceAuthorizationTests; +import org.elasticsearch.xpack.inference.services.elastic.completion.ElasticInferenceServiceCompletionModel; +import org.elasticsearch.xpack.inference.services.elastic.completion.ElasticInferenceServiceCompletionServiceSettings; import org.elasticsearch.xpack.inference.services.elasticsearch.ElserModels; +import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings; import org.hamcrest.MatcherAssert; import org.hamcrest.Matchers; import org.junit.After; @@ -61,8 +68,10 @@ import java.util.Map; import java.util.concurrent.TimeUnit; +import static org.elasticsearch.ExceptionsHelper.unwrapCause; import static org.elasticsearch.common.xcontent.XContentHelper.toXContent; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertToXContentEquivalent; +import static org.elasticsearch.xcontent.ToXContent.EMPTY_PARAMS; import static org.elasticsearch.xpack.inference.Utils.getInvalidModel; import static org.elasticsearch.xpack.inference.Utils.getModelListenerForException; import static org.elasticsearch.xpack.inference.Utils.getPersistedConfigMap; @@ -76,6 +85,7 @@ import static org.hamcrest.CoreMatchers.is; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.isA; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; @@ -949,6 +959,62 @@ public void testDefaultConfigs_Returns_DefaultChatCompletion_V1_WhenTaskTypeIsCo } } + public void testUnifiedCompletionError() throws Exception { + var eisGatewayUrl = getUrl(webServer); + var senderFactory = HttpRequestSenderTests.createSenderFactory(threadPool, clientManager); + try (var service = createService(senderFactory, eisGatewayUrl)) { + var responseJson = """ + { + "error": "The model `rainbow-sprinkles` does not exist or you do not have access to it." + }"""; + webServer.enqueue(new MockResponse().setResponseCode(404).setBody(responseJson)); + var model = new ElasticInferenceServiceCompletionModel( + "id", + TaskType.COMPLETION, + "elastic", + new ElasticInferenceServiceCompletionServiceSettings("model_id", new RateLimitSettings(100)), + EmptyTaskSettings.INSTANCE, + EmptySecretSettings.INSTANCE, + new ElasticInferenceServiceComponents(eisGatewayUrl) + ); + PlainActionFuture listener = new PlainActionFuture<>(); + service.unifiedCompletionInfer( + model, + UnifiedCompletionRequest.of( + List.of(new UnifiedCompletionRequest.Message(new UnifiedCompletionRequest.ContentString("hello"), "user", null, null)) + ), + InferenceAction.Request.DEFAULT_TIMEOUT, + listener + ); + + var result = listener.actionGet(TIMEOUT); + + InferenceEventsAssertion.assertThat(result).hasFinishedStream().hasNoEvents().hasErrorMatching(e -> { + e = unwrapCause(e); + assertThat(e, isA(UnifiedChatCompletionException.class)); + try (var builder = XContentFactory.jsonBuilder()) { + ((UnifiedChatCompletionException) e).toXContentChunked(EMPTY_PARAMS).forEachRemaining(xContent -> { + try { + xContent.toXContent(builder, EMPTY_PARAMS); + } catch (IOException ex) { + throw new RuntimeException(ex); + } + }); + var json = XContentHelper.convertToJson(BytesReference.bytes(builder), false, builder.contentType()); + + assertThat(json, is(""" + {\ + "error":{\ + "code":"not_found",\ + "message":"Received an unsuccessful status code for request from inference entity id [id] status \ + [404]. Error message: [The model `rainbow-sprinkles` does not exist or you do not have access to it.]",\ + "type":"error"\ + }}""")); + } + }); + } + } + private ElasticInferenceService createServiceWithMockSender() { return createServiceWithMockSender(ElasticInferenceServiceAuthorizationTests.createEnabledAuth()); } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/openai/OpenAiServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/openai/OpenAiServiceTests.java index 50c028fab28dd..57ae66477ef8c 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/openai/OpenAiServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/openai/OpenAiServiceTests.java @@ -33,9 +33,11 @@ import org.elasticsearch.test.http.MockWebServer; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.xcontent.ToXContent; +import org.elasticsearch.xcontent.XContentFactory; import org.elasticsearch.xcontent.XContentType; import org.elasticsearch.xpack.core.inference.action.InferenceAction; import org.elasticsearch.xpack.core.inference.results.ChunkedInferenceEmbeddingFloat; +import org.elasticsearch.xpack.core.inference.results.UnifiedChatCompletionException; import org.elasticsearch.xpack.inference.chunking.ChunkingSettingsTests; import org.elasticsearch.xpack.inference.external.http.HttpClientManager; import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender; @@ -61,8 +63,10 @@ import java.util.Map; import java.util.concurrent.TimeUnit; +import static org.elasticsearch.ExceptionsHelper.unwrapCause; import static org.elasticsearch.common.xcontent.XContentHelper.toXContent; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertToXContentEquivalent; +import static org.elasticsearch.xcontent.ToXContent.EMPTY_PARAMS; import static org.elasticsearch.xpack.inference.Utils.getInvalidModel; import static org.elasticsearch.xpack.inference.Utils.getPersistedConfigMap; import static org.elasticsearch.xpack.inference.Utils.getRequestConfigMap; @@ -84,6 +88,7 @@ import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.isA; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; @@ -1061,6 +1066,59 @@ public void testUnifiedCompletionInfer() throws Exception { } } + public void testUnifiedCompletionError() throws Exception { + String responseJson = """ + { + "error": { + "message": "The model `gpt-4awero` does not exist or you do not have access to it.", + "type": "invalid_request_error", + "param": null, + "code": "model_not_found" + } + }"""; + webServer.enqueue(new MockResponse().setResponseCode(404).setBody(responseJson)); + + var senderFactory = HttpRequestSenderTests.createSenderFactory(threadPool, clientManager); + try (var service = new OpenAiService(senderFactory, createWithEmptySettings(threadPool))) { + var model = OpenAiChatCompletionModelTests.createChatCompletionModel(getUrl(webServer), "org", "secret", "model", "user"); + PlainActionFuture listener = new PlainActionFuture<>(); + service.unifiedCompletionInfer( + model, + UnifiedCompletionRequest.of( + List.of(new UnifiedCompletionRequest.Message(new UnifiedCompletionRequest.ContentString("hello"), "user", null, null)) + ), + InferenceAction.Request.DEFAULT_TIMEOUT, + listener + ); + + var result = listener.actionGet(TIMEOUT); + + InferenceEventsAssertion.assertThat(result).hasFinishedStream().hasNoEvents().hasErrorMatching(e -> { + e = unwrapCause(e); + assertThat(e, isA(UnifiedChatCompletionException.class)); + try (var builder = XContentFactory.jsonBuilder()) { + ((UnifiedChatCompletionException) e).toXContentChunked(EMPTY_PARAMS).forEachRemaining(xContent -> { + try { + xContent.toXContent(builder, EMPTY_PARAMS); + } catch (IOException ex) { + throw new RuntimeException(ex); + } + }); + var json = XContentHelper.convertToJson(BytesReference.bytes(builder), false, builder.contentType()); + + assertThat(json, is(""" + {\ + "error":{\ + "code":"model_not_found",\ + "message":"Received an unsuccessful status code for request from inference entity id [id] status \ + [404]. Error message: [The model `gpt-4awero` does not exist or you do not have access to it.]",\ + "type":"invalid_request_error"\ + }}""")); + } + }); + } + } + public void testInfer_StreamRequest() throws Exception { String responseJson = """ data: {\