elastic
diff --git a/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/retry/BaseResponseHandler.java‎
Lines changed: 3 additions & 3 deletions b/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/retry/BaseResponseHandler.java‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/retry/ChatCompletionErrorResponseHandler.java‎
Lines changed: 162 additions & 0 deletions b/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/retry/ChatCompletionErrorResponseHandler.java‎
Lines changed: 162 additions & 0 deletions
diff --git a/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/retry/ErrorResponse.java‎
Lines changed: 1 addition & 1 deletion b/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/retry/ErrorResponse.java‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/retry/UnifiedChatCompletionErrorParser.java‎
Lines changed: 16 additions & 0 deletions b/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/retry/UnifiedChatCompletionErrorParser.java‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/retry/UnifiedChatCompletionErrorResponse.java‎
Lines changed: 63 additions & 0 deletions b/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/retry/UnifiedChatCompletionErrorResponse.java‎
Lines changed: 63 additions & 0 deletions
diff --git a/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceUnifiedChatCompletionResponseHandler.java‎
Lines changed: 1 addition & 1 deletion b/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceUnifiedChatCompletionResponseHandler.java‎
Lines changed: 1 addition & 1 deletion
@@ -95,7 +95,7 @@ public void validateResponse(
 
     protected abstract void checkForFailureStatusCode(Request request, HttpResult result);
 
-    private void checkForErrorObject(Request request, HttpResult result) {
+    protected void checkForErrorObject(Request request, HttpResult result) {
         var errorEntity = errorParseFunction.apply(result);
 
         if (errorEntity.errorStructureFound()) {
@@ -116,12 +116,12 @@ protected Exception buildError(String message, Request request, HttpResult resul
     protected Exception buildError(String message, Request request, HttpResult result, ErrorResponse errorResponse) {
         var responseStatusCode = result.response().getStatusLine().getStatusCode();
         return new ElasticsearchStatusException(
-            errorMessage(message, request, result, errorResponse, responseStatusCode),
+            constructErrorMessage(message, request, errorResponse, responseStatusCode),
             toRestStatus(responseStatusCode)
         );
     }
 
-    protected String errorMessage(String message, Request request, HttpResult result, ErrorResponse errorResponse, int statusCode) {
+    public static String constructErrorMessage(String message, Request request, ErrorResponse errorResponse, int statusCode) {
         return (errorResponse == null
             || errorResponse.errorStructureFound() == false
             || Strings.isNullOrEmpty(errorResponse.getErrorMessage()))
 
@@ -0,0 +1,162 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.inference.external.http.retry;
+
+import org.elasticsearch.rest.RestStatus;
+import org.elasticsearch.xpack.core.inference.results.UnifiedChatCompletionException;
+import org.elasticsearch.xpack.inference.external.http.HttpResult;
+import org.elasticsearch.xpack.inference.external.request.Request;
+
+import java.util.Locale;
+import java.util.Objects;
+
+import static org.elasticsearch.core.Strings.format;
+import static org.elasticsearch.xpack.inference.external.http.retry.BaseResponseHandler.SERVER_ERROR_OBJECT;
+import static org.elasticsearch.xpack.inference.external.http.retry.BaseResponseHandler.toRestStatus;
+
+public class ChatCompletionErrorResponseHandler {
+    private static final String STREAM_ERROR = "stream_error";
+
+    private final UnifiedChatCompletionErrorParser unifiedChatCompletionErrorParser;
+
+    public ChatCompletionErrorResponseHandler(UnifiedChatCompletionErrorParser errorParser) {
+        this.unifiedChatCompletionErrorParser = Objects.requireNonNull(errorParser);
+    }
+
+    public void checkForErrorObject(Request request, HttpResult result) {
+        var errorEntity = unifiedChatCompletionErrorParser.parse(result);
+
+        if (errorEntity.errorStructureFound()) {
+            // We don't really know what happened because the status code was 200 so we'll return a failure and let the
+            // client retry if necessary
+            // If we did want to retry here, we'll need to determine if this was a streaming request, if it was
+            // we shouldn't retry because that would replay the entire streaming request and the client would get
+            // duplicate chunks back
+            throw new RetryException(false, buildChatCompletionErrorInternal(SERVER_ERROR_OBJECT, request, result, errorEntity));
+        }
+    }
+
+    public UnifiedChatCompletionException buildChatCompletionError(String message, Request request, HttpResult result) {
+        var errorResponse = unifiedChatCompletionErrorParser.parse(result);
+        return buildChatCompletionErrorInternal(message, request, result, errorResponse);
+    }
+
+    private UnifiedChatCompletionException buildChatCompletionErrorInternal(
+        String message,
+        Request request,
+        HttpResult result,
+        UnifiedChatCompletionErrorResponse errorResponse
+    ) {
+        assert request.isStreaming() : "Only streaming requests support this format";
+        var statusCode = result.response().getStatusLine().getStatusCode();
+        var errorMessage = BaseResponseHandler.constructErrorMessage(message, request, errorResponse, statusCode);
+        var restStatus = toRestStatus(statusCode);
+
+        if (errorResponse.errorStructureFound()) {
+            return new UnifiedChatCompletionException(
+                restStatus,
+                errorMessage,
+                errorResponse.type(),
+                errorResponse.code(),
+                errorResponse.param()
+            );
+        } else {
+            return buildDefaultChatCompletionError(errorResponse, errorMessage, restStatus);
+        }
+    }
+
+    /**
+     * Builds a default {@link UnifiedChatCompletionException} for a streaming request.
+     * This method is used when an error response is received we were unable to parse it in the format we were expecting.
+     * Only streaming requests should use this method.
+     *
+     * @param errorResponse the error response extracted from the HTTP result
+     * @param errorMessage the error message to include in the exception
+     * @param restStatus the REST status code of the response
+     * @return an instance of {@link UnifiedChatCompletionException} with details from the error response
+     */
+    private static UnifiedChatCompletionException buildDefaultChatCompletionError(
+        ErrorResponse errorResponse,
+        String errorMessage,
+        RestStatus restStatus
+    ) {
+        return new UnifiedChatCompletionException(
+            restStatus,
+            errorMessage,
+            createErrorType(errorResponse),
+            restStatus.name().toLowerCase(Locale.ROOT)
+        );
+    }
+
+    /**
+     * Builds a mid-stream error for a streaming request.
+     * This method is used when an error occurs while processing a streaming response.
+     * Only streaming requests should use this method.
+     *
+     * @param inferenceEntityId the ID of the inference entity
+     * @param message the error message
+     * @param e the exception that caused the error, can be null
+     * @return a {@link UnifiedChatCompletionException} representing the mid-stream error
+     */
+    public UnifiedChatCompletionException buildMidStreamChatCompletionError(String inferenceEntityId, String message, Exception e) {
+        var error = unifiedChatCompletionErrorParser.parse(message);
+
+        if (error.errorStructureFound()) {
+            return new UnifiedChatCompletionException(
+                RestStatus.INTERNAL_SERVER_ERROR,
+                format(
+                    "%s for request from inference entity id [%s]. Error message: [%s]",
+                    SERVER_ERROR_OBJECT,
+                    inferenceEntityId,
+                    error.getErrorMessage()
+                ),
+                error.type(),
+                error.code(),
+                error.param()
+            );
+        } else if (e != null) {
+            // If the error response does not match, we can still return an exception based on the original throwable
+            return UnifiedChatCompletionException.fromThrowable(e);
+        } else {
+            // If no specific error response is found, we return a default mid-stream error
+            return buildDefaultMidStreamChatCompletionError(inferenceEntityId, error);
+        }
+    }
+
+    /**
+     * Builds a default mid-stream error for a streaming request.
+     * This method is used when no specific error response is found in the message.
+     * Only streaming requests should use this method.
+     *
+     * @param inferenceEntityId the ID of the inference entity
+     * @param errorResponse     the error response extracted from the message
+     * @return a {@link UnifiedChatCompletionException} representing the default mid-stream error
+     */
+    private static UnifiedChatCompletionException buildDefaultMidStreamChatCompletionError(
+        String inferenceEntityId,
+        ErrorResponse errorResponse
+    ) {
+        return new UnifiedChatCompletionException(
+            RestStatus.INTERNAL_SERVER_ERROR,
+            format("%s for request from inference entity id [%s]", SERVER_ERROR_OBJECT, inferenceEntityId),
+            createErrorType(errorResponse),
+            STREAM_ERROR
+        );
+    }
+
+    /**
+     * Creates a string representation of the error type based on the provided ErrorResponse.
+     * This method is used to generate a human-readable error type for logging or exception messages.
+     *
+     * @param errorResponse the ErrorResponse object
+     * @return a string representing the error type
+     */
+    private static String createErrorType(ErrorResponse errorResponse) {
+        return errorResponse != null ? errorResponse.getClass().getSimpleName() : "unknown";
+    }
+}
@@ -22,7 +22,7 @@ public ErrorResponse(String errorMessage) {
         this.errorStructureFound = true;
     }
 
-    private ErrorResponse(boolean errorStructureFound) {
+    protected ErrorResponse(boolean errorStructureFound) {
         this.errorMessage = "";
         this.errorStructureFound = errorStructureFound;
     }
 
@@ -0,0 +1,16 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.inference.external.http.retry;
+
+import org.elasticsearch.xpack.inference.external.http.HttpResult;
+
+public interface UnifiedChatCompletionErrorParser {
+    UnifiedChatCompletionErrorResponse parse(HttpResult result);
+
+    UnifiedChatCompletionErrorResponse parse(String result);
+}
@@ -0,0 +1,63 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.inference.external.http.retry;
+
+import org.elasticsearch.core.Nullable;
+
+import java.util.Objects;
+
+public class UnifiedChatCompletionErrorResponse extends ErrorResponse {
+    public static final UnifiedChatCompletionErrorResponse UNDEFINED_ERROR = new UnifiedChatCompletionErrorResponse();
+
+    @Nullable
+    private final String code;
+    @Nullable
+    private final String param;
+    private final String type;
+
+    public UnifiedChatCompletionErrorResponse(String errorMessage, String type, @Nullable String code, @Nullable String param) {
+        super(errorMessage);
+        this.code = code;
+        this.param = param;
+        this.type = Objects.requireNonNull(type);
+    }
+
+    private UnifiedChatCompletionErrorResponse() {
+        super(false);
+        this.code = null;
+        this.param = null;
+        this.type = "unknown";
+    }
+
+    @Nullable
+    public String code() {
+        return code;
+    }
+
+    @Nullable
+    public String param() {
+        return param;
+    }
+
+    public String type() {
+        return type;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (o == null || getClass() != o.getClass()) return false;
+        if (super.equals(o) == false) return false;
+        UnifiedChatCompletionErrorResponse that = (UnifiedChatCompletionErrorResponse) o;
+        return Objects.equals(code, that.code) && Objects.equals(param, that.param) && Objects.equals(type, that.type);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(super.hashCode(), code, param, type);
+    }
+}
@@ -49,7 +49,7 @@ protected Exception buildError(String message, Request request, HttpResult resul
             var restStatus = toRestStatus(responseStatusCode);
             return new UnifiedChatCompletionException(
                 restStatus,
-                errorMessage(message, request, result, errorResponse, responseStatusCode),
+                constructErrorMessage(message, request, errorResponse, responseStatusCode),
                 "error",
                 restStatus.name().toLowerCase(Locale.ROOT)
             );
Original file line number	Diff line number	Diff line change
`@@ -22,7 +22,7 @@ public ErrorResponse(String errorMessage) {`
`22`	`22`	`this.errorStructureFound = true;`
`23`	`23`	`}`
`24`	`24`
`25`		`- private ErrorResponse(boolean errorStructureFound) {`
	`25`	`+ protected ErrorResponse(boolean errorStructureFound) {`
`26`	`26`	`this.errorMessage = "";`
`27`	`27`	`this.errorStructureFound = errorStructureFound;`
`28`	`28`	`}`