Skip to content

Commit 53668f7

Browse files
Improve exception for trained model deployment scale up timeout (#128218)
* Improve exception for trained model deployment scale up timeout * Update docs/changelog/128218.yaml * Rename exception and update exception message --------- Co-authored-by: Elastic Machine <[email protected]>
1 parent ac08e9c commit 53668f7

File tree

5 files changed

+57
-8
lines changed

5 files changed

+57
-8
lines changed

docs/changelog/128218.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 128218
2+
summary: Improve exception for trained model deployment scale up timeout
3+
area: Machine Learning
4+
type: enhancement
5+
issues: []

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartTrainedModelDeploymentAction.java

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
6565
import org.elasticsearch.xpack.core.ml.utils.TransportVersionUtils;
6666
import org.elasticsearch.xpack.ml.MachineLearning;
67+
import org.elasticsearch.xpack.ml.inference.assignment.ModelDeploymentTimeoutException;
6768
import org.elasticsearch.xpack.ml.inference.assignment.TrainedModelAssignmentService;
6869
import org.elasticsearch.xpack.ml.inference.persistence.TrainedModelDefinitionDoc;
6970
import org.elasticsearch.xpack.ml.notifications.InferenceAuditor;
@@ -350,11 +351,14 @@ public void onFailure(Exception e) {
350351
@Override
351352
public void onTimeout(TimeValue timeout) {
352353
onFailure(
353-
new ElasticsearchStatusException(
354-
"Timed out after [{}] waiting for model deployment to start. "
355-
+ "Use the trained model stats API to track the state of the deployment.",
356-
RestStatus.REQUEST_TIMEOUT,
357-
request.getTimeout() // use the full request timeout in the error message
354+
new ModelDeploymentTimeoutException(
355+
format(
356+
"Timed out after [%s] waiting for trained model deployment [%s] to start. "
357+
+ "Use the trained model stats API to track the state of the deployment "
358+
+ "and try again once it has started.",
359+
request.getTimeout(),
360+
request.getDeploymentId()
361+
)
358362
)
359363
);
360364
}

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/InferenceWaitForAllocation.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.elasticsearch.ElasticsearchStatusException;
1313
import org.elasticsearch.action.ActionListener;
1414
import org.elasticsearch.cluster.ClusterState;
15+
import org.elasticsearch.core.TimeValue;
1516
import org.elasticsearch.rest.RestStatus;
1617
import org.elasticsearch.tasks.TaskId;
1718
import org.elasticsearch.xpack.core.ml.action.InferModelAction;
@@ -20,6 +21,7 @@
2021
import org.elasticsearch.xpack.core.ml.inference.assignment.TrainedModelAssignment;
2122
import org.elasticsearch.xpack.core.ml.inference.assignment.TrainedModelAssignmentMetadata;
2223
import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
24+
import org.elasticsearch.xpack.ml.inference.assignment.ModelDeploymentTimeoutException;
2325
import org.elasticsearch.xpack.ml.inference.assignment.TrainedModelAssignmentService;
2426

2527
import java.util.HashMap;
@@ -188,5 +190,19 @@ public void onFailure(Exception e) {
188190
pendingRequestCount.decrementAndGet();
189191
request.listener().onFailure(e);
190192
}
193+
194+
@Override
195+
public void onTimeout(TimeValue timeout) {
196+
onFailure(
197+
new ModelDeploymentTimeoutException(
198+
format(
199+
"Timed out after [%s] waiting for trained model deployment [%s] to start. "
200+
+ "Use the trained model stats API to track the state of the deployment and try again once it has started.",
201+
timeout,
202+
request.deploymentId()
203+
)
204+
)
205+
);
206+
}
191207
}
192208
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.ml.inference.assignment;
9+
10+
import org.elasticsearch.ElasticsearchStatusException;
11+
import org.elasticsearch.rest.RestStatus;
12+
13+
public class ModelDeploymentTimeoutException extends ElasticsearchStatusException {
14+
public ModelDeploymentTimeoutException(String message) {
15+
super(message, RestStatus.REQUEST_TIMEOUT);
16+
}
17+
}

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentService.java

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99

1010
import org.apache.logging.log4j.LogManager;
1111
import org.apache.logging.log4j.Logger;
12-
import org.elasticsearch.ElasticsearchStatusException;
1312
import org.elasticsearch.action.ActionListener;
1413
import org.elasticsearch.action.ActionRequest;
1514
import org.elasticsearch.action.ActionType;
@@ -25,7 +24,6 @@
2524
import org.elasticsearch.core.Nullable;
2625
import org.elasticsearch.core.TimeValue;
2726
import org.elasticsearch.node.NodeClosedException;
28-
import org.elasticsearch.rest.RestStatus;
2927
import org.elasticsearch.threadpool.ThreadPool;
3028
import org.elasticsearch.transport.ConnectTransportException;
3129
import org.elasticsearch.xpack.core.ml.action.CreateTrainedModelAssignmentAction;
@@ -37,6 +35,7 @@
3735
import java.util.Objects;
3836
import java.util.function.Predicate;
3937

38+
import static org.elasticsearch.core.Strings.format;
4039
import static org.elasticsearch.xpack.core.ClientHelper.ML_ORIGIN;
4140

4241
public class TrainedModelAssignmentService {
@@ -120,7 +119,15 @@ public void onTimeout(TimeValue timeout) {
120119

121120
public interface WaitForAssignmentListener extends ActionListener<TrainedModelAssignment> {
122121
default void onTimeout(TimeValue timeout) {
123-
onFailure(new ElasticsearchStatusException("Starting deployment timed out after [{}]", RestStatus.REQUEST_TIMEOUT, timeout));
122+
onFailure(
123+
new ModelDeploymentTimeoutException(
124+
format(
125+
"Timed out after [%s] waiting for trained model deployment to start. "
126+
+ "Use the trained model stats API to track the state of the deployment and try again once it has started.",
127+
timeout
128+
)
129+
)
130+
);
124131
}
125132
}
126133

0 commit comments

Comments
 (0)