Skip to content

Commit ce9a97c

Browse files
Improve exception for trained model deployment scale up timeout
1 parent d10ef76 commit ce9a97c

File tree

4 files changed

+54
-7
lines changed

4 files changed

+54
-7
lines changed

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartTrainedModelDeploymentAction.java

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
6565
import org.elasticsearch.xpack.core.ml.utils.TransportVersionUtils;
6666
import org.elasticsearch.xpack.ml.MachineLearning;
67+
import org.elasticsearch.xpack.ml.inference.assignment.ModelDeploymentScaleUpException;
6768
import org.elasticsearch.xpack.ml.inference.assignment.TrainedModelAssignmentService;
6869
import org.elasticsearch.xpack.ml.inference.persistence.TrainedModelDefinitionDoc;
6970
import org.elasticsearch.xpack.ml.notifications.InferenceAuditor;
@@ -350,11 +351,14 @@ public void onFailure(Exception e) {
350351
@Override
351352
public void onTimeout(TimeValue timeout) {
352353
onFailure(
353-
new ElasticsearchStatusException(
354-
"Timed out after [{}] waiting for model deployment to start. "
355-
+ "Use the trained model stats API to track the state of the deployment.",
356-
RestStatus.REQUEST_TIMEOUT,
357-
request.getTimeout() // use the full request timeout in the error message
354+
new ModelDeploymentScaleUpException(
355+
format(
356+
"Timed out after [%s] waiting for trained model deployment [%s] to start. "
357+
+ "Please ensure the trained model deployment has started and try again.",
358+
request.getTimeout(),
359+
request.getDeploymentId()
360+
),
361+
RestStatus.REQUEST_TIMEOUT
358362
)
359363
);
360364
}

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/InferenceWaitForAllocation.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.elasticsearch.ElasticsearchStatusException;
1313
import org.elasticsearch.action.ActionListener;
1414
import org.elasticsearch.cluster.ClusterState;
15+
import org.elasticsearch.core.TimeValue;
1516
import org.elasticsearch.rest.RestStatus;
1617
import org.elasticsearch.tasks.TaskId;
1718
import org.elasticsearch.xpack.core.ml.action.InferModelAction;
@@ -20,6 +21,7 @@
2021
import org.elasticsearch.xpack.core.ml.inference.assignment.TrainedModelAssignment;
2122
import org.elasticsearch.xpack.core.ml.inference.assignment.TrainedModelAssignmentMetadata;
2223
import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
24+
import org.elasticsearch.xpack.ml.inference.assignment.ModelDeploymentScaleUpException;
2325
import org.elasticsearch.xpack.ml.inference.assignment.TrainedModelAssignmentService;
2426

2527
import java.util.HashMap;
@@ -188,5 +190,20 @@ public void onFailure(Exception e) {
188190
pendingRequestCount.decrementAndGet();
189191
request.listener().onFailure(e);
190192
}
193+
194+
@Override
195+
public void onTimeout(TimeValue timeout) {
196+
onFailure(
197+
new ModelDeploymentScaleUpException(
198+
format(
199+
"Timed out after [%s] waiting for trained model deployment [%s] to start. "
200+
+ "Please ensure the trained model deployment has started and try again.",
201+
timeout,
202+
request.deploymentId()
203+
),
204+
RestStatus.REQUEST_TIMEOUT
205+
)
206+
);
207+
}
191208
}
192209
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.ml.inference.assignment;
9+
10+
import org.elasticsearch.ElasticsearchStatusException;
11+
import org.elasticsearch.rest.RestStatus;
12+
13+
public class ModelDeploymentScaleUpException extends ElasticsearchStatusException {
14+
public ModelDeploymentScaleUpException(String message, RestStatus status) {
15+
super(message, status);
16+
}
17+
}

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentService.java

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99

1010
import org.apache.logging.log4j.LogManager;
1111
import org.apache.logging.log4j.Logger;
12-
import org.elasticsearch.ElasticsearchStatusException;
1312
import org.elasticsearch.action.ActionListener;
1413
import org.elasticsearch.action.ActionRequest;
1514
import org.elasticsearch.action.ActionType;
@@ -37,6 +36,7 @@
3736
import java.util.Objects;
3837
import java.util.function.Predicate;
3938

39+
import static org.elasticsearch.core.Strings.format;
4040
import static org.elasticsearch.xpack.core.ClientHelper.ML_ORIGIN;
4141

4242
public class TrainedModelAssignmentService {
@@ -120,7 +120,16 @@ public void onTimeout(TimeValue timeout) {
120120

121121
public interface WaitForAssignmentListener extends ActionListener<TrainedModelAssignment> {
122122
default void onTimeout(TimeValue timeout) {
123-
onFailure(new ElasticsearchStatusException("Starting deployment timed out after [{}]", RestStatus.REQUEST_TIMEOUT, timeout));
123+
onFailure(
124+
new ModelDeploymentScaleUpException(
125+
format(
126+
"Timed out after [%s] waiting for trained model deployment to start. "
127+
+ "Please ensure the trained model deployment has started and try again.",
128+
timeout
129+
),
130+
RestStatus.REQUEST_TIMEOUT
131+
)
132+
);
124133
}
125134
}
126135

0 commit comments

Comments
 (0)