From 89713a9b153e205a28d548226b9bad543421ed0a Mon Sep 17 00:00:00 2001 From: Max Hniebergall <137079448+maxhniebergall@users.noreply.github.com> Date: Mon, 23 Sep 2024 09:03:36 -0400 Subject: [PATCH] [ML] Warn for model load failures if they have a status code <500 (#113280) * On model load failure, warn if the failure status code was less tahn 500 * Update docs/changelog/113280.yaml --- docs/changelog/113280.yaml | 5 +++++ .../assignment/TrainedModelAssignmentNodeService.java | 7 ++++++- 2 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 docs/changelog/113280.yaml diff --git a/docs/changelog/113280.yaml b/docs/changelog/113280.yaml new file mode 100644 index 0000000000000..1d8de0d87dd0d --- /dev/null +++ b/docs/changelog/113280.yaml @@ -0,0 +1,5 @@ +pr: 113280 +summary: Warn for model load failures if they have a status code <500 +area: Machine Learning +type: bug +issues: [] diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentNodeService.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentNodeService.java index afd17b803cdcb..c86b3e710a736 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentNodeService.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentNodeService.java @@ -9,6 +9,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ResourceNotFoundException; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.search.SearchPhaseExecutionException; @@ -775,7 +776,11 @@ private void updateStoredState(String deploymentId, RoutingInfoUpdate update, Ac } private void handleLoadFailure(TrainedModelDeploymentTask task, Exception ex, ActionListener retryListener) { - logger.error(() -> "[" + task.getDeploymentId() + "] model [" + task.getParams().getModelId() + "] failed to load", ex); + if (ex instanceof ElasticsearchException esEx && esEx.status().getStatus() < 500) { + logger.warn(() -> "[" + task.getDeploymentId() + "] model [" + task.getParams().getModelId() + "] failed to load", ex); + } else { + logger.error(() -> "[" + task.getDeploymentId() + "] model [" + task.getParams().getModelId() + "] failed to load", ex); + } if (task.isStopped()) { logger.debug( () -> format(