From 116a44a8ffb490c496dc41258062fa9ac2ac4dcb Mon Sep 17 00:00:00 2001 From: Max Hniebergall Date: Mon, 23 Sep 2024 13:06:23 -0400 Subject: [PATCH 1/3] [ML][backport] Warn for model load failures if they have a status code <500 * On model load failure, warn if the failure status code was less tahn 500 * Update docs/changelog/113280.yaml # Conflicts: # x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentNodeService.java --- .../assignment/TrainedModelAssignmentNodeService.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentNodeService.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentNodeService.java index 7052e6f147b36..6c59874c76284 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentNodeService.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentNodeService.java @@ -9,6 +9,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ResourceNotFoundException; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.search.SearchPhaseExecutionException; @@ -754,6 +755,12 @@ private void updateStoredState(String deploymentId, RoutingInfoUpdate update, Ac private void handleLoadFailure(TrainedModelDeploymentTask task, Exception ex) { logger.error(() -> "[" + task.getDeploymentId() + "] model [" + task.getParams().getModelId() + "] failed to load", ex); + private void handleLoadFailure(TrainedModelDeploymentTask task, Exception ex, ActionListener retryListener) { + if (ex instanceof ElasticsearchException esEx && esEx.status().getStatus() < 500) { + logger.warn(() -> "[" + task.getDeploymentId() + "] model [" + task.getParams().getModelId() + "] failed to load", ex); + } else { + logger.error(() -> "[" + task.getDeploymentId() + "] model [" + task.getParams().getModelId() + "] failed to load", ex); + } if (task.isStopped()) { logger.debug( () -> format( From db2d061497c048d43cf297d2a12caa68edeea7ae Mon Sep 17 00:00:00 2001 From: Max Hniebergall <137079448+maxhniebergall@users.noreply.github.com> Date: Mon, 23 Sep 2024 13:10:55 -0400 Subject: [PATCH 2/3] Update docs/changelog/113410.yaml --- docs/changelog/113410.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/113410.yaml diff --git a/docs/changelog/113410.yaml b/docs/changelog/113410.yaml new file mode 100644 index 0000000000000..03dec6624b771 --- /dev/null +++ b/docs/changelog/113410.yaml @@ -0,0 +1,5 @@ +pr: 113410 +summary: "[ML][backport] Warn for model load failures if they have a status code <500" +area: Machine Learning +type: bug +issues: [] From 9cca95af2611e2a75e096a6fef89d255926e9f29 Mon Sep 17 00:00:00 2001 From: Max Hniebergall Date: Mon, 23 Sep 2024 13:21:01 -0400 Subject: [PATCH 3/3] fix merge --- .../inference/assignment/TrainedModelAssignmentNodeService.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentNodeService.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentNodeService.java index 6c59874c76284..f1c9842e2d5c4 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentNodeService.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentNodeService.java @@ -754,8 +754,6 @@ private void updateStoredState(String deploymentId, RoutingInfoUpdate update, Ac } private void handleLoadFailure(TrainedModelDeploymentTask task, Exception ex) { - logger.error(() -> "[" + task.getDeploymentId() + "] model [" + task.getParams().getModelId() + "] failed to load", ex); - private void handleLoadFailure(TrainedModelDeploymentTask task, Exception ex, ActionListener retryListener) { if (ex instanceof ElasticsearchException esEx && esEx.status().getStatus() < 500) { logger.warn(() -> "[" + task.getDeploymentId() + "] model [" + task.getParams().getModelId() + "] failed to load", ex); } else {