From af1f7d8746f015775e4a7809f7724f4be6d9ed38 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Fri, 21 Mar 2025 09:31:36 +0000 Subject: [PATCH 1/2] Prevent default inference model to update the cluster state when deleting The Elastic inference service removes the default models at startup if the node cannot access EIS. Since #125242 we don't store default models in the cluster state but we still try to delete them. This change ensures that we don't try to update the cluster state when a default model is deleted since the delete is not performed on the master node and default models are never stored in the cluster state. --- .../inference/registry/ModelRegistry.java | 28 +++++++++++++++---- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/registry/ModelRegistry.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/registry/ModelRegistry.java index c93bebe6f2ce8..caa233ab46da6 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/registry/ModelRegistry.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/registry/ModelRegistry.java @@ -46,6 +46,7 @@ import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.core.TimeValue; import org.elasticsearch.core.Tuple; +import org.elasticsearch.gateway.GatewayService; import org.elasticsearch.index.engine.VersionConflictEngineException; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; @@ -627,8 +628,8 @@ public void storeModel(Model model, ActionListener listener, TimeValue storeModel(model, true, listener, timeout); } - private void storeModel(Model model, boolean addToClusterState, ActionListener listener, TimeValue timeout) { - ActionListener bulkResponseActionListener = getStoreIndexListener(model, addToClusterState, listener, timeout); + private void storeModel(Model model, boolean updateClusterState, ActionListener listener, TimeValue timeout) { + ActionListener bulkResponseActionListener = getStoreIndexListener(model, updateClusterState, listener, timeout); IndexRequest configRequest = createIndexRequest( Model.documentId(model.getConfigurations().getInferenceEntityId()), @@ -653,7 +654,7 @@ private void storeModel(Model model, boolean addToClusterState, ActionListener getStoreIndexListener( Model model, - boolean addToClusterState, + boolean updateClusterState, ActionListener listener, TimeValue timeout ) { @@ -680,7 +681,7 @@ private ActionListener getStoreIndexListener( BulkItemResponse.Failure failure = getFirstBulkFailure(bulkItemResponses); if (failure == null) { - if (addToClusterState) { + if (updateClusterState) { var storeListener = getStoreMetadataListener(inferenceEntityId, listener); try { var projectId = clusterService.state().projectState().projectId(); @@ -777,7 +778,8 @@ public synchronized void removeDefaultConfigs(Set inferenceEntityIds, Ac } defaultConfigIds.keySet().removeAll(inferenceEntityIds); - deleteModels(inferenceEntityIds, listener); + // default models are not stored in the cluster state. + deleteModels(inferenceEntityIds, false, listener); } public void deleteModel(String inferenceEntityId, ActionListener listener) { @@ -785,6 +787,10 @@ public void deleteModel(String inferenceEntityId, ActionListener listen } public void deleteModels(Set inferenceEntityIds, ActionListener listener) { + deleteModels(inferenceEntityIds, true, listener); + } + + private void deleteModels(Set inferenceEntityIds, boolean updateClusterState, ActionListener listener) { var lockedInferenceIds = new HashSet<>(inferenceEntityIds); lockedInferenceIds.retainAll(preventDeletionLock); @@ -803,16 +809,21 @@ public void deleteModels(Set inferenceEntityIds, ActionListener } var request = createDeleteRequest(inferenceEntityIds); - client.execute(DeleteByQueryAction.INSTANCE, request, getDeleteModelClusterStateListener(inferenceEntityIds, listener)); + client.execute(DeleteByQueryAction.INSTANCE, request, getDeleteModelClusterStateListener(inferenceEntityIds, updateClusterState, listener)); } private ActionListener getDeleteModelClusterStateListener( Set inferenceEntityIds, + boolean updateClusterState, ActionListener listener ) { return new ActionListener<>() { @Override public void onResponse(BulkByScrollResponse bulkByScrollResponse) { + if (updateClusterState == false) { + listener.onResponse(Boolean.TRUE); + return; + } var clusterStateListener = new ActionListener() { @Override public void onResponse(AcknowledgedResponse acknowledgedResponse) { @@ -920,6 +931,11 @@ public void clusterChanged(ClusterChangedEvent event) { return; } + // wait for the cluster state to be recovered + if (event.state().blocks().hasGlobalBlock(GatewayService.STATE_NOT_RECOVERED_BLOCK)) { + return; + } + if (event.state().metadata().projects().size() > 1) { // TODO: Add support to handle multi-projects return; From fc9a2ab2cc1b2f65fc7df9913cb552cb5fa113d3 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 21 Mar 2025 09:41:40 +0000 Subject: [PATCH 2/2] [CI] Auto commit changes from spotless --- .../xpack/inference/registry/ModelRegistry.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/registry/ModelRegistry.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/registry/ModelRegistry.java index caa233ab46da6..1f5a2cd50c01d 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/registry/ModelRegistry.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/registry/ModelRegistry.java @@ -809,7 +809,11 @@ private void deleteModels(Set inferenceEntityIds, boolean updateClusterS } var request = createDeleteRequest(inferenceEntityIds); - client.execute(DeleteByQueryAction.INSTANCE, request, getDeleteModelClusterStateListener(inferenceEntityIds, updateClusterState, listener)); + client.execute( + DeleteByQueryAction.INSTANCE, + request, + getDeleteModelClusterStateListener(inferenceEntityIds, updateClusterState, listener) + ); } private ActionListener getDeleteModelClusterStateListener(