From 0a0418dd8e400e391b50281d83d98555536f8eb0 Mon Sep 17 00:00:00 2001 From: Joshua Adams Date: Fri, 26 Sep 2025 16:53:41 +0100 Subject: [PATCH 01/11] Changes FailedToCommitClusterStateException to NotMasterException Changes a FailedToCommitClusterStateException incorrectly thrown prior to cluster state update publication to a NotMasterException --- .../cluster/NotMasterException.java | 4 +++ .../cluster/coordination/Coordinator.java | 7 ++-- .../cluster/service/MasterService.java | 33 ++++++++++++++++--- 3 files changed, 36 insertions(+), 8 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/NotMasterException.java b/server/src/main/java/org/elasticsearch/cluster/NotMasterException.java index 43c43bda9aa8c..98da31604a33d 100644 --- a/server/src/main/java/org/elasticsearch/cluster/NotMasterException.java +++ b/server/src/main/java/org/elasticsearch/cluster/NotMasterException.java @@ -26,6 +26,10 @@ public NotMasterException(StreamInput in) throws IOException { super(in); } + public NotMasterException(String msg, Object... args) { + super(msg, args); + } + @Override public Throwable fillInStackTrace() { return this; diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java index 1976bda6c6aba..f003746331192 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java @@ -24,6 +24,7 @@ import org.elasticsearch.cluster.ClusterStatePublicationEvent; import org.elasticsearch.cluster.ClusterStateUpdateTask; import org.elasticsearch.cluster.LocalMasterServiceTask; +import org.elasticsearch.cluster.NotMasterException; import org.elasticsearch.cluster.block.ClusterBlocks; import org.elasticsearch.cluster.coordination.ClusterFormationFailureHelper.ClusterFormationState; import org.elasticsearch.cluster.coordination.CoordinationMetadata.VotingConfigExclusion; @@ -1552,7 +1553,7 @@ public void publish( clusterStatePublicationEvent.getNewState().term() ) ); - throw new FailedToCommitClusterStateException( + throw new NotMasterException( "node is no longer master for term " + clusterStatePublicationEvent.getNewState().term() + " while handling publication" @@ -1638,8 +1639,8 @@ assert getLocalNode().equals(clusterState.getNodes().get(getLocalNode().getId()) } } } - } catch (FailedToCommitClusterStateException failedToCommitClusterStateException) { - publishListener.onFailure(failedToCommitClusterStateException); + } catch (FailedToCommitClusterStateException | NotMasterException e) { + publishListener.onFailure(e); } catch (Exception e) { assert false : e; // all exceptions should already be caught and wrapped in a FailedToCommitClusterStateException logger.error(() -> "[" + clusterStatePublicationEvent.getSummary() + "] publishing unexpectedly failed", e); diff --git a/server/src/main/java/org/elasticsearch/cluster/service/MasterService.java b/server/src/main/java/org/elasticsearch/cluster/service/MasterService.java index 596cce87fd151..a917e992d1c6f 100644 --- a/server/src/main/java/org/elasticsearch/cluster/service/MasterService.java +++ b/server/src/main/java/org/elasticsearch/cluster/service/MasterService.java @@ -415,13 +415,30 @@ public void onResponse(Void unused) { @Override public void onFailure(Exception exception) { - if (exception instanceof FailedToCommitClusterStateException failedToCommitClusterStateException) { + if (exception instanceof FailedToCommitClusterStateException || exception instanceof NotMasterException) { final long notificationStartTime = threadPool.rawRelativeTimeInMillis(); final long version = newClusterState.version(); - logger.warn(() -> format("failing [%s]: failed to commit cluster state version [%s]", summary, version), exception); + + if (exception instanceof FailedToCommitClusterStateException) { + logger.warn( + () -> format("failing [%s]: failed to commit cluster state version [%s]", summary, version), + exception + ); + } else { + logger.debug( + () -> format( + "node is no longer the master prior to publication of cluster state version [%s]: [%s]", + version, + summary + ), + exception + ); + } + for (final var executionResult : executionResults) { - executionResult.onPublishFailure(failedToCommitClusterStateException); + executionResult.onPublishFailure(exception); } + final long notificationMillis = threadPool.rawRelativeTimeInMillis() - notificationStartTime; clusterStateUpdateStatsTracker.onPublicationFailure( threadPool.rawRelativeTimeInMillis(), @@ -985,11 +1002,17 @@ void onClusterStateUnchanged(ClusterState clusterState) { } } - void onPublishFailure(FailedToCommitClusterStateException e) { + void onPublishFailure(Exception e) { if (publishedStateConsumer == null && onPublicationSuccess == null) { assert failure != null; var taskFailure = failure; - failure = new FailedToCommitClusterStateException(e.getMessage(), e); + + if (e instanceof FailedToCommitClusterStateException) { + failure = new FailedToCommitClusterStateException(e.getMessage(), e); + } else { + failure = new NotMasterException(e.getMessage(), e); + } + failure.addSuppressed(taskFailure); notifyFailure(); return; From f2c836db014d7b87bf1a1f59a515afa134820c4e Mon Sep 17 00:00:00 2001 From: Joshua Adams Date: Tue, 30 Sep 2025 14:56:35 +0100 Subject: [PATCH 02/11] Add FailedToPublishClusterStateException - Introduces a FailedToPublishClusterStateException. - Changes three FailedToCommitClusterStateExceptions thrown prior to the cluster state update publication to FailedToPublishClusterStateExceptions. Relates to: ES-13061 --- .../elasticsearch/ElasticsearchException.java | 10 ++++ .../cluster/coordination/Coordinator.java | 12 ++--- .../FailedToCommitClusterStateException.java | 16 +++++-- .../FailedToPublishClusterStateException.java | 46 +++++++++++++++++++ .../cluster/service/MasterService.java | 10 +++- .../resources/transport/upper_bounds/9.2.csv | 2 +- .../ExceptionSerializationTests.java | 2 + 7 files changed, 86 insertions(+), 12 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/cluster/coordination/FailedToPublishClusterStateException.java diff --git a/server/src/main/java/org/elasticsearch/ElasticsearchException.java b/server/src/main/java/org/elasticsearch/ElasticsearchException.java index 79d1df6a09be4..1a08155f7c9e9 100644 --- a/server/src/main/java/org/elasticsearch/ElasticsearchException.java +++ b/server/src/main/java/org/elasticsearch/ElasticsearchException.java @@ -87,6 +87,10 @@ public class ElasticsearchException extends RuntimeException implements ToXConte private static final TransportVersion UNKNOWN_VERSION_ADDED = TransportVersion.zero(); + private static final TransportVersion FAILED_TO_PUBLISH_CLUSTER_STATE_EXCEPTION_TRANSPORT_VERSION = TransportVersion.fromName( + "failed_to_publish_cluster_state_exception" + ); + /** * Passed in the {@link Params} of {@link #generateThrowableXContent(XContentBuilder, Params, Throwable)} * to control if the {@code caused_by} element should render. Unlike most parameters to {@code toXContent} methods this parameter is @@ -2022,6 +2026,12 @@ private enum ElasticsearchExceptionHandle { 184, TransportVersions.REMOTE_EXCEPTION, TransportVersions.REMOTE_EXCEPTION_8_19 + ), + FAILED_TO_PUBLISH_CLUSTER_STATE_EXCEPTION( + org.elasticsearch.cluster.coordination.FailedToPublishClusterStateException.class, + org.elasticsearch.cluster.coordination.FailedToPublishClusterStateException::new, + 185, + FAILED_TO_PUBLISH_CLUSTER_STATE_EXCEPTION_TRANSPORT_VERSION ); final Class exceptionClass; diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java index f003746331192..47ffbb85377ec 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java @@ -1568,7 +1568,7 @@ public void publish( clusterStatePublicationEvent.getSummary() ) ); - throw new FailedToCommitClusterStateException("publication " + currentPublication.get() + " already in progress"); + throw new FailedToPublishClusterStateException("publication " + currentPublication.get() + " already in progress"); } assert assertPreviousStateConsistency(clusterStatePublicationEvent); @@ -1587,7 +1587,7 @@ assert getLocalNode().equals(clusterState.getNodes().get(getLocalNode().getId()) } catch (Exception e) { logger.debug(() -> "[" + clusterStatePublicationEvent.getSummary() + "] publishing failed during context creation", e); becomeCandidate("publication context creation"); - throw new FailedToCommitClusterStateException("publishing failed during context creation", e); + throw new FailedToPublishClusterStateException("publishing failed during context creation", e); } try (Releasable ignored = publicationContext::decRef) { @@ -1608,7 +1608,7 @@ assert getLocalNode().equals(clusterState.getNodes().get(getLocalNode().getId()) e ); becomeCandidate("publication creation"); - throw new FailedToCommitClusterStateException("publishing failed while starting", e); + throw new FailedToPublishClusterStateException("publishing failed while starting", e); } try { @@ -1639,12 +1639,12 @@ assert getLocalNode().equals(clusterState.getNodes().get(getLocalNode().getId()) } } } - } catch (FailedToCommitClusterStateException | NotMasterException e) { + } catch (FailedToPublishClusterStateException | FailedToCommitClusterStateException | NotMasterException e) { publishListener.onFailure(e); } catch (Exception e) { - assert false : e; // all exceptions should already be caught and wrapped in a FailedToCommitClusterStateException + assert false : e; // all exceptions should already be caught and wrapped in a FailedToPublishClusterStateException | logger.error(() -> "[" + clusterStatePublicationEvent.getSummary() + "] publishing unexpectedly failed", e); - publishListener.onFailure(new FailedToCommitClusterStateException("publishing unexpectedly failed", e)); + publishListener.onFailure(new FailedToPublishClusterStateException("publishing unexpectedly failed", e)); } } diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/FailedToCommitClusterStateException.java b/server/src/main/java/org/elasticsearch/cluster/coordination/FailedToCommitClusterStateException.java index 2731bc2973321..0e0940569c0f4 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/FailedToCommitClusterStateException.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/FailedToCommitClusterStateException.java @@ -9,15 +9,23 @@ package org.elasticsearch.cluster.coordination; import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.action.support.master.TransportMasterNodeAction; import org.elasticsearch.common.io.stream.StreamInput; import java.io.IOException; /** - * Thrown when a cluster state publication fails to commit the new cluster state. If publication fails then a new master is elected but the - * update might or might not take effect, depending on whether or not the newly-elected master accepted the published state that failed to - * be committed. - * + * Exception indicating a cluster state update was published but not committed to all nodes. + *

+ * If this exception is thrown, then the cluster state update was published, but is not guaranteed + * to be committed on any nodes, including the next master node. This exception should only be thrown when there is + * ambiguity whether a cluster state update has been committed. + *

+ * For exceptions thrown prior to publication, + * when the cluster update has definitely failed, use a {@link FailedToPublishClusterStateException}. + *

+ * This is a retryable exception inside {@link TransportMasterNodeAction} + *

* See {@link ClusterStatePublisher} for more details. */ public class FailedToCommitClusterStateException extends ElasticsearchException { diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/FailedToPublishClusterStateException.java b/server/src/main/java/org/elasticsearch/cluster/coordination/FailedToPublishClusterStateException.java new file mode 100644 index 0000000000000..2ef3c4b3f3706 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/FailedToPublishClusterStateException.java @@ -0,0 +1,46 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ +package org.elasticsearch.cluster.coordination; + +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.action.support.master.TransportMasterNodeAction; +import org.elasticsearch.common.io.stream.StreamInput; + +import java.io.IOException; + +/** + * Exception indicating a cluster state update failed prior to publication. + *

+ * If this exception is thrown, then the cluster state update was not published to any node. + * It is therefore impossible for the new master to have committed this state. + *

+ * For exceptions thrown after publication, when the cluster state update may or may not have been committed, + * use a {@link FailedToCommitClusterStateException}. + *

+ * This is a retryable exception inside {@link TransportMasterNodeAction} + */ +public class FailedToPublishClusterStateException extends ElasticsearchException { + + public FailedToPublishClusterStateException(String msg) { + super(msg); + } + + public FailedToPublishClusterStateException(StreamInput in) throws IOException { + super(in); + } + + public FailedToPublishClusterStateException(String msg, Throwable cause, Object... args) { + super(msg, cause, args); + } + + @Override + public Throwable fillInStackTrace() { + return this; + } +} diff --git a/server/src/main/java/org/elasticsearch/cluster/service/MasterService.java b/server/src/main/java/org/elasticsearch/cluster/service/MasterService.java index a917e992d1c6f..4cb6ebad85af4 100644 --- a/server/src/main/java/org/elasticsearch/cluster/service/MasterService.java +++ b/server/src/main/java/org/elasticsearch/cluster/service/MasterService.java @@ -26,6 +26,7 @@ import org.elasticsearch.cluster.NotMasterException; import org.elasticsearch.cluster.coordination.ClusterStatePublisher; import org.elasticsearch.cluster.coordination.FailedToCommitClusterStateException; +import org.elasticsearch.cluster.coordination.FailedToPublishClusterStateException; import org.elasticsearch.cluster.metadata.ProcessClusterEventTimeoutException; import org.elasticsearch.cluster.metadata.ProjectMetadata; import org.elasticsearch.cluster.node.DiscoveryNode; @@ -415,7 +416,9 @@ public void onResponse(Void unused) { @Override public void onFailure(Exception exception) { - if (exception instanceof FailedToCommitClusterStateException || exception instanceof NotMasterException) { + if (exception instanceof FailedToPublishClusterStateException + || exception instanceof FailedToCommitClusterStateException + || exception instanceof NotMasterException) { final long notificationStartTime = threadPool.rawRelativeTimeInMillis(); final long version = newClusterState.version(); @@ -424,6 +427,11 @@ public void onFailure(Exception exception) { () -> format("failing [%s]: failed to commit cluster state version [%s]", summary, version), exception ); + } else if (exception instanceof FailedToPublishClusterStateException) { + logger.warn( + () -> format("failing [%s]: failed to publish cluster state version [%s]", summary, version), + exception + ); } else { logger.debug( () -> format( diff --git a/server/src/main/resources/transport/upper_bounds/9.2.csv b/server/src/main/resources/transport/upper_bounds/9.2.csv index 78180d915cd67..c114101f8655a 100644 --- a/server/src/main/resources/transport/upper_bounds/9.2.csv +++ b/server/src/main/resources/transport/upper_bounds/9.2.csv @@ -1 +1 @@ -roles_security_stats,9176000 +failed_to_publish_cluster_state_exception,9183000 diff --git a/server/src/test/java/org/elasticsearch/ExceptionSerializationTests.java b/server/src/test/java/org/elasticsearch/ExceptionSerializationTests.java index 64c0b9a780cfe..8ab7fd9b9c9e1 100644 --- a/server/src/test/java/org/elasticsearch/ExceptionSerializationTests.java +++ b/server/src/test/java/org/elasticsearch/ExceptionSerializationTests.java @@ -25,6 +25,7 @@ import org.elasticsearch.cluster.action.shard.ShardStateAction; import org.elasticsearch.cluster.block.ClusterBlockException; import org.elasticsearch.cluster.coordination.CoordinationStateRejectedException; +import org.elasticsearch.cluster.coordination.FailedToPublishClusterStateException; import org.elasticsearch.cluster.coordination.NoMasterBlockService; import org.elasticsearch.cluster.coordination.NodeHealthCheckFailureException; import org.elasticsearch.cluster.desirednodes.VersionConflictException; @@ -846,6 +847,7 @@ public void testIds() { ids.put(182, IngestPipelineException.class); ids.put(183, IndexDocFailureStoreStatus.ExceptionWithFailureStoreStatus.class); ids.put(184, RemoteException.class); + ids.put(185, FailedToPublishClusterStateException.class); Map, Integer> reverse = new HashMap<>(); for (Map.Entry> entry : ids.entrySet()) { From 350da550a97851fa7878bc700f705e59a7e6c955 Mon Sep 17 00:00:00 2001 From: Joshua Adams Date: Thu, 2 Oct 2025 12:48:04 +0100 Subject: [PATCH 03/11] Extend use of FailedToPublishClusterStateException --- .../action/shard/ShardStateAction.java | 4 +- .../cluster/service/MasterService.java | 8 +++- .../service/FileSettingsService.java | 3 ++ .../snapshots/SnapshotsService.java | 5 +- .../TransportMasterNodeActionTests.java | 5 +- .../action/shard/ShardStateActionTests.java | 7 ++- .../cluster/coordination/JoinHelperTests.java | 10 ++++ .../routing/BatchedRerouteServiceTests.java | 46 +++++++++++++++---- .../cluster/service/MasterServiceTests.java | 23 ++++++++-- .../org/elasticsearch/test/ESTestCase.java | 17 +++++++ .../xpack/ilm/MoveToErrorStepUpdateTask.java | 3 +- .../TrainedModelAssignmentService.java | 2 + 12 files changed, 107 insertions(+), 26 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/action/shard/ShardStateAction.java b/server/src/main/java/org/elasticsearch/cluster/action/shard/ShardStateAction.java index f24d5be356ee5..9d7bdd7f21d14 100644 --- a/server/src/main/java/org/elasticsearch/cluster/action/shard/ShardStateAction.java +++ b/server/src/main/java/org/elasticsearch/cluster/action/shard/ShardStateAction.java @@ -25,6 +25,7 @@ import org.elasticsearch.cluster.ClusterStateTaskListener; import org.elasticsearch.cluster.NotMasterException; import org.elasticsearch.cluster.coordination.FailedToCommitClusterStateException; +import org.elasticsearch.cluster.coordination.FailedToPublishClusterStateException; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.Metadata; import org.elasticsearch.cluster.metadata.ProjectId; @@ -164,6 +165,7 @@ private void sendShardAction( private static final Class[] MASTER_CHANNEL_EXCEPTIONS = new Class[] { NotMasterException.class, ConnectTransportException.class, + FailedToPublishClusterStateException.class, FailedToCommitClusterStateException.class }; private static boolean isMasterChannelException(Throwable exp) { @@ -936,7 +938,7 @@ public StartedShardEntry getStartedShardEntry() { public void onFailure(Exception e) { if (e instanceof NotMasterException) { logger.debug(() -> format("%s no longer master while starting shard [%s]", entry.shardId, entry)); - } else if (e instanceof FailedToCommitClusterStateException) { + } else if (e instanceof FailedToPublishClusterStateException || e instanceof FailedToCommitClusterStateException) { logger.debug(() -> format("%s unexpected failure while starting shard [%s]", entry.shardId, entry), e); } else { logger.error(() -> format("%s unexpected failure while starting shard [%s]", entry.shardId, entry), e); diff --git a/server/src/main/java/org/elasticsearch/cluster/service/MasterService.java b/server/src/main/java/org/elasticsearch/cluster/service/MasterService.java index 4cb6ebad85af4..285c29d78e8b1 100644 --- a/server/src/main/java/org/elasticsearch/cluster/service/MasterService.java +++ b/server/src/main/java/org/elasticsearch/cluster/service/MasterService.java @@ -1015,7 +1015,9 @@ void onPublishFailure(Exception e) { assert failure != null; var taskFailure = failure; - if (e instanceof FailedToCommitClusterStateException) { + if (e instanceof FailedToPublishClusterStateException) { + failure = new FailedToPublishClusterStateException(e.getMessage(), e); + } else if (e instanceof FailedToCommitClusterStateException) { failure = new FailedToCommitClusterStateException(e.getMessage(), e); } else { failure = new NotMasterException(e.getMessage(), e); @@ -1281,7 +1283,9 @@ synchronized ClusterStateUpdateStats getStatistics() { } public static boolean isPublishFailureException(Exception e) { - return e instanceof NotMasterException || e instanceof FailedToCommitClusterStateException; + return e instanceof NotMasterException + || e instanceof FailedToCommitClusterStateException + || e instanceof FailedToPublishClusterStateException; } private final Runnable queuesProcessor = new AbstractRunnable() { diff --git a/server/src/main/java/org/elasticsearch/reservedstate/service/FileSettingsService.java b/server/src/main/java/org/elasticsearch/reservedstate/service/FileSettingsService.java index 47182c30dd30b..80bf47a21c47f 100644 --- a/server/src/main/java/org/elasticsearch/reservedstate/service/FileSettingsService.java +++ b/server/src/main/java/org/elasticsearch/reservedstate/service/FileSettingsService.java @@ -20,6 +20,7 @@ import org.elasticsearch.cluster.ClusterStateListener; import org.elasticsearch.cluster.NotMasterException; import org.elasticsearch.cluster.coordination.FailedToCommitClusterStateException; +import org.elasticsearch.cluster.coordination.FailedToPublishClusterStateException; import org.elasticsearch.cluster.metadata.Metadata; import org.elasticsearch.cluster.metadata.ProjectId; import org.elasticsearch.cluster.metadata.ReservedStateMetadata; @@ -254,6 +255,8 @@ protected void onProcessFileChangesException(Path file, Exception e) { } else if (cause instanceof NotMasterException) { logger().error(Strings.format("Node is no longer master while processing file [%s]", file), e); return; + } else if (cause instanceof FailedToPublishClusterStateException) { + logger().error(Strings.format("Unable to publish cluster state while processing file [%s]", file), e); } } diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index f28d4948f657a..fe6414e591771 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -35,6 +35,7 @@ import org.elasticsearch.cluster.SnapshotsInProgress.ShardSnapshotStatus; import org.elasticsearch.cluster.SnapshotsInProgress.ShardState; import org.elasticsearch.cluster.coordination.FailedToCommitClusterStateException; +import org.elasticsearch.cluster.coordination.FailedToPublishClusterStateException; import org.elasticsearch.cluster.metadata.DataStream; import org.elasticsearch.cluster.metadata.DataStreamAlias; import org.elasticsearch.cluster.metadata.IndexMetadata; @@ -1310,7 +1311,7 @@ private void handleFinalizationFailure( RepositoryData repositoryData, UpdatedShardGenerations updatedShardGenerations ) { - if (ExceptionsHelper.unwrap(e, NotMasterException.class, FailedToCommitClusterStateException.class) != null) { + if (ExceptionsHelper.unwrap(e, NotMasterException.class, FailedToPublishClusterStateException.class, FailedToCommitClusterStateException.class) != null) { // Failure due to not being master any more, don't try to remove snapshot from cluster state the next master // will try ending this snapshot again logger.debug(() -> "[" + snapshot + "] failed to update cluster state during snapshot finalization", e); @@ -1962,7 +1963,7 @@ private void failAllListenersOnMasterFailOver(Exception e) { logger.debug("Failing all snapshot operation listeners because this node is not master any longer", e); final List readyToResolveListeners = new ArrayList<>(); synchronized (currentlyFinalizing) { - if (ExceptionsHelper.unwrap(e, NotMasterException.class, FailedToCommitClusterStateException.class) != null) { + if (ExceptionsHelper.unwrap(e, NotMasterException.class, FailedToPublishClusterStateException.class, FailedToCommitClusterStateException.class) != null) { repositoryOperations.clear(); for (final Snapshot snapshot : snapshotCompletionListeners.keySet()) { failSnapshotCompletionListeners( diff --git a/server/src/test/java/org/elasticsearch/action/support/master/TransportMasterNodeActionTests.java b/server/src/test/java/org/elasticsearch/action/support/master/TransportMasterNodeActionTests.java index 823af466cae66..d81388db7378b 100644 --- a/server/src/test/java/org/elasticsearch/action/support/master/TransportMasterNodeActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/support/master/TransportMasterNodeActionTests.java @@ -629,10 +629,7 @@ protected void masterOperation(Task task, Request request, ClusterState state, A clusterService, ClusterState.builder(ClusterStateCreationUtils.state(localNode, remoteNode, allNodes)).incrementVersion().build() ); - Exception failure = randomBoolean() - ? new FailedToCommitClusterStateException("Fake error") - : new NotMasterException("Fake error"); - listener.onFailure(failure); + listener.onFailure(randomClusterStateUpdateException()); } }, null, request, listener); diff --git a/server/src/test/java/org/elasticsearch/cluster/action/shard/ShardStateActionTests.java b/server/src/test/java/org/elasticsearch/cluster/action/shard/ShardStateActionTests.java index b189d7796b51a..6faaca4ad329b 100644 --- a/server/src/test/java/org/elasticsearch/cluster/action/shard/ShardStateActionTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/action/shard/ShardStateActionTests.java @@ -22,6 +22,7 @@ import org.elasticsearch.cluster.action.shard.ShardStateAction.FailedShardEntry; import org.elasticsearch.cluster.action.shard.ShardStateAction.StartedShardEntry; import org.elasticsearch.cluster.coordination.FailedToCommitClusterStateException; +import org.elasticsearch.cluster.coordination.FailedToPublishClusterStateException; import org.elasticsearch.cluster.routing.IndexRoutingTable; import org.elasticsearch.cluster.routing.RerouteService; import org.elasticsearch.cluster.routing.RoutingTable; @@ -235,7 +236,11 @@ public void testMasterChannelException() throws InterruptedException { if (randomBoolean()) { transport.handleRemoteError( requestId, - randomFrom(new NotMasterException("simulated"), new FailedToCommitClusterStateException("simulated")) + randomFrom( + new NotMasterException("simulated"), + new FailedToPublishClusterStateException("simulated"), + new FailedToCommitClusterStateException("simulated") + ) ); } else { if (randomBoolean()) { diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/JoinHelperTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/JoinHelperTests.java index 4179a6d3958ea..4ce67cab09335 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/JoinHelperTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/JoinHelperTests.java @@ -216,6 +216,16 @@ public void testFailedJoinAttemptLogLevel() { is(Level.DEBUG) ); + assertThat( + JoinHelper.FailedJoinAttempt.getLogLevel( + new RemoteTransportException( + "caused by FailedToPublishClusterStateException", + new FailedToPublishClusterStateException("test") + ) + ), + is(Level.DEBUG) + ); + assertThat( JoinHelper.FailedJoinAttempt.getLogLevel( new RemoteTransportException("caused by NotMasterException", new NotMasterException("test")) diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/BatchedRerouteServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/BatchedRerouteServiceTests.java index 0a97c903a4965..3636f77f68b32 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/BatchedRerouteServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/BatchedRerouteServiceTests.java @@ -16,6 +16,7 @@ import org.elasticsearch.cluster.ClusterStateUpdateTask; import org.elasticsearch.cluster.NotMasterException; import org.elasticsearch.cluster.coordination.FailedToCommitClusterStateException; +import org.elasticsearch.cluster.coordination.FailedToPublishClusterStateException; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.Priority; import org.elasticsearch.common.Randomness; @@ -221,7 +222,7 @@ public void testNotifiesOnFailure() throws InterruptedException { randomBoolean() ? ClusterServiceUtils.createClusterStatePublisher(clusterService.getClusterApplierService()) : (event, publishListener, ackListener) -> publishListener.onFailure( - new FailedToCommitClusterStateException("simulated") + randomClusterStateUpdateException() ) ); } @@ -244,11 +245,6 @@ public void testExceptionFidelity() { try (var mockLog = MockLog.capture(BatchedRerouteService.class)) { - clusterService.getMasterService() - .setClusterStatePublisher( - (event, publishListener, ackListener) -> publishListener.onFailure(new FailedToCommitClusterStateException("simulated")) - ); - // Case 1: an exception thrown from within the reroute itself mockLog.addExpectation( @@ -284,9 +280,14 @@ public void testExceptionFidelity() { new MockLog.UnseenEventExpectation("no info", BatchedRerouteService.class.getCanonicalName(), Level.INFO, "*") ); - // Case 2: a FailedToCommitClusterStateException (see the call to setClusterStatePublisher above) + // Case 2: a FailedToPublishClusterStateException + + clusterService.getMasterService() + .setClusterStatePublisher( + (event, publishListener, ackListener) -> publishListener.onFailure(new FailedToPublishClusterStateException("test")) + ); - final BatchedRerouteService batchedRerouteService = new BatchedRerouteService(clusterService, (s, r, l) -> { + BatchedRerouteService batchedRerouteService = new BatchedRerouteService(clusterService, (s, r, l) -> { l.onResponse(null); return ClusterState.builder(s).build(); }); @@ -304,12 +305,37 @@ public void testExceptionFidelity() { batchedRerouteService.reroute("publish failure", randomFrom(EnumSet.allOf(Priority.class)), publishFailureFuture); expectThrows( ExecutionException.class, - FailedToCommitClusterStateException.class, + FailedToPublishClusterStateException.class, () -> publishFailureFuture.get(10, TimeUnit.SECONDS) ); mockLog.assertAllExpectationsMatched(); - // Case 3: a NotMasterException + // Case 3: a FailedToCommitClusterStateException + + clusterService.getMasterService() + .setClusterStatePublisher( + (event, publishListener, ackListener) -> publishListener.onFailure(new FailedToCommitClusterStateException("test")) + ); + + mockLog.addExpectation( + new MockLog.SeenEventExpectation( + "commit failure", + BatchedRerouteService.class.getCanonicalName(), + Level.DEBUG, + "unexpected failure" + ) + ); + + final var commitFailureFuture = new PlainActionFuture(); + batchedRerouteService.reroute("commit failure", randomFrom(EnumSet.allOf(Priority.class)), commitFailureFuture); + expectThrows( + ExecutionException.class, + FailedToCommitClusterStateException.class, + () -> commitFailureFuture.get(10, TimeUnit.SECONDS) + ); + mockLog.assertAllExpectationsMatched(); + + // Case 4: a NotMasterException safeAwait((ActionListener listener) -> clusterService.getClusterApplierService().onNewClusterState("simulated", () -> { final var state = clusterService.state(); diff --git a/server/src/test/java/org/elasticsearch/cluster/service/MasterServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/service/MasterServiceTests.java index 4c24abbd5fd9a..26bd32c56ba8c 100644 --- a/server/src/test/java/org/elasticsearch/cluster/service/MasterServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/service/MasterServiceTests.java @@ -31,6 +31,7 @@ import org.elasticsearch.cluster.block.ClusterBlocks; import org.elasticsearch.cluster.coordination.ClusterStatePublisher; import org.elasticsearch.cluster.coordination.FailedToCommitClusterStateException; +import org.elasticsearch.cluster.coordination.FailedToPublishClusterStateException; import org.elasticsearch.cluster.metadata.ProcessClusterEventTimeoutException; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodeUtils; @@ -278,7 +279,7 @@ public void testThreadContext() { ); } else { randomExecutor(threadPool).execute( - () -> publishListener.onFailure(new FailedToCommitClusterStateException("simulated publish failure")) + () -> publishListener.onFailure(randomClusterStateUpdateException()) ); } } @@ -1070,13 +1071,14 @@ public void onFailure(Exception e) { assertNotNull(stateBeforeFailure); final var publicationFailedExceptionMessage = "simulated publication failure"; + ElasticsearchException clusterStateUpdateException = randomBoolean() ? new FailedToPublishClusterStateException(publicationFailedExceptionMessage) : new FailedToCommitClusterStateException(publicationFailedExceptionMessage); masterService.setClusterStatePublisher((clusterStatePublicationEvent, publishListener, ackListener) -> { assertSame(stateBeforeFailure, clusterStatePublicationEvent.getOldState()); assertNotSame(stateBeforeFailure, clusterStatePublicationEvent.getNewState()); assertTrue(publishedState.compareAndSet(null, clusterStatePublicationEvent.getNewState())); ClusterServiceUtils.setAllElapsedMillis(clusterStatePublicationEvent); - publishListener.onFailure(new FailedToCommitClusterStateException(publicationFailedExceptionMessage)); + publishListener.onFailure(clusterStateUpdateException); }); toSubmit = between(1, 10); @@ -1092,7 +1094,11 @@ public void onFailure(Exception e) { final var task = new Task(expectFailure, testResponseHeaderValue, assertNoSuccessListener(e -> { assertEquals(testContextHeaderValue, threadContext.getHeader(testContextHeaderName)); assertEquals(List.of(testResponseHeaderValue), threadContext.getResponseHeaders().get(testResponseHeaderName)); - assertThat(e, instanceOf(FailedToCommitClusterStateException.class)); + if (clusterStateUpdateException instanceof FailedToPublishClusterStateException) { + assertThat(e, instanceOf(FailedToPublishClusterStateException.class)); + } else { + assertThat(e, instanceOf(FailedToCommitClusterStateException.class)); + } assertThat(e.getMessage(), equalTo(publicationFailedExceptionMessage)); if (expectFailure) { assertThat(e.getSuppressed().length, greaterThan(0)); @@ -1606,9 +1612,11 @@ public void onAckFailure(Exception e) { { final CountDownLatch latch = new CountDownLatch(1); + String publicationFailedExceptionMessage = "mock exception"; + ElasticsearchException clusterStateUpdateException = randomBoolean() ? new FailedToPublishClusterStateException(publicationFailedExceptionMessage) : new FailedToCommitClusterStateException(publicationFailedExceptionMessage); publisherRef.set( (clusterChangedEvent, publishListener, ackListener) -> publishListener.onFailure( - new FailedToCommitClusterStateException("mock exception") + clusterStateUpdateException ) ); @@ -1633,7 +1641,12 @@ protected AcknowledgedResponse newResponse(boolean acknowledged) { @Override public void onFailure(Exception e) { - assertEquals("mock exception", asInstanceOf(FailedToCommitClusterStateException.class, e).getMessage()); + if (clusterStateUpdateException instanceof FailedToPublishClusterStateException) { + assertThat(e, instanceOf(FailedToPublishClusterStateException.class)); + } else { + assertThat(e, instanceOf(FailedToCommitClusterStateException.class)); + } + assertThat(e.getMessage(), equalTo(publicationFailedExceptionMessage)); latch.countDown(); } diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java index e173790c285ed..013a77e8ce045 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java @@ -59,7 +59,10 @@ import org.elasticsearch.cluster.ClusterModule; import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.NotMasterException; import org.elasticsearch.cluster.ProjectState; +import org.elasticsearch.cluster.coordination.FailedToCommitClusterStateException; +import org.elasticsearch.cluster.coordination.FailedToPublishClusterStateException; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.Metadata; import org.elasticsearch.cluster.metadata.ProjectId; @@ -1484,6 +1487,20 @@ public static Executor randomExecutor(ThreadPool threadPool, String... otherExec } } + /** + * Randomly chooses between {@link NotMasterException}, {@link FailedToCommitClusterStateException} and {@link FailedToPublishClusterStateException}. + */ + public static Exception randomClusterStateUpdateException() { + int choice = between(0, 2); + if (choice == 0) { + return new NotMasterException("Fake NotMasterException"); + } else if (choice == 1) { + return new FailedToCommitClusterStateException("Fake FailedToCommitClusterStateException"); + } else { + return new FailedToPublishClusterStateException(" Fake FailedToPublishClusterStateException"); + } + } + /** * helper to randomly perform on consumer with value */ diff --git a/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/MoveToErrorStepUpdateTask.java b/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/MoveToErrorStepUpdateTask.java index 9013cacdd01e7..485136a76ca17 100644 --- a/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/MoveToErrorStepUpdateTask.java +++ b/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/MoveToErrorStepUpdateTask.java @@ -14,6 +14,7 @@ import org.elasticsearch.cluster.NotMasterException; import org.elasticsearch.cluster.ProjectState; import org.elasticsearch.cluster.coordination.FailedToCommitClusterStateException; +import org.elasticsearch.cluster.coordination.FailedToPublishClusterStateException; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.LifecycleExecutionState; import org.elasticsearch.cluster.metadata.ProjectId; @@ -101,7 +102,7 @@ public int hashCode() { @Override protected void handleFailure(Exception e) { Level level; - if (ExceptionsHelper.unwrap(e, NotMasterException.class, FailedToCommitClusterStateException.class) != null) { + if (ExceptionsHelper.unwrap(e, NotMasterException.class, FailedToPublishClusterStateException.class, FailedToCommitClusterStateException.class) != null) { level = Level.DEBUG; } else { level = Level.ERROR; diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentService.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentService.java index 12f402ff18f9f..7472c559c4418 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentService.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentService.java @@ -19,6 +19,7 @@ import org.elasticsearch.cluster.ClusterStateObserver; import org.elasticsearch.cluster.NotMasterException; import org.elasticsearch.cluster.coordination.FailedToCommitClusterStateException; +import org.elasticsearch.cluster.coordination.FailedToPublishClusterStateException; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.core.Nullable; @@ -161,6 +162,7 @@ public void onTimeout(TimeValue timeout) { private static final Class[] MASTER_CHANNEL_EXCEPTIONS = new Class[] { NotMasterException.class, ConnectTransportException.class, + FailedToPublishClusterStateException.class, FailedToCommitClusterStateException.class }; private static boolean isMasterChannelException(Exception exp) { From eec4c8f1902116274303192791a9635278bd10bb Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 2 Oct 2025 11:55:43 +0000 Subject: [PATCH 04/11] [CI] Update transport version definitions --- .../referable/failed_to_publish_cluster_state_exception.csv | 1 + server/src/main/resources/transport/upper_bounds/9.2.csv | 2 +- server/src/main/resources/transport/upper_bounds/9.3.csv | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 server/src/main/resources/transport/definitions/referable/failed_to_publish_cluster_state_exception.csv create mode 100644 server/src/main/resources/transport/upper_bounds/9.3.csv diff --git a/server/src/main/resources/transport/definitions/referable/failed_to_publish_cluster_state_exception.csv b/server/src/main/resources/transport/definitions/referable/failed_to_publish_cluster_state_exception.csv new file mode 100644 index 0000000000000..eba277f9d4469 --- /dev/null +++ b/server/src/main/resources/transport/definitions/referable/failed_to_publish_cluster_state_exception.csv @@ -0,0 +1 @@ +9186001,9186000 diff --git a/server/src/main/resources/transport/upper_bounds/9.2.csv b/server/src/main/resources/transport/upper_bounds/9.2.csv index c114101f8655a..01aeef99a0846 100644 --- a/server/src/main/resources/transport/upper_bounds/9.2.csv +++ b/server/src/main/resources/transport/upper_bounds/9.2.csv @@ -1 +1 @@ -failed_to_publish_cluster_state_exception,9183000 +failed_to_publish_cluster_state_exception,9186000 diff --git a/server/src/main/resources/transport/upper_bounds/9.3.csv b/server/src/main/resources/transport/upper_bounds/9.3.csv new file mode 100644 index 0000000000000..849b7448160b3 --- /dev/null +++ b/server/src/main/resources/transport/upper_bounds/9.3.csv @@ -0,0 +1 @@ +failed_to_publish_cluster_state_exception,9186001 From 812ce64d860f3dec770d760e2f0af1e3df6102a5 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 2 Oct 2025 11:55:55 +0000 Subject: [PATCH 05/11] [CI] Auto commit changes from spotless --- .../snapshots/SnapshotsService.java | 14 ++++++++++++-- .../master/TransportMasterNodeActionTests.java | 2 -- .../routing/BatchedRerouteServiceTests.java | 4 +--- .../cluster/service/MasterServiceTests.java | 16 ++++++++-------- .../xpack/ilm/MoveToErrorStepUpdateTask.java | 7 ++++++- 5 files changed, 27 insertions(+), 16 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index fe6414e591771..b540eba528276 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -1311,7 +1311,12 @@ private void handleFinalizationFailure( RepositoryData repositoryData, UpdatedShardGenerations updatedShardGenerations ) { - if (ExceptionsHelper.unwrap(e, NotMasterException.class, FailedToPublishClusterStateException.class, FailedToCommitClusterStateException.class) != null) { + if (ExceptionsHelper.unwrap( + e, + NotMasterException.class, + FailedToPublishClusterStateException.class, + FailedToCommitClusterStateException.class + ) != null) { // Failure due to not being master any more, don't try to remove snapshot from cluster state the next master // will try ending this snapshot again logger.debug(() -> "[" + snapshot + "] failed to update cluster state during snapshot finalization", e); @@ -1963,7 +1968,12 @@ private void failAllListenersOnMasterFailOver(Exception e) { logger.debug("Failing all snapshot operation listeners because this node is not master any longer", e); final List readyToResolveListeners = new ArrayList<>(); synchronized (currentlyFinalizing) { - if (ExceptionsHelper.unwrap(e, NotMasterException.class, FailedToPublishClusterStateException.class, FailedToCommitClusterStateException.class) != null) { + if (ExceptionsHelper.unwrap( + e, + NotMasterException.class, + FailedToPublishClusterStateException.class, + FailedToCommitClusterStateException.class + ) != null) { repositoryOperations.clear(); for (final Snapshot snapshot : snapshotCompletionListeners.keySet()) { failSnapshotCompletionListeners( diff --git a/server/src/test/java/org/elasticsearch/action/support/master/TransportMasterNodeActionTests.java b/server/src/test/java/org/elasticsearch/action/support/master/TransportMasterNodeActionTests.java index d81388db7378b..22391f5c5d6f8 100644 --- a/server/src/test/java/org/elasticsearch/action/support/master/TransportMasterNodeActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/support/master/TransportMasterNodeActionTests.java @@ -23,12 +23,10 @@ import org.elasticsearch.action.support.replication.ClusterStateCreationUtils; import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.ClusterState; -import org.elasticsearch.cluster.NotMasterException; import org.elasticsearch.cluster.block.ClusterBlock; import org.elasticsearch.cluster.block.ClusterBlockException; import org.elasticsearch.cluster.block.ClusterBlockLevel; import org.elasticsearch.cluster.block.ClusterBlocks; -import org.elasticsearch.cluster.coordination.FailedToCommitClusterStateException; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.metadata.Metadata; diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/BatchedRerouteServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/BatchedRerouteServiceTests.java index 3636f77f68b32..585742df497d9 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/BatchedRerouteServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/BatchedRerouteServiceTests.java @@ -221,9 +221,7 @@ public void testNotifiesOnFailure() throws InterruptedException { .setClusterStatePublisher( randomBoolean() ? ClusterServiceUtils.createClusterStatePublisher(clusterService.getClusterApplierService()) - : (event, publishListener, ackListener) -> publishListener.onFailure( - randomClusterStateUpdateException() - ) + : (event, publishListener, ackListener) -> publishListener.onFailure(randomClusterStateUpdateException()) ); } diff --git a/server/src/test/java/org/elasticsearch/cluster/service/MasterServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/service/MasterServiceTests.java index 26bd32c56ba8c..262d08fab9aca 100644 --- a/server/src/test/java/org/elasticsearch/cluster/service/MasterServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/service/MasterServiceTests.java @@ -278,9 +278,7 @@ public void testThreadContext() { ) ); } else { - randomExecutor(threadPool).execute( - () -> publishListener.onFailure(randomClusterStateUpdateException()) - ); + randomExecutor(threadPool).execute(() -> publishListener.onFailure(randomClusterStateUpdateException())); } } }); @@ -1071,7 +1069,9 @@ public void onFailure(Exception e) { assertNotNull(stateBeforeFailure); final var publicationFailedExceptionMessage = "simulated publication failure"; - ElasticsearchException clusterStateUpdateException = randomBoolean() ? new FailedToPublishClusterStateException(publicationFailedExceptionMessage) : new FailedToCommitClusterStateException(publicationFailedExceptionMessage); + ElasticsearchException clusterStateUpdateException = randomBoolean() + ? new FailedToPublishClusterStateException(publicationFailedExceptionMessage) + : new FailedToCommitClusterStateException(publicationFailedExceptionMessage); masterService.setClusterStatePublisher((clusterStatePublicationEvent, publishListener, ackListener) -> { assertSame(stateBeforeFailure, clusterStatePublicationEvent.getOldState()); @@ -1613,11 +1613,11 @@ public void onAckFailure(Exception e) { final CountDownLatch latch = new CountDownLatch(1); String publicationFailedExceptionMessage = "mock exception"; - ElasticsearchException clusterStateUpdateException = randomBoolean() ? new FailedToPublishClusterStateException(publicationFailedExceptionMessage) : new FailedToCommitClusterStateException(publicationFailedExceptionMessage); + ElasticsearchException clusterStateUpdateException = randomBoolean() + ? new FailedToPublishClusterStateException(publicationFailedExceptionMessage) + : new FailedToCommitClusterStateException(publicationFailedExceptionMessage); publisherRef.set( - (clusterChangedEvent, publishListener, ackListener) -> publishListener.onFailure( - clusterStateUpdateException - ) + (clusterChangedEvent, publishListener, ackListener) -> publishListener.onFailure(clusterStateUpdateException) ); masterService.submitUnbatchedStateUpdateTask( diff --git a/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/MoveToErrorStepUpdateTask.java b/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/MoveToErrorStepUpdateTask.java index 485136a76ca17..c15c848cd8ade 100644 --- a/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/MoveToErrorStepUpdateTask.java +++ b/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/MoveToErrorStepUpdateTask.java @@ -102,7 +102,12 @@ public int hashCode() { @Override protected void handleFailure(Exception e) { Level level; - if (ExceptionsHelper.unwrap(e, NotMasterException.class, FailedToPublishClusterStateException.class, FailedToCommitClusterStateException.class) != null) { + if (ExceptionsHelper.unwrap( + e, + NotMasterException.class, + FailedToPublishClusterStateException.class, + FailedToCommitClusterStateException.class + ) != null) { level = Level.DEBUG; } else { level = Level.ERROR; From 8df45c6169afb317e6a939e94984087d0117f893 Mon Sep 17 00:00:00 2001 From: Joshua Adams Date: Thu, 2 Oct 2025 13:47:02 +0100 Subject: [PATCH 06/11] Spotless Apply --- .../snapshots/SnapshotsService.java | 14 ++++++++++++-- .../master/TransportMasterNodeActionTests.java | 2 -- .../routing/BatchedRerouteServiceTests.java | 4 +--- .../cluster/service/MasterServiceTests.java | 16 ++++++++-------- .../java/org/elasticsearch/test/ESTestCase.java | 3 ++- .../xpack/ilm/MoveToErrorStepUpdateTask.java | 7 ++++++- 6 files changed, 29 insertions(+), 17 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index fe6414e591771..b540eba528276 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -1311,7 +1311,12 @@ private void handleFinalizationFailure( RepositoryData repositoryData, UpdatedShardGenerations updatedShardGenerations ) { - if (ExceptionsHelper.unwrap(e, NotMasterException.class, FailedToPublishClusterStateException.class, FailedToCommitClusterStateException.class) != null) { + if (ExceptionsHelper.unwrap( + e, + NotMasterException.class, + FailedToPublishClusterStateException.class, + FailedToCommitClusterStateException.class + ) != null) { // Failure due to not being master any more, don't try to remove snapshot from cluster state the next master // will try ending this snapshot again logger.debug(() -> "[" + snapshot + "] failed to update cluster state during snapshot finalization", e); @@ -1963,7 +1968,12 @@ private void failAllListenersOnMasterFailOver(Exception e) { logger.debug("Failing all snapshot operation listeners because this node is not master any longer", e); final List readyToResolveListeners = new ArrayList<>(); synchronized (currentlyFinalizing) { - if (ExceptionsHelper.unwrap(e, NotMasterException.class, FailedToPublishClusterStateException.class, FailedToCommitClusterStateException.class) != null) { + if (ExceptionsHelper.unwrap( + e, + NotMasterException.class, + FailedToPublishClusterStateException.class, + FailedToCommitClusterStateException.class + ) != null) { repositoryOperations.clear(); for (final Snapshot snapshot : snapshotCompletionListeners.keySet()) { failSnapshotCompletionListeners( diff --git a/server/src/test/java/org/elasticsearch/action/support/master/TransportMasterNodeActionTests.java b/server/src/test/java/org/elasticsearch/action/support/master/TransportMasterNodeActionTests.java index d81388db7378b..22391f5c5d6f8 100644 --- a/server/src/test/java/org/elasticsearch/action/support/master/TransportMasterNodeActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/support/master/TransportMasterNodeActionTests.java @@ -23,12 +23,10 @@ import org.elasticsearch.action.support.replication.ClusterStateCreationUtils; import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.ClusterState; -import org.elasticsearch.cluster.NotMasterException; import org.elasticsearch.cluster.block.ClusterBlock; import org.elasticsearch.cluster.block.ClusterBlockException; import org.elasticsearch.cluster.block.ClusterBlockLevel; import org.elasticsearch.cluster.block.ClusterBlocks; -import org.elasticsearch.cluster.coordination.FailedToCommitClusterStateException; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.metadata.Metadata; diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/BatchedRerouteServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/BatchedRerouteServiceTests.java index 3636f77f68b32..585742df497d9 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/BatchedRerouteServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/BatchedRerouteServiceTests.java @@ -221,9 +221,7 @@ public void testNotifiesOnFailure() throws InterruptedException { .setClusterStatePublisher( randomBoolean() ? ClusterServiceUtils.createClusterStatePublisher(clusterService.getClusterApplierService()) - : (event, publishListener, ackListener) -> publishListener.onFailure( - randomClusterStateUpdateException() - ) + : (event, publishListener, ackListener) -> publishListener.onFailure(randomClusterStateUpdateException()) ); } diff --git a/server/src/test/java/org/elasticsearch/cluster/service/MasterServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/service/MasterServiceTests.java index 26bd32c56ba8c..262d08fab9aca 100644 --- a/server/src/test/java/org/elasticsearch/cluster/service/MasterServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/service/MasterServiceTests.java @@ -278,9 +278,7 @@ public void testThreadContext() { ) ); } else { - randomExecutor(threadPool).execute( - () -> publishListener.onFailure(randomClusterStateUpdateException()) - ); + randomExecutor(threadPool).execute(() -> publishListener.onFailure(randomClusterStateUpdateException())); } } }); @@ -1071,7 +1069,9 @@ public void onFailure(Exception e) { assertNotNull(stateBeforeFailure); final var publicationFailedExceptionMessage = "simulated publication failure"; - ElasticsearchException clusterStateUpdateException = randomBoolean() ? new FailedToPublishClusterStateException(publicationFailedExceptionMessage) : new FailedToCommitClusterStateException(publicationFailedExceptionMessage); + ElasticsearchException clusterStateUpdateException = randomBoolean() + ? new FailedToPublishClusterStateException(publicationFailedExceptionMessage) + : new FailedToCommitClusterStateException(publicationFailedExceptionMessage); masterService.setClusterStatePublisher((clusterStatePublicationEvent, publishListener, ackListener) -> { assertSame(stateBeforeFailure, clusterStatePublicationEvent.getOldState()); @@ -1613,11 +1613,11 @@ public void onAckFailure(Exception e) { final CountDownLatch latch = new CountDownLatch(1); String publicationFailedExceptionMessage = "mock exception"; - ElasticsearchException clusterStateUpdateException = randomBoolean() ? new FailedToPublishClusterStateException(publicationFailedExceptionMessage) : new FailedToCommitClusterStateException(publicationFailedExceptionMessage); + ElasticsearchException clusterStateUpdateException = randomBoolean() + ? new FailedToPublishClusterStateException(publicationFailedExceptionMessage) + : new FailedToCommitClusterStateException(publicationFailedExceptionMessage); publisherRef.set( - (clusterChangedEvent, publishListener, ackListener) -> publishListener.onFailure( - clusterStateUpdateException - ) + (clusterChangedEvent, publishListener, ackListener) -> publishListener.onFailure(clusterStateUpdateException) ); masterService.submitUnbatchedStateUpdateTask( diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java index 013a77e8ce045..fa33246c4f984 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java @@ -1488,7 +1488,8 @@ public static Executor randomExecutor(ThreadPool threadPool, String... otherExec } /** - * Randomly chooses between {@link NotMasterException}, {@link FailedToCommitClusterStateException} and {@link FailedToPublishClusterStateException}. + * Randomly chooses between {@link NotMasterException}, {@link FailedToCommitClusterStateException} + * and {@link FailedToPublishClusterStateException}. */ public static Exception randomClusterStateUpdateException() { int choice = between(0, 2); diff --git a/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/MoveToErrorStepUpdateTask.java b/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/MoveToErrorStepUpdateTask.java index 485136a76ca17..c15c848cd8ade 100644 --- a/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/MoveToErrorStepUpdateTask.java +++ b/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/MoveToErrorStepUpdateTask.java @@ -102,7 +102,12 @@ public int hashCode() { @Override protected void handleFailure(Exception e) { Level level; - if (ExceptionsHelper.unwrap(e, NotMasterException.class, FailedToPublishClusterStateException.class, FailedToCommitClusterStateException.class) != null) { + if (ExceptionsHelper.unwrap( + e, + NotMasterException.class, + FailedToPublishClusterStateException.class, + FailedToCommitClusterStateException.class + ) != null) { level = Level.DEBUG; } else { level = Level.ERROR; From 49116065cee8758c19de4cbef5399efe9808cdf7 Mon Sep 17 00:00:00 2001 From: Joshua Adams Date: Thu, 2 Oct 2025 13:58:11 +0100 Subject: [PATCH 07/11] Trying to get transport definitions to work --- .../referable/failed_to_publish_cluster_state_exception.csv | 2 +- server/src/main/resources/transport/upper_bounds/9.3.csv | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/resources/transport/definitions/referable/failed_to_publish_cluster_state_exception.csv b/server/src/main/resources/transport/definitions/referable/failed_to_publish_cluster_state_exception.csv index eba277f9d4469..085d1343edd9e 100644 --- a/server/src/main/resources/transport/definitions/referable/failed_to_publish_cluster_state_exception.csv +++ b/server/src/main/resources/transport/definitions/referable/failed_to_publish_cluster_state_exception.csv @@ -1 +1 @@ -9186001,9186000 +9186000 diff --git a/server/src/main/resources/transport/upper_bounds/9.3.csv b/server/src/main/resources/transport/upper_bounds/9.3.csv index 849b7448160b3..dfb000bd20c3d 100644 --- a/server/src/main/resources/transport/upper_bounds/9.3.csv +++ b/server/src/main/resources/transport/upper_bounds/9.3.csv @@ -1 +1 @@ -failed_to_publish_cluster_state_exception,9186001 +esql_plan_with_no_columns,9186000 From 7376b9dce69759a430866542878c36ac2704bdd8 Mon Sep 17 00:00:00 2001 From: Joshua Adams Date: Thu, 2 Oct 2025 14:07:39 +0100 Subject: [PATCH 08/11] Finally got transport definitions to work --- .../referable/failed_to_publish_cluster_state_exception.csv | 2 +- server/src/main/resources/transport/upper_bounds/9.3.csv | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/resources/transport/definitions/referable/failed_to_publish_cluster_state_exception.csv b/server/src/main/resources/transport/definitions/referable/failed_to_publish_cluster_state_exception.csv index 085d1343edd9e..71ac710f9bfe1 100644 --- a/server/src/main/resources/transport/definitions/referable/failed_to_publish_cluster_state_exception.csv +++ b/server/src/main/resources/transport/definitions/referable/failed_to_publish_cluster_state_exception.csv @@ -1 +1 @@ -9186000 +9187000 diff --git a/server/src/main/resources/transport/upper_bounds/9.3.csv b/server/src/main/resources/transport/upper_bounds/9.3.csv index dfb000bd20c3d..20b4c7dda6cac 100644 --- a/server/src/main/resources/transport/upper_bounds/9.3.csv +++ b/server/src/main/resources/transport/upper_bounds/9.3.csv @@ -1 +1 @@ -esql_plan_with_no_columns,9186000 +failed_to_publish_cluster_state_exception,9187000 From 4152c42a3faa6f5062c37258aa2c97adaa3342e6 Mon Sep 17 00:00:00 2001 From: Joshua Adams Date: Tue, 7 Oct 2025 11:56:17 +0100 Subject: [PATCH 09/11] Remove FailedToPublishClusterStateException and uses NotMasterException instead --- .../elasticsearch/ElasticsearchException.java | 18 ------ .../action/shard/ShardStateAction.java | 4 +- .../cluster/coordination/Coordinator.java | 12 ++-- .../FailedToCommitClusterStateException.java | 2 +- .../FailedToPublishClusterStateException.java | 46 ------------- .../cluster/service/MasterService.java | 64 +++++-------------- .../service/FileSettingsService.java | 3 - .../snapshots/SnapshotsService.java | 15 +---- .../ExceptionSerializationTests.java | 4 -- .../TransportMasterNodeActionTests.java | 7 +- .../action/shard/ShardStateActionTests.java | 7 +- .../cluster/coordination/JoinHelperTests.java | 10 --- .../routing/BatchedRerouteServiceTests.java | 48 ++++---------- .../cluster/service/MasterServiceTests.java | 35 ++++------ .../org/elasticsearch/test/ESTestCase.java | 18 ------ .../xpack/ilm/MoveToErrorStepUpdateTask.java | 8 +-- .../TrainedModelAssignmentService.java | 2 - 17 files changed, 56 insertions(+), 247 deletions(-) delete mode 100644 server/src/main/java/org/elasticsearch/cluster/coordination/FailedToPublishClusterStateException.java diff --git a/server/src/main/java/org/elasticsearch/ElasticsearchException.java b/server/src/main/java/org/elasticsearch/ElasticsearchException.java index e203a31710eaa..79d1df6a09be4 100644 --- a/server/src/main/java/org/elasticsearch/ElasticsearchException.java +++ b/server/src/main/java/org/elasticsearch/ElasticsearchException.java @@ -45,7 +45,6 @@ import org.elasticsearch.search.aggregations.AggregationExecutionException; import org.elasticsearch.search.aggregations.MultiBucketConsumerService; import org.elasticsearch.search.aggregations.UnsupportedAggregationOnDownsampledIndex; -import org.elasticsearch.search.crossproject.NoMatchingProjectException; import org.elasticsearch.search.query.SearchTimeoutException; import org.elasticsearch.transport.TcpTransport; import org.elasticsearch.xcontent.ParseField; @@ -80,7 +79,6 @@ import static org.elasticsearch.cluster.metadata.IndexMetadata.INDEX_UUID_NA_VALUE; import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureFieldName; -import static org.elasticsearch.search.crossproject.IndexExpressionsRewriter.NO_MATCHING_PROJECT_EXCEPTION_VERSION; /** * A base class for all elasticsearch exceptions. @@ -89,10 +87,6 @@ public class ElasticsearchException extends RuntimeException implements ToXConte private static final TransportVersion UNKNOWN_VERSION_ADDED = TransportVersion.zero(); - private static final TransportVersion FAILED_TO_PUBLISH_CLUSTER_STATE_EXCEPTION_TRANSPORT_VERSION = TransportVersion.fromName( - "failed_to_publish_cluster_state_exception" - ); - /** * Passed in the {@link Params} of {@link #generateThrowableXContent(XContentBuilder, Params, Throwable)} * to control if the {@code caused_by} element should render. Unlike most parameters to {@code toXContent} methods this parameter is @@ -2028,18 +2022,6 @@ private enum ElasticsearchExceptionHandle { 184, TransportVersions.REMOTE_EXCEPTION, TransportVersions.REMOTE_EXCEPTION_8_19 - ), - NO_MATCHING_PROJECT_EXCEPTION( - NoMatchingProjectException.class, - NoMatchingProjectException::new, - 185, - NO_MATCHING_PROJECT_EXCEPTION_VERSION - ), - FAILED_TO_PUBLISH_CLUSTER_STATE_EXCEPTION( - org.elasticsearch.cluster.coordination.FailedToPublishClusterStateException.class, - org.elasticsearch.cluster.coordination.FailedToPublishClusterStateException::new, - 186, - FAILED_TO_PUBLISH_CLUSTER_STATE_EXCEPTION_TRANSPORT_VERSION ); final Class exceptionClass; diff --git a/server/src/main/java/org/elasticsearch/cluster/action/shard/ShardStateAction.java b/server/src/main/java/org/elasticsearch/cluster/action/shard/ShardStateAction.java index 9d7bdd7f21d14..f24d5be356ee5 100644 --- a/server/src/main/java/org/elasticsearch/cluster/action/shard/ShardStateAction.java +++ b/server/src/main/java/org/elasticsearch/cluster/action/shard/ShardStateAction.java @@ -25,7 +25,6 @@ import org.elasticsearch.cluster.ClusterStateTaskListener; import org.elasticsearch.cluster.NotMasterException; import org.elasticsearch.cluster.coordination.FailedToCommitClusterStateException; -import org.elasticsearch.cluster.coordination.FailedToPublishClusterStateException; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.Metadata; import org.elasticsearch.cluster.metadata.ProjectId; @@ -165,7 +164,6 @@ private void sendShardAction( private static final Class[] MASTER_CHANNEL_EXCEPTIONS = new Class[] { NotMasterException.class, ConnectTransportException.class, - FailedToPublishClusterStateException.class, FailedToCommitClusterStateException.class }; private static boolean isMasterChannelException(Throwable exp) { @@ -938,7 +936,7 @@ public StartedShardEntry getStartedShardEntry() { public void onFailure(Exception e) { if (e instanceof NotMasterException) { logger.debug(() -> format("%s no longer master while starting shard [%s]", entry.shardId, entry)); - } else if (e instanceof FailedToPublishClusterStateException || e instanceof FailedToCommitClusterStateException) { + } else if (e instanceof FailedToCommitClusterStateException) { logger.debug(() -> format("%s unexpected failure while starting shard [%s]", entry.shardId, entry), e); } else { logger.error(() -> format("%s unexpected failure while starting shard [%s]", entry.shardId, entry), e); diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java index 47ffbb85377ec..b201cf4cc56fa 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java @@ -1568,7 +1568,7 @@ public void publish( clusterStatePublicationEvent.getSummary() ) ); - throw new FailedToPublishClusterStateException("publication " + currentPublication.get() + " already in progress"); + throw new NotMasterException("publication " + currentPublication.get() + " already in progress"); } assert assertPreviousStateConsistency(clusterStatePublicationEvent); @@ -1587,7 +1587,7 @@ assert getLocalNode().equals(clusterState.getNodes().get(getLocalNode().getId()) } catch (Exception e) { logger.debug(() -> "[" + clusterStatePublicationEvent.getSummary() + "] publishing failed during context creation", e); becomeCandidate("publication context creation"); - throw new FailedToPublishClusterStateException("publishing failed during context creation", e); + throw new NotMasterException("publishing failed during context creation", e); } try (Releasable ignored = publicationContext::decRef) { @@ -1608,7 +1608,7 @@ assert getLocalNode().equals(clusterState.getNodes().get(getLocalNode().getId()) e ); becomeCandidate("publication creation"); - throw new FailedToPublishClusterStateException("publishing failed while starting", e); + throw new NotMasterException("publishing failed while starting", e); } try { @@ -1639,12 +1639,12 @@ assert getLocalNode().equals(clusterState.getNodes().get(getLocalNode().getId()) } } } - } catch (FailedToPublishClusterStateException | FailedToCommitClusterStateException | NotMasterException e) { + } catch (FailedToCommitClusterStateException | NotMasterException e) { publishListener.onFailure(e); } catch (Exception e) { - assert false : e; // all exceptions should already be caught and wrapped in a FailedToPublishClusterStateException | + assert false : e; // all exceptions should already be caught and wrapped in a FailedToCommitClusterStateException | logger.error(() -> "[" + clusterStatePublicationEvent.getSummary() + "] publishing unexpectedly failed", e); - publishListener.onFailure(new FailedToPublishClusterStateException("publishing unexpectedly failed", e)); + publishListener.onFailure(new FailedToCommitClusterStateException("publishing unexpectedly failed", e)); } } diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/FailedToCommitClusterStateException.java b/server/src/main/java/org/elasticsearch/cluster/coordination/FailedToCommitClusterStateException.java index 1b21181060cad..1f5433b7d40f5 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/FailedToCommitClusterStateException.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/FailedToCommitClusterStateException.java @@ -23,7 +23,7 @@ * ambiguity whether a cluster state update has been committed. *

* For exceptions thrown prior to publication, - * when the cluster update has definitely failed, use a {@link FailedToPublishClusterStateException}. + * when the cluster update has definitely failed, use a different exception. *

* If during a cluster state update the node is no longer master, use a {@link NotMasterException} *

diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/FailedToPublishClusterStateException.java b/server/src/main/java/org/elasticsearch/cluster/coordination/FailedToPublishClusterStateException.java deleted file mode 100644 index 2ef3c4b3f3706..0000000000000 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/FailedToPublishClusterStateException.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ -package org.elasticsearch.cluster.coordination; - -import org.elasticsearch.ElasticsearchException; -import org.elasticsearch.action.support.master.TransportMasterNodeAction; -import org.elasticsearch.common.io.stream.StreamInput; - -import java.io.IOException; - -/** - * Exception indicating a cluster state update failed prior to publication. - *

- * If this exception is thrown, then the cluster state update was not published to any node. - * It is therefore impossible for the new master to have committed this state. - *

- * For exceptions thrown after publication, when the cluster state update may or may not have been committed, - * use a {@link FailedToCommitClusterStateException}. - *

- * This is a retryable exception inside {@link TransportMasterNodeAction} - */ -public class FailedToPublishClusterStateException extends ElasticsearchException { - - public FailedToPublishClusterStateException(String msg) { - super(msg); - } - - public FailedToPublishClusterStateException(StreamInput in) throws IOException { - super(in); - } - - public FailedToPublishClusterStateException(String msg, Throwable cause, Object... args) { - super(msg, cause, args); - } - - @Override - public Throwable fillInStackTrace() { - return this; - } -} diff --git a/server/src/main/java/org/elasticsearch/cluster/service/MasterService.java b/server/src/main/java/org/elasticsearch/cluster/service/MasterService.java index 48dca03ae2683..596cce87fd151 100644 --- a/server/src/main/java/org/elasticsearch/cluster/service/MasterService.java +++ b/server/src/main/java/org/elasticsearch/cluster/service/MasterService.java @@ -26,7 +26,6 @@ import org.elasticsearch.cluster.NotMasterException; import org.elasticsearch.cluster.coordination.ClusterStatePublisher; import org.elasticsearch.cluster.coordination.FailedToCommitClusterStateException; -import org.elasticsearch.cluster.coordination.FailedToPublishClusterStateException; import org.elasticsearch.cluster.metadata.ProcessClusterEventTimeoutException; import org.elasticsearch.cluster.metadata.ProjectMetadata; import org.elasticsearch.cluster.node.DiscoveryNode; @@ -416,37 +415,13 @@ public void onResponse(Void unused) { @Override public void onFailure(Exception exception) { - if (exception instanceof FailedToPublishClusterStateException - || exception instanceof FailedToCommitClusterStateException - || exception instanceof NotMasterException) { + if (exception instanceof FailedToCommitClusterStateException failedToCommitClusterStateException) { final long notificationStartTime = threadPool.rawRelativeTimeInMillis(); final long version = newClusterState.version(); - - if (exception instanceof FailedToCommitClusterStateException) { - logger.warn( - () -> format("failing [%s]: failed to commit cluster state version [%s]", summary, version), - exception - ); - } else if (exception instanceof FailedToPublishClusterStateException) { - logger.warn( - () -> format("failing [%s]: failed to publish cluster state version [%s]", summary, version), - exception - ); - } else { - logger.debug( - () -> format( - "node is no longer the master prior to publication of cluster state version [%s]: [%s]", - version, - summary - ), - exception - ); - } - + logger.warn(() -> format("failing [%s]: failed to commit cluster state version [%s]", summary, version), exception); for (final var executionResult : executionResults) { - executionResult.onPublishFailure(exception); + executionResult.onPublishFailure(failedToCommitClusterStateException); } - final long notificationMillis = threadPool.rawRelativeTimeInMillis() - notificationStartTime; clusterStateUpdateStatsTracker.onPublicationFailure( threadPool.rawRelativeTimeInMillis(), @@ -1010,19 +985,11 @@ void onClusterStateUnchanged(ClusterState clusterState) { } } - void onPublishFailure(Exception e) { + void onPublishFailure(FailedToCommitClusterStateException e) { if (publishedStateConsumer == null && onPublicationSuccess == null) { assert failure != null; var taskFailure = failure; - - if (e instanceof FailedToPublishClusterStateException) { - failure = new FailedToPublishClusterStateException(e.getMessage(), e); - } else if (e instanceof FailedToCommitClusterStateException) { - failure = new FailedToCommitClusterStateException(e.getMessage(), e); - } else { - failure = new NotMasterException(e.getMessage(), e); - } - + failure = new FailedToCommitClusterStateException(e.getMessage(), e); failure.addSuppressed(taskFailure); notifyFailure(); return; @@ -1283,9 +1250,7 @@ synchronized ClusterStateUpdateStats getStatistics() { } public static boolean isPublishFailureException(Exception e) { - return e instanceof NotMasterException - || e instanceof FailedToCommitClusterStateException - || e instanceof FailedToPublishClusterStateException; + return e instanceof NotMasterException || e instanceof FailedToCommitClusterStateException; } private final Runnable queuesProcessor = new AbstractRunnable() { @@ -1318,7 +1283,7 @@ public String toString() { if (lifecycle.started()) { nextBatch.run(batchCompletionListener); } else { - nextBatch.onRejection(new NotMasterException("node closed", getRejectionException())); + nextBatch.onRejection(new FailedToCommitClusterStateException("node closed", getRejectionException())); batchCompletionListener.onResponse(null); } }); @@ -1344,7 +1309,7 @@ private void onCompletion() { @Override public void onRejection(Exception e) { assert e instanceof EsRejectedExecutionException esre && esre.isExecutorShutdown() : e; - drainQueueOnRejection(new NotMasterException("node closed", e)); + drainQueueOnRejection(new FailedToCommitClusterStateException("node closed", e)); } @Override @@ -1371,7 +1336,7 @@ private Batch takeNextBatch() { private void forkQueueProcessor() { // single-threaded: started when totalQueueSize transitions from 0 to 1 and keeps calling itself until the queue is drained. if (lifecycle.started() == false) { - drainQueueOnRejection(new NotMasterException("node closed", getRejectionException())); + drainQueueOnRejection(new FailedToCommitClusterStateException("node closed", getRejectionException())); return; } @@ -1388,7 +1353,7 @@ private EsRejectedExecutionException getRejectionException() { return new EsRejectedExecutionException("master service is in state [" + lifecycleState() + "]", true); } - private void drainQueueOnRejection(NotMasterException e) { + private void drainQueueOnRejection(FailedToCommitClusterStateException e) { assert totalQueueSize.get() > 0; do { assert currentlyExecutingBatch == null; @@ -1442,11 +1407,12 @@ private interface Batch { /** * Called when the batch is rejected due to the master service shutting down. * - * @param e is a {@link NotMasterException} to cause things like {@link TransportMasterNodeAction} to retry after + * @param e is a {@link FailedToCommitClusterStateException} to cause things like {@link TransportMasterNodeAction} to retry after * submitting a task to a master which shut down. {@code e.getCause()} is the rejection exception, which should be a * {@link EsRejectedExecutionException} with {@link EsRejectedExecutionException#isExecutorShutdown()} true. */ - void onRejection(NotMasterException e); + // Should really be a NodeClosedException instead, but this exception type doesn't trigger retries today. + void onRejection(FailedToCommitClusterStateException e); /** * @return number of tasks in this batch if the batch is pending, or {@code 0} if the batch is not pending. @@ -1668,7 +1634,7 @@ T acquireForExecution() { return task; } - void onRejection(NotMasterException e) { + void onRejection(FailedToCommitClusterStateException e) { final var task = acquireForExecution(); if (task != null) { try (var ignored = storedContextSupplier.get()) { @@ -1688,7 +1654,7 @@ boolean isPending() { private class Processor implements Batch { @Override - public void onRejection(NotMasterException e) { + public void onRejection(FailedToCommitClusterStateException e) { final var items = queueSize.getAndSet(0); for (int i = 0; i < items; i++) { final var entry = queue.poll(); diff --git a/server/src/main/java/org/elasticsearch/reservedstate/service/FileSettingsService.java b/server/src/main/java/org/elasticsearch/reservedstate/service/FileSettingsService.java index 80bf47a21c47f..47182c30dd30b 100644 --- a/server/src/main/java/org/elasticsearch/reservedstate/service/FileSettingsService.java +++ b/server/src/main/java/org/elasticsearch/reservedstate/service/FileSettingsService.java @@ -20,7 +20,6 @@ import org.elasticsearch.cluster.ClusterStateListener; import org.elasticsearch.cluster.NotMasterException; import org.elasticsearch.cluster.coordination.FailedToCommitClusterStateException; -import org.elasticsearch.cluster.coordination.FailedToPublishClusterStateException; import org.elasticsearch.cluster.metadata.Metadata; import org.elasticsearch.cluster.metadata.ProjectId; import org.elasticsearch.cluster.metadata.ReservedStateMetadata; @@ -255,8 +254,6 @@ protected void onProcessFileChangesException(Path file, Exception e) { } else if (cause instanceof NotMasterException) { logger().error(Strings.format("Node is no longer master while processing file [%s]", file), e); return; - } else if (cause instanceof FailedToPublishClusterStateException) { - logger().error(Strings.format("Unable to publish cluster state while processing file [%s]", file), e); } } diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index b540eba528276..f28d4948f657a 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -35,7 +35,6 @@ import org.elasticsearch.cluster.SnapshotsInProgress.ShardSnapshotStatus; import org.elasticsearch.cluster.SnapshotsInProgress.ShardState; import org.elasticsearch.cluster.coordination.FailedToCommitClusterStateException; -import org.elasticsearch.cluster.coordination.FailedToPublishClusterStateException; import org.elasticsearch.cluster.metadata.DataStream; import org.elasticsearch.cluster.metadata.DataStreamAlias; import org.elasticsearch.cluster.metadata.IndexMetadata; @@ -1311,12 +1310,7 @@ private void handleFinalizationFailure( RepositoryData repositoryData, UpdatedShardGenerations updatedShardGenerations ) { - if (ExceptionsHelper.unwrap( - e, - NotMasterException.class, - FailedToPublishClusterStateException.class, - FailedToCommitClusterStateException.class - ) != null) { + if (ExceptionsHelper.unwrap(e, NotMasterException.class, FailedToCommitClusterStateException.class) != null) { // Failure due to not being master any more, don't try to remove snapshot from cluster state the next master // will try ending this snapshot again logger.debug(() -> "[" + snapshot + "] failed to update cluster state during snapshot finalization", e); @@ -1968,12 +1962,7 @@ private void failAllListenersOnMasterFailOver(Exception e) { logger.debug("Failing all snapshot operation listeners because this node is not master any longer", e); final List readyToResolveListeners = new ArrayList<>(); synchronized (currentlyFinalizing) { - if (ExceptionsHelper.unwrap( - e, - NotMasterException.class, - FailedToPublishClusterStateException.class, - FailedToCommitClusterStateException.class - ) != null) { + if (ExceptionsHelper.unwrap(e, NotMasterException.class, FailedToCommitClusterStateException.class) != null) { repositoryOperations.clear(); for (final Snapshot snapshot : snapshotCompletionListeners.keySet()) { failSnapshotCompletionListeners( diff --git a/server/src/test/java/org/elasticsearch/ExceptionSerializationTests.java b/server/src/test/java/org/elasticsearch/ExceptionSerializationTests.java index 87110e6798d43..64c0b9a780cfe 100644 --- a/server/src/test/java/org/elasticsearch/ExceptionSerializationTests.java +++ b/server/src/test/java/org/elasticsearch/ExceptionSerializationTests.java @@ -25,7 +25,6 @@ import org.elasticsearch.cluster.action.shard.ShardStateAction; import org.elasticsearch.cluster.block.ClusterBlockException; import org.elasticsearch.cluster.coordination.CoordinationStateRejectedException; -import org.elasticsearch.cluster.coordination.FailedToPublishClusterStateException; import org.elasticsearch.cluster.coordination.NoMasterBlockService; import org.elasticsearch.cluster.coordination.NodeHealthCheckFailureException; import org.elasticsearch.cluster.desirednodes.VersionConflictException; @@ -87,7 +86,6 @@ import org.elasticsearch.search.aggregations.AggregationExecutionException; import org.elasticsearch.search.aggregations.MultiBucketConsumerService; import org.elasticsearch.search.aggregations.UnsupportedAggregationOnDownsampledIndex; -import org.elasticsearch.search.crossproject.NoMatchingProjectException; import org.elasticsearch.search.internal.ShardSearchContextId; import org.elasticsearch.search.query.SearchTimeoutException; import org.elasticsearch.snapshots.Snapshot; @@ -848,8 +846,6 @@ public void testIds() { ids.put(182, IngestPipelineException.class); ids.put(183, IndexDocFailureStoreStatus.ExceptionWithFailureStoreStatus.class); ids.put(184, RemoteException.class); - ids.put(185, NoMatchingProjectException.class); - ids.put(186, FailedToPublishClusterStateException.class); Map, Integer> reverse = new HashMap<>(); for (Map.Entry> entry : ids.entrySet()) { diff --git a/server/src/test/java/org/elasticsearch/action/support/master/TransportMasterNodeActionTests.java b/server/src/test/java/org/elasticsearch/action/support/master/TransportMasterNodeActionTests.java index 22391f5c5d6f8..823af466cae66 100644 --- a/server/src/test/java/org/elasticsearch/action/support/master/TransportMasterNodeActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/support/master/TransportMasterNodeActionTests.java @@ -23,10 +23,12 @@ import org.elasticsearch.action.support.replication.ClusterStateCreationUtils; import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.NotMasterException; import org.elasticsearch.cluster.block.ClusterBlock; import org.elasticsearch.cluster.block.ClusterBlockException; import org.elasticsearch.cluster.block.ClusterBlockLevel; import org.elasticsearch.cluster.block.ClusterBlocks; +import org.elasticsearch.cluster.coordination.FailedToCommitClusterStateException; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.metadata.Metadata; @@ -627,7 +629,10 @@ protected void masterOperation(Task task, Request request, ClusterState state, A clusterService, ClusterState.builder(ClusterStateCreationUtils.state(localNode, remoteNode, allNodes)).incrementVersion().build() ); - listener.onFailure(randomClusterStateUpdateException()); + Exception failure = randomBoolean() + ? new FailedToCommitClusterStateException("Fake error") + : new NotMasterException("Fake error"); + listener.onFailure(failure); } }, null, request, listener); diff --git a/server/src/test/java/org/elasticsearch/cluster/action/shard/ShardStateActionTests.java b/server/src/test/java/org/elasticsearch/cluster/action/shard/ShardStateActionTests.java index 6faaca4ad329b..b189d7796b51a 100644 --- a/server/src/test/java/org/elasticsearch/cluster/action/shard/ShardStateActionTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/action/shard/ShardStateActionTests.java @@ -22,7 +22,6 @@ import org.elasticsearch.cluster.action.shard.ShardStateAction.FailedShardEntry; import org.elasticsearch.cluster.action.shard.ShardStateAction.StartedShardEntry; import org.elasticsearch.cluster.coordination.FailedToCommitClusterStateException; -import org.elasticsearch.cluster.coordination.FailedToPublishClusterStateException; import org.elasticsearch.cluster.routing.IndexRoutingTable; import org.elasticsearch.cluster.routing.RerouteService; import org.elasticsearch.cluster.routing.RoutingTable; @@ -236,11 +235,7 @@ public void testMasterChannelException() throws InterruptedException { if (randomBoolean()) { transport.handleRemoteError( requestId, - randomFrom( - new NotMasterException("simulated"), - new FailedToPublishClusterStateException("simulated"), - new FailedToCommitClusterStateException("simulated") - ) + randomFrom(new NotMasterException("simulated"), new FailedToCommitClusterStateException("simulated")) ); } else { if (randomBoolean()) { diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/JoinHelperTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/JoinHelperTests.java index 4ce67cab09335..4179a6d3958ea 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/JoinHelperTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/JoinHelperTests.java @@ -216,16 +216,6 @@ public void testFailedJoinAttemptLogLevel() { is(Level.DEBUG) ); - assertThat( - JoinHelper.FailedJoinAttempt.getLogLevel( - new RemoteTransportException( - "caused by FailedToPublishClusterStateException", - new FailedToPublishClusterStateException("test") - ) - ), - is(Level.DEBUG) - ); - assertThat( JoinHelper.FailedJoinAttempt.getLogLevel( new RemoteTransportException("caused by NotMasterException", new NotMasterException("test")) diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/BatchedRerouteServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/BatchedRerouteServiceTests.java index 585742df497d9..0a97c903a4965 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/BatchedRerouteServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/BatchedRerouteServiceTests.java @@ -16,7 +16,6 @@ import org.elasticsearch.cluster.ClusterStateUpdateTask; import org.elasticsearch.cluster.NotMasterException; import org.elasticsearch.cluster.coordination.FailedToCommitClusterStateException; -import org.elasticsearch.cluster.coordination.FailedToPublishClusterStateException; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.Priority; import org.elasticsearch.common.Randomness; @@ -221,7 +220,9 @@ public void testNotifiesOnFailure() throws InterruptedException { .setClusterStatePublisher( randomBoolean() ? ClusterServiceUtils.createClusterStatePublisher(clusterService.getClusterApplierService()) - : (event, publishListener, ackListener) -> publishListener.onFailure(randomClusterStateUpdateException()) + : (event, publishListener, ackListener) -> publishListener.onFailure( + new FailedToCommitClusterStateException("simulated") + ) ); } @@ -243,6 +244,11 @@ public void testExceptionFidelity() { try (var mockLog = MockLog.capture(BatchedRerouteService.class)) { + clusterService.getMasterService() + .setClusterStatePublisher( + (event, publishListener, ackListener) -> publishListener.onFailure(new FailedToCommitClusterStateException("simulated")) + ); + // Case 1: an exception thrown from within the reroute itself mockLog.addExpectation( @@ -278,14 +284,9 @@ public void testExceptionFidelity() { new MockLog.UnseenEventExpectation("no info", BatchedRerouteService.class.getCanonicalName(), Level.INFO, "*") ); - // Case 2: a FailedToPublishClusterStateException - - clusterService.getMasterService() - .setClusterStatePublisher( - (event, publishListener, ackListener) -> publishListener.onFailure(new FailedToPublishClusterStateException("test")) - ); + // Case 2: a FailedToCommitClusterStateException (see the call to setClusterStatePublisher above) - BatchedRerouteService batchedRerouteService = new BatchedRerouteService(clusterService, (s, r, l) -> { + final BatchedRerouteService batchedRerouteService = new BatchedRerouteService(clusterService, (s, r, l) -> { l.onResponse(null); return ClusterState.builder(s).build(); }); @@ -301,39 +302,14 @@ public void testExceptionFidelity() { final var publishFailureFuture = new PlainActionFuture(); batchedRerouteService.reroute("publish failure", randomFrom(EnumSet.allOf(Priority.class)), publishFailureFuture); - expectThrows( - ExecutionException.class, - FailedToPublishClusterStateException.class, - () -> publishFailureFuture.get(10, TimeUnit.SECONDS) - ); - mockLog.assertAllExpectationsMatched(); - - // Case 3: a FailedToCommitClusterStateException - - clusterService.getMasterService() - .setClusterStatePublisher( - (event, publishListener, ackListener) -> publishListener.onFailure(new FailedToCommitClusterStateException("test")) - ); - - mockLog.addExpectation( - new MockLog.SeenEventExpectation( - "commit failure", - BatchedRerouteService.class.getCanonicalName(), - Level.DEBUG, - "unexpected failure" - ) - ); - - final var commitFailureFuture = new PlainActionFuture(); - batchedRerouteService.reroute("commit failure", randomFrom(EnumSet.allOf(Priority.class)), commitFailureFuture); expectThrows( ExecutionException.class, FailedToCommitClusterStateException.class, - () -> commitFailureFuture.get(10, TimeUnit.SECONDS) + () -> publishFailureFuture.get(10, TimeUnit.SECONDS) ); mockLog.assertAllExpectationsMatched(); - // Case 4: a NotMasterException + // Case 3: a NotMasterException safeAwait((ActionListener listener) -> clusterService.getClusterApplierService().onNewClusterState("simulated", () -> { final var state = clusterService.state(); diff --git a/server/src/test/java/org/elasticsearch/cluster/service/MasterServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/service/MasterServiceTests.java index 533cf97568517..4c24abbd5fd9a 100644 --- a/server/src/test/java/org/elasticsearch/cluster/service/MasterServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/service/MasterServiceTests.java @@ -31,7 +31,6 @@ import org.elasticsearch.cluster.block.ClusterBlocks; import org.elasticsearch.cluster.coordination.ClusterStatePublisher; import org.elasticsearch.cluster.coordination.FailedToCommitClusterStateException; -import org.elasticsearch.cluster.coordination.FailedToPublishClusterStateException; import org.elasticsearch.cluster.metadata.ProcessClusterEventTimeoutException; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodeUtils; @@ -278,7 +277,9 @@ public void testThreadContext() { ) ); } else { - randomExecutor(threadPool).execute(() -> publishListener.onFailure(randomClusterStateUpdateException())); + randomExecutor(threadPool).execute( + () -> publishListener.onFailure(new FailedToCommitClusterStateException("simulated publish failure")) + ); } } }); @@ -1069,16 +1070,13 @@ public void onFailure(Exception e) { assertNotNull(stateBeforeFailure); final var publicationFailedExceptionMessage = "simulated publication failure"; - ElasticsearchException clusterStateUpdateException = randomBoolean() - ? new FailedToPublishClusterStateException(publicationFailedExceptionMessage) - : new FailedToCommitClusterStateException(publicationFailedExceptionMessage); masterService.setClusterStatePublisher((clusterStatePublicationEvent, publishListener, ackListener) -> { assertSame(stateBeforeFailure, clusterStatePublicationEvent.getOldState()); assertNotSame(stateBeforeFailure, clusterStatePublicationEvent.getNewState()); assertTrue(publishedState.compareAndSet(null, clusterStatePublicationEvent.getNewState())); ClusterServiceUtils.setAllElapsedMillis(clusterStatePublicationEvent); - publishListener.onFailure(clusterStateUpdateException); + publishListener.onFailure(new FailedToCommitClusterStateException(publicationFailedExceptionMessage)); }); toSubmit = between(1, 10); @@ -1094,11 +1092,7 @@ public void onFailure(Exception e) { final var task = new Task(expectFailure, testResponseHeaderValue, assertNoSuccessListener(e -> { assertEquals(testContextHeaderValue, threadContext.getHeader(testContextHeaderName)); assertEquals(List.of(testResponseHeaderValue), threadContext.getResponseHeaders().get(testResponseHeaderName)); - if (clusterStateUpdateException instanceof FailedToPublishClusterStateException) { - assertThat(e, instanceOf(FailedToPublishClusterStateException.class)); - } else { - assertThat(e, instanceOf(FailedToCommitClusterStateException.class)); - } + assertThat(e, instanceOf(FailedToCommitClusterStateException.class)); assertThat(e.getMessage(), equalTo(publicationFailedExceptionMessage)); if (expectFailure) { assertThat(e.getSuppressed().length, greaterThan(0)); @@ -1612,12 +1606,10 @@ public void onAckFailure(Exception e) { { final CountDownLatch latch = new CountDownLatch(1); - String publicationFailedExceptionMessage = "mock exception"; - ElasticsearchException clusterStateUpdateException = randomBoolean() - ? new FailedToPublishClusterStateException(publicationFailedExceptionMessage) - : new FailedToCommitClusterStateException(publicationFailedExceptionMessage); publisherRef.set( - (clusterChangedEvent, publishListener, ackListener) -> publishListener.onFailure(clusterStateUpdateException) + (clusterChangedEvent, publishListener, ackListener) -> publishListener.onFailure( + new FailedToCommitClusterStateException("mock exception") + ) ); masterService.submitUnbatchedStateUpdateTask( @@ -1641,12 +1633,7 @@ protected AcknowledgedResponse newResponse(boolean acknowledged) { @Override public void onFailure(Exception e) { - if (clusterStateUpdateException instanceof FailedToPublishClusterStateException) { - assertThat(e, instanceOf(FailedToPublishClusterStateException.class)); - } else { - assertThat(e, instanceOf(FailedToCommitClusterStateException.class)); - } - assertThat(e.getMessage(), equalTo(publicationFailedExceptionMessage)); + assertEquals("mock exception", asInstanceOf(FailedToCommitClusterStateException.class, e).getMessage()); latch.countDown(); } @@ -2195,7 +2182,7 @@ class TestTask implements ClusterStateTaskListener { @Override public void onFailure(Exception e) { assertEquals(expectedHeader, threadPool.getThreadContext().getHeader(testHeader)); - if ((e instanceof NotMasterException + if ((e instanceof FailedToCommitClusterStateException && e.getCause() instanceof EsRejectedExecutionException esre && esre.isExecutorShutdown()) == false) { throw new AssertionError("unexpected exception", e); @@ -2374,7 +2361,7 @@ class TestTask implements ClusterStateTaskListener { @Override public void onFailure(Exception e) { assertEquals(expectedHeader, threadPool.getThreadContext().getHeader(testHeader)); - if ((e instanceof NotMasterException + if ((e instanceof FailedToCommitClusterStateException && e.getCause() instanceof EsRejectedExecutionException esre && esre.isExecutorShutdown()) == false) { throw new AssertionError("unexpected exception", e); diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java index 362dc742a09c7..8150456194b76 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java @@ -59,10 +59,7 @@ import org.elasticsearch.cluster.ClusterModule; import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.ClusterState; -import org.elasticsearch.cluster.NotMasterException; import org.elasticsearch.cluster.ProjectState; -import org.elasticsearch.cluster.coordination.FailedToCommitClusterStateException; -import org.elasticsearch.cluster.coordination.FailedToPublishClusterStateException; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.Metadata; import org.elasticsearch.cluster.metadata.ProjectId; @@ -1491,21 +1488,6 @@ public static Executor randomExecutor(ThreadPool threadPool, String... otherExec } } - /** - * Randomly chooses between {@link NotMasterException}, {@link FailedToCommitClusterStateException} - * and {@link FailedToPublishClusterStateException}. - */ - public static Exception randomClusterStateUpdateException() { - int choice = between(0, 2); - if (choice == 0) { - return new NotMasterException("Fake NotMasterException"); - } else if (choice == 1) { - return new FailedToCommitClusterStateException("Fake FailedToCommitClusterStateException"); - } else { - return new FailedToPublishClusterStateException(" Fake FailedToPublishClusterStateException"); - } - } - /** * helper to randomly perform on consumer with value */ diff --git a/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/MoveToErrorStepUpdateTask.java b/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/MoveToErrorStepUpdateTask.java index c15c848cd8ade..9013cacdd01e7 100644 --- a/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/MoveToErrorStepUpdateTask.java +++ b/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/MoveToErrorStepUpdateTask.java @@ -14,7 +14,6 @@ import org.elasticsearch.cluster.NotMasterException; import org.elasticsearch.cluster.ProjectState; import org.elasticsearch.cluster.coordination.FailedToCommitClusterStateException; -import org.elasticsearch.cluster.coordination.FailedToPublishClusterStateException; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.LifecycleExecutionState; import org.elasticsearch.cluster.metadata.ProjectId; @@ -102,12 +101,7 @@ public int hashCode() { @Override protected void handleFailure(Exception e) { Level level; - if (ExceptionsHelper.unwrap( - e, - NotMasterException.class, - FailedToPublishClusterStateException.class, - FailedToCommitClusterStateException.class - ) != null) { + if (ExceptionsHelper.unwrap(e, NotMasterException.class, FailedToCommitClusterStateException.class) != null) { level = Level.DEBUG; } else { level = Level.ERROR; diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentService.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentService.java index 7472c559c4418..12f402ff18f9f 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentService.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentService.java @@ -19,7 +19,6 @@ import org.elasticsearch.cluster.ClusterStateObserver; import org.elasticsearch.cluster.NotMasterException; import org.elasticsearch.cluster.coordination.FailedToCommitClusterStateException; -import org.elasticsearch.cluster.coordination.FailedToPublishClusterStateException; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.core.Nullable; @@ -162,7 +161,6 @@ public void onTimeout(TimeValue timeout) { private static final Class[] MASTER_CHANNEL_EXCEPTIONS = new Class[] { NotMasterException.class, ConnectTransportException.class, - FailedToPublishClusterStateException.class, FailedToCommitClusterStateException.class }; private static boolean isMasterChannelException(Exception exp) { From f886182df71ad436b846147076852371a4bb06fe Mon Sep 17 00:00:00 2001 From: Joshua Adams Date: Tue, 7 Oct 2025 12:01:51 +0100 Subject: [PATCH 10/11] Fix merge issues --- .../org/elasticsearch/cluster/coordination/Coordinator.java | 2 +- .../referable/failed_to_publish_cluster_state_exception.csv | 1 - .../java/org/elasticsearch/ExceptionSerializationTests.java | 4 ++-- 3 files changed, 3 insertions(+), 4 deletions(-) delete mode 100644 server/src/main/resources/transport/definitions/referable/failed_to_publish_cluster_state_exception.csv diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java index b201cf4cc56fa..2fe4408d142d3 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java @@ -1642,7 +1642,7 @@ assert getLocalNode().equals(clusterState.getNodes().get(getLocalNode().getId()) } catch (FailedToCommitClusterStateException | NotMasterException e) { publishListener.onFailure(e); } catch (Exception e) { - assert false : e; // all exceptions should already be caught and wrapped in a FailedToCommitClusterStateException | + assert false : e; // all exceptions should already be caught and wrapped in a FailedToCommitClusterStateException logger.error(() -> "[" + clusterStatePublicationEvent.getSummary() + "] publishing unexpectedly failed", e); publishListener.onFailure(new FailedToCommitClusterStateException("publishing unexpectedly failed", e)); } diff --git a/server/src/main/resources/transport/definitions/referable/failed_to_publish_cluster_state_exception.csv b/server/src/main/resources/transport/definitions/referable/failed_to_publish_cluster_state_exception.csv deleted file mode 100644 index 71ac710f9bfe1..0000000000000 --- a/server/src/main/resources/transport/definitions/referable/failed_to_publish_cluster_state_exception.csv +++ /dev/null @@ -1 +0,0 @@ -9187000 diff --git a/server/src/test/java/org/elasticsearch/ExceptionSerializationTests.java b/server/src/test/java/org/elasticsearch/ExceptionSerializationTests.java index 31ea6b849306c..64c0b9a780cfe 100644 --- a/server/src/test/java/org/elasticsearch/ExceptionSerializationTests.java +++ b/server/src/test/java/org/elasticsearch/ExceptionSerializationTests.java @@ -237,7 +237,7 @@ public TestException(StreamInput in) throws IOException { } private T serialize(T exception) throws IOException { - return serialize(exception, TransportVersionUtils.randomCompatibleVersion(random())); + return serialize(exception, TransportVersionUtils.randomVersion(random())); } private T serialize(T exception, TransportVersion version) throws IOException { @@ -365,7 +365,7 @@ public void testSearchContextMissingException() throws IOException { public void testCircuitBreakingException() throws IOException { CircuitBreakingException ex = serialize( new CircuitBreakingException("Too large", 0, 100, CircuitBreaker.Durability.TRANSIENT), - TransportVersionUtils.randomCompatibleVersion(random()) + TransportVersions.V_8_0_0 ); assertEquals("Too large", ex.getMessage()); assertEquals(100, ex.getByteLimit()); From a277c2eb3c9d088e83ec8cf3f3e85f59cfeef385 Mon Sep 17 00:00:00 2001 From: Joshua Adams Date: Tue, 7 Oct 2025 12:12:53 +0100 Subject: [PATCH 11/11] Fix merge issues --- .../java/org/elasticsearch/ExceptionSerializationTests.java | 6 ++++-- .../elasticsearch/cluster/service/MasterServiceTests.java | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/ExceptionSerializationTests.java b/server/src/test/java/org/elasticsearch/ExceptionSerializationTests.java index 64c0b9a780cfe..1b70407f89639 100644 --- a/server/src/test/java/org/elasticsearch/ExceptionSerializationTests.java +++ b/server/src/test/java/org/elasticsearch/ExceptionSerializationTests.java @@ -86,6 +86,7 @@ import org.elasticsearch.search.aggregations.AggregationExecutionException; import org.elasticsearch.search.aggregations.MultiBucketConsumerService; import org.elasticsearch.search.aggregations.UnsupportedAggregationOnDownsampledIndex; +import org.elasticsearch.search.crossproject.NoMatchingProjectException; import org.elasticsearch.search.internal.ShardSearchContextId; import org.elasticsearch.search.query.SearchTimeoutException; import org.elasticsearch.snapshots.Snapshot; @@ -237,7 +238,7 @@ public TestException(StreamInput in) throws IOException { } private T serialize(T exception) throws IOException { - return serialize(exception, TransportVersionUtils.randomVersion(random())); + return serialize(exception, TransportVersionUtils.randomCompatibleVersion(random())); } private T serialize(T exception, TransportVersion version) throws IOException { @@ -365,7 +366,7 @@ public void testSearchContextMissingException() throws IOException { public void testCircuitBreakingException() throws IOException { CircuitBreakingException ex = serialize( new CircuitBreakingException("Too large", 0, 100, CircuitBreaker.Durability.TRANSIENT), - TransportVersions.V_8_0_0 + TransportVersionUtils.randomCompatibleVersion(random()) ); assertEquals("Too large", ex.getMessage()); assertEquals(100, ex.getByteLimit()); @@ -846,6 +847,7 @@ public void testIds() { ids.put(182, IngestPipelineException.class); ids.put(183, IndexDocFailureStoreStatus.ExceptionWithFailureStoreStatus.class); ids.put(184, RemoteException.class); + ids.put(185, NoMatchingProjectException.class); Map, Integer> reverse = new HashMap<>(); for (Map.Entry> entry : ids.entrySet()) { diff --git a/server/src/test/java/org/elasticsearch/cluster/service/MasterServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/service/MasterServiceTests.java index 4c24abbd5fd9a..da8c013ef44c3 100644 --- a/server/src/test/java/org/elasticsearch/cluster/service/MasterServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/service/MasterServiceTests.java @@ -2182,7 +2182,7 @@ class TestTask implements ClusterStateTaskListener { @Override public void onFailure(Exception e) { assertEquals(expectedHeader, threadPool.getThreadContext().getHeader(testHeader)); - if ((e instanceof FailedToCommitClusterStateException + if ((e instanceof NotMasterException && e.getCause() instanceof EsRejectedExecutionException esre && esre.isExecutorShutdown()) == false) { throw new AssertionError("unexpected exception", e); @@ -2361,7 +2361,7 @@ class TestTask implements ClusterStateTaskListener { @Override public void onFailure(Exception e) { assertEquals(expectedHeader, threadPool.getThreadContext().getHeader(testHeader)); - if ((e instanceof FailedToCommitClusterStateException + if ((e instanceof NotMasterException && e.getCause() instanceof EsRejectedExecutionException esre && esre.isExecutorShutdown()) == false) { throw new AssertionError("unexpected exception", e);