elastic · joshua-adams-1 · Oct 8, 2025 · Sep 26, 2025 · Sep 30, 2025 · Oct 2, 2025
diff --git a/server/src/main/java/org/elasticsearch/ElasticsearchException.java b/server/src/main/java/org/elasticsearch/ElasticsearchException.java
@@ -87,6 +87,10 @@ public class ElasticsearchException extends RuntimeException implements ToXConte
 
     private static final TransportVersion UNKNOWN_VERSION_ADDED = TransportVersion.zero();
 
+    private static final TransportVersion FAILED_TO_PUBLISH_CLUSTER_STATE_EXCEPTION_TRANSPORT_VERSION = TransportVersion.fromName(
+        "failed_to_publish_cluster_state_exception"
+    );
+
     /**
      * Passed in the {@link Params} of {@link #generateThrowableXContent(XContentBuilder, Params, Throwable)}
      * to control if the {@code caused_by} element should render. Unlike most parameters to {@code toXContent} methods this parameter is
@@ -2022,6 +2026,12 @@ private enum ElasticsearchExceptionHandle {
             184,
             TransportVersions.REMOTE_EXCEPTION,
             TransportVersions.REMOTE_EXCEPTION_8_19
+        ),
+        FAILED_TO_PUBLISH_CLUSTER_STATE_EXCEPTION(
+            org.elasticsearch.cluster.coordination.FailedToPublishClusterStateException.class,
+            org.elasticsearch.cluster.coordination.FailedToPublishClusterStateException::new,
+            185,
+            FAILED_TO_PUBLISH_CLUSTER_STATE_EXCEPTION_TRANSPORT_VERSION
         );
 
         final Class<? extends ElasticsearchException> exceptionClass;

diff --git a/server/src/main/java/org/elasticsearch/cluster/NotMasterException.java b/server/src/main/java/org/elasticsearch/cluster/NotMasterException.java
@@ -26,6 +26,10 @@ public NotMasterException(StreamInput in) throws IOException {
         super(in);
     }
 
+    public NotMasterException(String msg, Object... args) {
+        super(msg, args);
+    }
+
     @Override
     public Throwable fillInStackTrace() {
         return this;

diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java
@@ -24,6 +24,7 @@
 import org.elasticsearch.cluster.ClusterStatePublicationEvent;
 import org.elasticsearch.cluster.ClusterStateUpdateTask;
 import org.elasticsearch.cluster.LocalMasterServiceTask;
+import org.elasticsearch.cluster.NotMasterException;
 import org.elasticsearch.cluster.block.ClusterBlocks;
 import org.elasticsearch.cluster.coordination.ClusterFormationFailureHelper.ClusterFormationState;
 import org.elasticsearch.cluster.coordination.CoordinationMetadata.VotingConfigExclusion;
@@ -1552,7 +1553,7 @@ public void publish(
                             clusterStatePublicationEvent.getNewState().term()
                         )
                     );
-                    throw new FailedToCommitClusterStateException(
+                    throw new NotMasterException(
                         "node is no longer master for term "
                             + clusterStatePublicationEvent.getNewState().term()
                             + " while handling publication"
@@ -1567,7 +1568,7 @@ public void publish(
                             clusterStatePublicationEvent.getSummary()
                         )
                     );
-                    throw new FailedToCommitClusterStateException("publication " + currentPublication.get() + " already in progress");
+                    throw new FailedToPublishClusterStateException("publication " + currentPublication.get() + " already in progress");
                 }
 
                 assert assertPreviousStateConsistency(clusterStatePublicationEvent);
@@ -1586,7 +1587,7 @@ assert getLocalNode().equals(clusterState.getNodes().get(getLocalNode().getId())
                 } catch (Exception e) {
                     logger.debug(() -> "[" + clusterStatePublicationEvent.getSummary() + "] publishing failed during context creation", e);
                     becomeCandidate("publication context creation");
-                    throw new FailedToCommitClusterStateException("publishing failed during context creation", e);
+                    throw new FailedToPublishClusterStateException("publishing failed during context creation", e);
                 }
 
                 try (Releasable ignored = publicationContext::decRef) {
@@ -1607,7 +1608,7 @@ assert getLocalNode().equals(clusterState.getNodes().get(getLocalNode().getId())
                             e
                         );
                         becomeCandidate("publication creation");
-                        throw new FailedToCommitClusterStateException("publishing failed while starting", e);
+                        throw new FailedToPublishClusterStateException("publishing failed while starting", e);
                     }
 
                     try {
@@ -1638,12 +1639,12 @@ assert getLocalNode().equals(clusterState.getNodes().get(getLocalNode().getId())
                     }
                 }
             }
-        } catch (FailedToCommitClusterStateException failedToCommitClusterStateException) {
-            publishListener.onFailure(failedToCommitClusterStateException);
+        } catch (FailedToPublishClusterStateException | FailedToCommitClusterStateException | NotMasterException e) {
+            publishListener.onFailure(e);
         } catch (Exception e) {
-            assert false : e; // all exceptions should already be caught and wrapped in a FailedToCommitClusterStateException
+            assert false : e; // all exceptions should already be caught and wrapped in a FailedToPublishClusterStateException |
             logger.error(() -> "[" + clusterStatePublicationEvent.getSummary() + "] publishing unexpectedly failed", e);
-            publishListener.onFailure(new FailedToCommitClusterStateException("publishing unexpectedly failed", e));
+            publishListener.onFailure(new FailedToPublishClusterStateException("publishing unexpectedly failed", e));
         }
     }
 

diff --git a/...main/java/org/elasticsearch/cluster/coordination/FailedToCommitClusterStateException.java b/...main/java/org/elasticsearch/cluster/coordination/FailedToCommitClusterStateException.java
@@ -9,15 +9,23 @@
 package org.elasticsearch.cluster.coordination;
 
 import org.elasticsearch.ElasticsearchException;
+import org.elasticsearch.action.support.master.TransportMasterNodeAction;
 import org.elasticsearch.common.io.stream.StreamInput;
 
 import java.io.IOException;
 
 /**
- * Thrown when a cluster state publication fails to commit the new cluster state. If publication fails then a new master is elected but the
- * update might or might not take effect, depending on whether or not the newly-elected master accepted the published state that failed to
- * be committed.
- *
+ * Exception indicating a cluster state update was published but not committed to all nodes.
+ * <p>
+ * If this exception is thrown, then the cluster state update was published, but is not guaranteed
+ * to be committed on any nodes, including the next master node. This exception should only be thrown when there is
+ * <i>ambiguity</i> whether a cluster state update has been committed.
+ * <p>
+ * For exceptions thrown prior to publication,
+ * when the cluster update has <i>definitely</i> failed, use a {@link FailedToPublishClusterStateException}.
+ * <p>
+ * This is a retryable exception inside {@link TransportMasterNodeAction}
+ * <p>
  * See {@link ClusterStatePublisher} for more details.
  */
 public class FailedToCommitClusterStateException extends ElasticsearchException {

diff --git a/...ain/java/org/elasticsearch/cluster/coordination/FailedToPublishClusterStateException.java b/...ain/java/org/elasticsearch/cluster/coordination/FailedToPublishClusterStateException.java
@@ -0,0 +1,46 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+package org.elasticsearch.cluster.coordination;
+
+import org.elasticsearch.ElasticsearchException;
+import org.elasticsearch.action.support.master.TransportMasterNodeAction;
+import org.elasticsearch.common.io.stream.StreamInput;
+
+import java.io.IOException;
+
+/**
+ * Exception indicating a cluster state update failed prior to publication.
+ * <p>
+ * If this exception is thrown, then the cluster state update was <i>not</i> published to any node.
+ * It is therefore impossible for the new master to have committed this state.
+ * <p>
+ * For exceptions thrown <i>after</i> publication, when the cluster state update may or may not have been committed,
+ * use a {@link FailedToCommitClusterStateException}.
+ * <p>
+ * This is a retryable exception inside {@link TransportMasterNodeAction}
+ */
+public class FailedToPublishClusterStateException extends ElasticsearchException {
+
+    public FailedToPublishClusterStateException(String msg) {
+        super(msg);
+    }
+
+    public FailedToPublishClusterStateException(StreamInput in) throws IOException {
+        super(in);
+    }
+
+    public FailedToPublishClusterStateException(String msg, Throwable cause, Object... args) {
+        super(msg, cause, args);
+    }
+
+    @Override
+    public Throwable fillInStackTrace() {
+        return this;
+    }
+}
diff --git a/server/src/main/java/org/elasticsearch/cluster/service/MasterService.java b/server/src/main/java/org/elasticsearch/cluster/service/MasterService.java
@@ -26,6 +26,7 @@
 import org.elasticsearch.cluster.NotMasterException;
 import org.elasticsearch.cluster.coordination.ClusterStatePublisher;
 import org.elasticsearch.cluster.coordination.FailedToCommitClusterStateException;
+import org.elasticsearch.cluster.coordination.FailedToPublishClusterStateException;
 import org.elasticsearch.cluster.metadata.ProcessClusterEventTimeoutException;
 import org.elasticsearch.cluster.metadata.ProjectMetadata;
 import org.elasticsearch.cluster.node.DiscoveryNode;
@@ -415,13 +416,37 @@ public void onResponse(Void unused) {
 
                 @Override
                 public void onFailure(Exception exception) {
-                    if (exception instanceof FailedToCommitClusterStateException failedToCommitClusterStateException) {
+                    if (exception instanceof FailedToPublishClusterStateException
+                        || exception instanceof FailedToCommitClusterStateException
+                        || exception instanceof NotMasterException) {
                         final long notificationStartTime = threadPool.rawRelativeTimeInMillis();
                         final long version = newClusterState.version();
-                        logger.warn(() -> format("failing [%s]: failed to commit cluster state version [%s]", summary, version), exception);
+
+                        if (exception instanceof FailedToCommitClusterStateException) {
+                            logger.warn(
+                                () -> format("failing [%s]: failed to commit cluster state version [%s]", summary, version),
+                                exception
+                            );
+                        } else if (exception instanceof FailedToPublishClusterStateException) {
+                            logger.warn(
+                                () -> format("failing [%s]: failed to publish cluster state version [%s]", summary, version),
+                                exception
+                            );
+                        } else {
+                            logger.debug(
+                                () -> format(
+                                    "node is no longer the master prior to publication of cluster state version [%s]: [%s]",
+                                    version,
+                                    summary
+                                ),
+                                exception
+                            );
+                        }
+
                         for (final var executionResult : executionResults) {
-                            executionResult.onPublishFailure(failedToCommitClusterStateException);
+                            executionResult.onPublishFailure(exception);
                         }
+
                         final long notificationMillis = threadPool.rawRelativeTimeInMillis() - notificationStartTime;
                         clusterStateUpdateStatsTracker.onPublicationFailure(
                             threadPool.rawRelativeTimeInMillis(),
@@ -985,11 +1010,17 @@ void onClusterStateUnchanged(ClusterState clusterState) {
             }
         }
 
-        void onPublishFailure(FailedToCommitClusterStateException e) {
+        void onPublishFailure(Exception e) {
             if (publishedStateConsumer == null && onPublicationSuccess == null) {
                 assert failure != null;
                 var taskFailure = failure;
-                failure = new FailedToCommitClusterStateException(e.getMessage(), e);
+
+                if (e instanceof FailedToCommitClusterStateException) {
+                    failure = new FailedToCommitClusterStateException(e.getMessage(), e);
+                } else {
+                    failure = new NotMasterException(e.getMessage(), e);
+                }
+
                 failure.addSuppressed(taskFailure);
                 notifyFailure();
                 return;

diff --git a/server/src/main/resources/transport/upper_bounds/9.2.csv b/server/src/main/resources/transport/upper_bounds/9.2.csv
@@ -1 +1 @@
-roles_security_stats,9176000
+failed_to_publish_cluster_state_exception,9183000
diff --git a/server/src/test/java/org/elasticsearch/ExceptionSerializationTests.java b/server/src/test/java/org/elasticsearch/ExceptionSerializationTests.java
@@ -25,6 +25,7 @@
 import org.elasticsearch.cluster.action.shard.ShardStateAction;
 import org.elasticsearch.cluster.block.ClusterBlockException;
 import org.elasticsearch.cluster.coordination.CoordinationStateRejectedException;
+import org.elasticsearch.cluster.coordination.FailedToPublishClusterStateException;
 import org.elasticsearch.cluster.coordination.NoMasterBlockService;
 import org.elasticsearch.cluster.coordination.NodeHealthCheckFailureException;
 import org.elasticsearch.cluster.desirednodes.VersionConflictException;
@@ -846,6 +847,7 @@ public void testIds() {
         ids.put(182, IngestPipelineException.class);
         ids.put(183, IndexDocFailureStoreStatus.ExceptionWithFailureStoreStatus.class);
         ids.put(184, RemoteException.class);
+        ids.put(185, FailedToPublishClusterStateException.class);
 
         Map<Class<? extends ElasticsearchException>, Integer> reverse = new HashMap<>();
         for (Map.Entry<Integer, Class<? extends ElasticsearchException>> entry : ids.entrySet()) {
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		roles_security_stats,9176000
		failed_to_publish_cluster_state_exception,9183000