From 0a0418dd8e400e391b50281d83d98555536f8eb0 Mon Sep 17 00:00:00 2001 From: Joshua Adams Date: Fri, 26 Sep 2025 16:53:41 +0100 Subject: [PATCH 1/4] Changes FailedToCommitClusterStateException to NotMasterException Changes a FailedToCommitClusterStateException incorrectly thrown prior to cluster state update publication to a NotMasterException --- .../cluster/NotMasterException.java | 4 +++ .../cluster/coordination/Coordinator.java | 7 ++-- .../cluster/service/MasterService.java | 33 ++++++++++++++++--- 3 files changed, 36 insertions(+), 8 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/NotMasterException.java b/server/src/main/java/org/elasticsearch/cluster/NotMasterException.java index 43c43bda9aa8c..98da31604a33d 100644 --- a/server/src/main/java/org/elasticsearch/cluster/NotMasterException.java +++ b/server/src/main/java/org/elasticsearch/cluster/NotMasterException.java @@ -26,6 +26,10 @@ public NotMasterException(StreamInput in) throws IOException { super(in); } + public NotMasterException(String msg, Object... args) { + super(msg, args); + } + @Override public Throwable fillInStackTrace() { return this; diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java index 1976bda6c6aba..f003746331192 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java @@ -24,6 +24,7 @@ import org.elasticsearch.cluster.ClusterStatePublicationEvent; import org.elasticsearch.cluster.ClusterStateUpdateTask; import org.elasticsearch.cluster.LocalMasterServiceTask; +import org.elasticsearch.cluster.NotMasterException; import org.elasticsearch.cluster.block.ClusterBlocks; import org.elasticsearch.cluster.coordination.ClusterFormationFailureHelper.ClusterFormationState; import org.elasticsearch.cluster.coordination.CoordinationMetadata.VotingConfigExclusion; @@ -1552,7 +1553,7 @@ public void publish( clusterStatePublicationEvent.getNewState().term() ) ); - throw new FailedToCommitClusterStateException( + throw new NotMasterException( "node is no longer master for term " + clusterStatePublicationEvent.getNewState().term() + " while handling publication" @@ -1638,8 +1639,8 @@ assert getLocalNode().equals(clusterState.getNodes().get(getLocalNode().getId()) } } } - } catch (FailedToCommitClusterStateException failedToCommitClusterStateException) { - publishListener.onFailure(failedToCommitClusterStateException); + } catch (FailedToCommitClusterStateException | NotMasterException e) { + publishListener.onFailure(e); } catch (Exception e) { assert false : e; // all exceptions should already be caught and wrapped in a FailedToCommitClusterStateException logger.error(() -> "[" + clusterStatePublicationEvent.getSummary() + "] publishing unexpectedly failed", e); diff --git a/server/src/main/java/org/elasticsearch/cluster/service/MasterService.java b/server/src/main/java/org/elasticsearch/cluster/service/MasterService.java index 596cce87fd151..a917e992d1c6f 100644 --- a/server/src/main/java/org/elasticsearch/cluster/service/MasterService.java +++ b/server/src/main/java/org/elasticsearch/cluster/service/MasterService.java @@ -415,13 +415,30 @@ public void onResponse(Void unused) { @Override public void onFailure(Exception exception) { - if (exception instanceof FailedToCommitClusterStateException failedToCommitClusterStateException) { + if (exception instanceof FailedToCommitClusterStateException || exception instanceof NotMasterException) { final long notificationStartTime = threadPool.rawRelativeTimeInMillis(); final long version = newClusterState.version(); - logger.warn(() -> format("failing [%s]: failed to commit cluster state version [%s]", summary, version), exception); + + if (exception instanceof FailedToCommitClusterStateException) { + logger.warn( + () -> format("failing [%s]: failed to commit cluster state version [%s]", summary, version), + exception + ); + } else { + logger.debug( + () -> format( + "node is no longer the master prior to publication of cluster state version [%s]: [%s]", + version, + summary + ), + exception + ); + } + for (final var executionResult : executionResults) { - executionResult.onPublishFailure(failedToCommitClusterStateException); + executionResult.onPublishFailure(exception); } + final long notificationMillis = threadPool.rawRelativeTimeInMillis() - notificationStartTime; clusterStateUpdateStatsTracker.onPublicationFailure( threadPool.rawRelativeTimeInMillis(), @@ -985,11 +1002,17 @@ void onClusterStateUnchanged(ClusterState clusterState) { } } - void onPublishFailure(FailedToCommitClusterStateException e) { + void onPublishFailure(Exception e) { if (publishedStateConsumer == null && onPublicationSuccess == null) { assert failure != null; var taskFailure = failure; - failure = new FailedToCommitClusterStateException(e.getMessage(), e); + + if (e instanceof FailedToCommitClusterStateException) { + failure = new FailedToCommitClusterStateException(e.getMessage(), e); + } else { + failure = new NotMasterException(e.getMessage(), e); + } + failure.addSuppressed(taskFailure); notifyFailure(); return; From b65be1a3bbf30396b113105e4daa206e3f87e624 Mon Sep 17 00:00:00 2001 From: Joshua Adams Date: Thu, 2 Oct 2025 14:48:02 +0100 Subject: [PATCH 2/4] Jeremy Updates --- .../org/elasticsearch/cluster/service/MasterService.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/service/MasterService.java b/server/src/main/java/org/elasticsearch/cluster/service/MasterService.java index a917e992d1c6f..a27600a79263a 100644 --- a/server/src/main/java/org/elasticsearch/cluster/service/MasterService.java +++ b/server/src/main/java/org/elasticsearch/cluster/service/MasterService.java @@ -421,15 +421,15 @@ public void onFailure(Exception exception) { if (exception instanceof FailedToCommitClusterStateException) { logger.warn( - () -> format("failing [%s]: failed to commit cluster state version [%s]", summary, version), + () -> format("Failing [%s]: failed to commit cluster state version [%s]", summary, version), exception ); } else { logger.debug( () -> format( - "node is no longer the master prior to publication of cluster state version [%s]: [%s]", - version, - summary + "Failing [%s]: node is no longer the master. The cluster state update has not been published [%s]", + summary, + version ), exception ); @@ -1003,6 +1003,7 @@ void onClusterStateUnchanged(ClusterState clusterState) { } void onPublishFailure(Exception e) { + assert e instanceof FailedToCommitClusterStateException || e instanceof NotMasterException; if (publishedStateConsumer == null && onPublicationSuccess == null) { assert failure != null; var taskFailure = failure; From e296628a155b467a36eb5c84c603531f8c33dc0c Mon Sep 17 00:00:00 2001 From: Joshua Adams Date: Thu, 2 Oct 2025 14:51:16 +0100 Subject: [PATCH 3/4] Update log message --- .../java/org/elasticsearch/cluster/service/MasterService.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/service/MasterService.java b/server/src/main/java/org/elasticsearch/cluster/service/MasterService.java index 0f6f7d6fb8428..06fb85cf377c5 100644 --- a/server/src/main/java/org/elasticsearch/cluster/service/MasterService.java +++ b/server/src/main/java/org/elasticsearch/cluster/service/MasterService.java @@ -427,7 +427,7 @@ public void onFailure(Exception exception) { } else { logger.debug( () -> format( - "Failing [%s]: node is no longer the master. The cluster state update has not been published [%s]", + "Failing [%s]: node is no longer the master. Failed to publish cluster state version [%s]", summary, version ), From 956990207f67483561caa4cbe14173168fb2a33c Mon Sep 17 00:00:00 2001 From: Joshua Adams Date: Mon, 6 Oct 2025 11:23:54 +0100 Subject: [PATCH 4/4] Update assertion Co-authored-by: David Turner --- .../java/org/elasticsearch/cluster/service/MasterService.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/service/MasterService.java b/server/src/main/java/org/elasticsearch/cluster/service/MasterService.java index 06fb85cf377c5..10cd9e9c26f7b 100644 --- a/server/src/main/java/org/elasticsearch/cluster/service/MasterService.java +++ b/server/src/main/java/org/elasticsearch/cluster/service/MasterService.java @@ -1003,7 +1003,7 @@ void onClusterStateUnchanged(ClusterState clusterState) { } void onPublishFailure(Exception e) { - assert e instanceof FailedToCommitClusterStateException || e instanceof NotMasterException; + assert e instanceof FailedToCommitClusterStateException || e instanceof NotMasterException : e; if (publishedStateConsumer == null && onPublicationSuccess == null) { assert failure != null; var taskFailure = failure;