Use applied state after DiskThresholdMonitor reroute (#94916)

DaveCTurner · web-flow · commit c2ce4f8cd037 · 2023-03-31T03:28:54.000-04:00
Today when the `DiskThresholdMonitor` triggers a reroute, it supplies a listener which accepts the exact cluster state that the reroute produced. However it may be that there have been some other cluster state updates in between the reroute computation and the completion of this listener, so the cluster state it receives is potentially stale. Moreover by accepting the exact cluster state that the reroute produced, we require the `BatchedRerouteService` to retain every resulting state for this and all other batched reroute listeners. There's no need for this, we can instead retrieve the last-applied cluster state afresh, and avoid this unnecessary retention of cluster states. Relates #94914
diff --git a/docs/changelog/94916.yaml b/docs/changelog/94916.yaml
@@ -0,0 +1,5 @@
+pr: 94916
+summary: Use applied state after `DiskThresholdMonitor` reroute
+area: Allocation
+type: bug
+issues: []
diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/desirednodes/TransportUpdateDesiredNodesAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/desirednodes/TransportUpdateDesiredNodesAction.java
@@ -161,7 +161,7 @@ public void onFailure(Exception e) {
     }
 
     private static class UpdateDesiredNodesExecutor implements ClusterStateTaskExecutor<UpdateDesiredNodesTask> {
-        private static final ActionListener<ClusterState> REROUTE_LISTENER = ActionListener.wrap(
+        private static final ActionListener<Void> REROUTE_LISTENER = ActionListener.wrap(
             r -> logger.trace("reroute after desired nodes update completed"),
             e -> logger.debug("reroute after desired nodes update failed", e)
         );
diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/settings/TransportClusterUpdateSettingsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/settings/TransportClusterUpdateSettingsAction.java
@@ -202,7 +202,7 @@ private void reroute(final boolean updateSettingsAcked) {
                 // completed.
                 clusterService.getRerouteService().reroute(REROUTE_TASK_SOURCE, Priority.URGENT, new ActionListener<>() {
                     @Override
-                    public void onResponse(ClusterState clusterState) {
+                    public void onResponse(Void ignored) {
                         listener.onResponse(
                             new ClusterUpdateSettingsResponse(
                                 updateSettingsAcked,
diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/BatchedRerouteService.java b/server/src/main/java/org/elasticsearch/cluster/routing/BatchedRerouteService.java
@@ -43,7 +43,7 @@ public class BatchedRerouteService implements RerouteService {
 
     private final Object mutex = new Object();
     @Nullable // null if no reroute is currently pending
-    private List<ActionListener<ClusterState>> pendingRerouteListeners;
+    private List<ActionListener<Void>> pendingRerouteListeners;
     private Priority pendingTaskPriority = Priority.LANGUID;
 
     public interface RerouteAction {
@@ -62,12 +62,12 @@ public BatchedRerouteService(ClusterService clusterService, RerouteAction rerout
      * Initiates a reroute.
      */
     @Override
-    public final void reroute(String reason, Priority priority, ActionListener<ClusterState> listener) {
-        final ActionListener<ClusterState> wrappedListener = ContextPreservingActionListener.wrapPreservingContext(
+    public final void reroute(String reason, Priority priority, ActionListener<Void> listener) {
+        final ActionListener<Void> wrappedListener = ContextPreservingActionListener.wrapPreservingContext(
             listener,
             clusterService.getClusterApplierService().threadPool().getThreadContext()
         );
-        final List<ActionListener<ClusterState>> currentListeners;
+        final List<ActionListener<Void>> currentListeners;
         synchronized (mutex) {
             if (pendingRerouteListeners != null) {
                 if (priority.sameOrAfter(pendingTaskPriority)) {
@@ -152,7 +152,7 @@ public void onFailure(Exception e) {
 
                 @Override
                 public void clusterStateProcessed(ClusterState oldState, ClusterState newState) {
-                    future.addListener(ActionListener.running(() -> ActionListener.onResponse(currentListeners, newState)));
+                    future.addListener(ActionListener.running(() -> ActionListener.onResponse(currentListeners, null)));
                 }
             });
         } catch (Exception e) {
diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/RerouteService.java b/server/src/main/java/org/elasticsearch/cluster/routing/RerouteService.java
@@ -8,7 +8,6 @@
 package org.elasticsearch.cluster.routing;
 
 import org.elasticsearch.action.ActionListener;
-import org.elasticsearch.cluster.ClusterState;
 import org.elasticsearch.common.Priority;
 
 /**
@@ -23,5 +22,5 @@ public interface RerouteService {
      *                 this reroute is batched with the pending one; if there is already a pending reroute at a lower priority then
      *                 the priority of the pending batch is raised to the given priority.
      */
-    void reroute(String reason, Priority priority, ActionListener<ClusterState> listener);
+    void reroute(String reason, Priority priority, ActionListener<Void> listener);
 }
diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdMonitor.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdMonitor.java
@@ -317,8 +317,8 @@ public void onNewInfo(ClusterInfo info) {
                 rerouteService.reroute(
                     "disk threshold monitor",
                     Priority.HIGH,
-                    ActionListener.releaseAfter(ActionListener.runAfter(ActionListener.wrap(reroutedClusterState -> {
-
+                    ActionListener.releaseAfter(ActionListener.runAfter(ActionListener.wrap(ignored -> {
+                        final var reroutedClusterState = clusterStateSupplier.get();
                         for (DiskUsage diskUsage : usagesOverHighThreshold) {
                             final RoutingNode routingNode = reroutedClusterState.getRoutingNodes().node(diskUsage.getNodeId());
                             final DiskUsage usageIncludingRelocations;
diff --git a/server/src/main/java/org/elasticsearch/snapshots/InternalSnapshotsInfoService.java b/server/src/main/java/org/elasticsearch/snapshots/InternalSnapshotsInfoService.java
@@ -53,7 +53,7 @@ public class InternalSnapshotsInfoService implements ClusterStateListener, Snaps
 
     private static final Logger logger = LogManager.getLogger(InternalSnapshotsInfoService.class);
 
-    private static final ActionListener<ClusterState> REROUTE_LISTENER = ActionListener.wrap(
+    private static final ActionListener<Void> REROUTE_LISTENER = ActionListener.wrap(
         r -> logger.trace("reroute after snapshot shard size update completed"),
         e -> logger.debug("reroute after snapshot shard size update failed", e)
     );
diff --git a/server/src/test/java/org/elasticsearch/cluster/action/shard/ShardStartedClusterStateTaskExecutorTests.java b/server/src/test/java/org/elasticsearch/cluster/action/shard/ShardStartedClusterStateTaskExecutorTests.java
@@ -44,7 +44,7 @@ public class ShardStartedClusterStateTaskExecutorTests extends ESAllocationTestC
     private ShardStateAction.ShardStartedClusterStateTaskExecutor executor;
 
     @SuppressWarnings("unused")
-    private static void neverReroutes(String reason, Priority priority, ActionListener<ClusterState> listener) {
+    private static void neverReroutes(String reason, Priority priority, ActionListener<Void> listener) {
         fail("unexpectedly ran a deferred reroute");
     }
 
diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdMonitorTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdMonitorTests.java
@@ -266,7 +266,7 @@ private void doTestDoesNotSubmitRerouteTaskTooFrequently(boolean testMaxHeadroom
             .nodes(DiscoveryNodes.builder().add(newNormalNode("node1")).add(newNormalNode("node2")))
             .build();
         AtomicLong currentTime = new AtomicLong();
-        AtomicReference<ActionListener<ClusterState>> listenerReference = new AtomicReference<>();
+        AtomicReference<ActionListener<Void>> listenerReference = new AtomicReference<>();
         DiskThresholdMonitor monitor = new DiskThresholdMonitor(
             Settings.EMPTY,
             () -> clusterState,
@@ -316,7 +316,7 @@ protected void updateIndicesReadOnly(Set<String> indicesToMarkReadOnly, Releasab
         currentTime.addAndGet(randomLongBetween(0, 120000));
         monitor.onNewInfo(clusterInfo(allDisksOk));
         assertNotNull(listenerReference.get());
-        listenerReference.getAndSet(null).onResponse(clusterState);
+        listenerReference.getAndSet(null).onResponse(null);
 
         // should not reroute when all disks are ok and no new info received
         currentTime.addAndGet(randomLongBetween(0, 120000));
@@ -327,7 +327,7 @@ protected void updateIndicesReadOnly(Set<String> indicesToMarkReadOnly, Releasab
         if (randomBoolean()) {
             currentTime.addAndGet(randomLongBetween(0, 120000));
             monitor.onNewInfo(clusterInfo(oneDiskAboveWatermark));
-            Optional.ofNullable(listenerReference.getAndSet(null)).ifPresent(l -> l.onResponse(clusterState));
+            Optional.ofNullable(listenerReference.getAndSet(null)).ifPresent(l -> l.onResponse(null));
         }
 
         // however once the reroute interval has elapsed then we must reroute again
@@ -339,7 +339,7 @@ protected void updateIndicesReadOnly(Set<String> indicesToMarkReadOnly, Releasab
         );
         monitor.onNewInfo(clusterInfo(oneDiskAboveWatermark));
         assertNotNull(listenerReference.get());
-        listenerReference.getAndSet(null).onResponse(clusterState);
+        listenerReference.getAndSet(null).onResponse(null);
 
         if (randomBoolean()) {
             // should not re-route again within the reroute interval
@@ -362,15 +362,15 @@ protected void updateIndicesReadOnly(Set<String> indicesToMarkReadOnly, Releasab
         );
         monitor.onNewInfo(clusterInfo(oneDiskAboveWatermark));
         assertNotNull(listenerReference.get());
-        final ActionListener<ClusterState> rerouteListener1 = listenerReference.getAndSet(null);
+        final ActionListener<Void> rerouteListener1 = listenerReference.getAndSet(null);
 
         // should not re-route again before reroute has completed
         currentTime.addAndGet(randomLongBetween(0, 120000));
         monitor.onNewInfo(clusterInfo(allDisksOk));
         assertNull(listenerReference.get());
 
         // complete reroute
-        rerouteListener1.onResponse(clusterState);
+        rerouteListener1.onResponse(null);
 
         if (randomBoolean()) {
             // should not re-route again within the reroute interval
@@ -475,7 +475,7 @@ private void doTestAutoReleaseIndices(boolean testMaxHeadroom) {
             (reason, priority, listener) -> {
                 assertNotNull(listener);
                 assertThat(priority, equalTo(Priority.HIGH));
-                listener.onResponse(clusterState);
+                listener.onResponse(null);
             }
         ) {
             @Override
@@ -567,7 +567,7 @@ protected void updateIndicesReadOnly(Set<String> indicesToUpdate, Releasable onC
             (reason, priority, listener) -> {
                 assertNotNull(listener);
                 assertThat(priority, equalTo(Priority.HIGH));
-                listener.onResponse(clusterStateWithBlocks);
+                listener.onResponse(null);
             }
         ) {
             @Override
@@ -813,7 +813,7 @@ private void doTestNoAutoReleaseOfIndicesOnReplacementNodes(boolean testMaxHeadr
             (reason, priority, listener) -> {
                 assertNotNull(listener);
                 assertThat(priority, equalTo(Priority.HIGH));
-                listener.onResponse(currentClusterState.get());
+                listener.onResponse(null);
             }
         ) {
             @Override
@@ -1058,7 +1058,7 @@ public long getAsLong() {
             new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS),
             null,
             timeSupplier,
-            (reason, priority, listener) -> listener.onResponse(clusterStateRef.get())
+            (reason, priority, listener) -> listener.onResponse(null)
         ) {
             @Override
             protected void updateIndicesReadOnly(Set<String> indicesToMarkReadOnly, Releasable onCompletion, boolean readOnly) {
diff --git a/server/src/test/java/org/elasticsearch/snapshots/InternalSnapshotsInfoServiceTests.java b/server/src/test/java/org/elasticsearch/snapshots/InternalSnapshotsInfoServiceTests.java
@@ -91,7 +91,7 @@ public void setUp() throws Exception {
         threadPool = new TestThreadPool(getTestName());
         clusterService = ClusterServiceUtils.createClusterService(threadPool);
         repositoriesService = mock(RepositoriesService.class);
-        rerouteService = (reason, priority, listener) -> listener.onResponse(clusterService.state());
+        rerouteService = (reason, priority, listener) -> listener.onResponse(null);
     }
 
     @After
@@ -109,7 +109,7 @@ public void testSnapshotShardSizes() throws Exception {
         final int numberOfShards = randomIntBetween(1, 50);
         final CountDownLatch rerouteLatch = new CountDownLatch(numberOfShards);
         final RerouteService rerouteService = (reason, priority, listener) -> {
-            listener.onResponse(clusterService.state());
+            listener.onResponse(null);
             assertThat(rerouteLatch.getCount(), greaterThanOrEqualTo(0L));
             rerouteLatch.countDown();
         };
@@ -181,7 +181,7 @@ public void testErroneousSnapshotShardSizes() throws Exception {
         final CountDown reroutes = new CountDown(maxShardsToCreate);
         final RerouteService rerouteService = (reason, priority, listener) -> {
             try {
-                listener.onResponse(clusterService.state());
+                listener.onResponse(null);
             } finally {
                 if (reroutes.countDown()) {
                     waitForAllReroutesProcessed.onResponse(null);
diff --git a/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/action/TransportMigrateToDataTiersAction.java b/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/action/TransportMigrateToDataTiersAction.java
@@ -141,9 +141,9 @@ public void onFailure(Exception e) {
             @Override
             public void clusterStateProcessed(ClusterState oldState, ClusterState newState) {
                 clusterService.getRerouteService()
-                    .reroute("cluster migrated to data tiers routing", Priority.NORMAL, new ActionListener<ClusterState>() {
+                    .reroute("cluster migrated to data tiers routing", Priority.NORMAL, new ActionListener<Void>() {
                         @Override
-                        public void onResponse(ClusterState clusterState) {}
+                        public void onResponse(Void ignored) {}
 
                         @Override
                         public void onFailure(Exception e) {
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/FailShardsOnInvalidLicenseClusterListener.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/FailShardsOnInvalidLicenseClusterListener.java
@@ -11,7 +11,6 @@
 import org.apache.logging.log4j.Logger;
 import org.apache.lucene.store.AlreadyClosedException;
 import org.elasticsearch.action.ActionListener;
-import org.elasticsearch.cluster.ClusterState;
 import org.elasticsearch.cluster.routing.RerouteService;
 import org.elasticsearch.common.Priority;
 import org.elasticsearch.common.settings.Settings;
@@ -63,9 +62,9 @@ public synchronized void beforeIndexShardClosed(ShardId shardId, @Nullable Index
     public synchronized void licenseStateChanged() {
         final boolean allowed = ARCHIVE_FEATURE.checkWithoutTracking(xPackLicenseState);
         if (allowed && this.allowed == false) {
-            rerouteService.reroute("reroute after license activation", Priority.NORMAL, new ActionListener<ClusterState>() {
+            rerouteService.reroute("reroute after license activation", Priority.NORMAL, new ActionListener<>() {
                 @Override
-                public void onResponse(ClusterState clusterState) {
+                public void onResponse(Void ignored) {
                     logger.trace("successful reroute after license activation");
                 }
 
diff --git a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/allocation/FailShardsOnInvalidLicenseClusterListener.java b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/allocation/FailShardsOnInvalidLicenseClusterListener.java
@@ -11,7 +11,6 @@
 import org.apache.logging.log4j.Logger;
 import org.apache.lucene.store.AlreadyClosedException;
 import org.elasticsearch.action.ActionListener;
-import org.elasticsearch.cluster.ClusterState;
 import org.elasticsearch.cluster.routing.RerouteService;
 import org.elasticsearch.common.Priority;
 import org.elasticsearch.common.settings.Settings;
@@ -63,9 +62,9 @@ public synchronized void beforeIndexShardClosed(ShardId shardId, @Nullable Index
     public synchronized void licenseStateChanged() {
         final boolean isAllowed = SEARCHABLE_SNAPSHOT_FEATURE.checkWithoutTracking(xPackLicenseState);
         if (isAllowed && this.allowed == false) {
-            rerouteService.reroute("reroute after license activation", Priority.NORMAL, new ActionListener<ClusterState>() {
+            rerouteService.reroute("reroute after license activation", Priority.NORMAL, new ActionListener<Void>() {
                 @Override
-                public void onResponse(ClusterState clusterState) {
+                public void onResponse(Void ignored) {
                     logger.trace("successful reroute after license activation");
                 }
 
diff --git a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/allocation/SearchableSnapshotAllocator.java b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/allocation/SearchableSnapshotAllocator.java
@@ -12,7 +12,6 @@
 import org.elasticsearch.action.ActionListener;
 import org.elasticsearch.action.FailedNodeException;
 import org.elasticsearch.client.internal.Client;
-import org.elasticsearch.cluster.ClusterState;
 import org.elasticsearch.cluster.RestoreInProgress;
 import org.elasticsearch.cluster.metadata.IndexMetadata;
 import org.elasticsearch.cluster.metadata.RepositoriesMetadata;
@@ -76,9 +75,9 @@ public class SearchableSnapshotAllocator implements ExistingShardsAllocator {
 
     private static final Logger logger = LogManager.getLogger(SearchableSnapshotAllocator.class);
 
-    private static final ActionListener<ClusterState> REROUTE_LISTENER = new ActionListener<>() {
+    private static final ActionListener<Void> REROUTE_LISTENER = new ActionListener<>() {
         @Override
-        public void onResponse(ClusterState clusterRerouteResponse) {
+        public void onResponse(Void ignored) {
             logger.trace("reroute succeeded after loading snapshot cache information");
         }
 
diff --git a/x-pack/plugin/shutdown/src/main/java/org/elasticsearch/xpack/shutdown/TransportDeleteShutdownNodeAction.java b/x-pack/plugin/shutdown/src/main/java/org/elasticsearch/xpack/shutdown/TransportDeleteShutdownNodeAction.java
@@ -57,7 +57,7 @@ private static boolean deleteShutdownNodeState(Map<String, SingleNodeShutdownMet
     private static void ackAndReroute(Request request, ActionListener<AcknowledgedResponse> listener, RerouteService rerouteService) {
         rerouteService.reroute("node registered for removal from cluster", Priority.URGENT, new ActionListener<>() {
             @Override
-            public void onResponse(ClusterState clusterState) {}
+            public void onResponse(Void ignored) {}
 
             @Override
             public void onFailure(Exception e) {
diff --git a/x-pack/plugin/shutdown/src/main/java/org/elasticsearch/xpack/shutdown/TransportPutShutdownNodeAction.java b/x-pack/plugin/shutdown/src/main/java/org/elasticsearch/xpack/shutdown/TransportPutShutdownNodeAction.java
@@ -87,7 +87,7 @@ private static void ackAndMaybeReroute(Request request, ActionListener<Acknowled
         if (shouldReroute) {
             rerouteService.reroute("node registered for removal from cluster", Priority.URGENT, new ActionListener<>() {
                 @Override
-                public void onResponse(ClusterState clusterState) {}
+                public void onResponse(Void ignored) {}
 
                 @Override
                 public void onFailure(Exception e) {

Original file line number	Diff line number	Diff line change
`@@ -161,7 +161,7 @@ public void onFailure(Exception e) {`
`161`	`161`	`}`
`162`	`162`
`163`	`163`	`private static class UpdateDesiredNodesExecutor implements ClusterStateTaskExecutor<UpdateDesiredNodesTask> {`
`164`		`- private static final ActionListener<ClusterState> REROUTE_LISTENER = ActionListener.wrap(`
	`164`	`+ private static final ActionListener<Void> REROUTE_LISTENER = ActionListener.wrap(`
`165`	`165`	`r -> logger.trace("reroute after desired nodes update completed"),`
`166`	`166`	`e -> logger.debug("reroute after desired nodes update failed", e)`
`167`	`167`	`);`
Original file line number	Diff line number	Diff line change
`@@ -44,7 +44,7 @@ public class ShardStartedClusterStateTaskExecutorTests extends ESAllocationTestC`
`44`	`44`	`private ShardStateAction.ShardStartedClusterStateTaskExecutor executor;`
`45`	`45`
`46`	`46`	`@SuppressWarnings("unused")`
`47`		`- private static void neverReroutes(String reason, Priority priority, ActionListener<ClusterState> listener) {`
	`47`	`+ private static void neverReroutes(String reason, Priority priority, ActionListener<Void> listener) {`
`48`	`48`	`fail("unexpectedly ran a deferred reroute");`
`49`	`49`	`}`
`50`	`50`