elastic · nicktindall · Aug 29, 2025 · Aug 14, 2025 · Aug 15, 2025 · Aug 15, 2025
diff --git a/server/src/main/java/org/elasticsearch/cluster/NodeUsageStatsForThreadPools.java b/server/src/main/java/org/elasticsearch/cluster/NodeUsageStatsForThreadPools.java
@@ -89,33 +89,5 @@ public void writeTo(StreamOutput out) throws IOException {
             out.writeFloat(this.averageThreadPoolUtilization);
             out.writeVLong(this.maxThreadPoolQueueLatencyMillis);
         }
-
-        @Override
-        public int hashCode() {
-            return Objects.hash(totalThreadPoolThreads, averageThreadPoolUtilization, maxThreadPoolQueueLatencyMillis);
-        }
-
-        @Override
-        public String toString() {
-            return "[totalThreadPoolThreads="
-                + totalThreadPoolThreads
-                + ", averageThreadPoolUtilization="
-                + averageThreadPoolUtilization
-                + ", maxThreadPoolQueueLatencyMillis="
-                + maxThreadPoolQueueLatencyMillis
-                + "]";
-        }
-
-        @Override
-        public boolean equals(Object o) {
-            if (this == o) return true;
-            if (o == null || getClass() != o.getClass()) return false;
-            ThreadPoolUsageStats other = (ThreadPoolUsageStats) o;
-            return totalThreadPoolThreads == other.totalThreadPoolThreads
-                && averageThreadPoolUtilization == other.averageThreadPoolUtilization
-                && maxThreadPoolQueueLatencyMillis == other.maxThreadPoolQueueLatencyMillis;
-        }
-
-    } // ThreadPoolUsageStats
-
+    }
 }
diff --git a/...rc/main/java/org/elasticsearch/cluster/routing/allocation/WriteLoadConstraintMonitor.java b/...rc/main/java/org/elasticsearch/cluster/routing/allocation/WriteLoadConstraintMonitor.java
@@ -15,26 +15,33 @@
 import org.elasticsearch.cluster.ClusterInfo;
 import org.elasticsearch.cluster.ClusterInfoService;
 import org.elasticsearch.cluster.ClusterState;
+import org.elasticsearch.cluster.NodeUsageStatsForThreadPools;
 import org.elasticsearch.cluster.routing.RerouteService;
+import org.elasticsearch.cluster.routing.RoutingNodes;
+import org.elasticsearch.cluster.routing.ShardRoutingState;
 import org.elasticsearch.common.Priority;
+import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.settings.ClusterSettings;
+import org.elasticsearch.common.util.set.Sets;
 import org.elasticsearch.gateway.GatewayService;
+import org.elasticsearch.threadpool.ThreadPool;
 
+import java.util.Set;
 import java.util.function.LongSupplier;
 import java.util.function.Supplier;
 
 /**
  * Monitors the node-level write thread pool usage across the cluster and initiates (coming soon) a rebalancing round (via
  * {@link RerouteService#reroute}) whenever a node crosses the node-level write load thresholds.
- *
- * TODO (ES-11992): implement
  */
 public class WriteLoadConstraintMonitor {
     private static final Logger logger = LogManager.getLogger(WriteLoadConstraintMonitor.class);
     private final WriteLoadConstraintSettings writeLoadConstraintSettings;
     private final Supplier<ClusterState> clusterStateSupplier;
     private final LongSupplier currentTimeMillisSupplier;
     private final RerouteService rerouteService;
+    private volatile long lastRerouteTimeMillis = 0;
+    private volatile Set<String> lastSetOfHotSpottedNodes = Set.of();
 
     public WriteLoadConstraintMonitor(
         ClusterSettings clusterSettings,
@@ -60,29 +67,76 @@ public void onNewInfo(ClusterInfo clusterInfo) {
             return;
         }
 
-        if (writeLoadConstraintSettings.getWriteLoadConstraintEnabled() == WriteLoadConstraintSettings.WriteLoadDeciderStatus.DISABLED) {
-            logger.trace("skipping monitor because the write load decider is disabled");
+        if (writeLoadConstraintSettings.getWriteLoadConstraintEnabled() != WriteLoadConstraintSettings.WriteLoadDeciderStatus.ENABLED) {
+            logger.debug("skipping monitor because the write load decider is disabled");
             return;
         }
 
         logger.trace("processing new cluster info");
 
-        boolean reroute = false;
-        String explanation = "";
-        final long currentTimeMillis = currentTimeMillisSupplier.getAsLong();
+        final int numberOfNodes = clusterInfo.getNodeUsageStatsForThreadPools().size();
+        final Set<String> nodeIdsExceedingLatencyThreshold = Sets.newHashSetWithExpectedSize(numberOfNodes);
+        final Set<String> nodeIdsBelowUtilizationThreshold = Sets.newHashSetWithExpectedSize(numberOfNodes);
+        clusterInfo.getNodeUsageStatsForThreadPools().forEach((nodeId, usageStats) -> {
+            final NodeUsageStatsForThreadPools.ThreadPoolUsageStats writeThreadPoolStats = usageStats.threadPoolUsageStatsMap()
+                .get(ThreadPool.Names.WRITE);
+            if (writeThreadPoolStats.maxThreadPoolQueueLatencyMillis() > writeLoadConstraintSettings.getQueueLatencyThreshold().millis()) {
+                nodeIdsExceedingLatencyThreshold.add(nodeId);
+            }
+            if (writeThreadPoolStats.averageThreadPoolUtilization() <= writeLoadConstraintSettings.getHighUtilizationThreshold()
+                .getAsRatio()) {
+                nodeIdsBelowUtilizationThreshold.add(nodeId);
+            }
+        });
+
+        if (nodeIdsExceedingLatencyThreshold.isEmpty()) {
+            logger.debug("No nodes exceeding latency threshold");
+            return;
+        }
+
+        // Remove any over-threshold nodes that already have shards relocating away
+        final RoutingNodes routingNodes = state.getRoutingNodes();
+        nodeIdsExceedingLatencyThreshold.removeIf(
+            nodeId -> routingNodes.node(nodeId).numberOfShardsWithState(ShardRoutingState.RELOCATING) > 0
+        );
 
-        // TODO (ES-11992): implement
+        if (nodeIdsExceedingLatencyThreshold.isEmpty()) {
+            logger.debug("All nodes over threshold have relocation in progress");
+            return;
+        }
 
-        if (reroute) {
-            logger.debug("rerouting shards: [{}]", explanation);
-            rerouteService.reroute("disk threshold monitor", Priority.NORMAL, ActionListener.wrap(ignored -> {
-                final var reroutedClusterState = clusterStateSupplier.get();
+        if (Sets.difference(nodeIdsBelowUtilizationThreshold, nodeIdsExceedingLatencyThreshold).isEmpty()) {
+            logger.debug("No nodes below utilization threshold that are not exceeding latency threshold");
+            return;
+        }
 
-                // TODO (ES-11992): implement
+        final long currentTimeMillis = currentTimeMillisSupplier.getAsLong();
+        final long timeSinceLastRerouteMillis = currentTimeMillis - lastRerouteTimeMillis;
+        final boolean haveCalledRerouteRecently = timeSinceLastRerouteMillis < writeLoadConstraintSettings.getMinimumRerouteInterval()
+            .millis();
 
-            }, e -> logger.debug("reroute failed", e)));
+        if (haveCalledRerouteRecently == false
+            || Sets.difference(nodeIdsExceedingLatencyThreshold, lastSetOfHotSpottedNodes).isEmpty() == false) {
+            callReroute(nodeIdsExceedingLatencyThreshold);
         } else {
-            logger.trace("no reroute required");
+            logger.debug("Not calling reroute because we called reroute recently and there are no new hot spots");
         }
     }
+
+    private void callReroute(Set<String> hotSpottedNodes) {
+        final String reason = Strings.format(
+            "write load constraint monitor: Found %d node(s) exceeding the write thread pool queue latency threshold",
+            hotSpottedNodes.size()
+        );
+        rerouteService.reroute(
+            reason,
+            Priority.NORMAL,
+            ActionListener.wrap(
+                ignored -> logger.trace("{} reroute successful", reason),
+                e -> logger.debug(() -> Strings.format("reroute failed, reason: %s", reason), e)
+            )
+        );
+        lastRerouteTimeMillis = currentTimeMillisSupplier.getAsLong();
+        lastSetOfHotSpottedNodes = hotSpottedNodes;
+    }
 }
diff --git a/...c/main/java/org/elasticsearch/cluster/routing/allocation/WriteLoadConstraintSettings.java b/...c/main/java/org/elasticsearch/cluster/routing/allocation/WriteLoadConstraintSettings.java
@@ -117,41 +117,37 @@ public boolean disabled() {
         Setting.Property.NodeScope
     );
 
-    WriteLoadDeciderStatus writeLoadDeciderStatus;
-    TimeValue writeLoadDeciderRerouteIntervalSetting;
-    double writeThreadPoolHighUtilizationThresholdSetting;
+    private volatile WriteLoadDeciderStatus writeLoadDeciderStatus;
+    private volatile TimeValue minimumRerouteInterval;
+    private volatile RatioValue highUtilizationThreshold;
+    private volatile TimeValue queueLatencyThreshold;
 
     public WriteLoadConstraintSettings(ClusterSettings clusterSettings) {
-        clusterSettings.initializeAndWatch(WRITE_LOAD_DECIDER_ENABLED_SETTING, this::setWriteLoadConstraintEnabled);
-        clusterSettings.initializeAndWatch(WRITE_LOAD_DECIDER_REROUTE_INTERVAL_SETTING, this::setWriteLoadDeciderRerouteIntervalSetting);
+        clusterSettings.initializeAndWatch(WRITE_LOAD_DECIDER_ENABLED_SETTING, status -> this.writeLoadDeciderStatus = status);
+        clusterSettings.initializeAndWatch(
+            WRITE_LOAD_DECIDER_REROUTE_INTERVAL_SETTING,
+            timeValue -> this.minimumRerouteInterval = timeValue
+        );
         clusterSettings.initializeAndWatch(
             WRITE_LOAD_DECIDER_HIGH_UTILIZATION_THRESHOLD_SETTING,
-            this::setWriteThreadPoolHighUtilizationThresholdSetting
+            value -> highUtilizationThreshold = value
         );
-
-    };
-
-    private void setWriteLoadConstraintEnabled(WriteLoadDeciderStatus status) {
-        this.writeLoadDeciderStatus = status;
+        clusterSettings.initializeAndWatch(WRITE_LOAD_DECIDER_QUEUE_LATENCY_THRESHOLD_SETTING, value -> queueLatencyThreshold = value);
     }
 
     public WriteLoadDeciderStatus getWriteLoadConstraintEnabled() {
         return this.writeLoadDeciderStatus;
     }
 
-    public TimeValue getWriteLoadDeciderRerouteIntervalSetting() {
-        return this.writeLoadDeciderRerouteIntervalSetting;
-    }
-
-    public double getWriteThreadPoolHighUtilizationThresholdSetting() {
-        return this.writeThreadPoolHighUtilizationThresholdSetting;
+    public TimeValue getMinimumRerouteInterval() {
+        return this.minimumRerouteInterval;
     }
 
-    private void setWriteLoadDeciderRerouteIntervalSetting(TimeValue timeValue) {
-        this.writeLoadDeciderRerouteIntervalSetting = timeValue;
+    public TimeValue getQueueLatencyThreshold() {
+        return this.queueLatencyThreshold;
     }
 
-    private void setWriteThreadPoolHighUtilizationThresholdSetting(RatioValue percent) {
-        this.writeThreadPoolHighUtilizationThresholdSetting = percent.getAsRatio();
+    public RatioValue getHighUtilizationThreshold() {
+        return this.highUtilizationThreshold;
     }
 }
diff --git a/...java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDecider.java b/...java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDecider.java
@@ -61,7 +61,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing
         assert nodeUsageStatsForThreadPools.threadPoolUsageStatsMap().isEmpty() == false;
         assert nodeUsageStatsForThreadPools.threadPoolUsageStatsMap().get(ThreadPool.Names.WRITE) != null;
         var nodeWriteThreadPoolStats = nodeUsageStatsForThreadPools.threadPoolUsageStatsMap().get(ThreadPool.Names.WRITE);
-        var nodeWriteThreadPoolLoadThreshold = writeLoadConstraintSettings.getWriteThreadPoolHighUtilizationThresholdSetting();
+        var nodeWriteThreadPoolLoadThreshold = writeLoadConstraintSettings.getHighUtilizationThreshold().getAsRatio();
         if (nodeWriteThreadPoolStats.averageThreadPoolUtilization() >= nodeWriteThreadPoolLoadThreshold) {
             // The node's write thread pool usage stats already show high utilization above the threshold for accepting new shards.
             String explain = Strings.format(