elastic · DiannaHohensee · Jul 10, 2025 · Jul 1, 2025 · Jul 2, 2025 · Jul 2, 2025
diff --git a/server/src/internalClusterTest/java/org/elasticsearch/index/shard/IndexShardIT.java b/server/src/internalClusterTest/java/org/elasticsearch/index/shard/IndexShardIT.java
@@ -8,6 +8,8 @@
  */
 package org.elasticsearch.index.shard;
 
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.store.LockObtainFailedException;
 import org.apache.lucene.util.SetOnce;
@@ -22,13 +24,16 @@
 import org.elasticsearch.cluster.EstimatedHeapUsage;
 import org.elasticsearch.cluster.EstimatedHeapUsageCollector;
 import org.elasticsearch.cluster.InternalClusterInfoService;
+import org.elasticsearch.cluster.NodeExecutionLoad;
+import org.elasticsearch.cluster.NodeUsageLoadCollector;
 import org.elasticsearch.cluster.metadata.IndexMetadata;
 import org.elasticsearch.cluster.node.DiscoveryNode;
 import org.elasticsearch.cluster.node.DiscoveryNodeUtils;
 import org.elasticsearch.cluster.routing.RecoverySource;
 import org.elasticsearch.cluster.routing.ShardRouting;
 import org.elasticsearch.cluster.routing.ShardRoutingState;
 import org.elasticsearch.cluster.routing.UnassignedInfo;
+import org.elasticsearch.cluster.routing.allocation.WriteLoadConstraintSettings;
 import org.elasticsearch.cluster.service.ClusterService;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.UUIDs;
@@ -73,6 +78,7 @@
 import org.elasticsearch.test.ESSingleNodeTestCase;
 import org.elasticsearch.test.IndexSettingsModule;
 import org.elasticsearch.test.InternalSettingsPlugin;
+import org.elasticsearch.threadpool.ThreadPool;
 import org.elasticsearch.xcontent.XContentType;
 import org.junit.Assert;
 
@@ -85,6 +91,7 @@
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
@@ -117,14 +124,16 @@
 import static org.hamcrest.Matchers.either;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.greaterThan;
+import static org.hamcrest.Matchers.greaterThanOrEqualTo;
 import static org.hamcrest.Matchers.instanceOf;
 import static org.hamcrest.Matchers.lessThanOrEqualTo;
 
 public class IndexShardIT extends ESSingleNodeTestCase {
+    private static final Logger logger = LogManager.getLogger(IndexShardIT.class);
 
     @Override
     protected Collection<Class<? extends Plugin>> getPlugins() {
-        return pluginList(InternalSettingsPlugin.class, BogusEstimatedHeapUsagePlugin.class);
+        return pluginList(InternalSettingsPlugin.class, BogusEstimatedHeapUsagePlugin.class, BogusNodeUsageLoadCollectorPlugin.class);
     }
 
     public void testLockTryingToDelete() throws Exception {
@@ -295,6 +304,50 @@ public void testHeapUsageEstimateIsPresent() {
         }
     }
 
+    public void testNodeWriteLoadsArePresent() {
+        InternalClusterInfoService clusterInfoService = (InternalClusterInfoService) getInstanceFromNode(ClusterInfoService.class);
+        ClusterInfoServiceUtils.refresh(clusterInfoService);
+        Map<String, NodeExecutionLoad> nodeThreadPoolStats = clusterInfoService.getClusterInfo().getNodeExecutionStats();
+        assertNotNull(nodeThreadPoolStats);
+        /** Not collecting stats yet because allocation write load stats collection is disabled by default.
+         *  see {@link WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_ENABLED_SETTING} */
+        assertTrue(nodeThreadPoolStats.isEmpty());
+
+        // Enable collection for node write loads.
+        updateClusterSettings(
+            Settings.builder()
+                .put(
+                    WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_ENABLED_SETTING.getKey(),
+                    WriteLoadConstraintSettings.WriteLoadDeciderStatus.ENABLED
+                )
+                .build()
+        );
+        try {
+            // Force a ClusterInfo refresh to run collection of the node write loads.
+            ClusterInfoServiceUtils.refresh(clusterInfoService);
+            nodeThreadPoolStats = clusterInfoService.getClusterInfo().getNodeExecutionStats();
+
+            /** Verify that each node has a write load reported. The test {@link BogusNodeUsageLoadCollector} generates random load values */
+            ClusterState state = getInstanceFromNode(ClusterService.class).state();
+            assertEquals(state.nodes().size(), nodeThreadPoolStats.size());
+            for (DiscoveryNode node : state.nodes()) {
+                assertTrue(nodeThreadPoolStats.containsKey(node.getId()));
+                NodeExecutionLoad nodeExecutionLoad = nodeThreadPoolStats.get(node.getId());
+                assertThat(nodeExecutionLoad.nodeId(), equalTo(node.getId()));
+                NodeExecutionLoad.ThreadPoolUsageStats writeThreadPoolStats = nodeExecutionLoad.threadPoolUsageStatsMap()
+                    .get(ThreadPool.Names.WRITE);
+                assertNotNull(writeThreadPoolStats);
+                assertThat(writeThreadPoolStats.totalThreadPoolThreads(), greaterThanOrEqualTo(0));
+                assertThat(writeThreadPoolStats.averageThreadPoolUtilization(), greaterThanOrEqualTo(0.0f));
+                assertThat(writeThreadPoolStats.averageThreadPoolQueueLatencyMillis(), greaterThanOrEqualTo(0L));
+            }
+        } finally {
+            updateClusterSettings(
+                Settings.builder().putNull(WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_ENABLED_SETTING.getKey()).build()
+            );
+        }
+    }
+
     public void testIndexCanChangeCustomDataPath() throws Exception {
         final String index = "test-custom-data-path";
         final Path sharedDataPath = getInstanceFromNode(Environment.class).sharedDataDir().resolve(randomAsciiLettersOfLength(10));
@@ -875,4 +928,61 @@ public ClusterService getClusterService() {
             return clusterService.get();
         }
     }
+
+    /**
+     * A simple {@link NodeUsageLoadCollector} implementation that creates and returns random {@link NodeExecutionLoad} for each node in the
+     * cluster.
+     * <p>
+     * Note: there's an 'org.elasticsearch.cluster.WriteLoadCollector' file that declares this implementation so that the plugin system can
+     * pick it up and use it for the test set-up.
+     */
+    public static class BogusNodeUsageLoadCollector implements NodeUsageLoadCollector {
+
+        private final BogusNodeUsageLoadCollectorPlugin plugin;
+
+        public BogusNodeUsageLoadCollector(BogusNodeUsageLoadCollectorPlugin plugin) {
+            this.plugin = plugin;
+        }
+
+        @Override
+        public void collectUsageStats(ActionListener<Map<String, NodeExecutionLoad>> listener) {
+            ActionListener.completeWith(
+                listener,
+                () -> plugin.getClusterService()
+                    .state()
+                    .nodes()
+                    .stream()
+                    .collect(Collectors.toUnmodifiableMap(DiscoveryNode::getId, node -> makeRandomNodeLoad(node.getId())))
+            );
+        }
+
+        private NodeExecutionLoad makeRandomNodeLoad(String nodeId) {
+            NodeExecutionLoad.ThreadPoolUsageStats writeThreadPoolStats = new NodeExecutionLoad.ThreadPoolUsageStats(
+                randomNonNegativeInt(),
+                randomFloat(),
+                randomNonNegativeLong()
+            );
+            Map<String, NodeExecutionLoad.ThreadPoolUsageStats> statsForThreadPools = new HashMap<>();
+            statsForThreadPools.put(ThreadPool.Names.WRITE, writeThreadPoolStats);
+            return new NodeExecutionLoad(nodeId, statsForThreadPools);
+        }
+    }
+
+    /**
+     * Make a plugin to gain access to the {@link ClusterService} instance.
+     */
+    public static class BogusNodeUsageLoadCollectorPlugin extends Plugin implements ClusterPlugin {
+
+        private final SetOnce<ClusterService> clusterService = new SetOnce<>();
+
+        @Override
+        public Collection<?> createComponents(PluginServices services) {
+            clusterService.set(services.clusterService());
+            return List.of();
+        }
+
+        public ClusterService getClusterService() {
+            return clusterService.get();
+        }
+    }
 }
diff --git a/...lClusterTest/resources/META-INF/services/org.elasticsearch.cluster.NodeUsageLoadCollector b/...lClusterTest/resources/META-INF/services/org.elasticsearch.cluster.NodeUsageLoadCollector
@@ -0,0 +1,10 @@
+#
+# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+# or more contributor license agreements. Licensed under the "Elastic License
+# 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+# Public License v 1"; you may not use this file except in compliance with, at
+# your election, the "Elastic License 2.0", the "GNU Affero General Public
+# License v3.0 only", or the "Server Side Public License, v 1".
+#
+
+org.elasticsearch.index.shard.IndexShardIT$BogusNodeUsageLoadCollector
diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java
@@ -329,6 +329,7 @@ static TransportVersion def(int id) {
     public static final TransportVersion PROJECT_STATE_REGISTRY_RECORDS_DELETIONS = def(9_113_0_00);
     public static final TransportVersion ESQL_SERIALIZE_TIMESERIES_FIELD_TYPE = def(9_114_0_00);
     public static final TransportVersion ML_INFERENCE_IBM_WATSONX_COMPLETION_ADDED = def(9_115_0_00);
+    public static final TransportVersion NODE_WRITE_LOAD_IN_CLUSTER_INFO = def(9_116_0_00);
     /*
      * STOP! READ THIS FIRST! No, really,
      *        ____ _____ ___  ____  _        ____  _____    _    ____    _____ _   _ ___ ____    _____ ___ ____  ____ _____ _

diff --git a/server/src/main/java/org/elasticsearch/cluster/ClusterInfo.java b/server/src/main/java/org/elasticsearch/cluster/ClusterInfo.java
@@ -58,9 +58,10 @@ public class ClusterInfo implements ChunkedToXContent, Writeable {
     final Map<NodeAndShard, String> dataPath;
     final Map<NodeAndPath, ReservedSpace> reservedSpace;
     final Map<String, EstimatedHeapUsage> estimatedHeapUsages;
+    final Map<String, NodeExecutionLoad> nodeExecutionStats;
 
     protected ClusterInfo() {
-        this(Map.of(), Map.of(), Map.of(), Map.of(), Map.of(), Map.of(), Map.of());
+        this(Map.of(), Map.of(), Map.of(), Map.of(), Map.of(), Map.of(), Map.of(), Map.of());
     }
 
     /**
@@ -73,6 +74,7 @@ protected ClusterInfo() {
      * @param dataPath the shard routing to datapath mapping
      * @param reservedSpace reserved space per shard broken down by node and data path
      * @param estimatedHeapUsages estimated heap usage broken down by node
+     * @param nodeExecutionStats estimated node-level execution load broken down by node
      * @see #shardIdentifierFromRouting
      */
     public ClusterInfo(
@@ -82,7 +84,8 @@ public ClusterInfo(
         Map<ShardId, Long> shardDataSetSizes,
         Map<NodeAndShard, String> dataPath,
         Map<NodeAndPath, ReservedSpace> reservedSpace,
-        Map<String, EstimatedHeapUsage> estimatedHeapUsages
+        Map<String, EstimatedHeapUsage> estimatedHeapUsages,
+        Map<String, NodeExecutionLoad> nodeExecutionStats
     ) {
         this.leastAvailableSpaceUsage = Map.copyOf(leastAvailableSpaceUsage);
         this.mostAvailableSpaceUsage = Map.copyOf(mostAvailableSpaceUsage);
@@ -91,6 +94,7 @@ public ClusterInfo(
         this.dataPath = Map.copyOf(dataPath);
         this.reservedSpace = Map.copyOf(reservedSpace);
         this.estimatedHeapUsages = Map.copyOf(estimatedHeapUsages);
+        this.nodeExecutionStats = Map.copyOf(nodeExecutionStats);
     }
 
     public ClusterInfo(StreamInput in) throws IOException {
@@ -107,6 +111,11 @@ public ClusterInfo(StreamInput in) throws IOException {
         } else {
             this.estimatedHeapUsages = Map.of();
         }
+        if (in.getTransportVersion().onOrAfter(TransportVersions.NODE_WRITE_LOAD_IN_CLUSTER_INFO)) {
+            this.nodeExecutionStats = in.readImmutableMap(NodeExecutionLoad::new);
+        } else {
+            this.nodeExecutionStats = Map.of();
+        }
     }
 
     @Override
@@ -124,6 +133,9 @@ public void writeTo(StreamOutput out) throws IOException {
         if (out.getTransportVersion().onOrAfter(TransportVersions.HEAP_USAGE_IN_CLUSTER_INFO)) {
             out.writeMap(this.estimatedHeapUsages, StreamOutput::writeWriteable);
         }
+        if (out.getTransportVersion().onOrAfter(TransportVersions.NODE_WRITE_LOAD_IN_CLUSTER_INFO)) {
+            out.writeMap(this.nodeExecutionStats, StreamOutput::writeWriteable);
+        }
     }
 
     /**
@@ -204,8 +216,8 @@ public Iterator<? extends ToXContent> toXContentChunked(ToXContent.Params params
                 return builder.endObject(); // NodeAndPath
             }),
             endArray() // end "reserved_sizes"
-            // NOTE: We don't serialize estimatedHeapUsages at this stage, to avoid
-            // committing to API payloads until the feature is settled
+            // NOTE: We don't serialize estimatedHeapUsages/nodeExecutionStats at this stage, to avoid
+            // committing to API payloads until the features are settled
         );
     }
 
@@ -220,6 +232,13 @@ public Map<String, EstimatedHeapUsage> getEstimatedHeapUsages() {
         return estimatedHeapUsages;
     }
 
+    /**
+     * Returns a map containing the node-level write load estimate for each node by node ID.
+     */
+    public Map<String, NodeExecutionLoad> getNodeExecutionStats() {
+        return nodeExecutionStats;
+    }
+
     /**
      * Returns a node id to disk usage mapping for the path that has the least available space on the node.
      * Note that this does not take account of reserved space: there may be another path with less available _and unreserved_ space.
@@ -311,12 +330,21 @@ public boolean equals(Object o) {
             && shardSizes.equals(that.shardSizes)
             && shardDataSetSizes.equals(that.shardDataSetSizes)
             && dataPath.equals(that.dataPath)
-            && reservedSpace.equals(that.reservedSpace);
+            && reservedSpace.equals(that.reservedSpace)
+            && nodeExecutionStats.equals(that.nodeExecutionStats);
     }
 
     @Override
     public int hashCode() {
-        return Objects.hash(leastAvailableSpaceUsage, mostAvailableSpaceUsage, shardSizes, shardDataSetSizes, dataPath, reservedSpace);
+        return Objects.hash(
+            leastAvailableSpaceUsage,
+            mostAvailableSpaceUsage,
+            shardSizes,
+            shardDataSetSizes,
+            dataPath,
+            reservedSpace,
+            nodeExecutionStats
+        );
     }
 
     @Override

diff --git a/server/src/main/java/org/elasticsearch/cluster/ClusterInfoSimulator.java b/server/src/main/java/org/elasticsearch/cluster/ClusterInfoSimulator.java
@@ -34,6 +34,7 @@ public class ClusterInfoSimulator {
     private final Map<ShardId, Long> shardDataSetSizes;
     private final Map<NodeAndShard, String> dataPath;
     private final Map<String, EstimatedHeapUsage> estimatedHeapUsages;
+    private final Map<String, NodeExecutionLoad> nodeExecutionStats;
 
     public ClusterInfoSimulator(RoutingAllocation allocation) {
         this.allocation = allocation;
@@ -43,6 +44,7 @@ public ClusterInfoSimulator(RoutingAllocation allocation) {
         this.shardDataSetSizes = Map.copyOf(allocation.clusterInfo().shardDataSetSizes);
         this.dataPath = Map.copyOf(allocation.clusterInfo().dataPath);
         this.estimatedHeapUsages = allocation.clusterInfo().getEstimatedHeapUsages();
+        this.nodeExecutionStats = allocation.clusterInfo().getNodeExecutionStats();
     }
 
     /**
@@ -156,7 +158,8 @@ public ClusterInfo getClusterInfo() {
             shardDataSetSizes,
             dataPath,
             Map.of(),
-            estimatedHeapUsages
+            estimatedHeapUsages,
+            nodeExecutionStats
         );
     }
 }