elastic · schase-es · Sep 23, 2025 · Sep 23, 2025 · Sep 23, 2025 · Sep 23, 2025
diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/RoutingNode.java b/server/src/main/java/org/elasticsearch/cluster/routing/RoutingNode.java
@@ -245,6 +245,15 @@ public Stream<ShardRouting> shardsWithState(ShardRoutingState state) {
         return internalGetShardsWithState(state).stream();
     }
 
+    /**
+     * Determine the number of shards with a specific state
+     * @param state ShardRoutingState which should be listed
+     * @return number of shards
+     */
+    public int shardCountsWithState(ShardRoutingState state) {
+        return internalGetShardsWithState(state).size();
+    }
+
     /**
      * Determine the shards of an index with a specific state
      * @param index id of the index

diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/RoutingNodes.java b/server/src/main/java/org/elasticsearch/cluster/routing/RoutingNodes.java
@@ -88,6 +88,8 @@ public class RoutingNodes implements Iterable<RoutingNode> {
 
     private int relocatingShards = 0;
 
+    private int relocatingFrozenShards = 0;
+
     private final Map<String, Set<String>> attributeValuesByAttribute;
     private final Map<String, Recoveries> recoveriesPerNode;
 
@@ -152,6 +154,9 @@ private RoutingNodes(GlobalRoutingTable routingTable, DiscoveryNodes discoveryNo
                             assignedShardsAdd(shard);
                             if (shard.relocating()) {
                                 relocatingShards++;
+                                if (isFrozenNode(shard.currentNodeId())) {
+                                    relocatingFrozenShards++;
+                                }
                                 ShardRouting targetShardRouting = shard.getTargetRelocatingShard();
                                 addInitialRecovery(targetShardRouting, indexShard.primary);
                                 // LinkedHashMap to preserve order.
@@ -192,6 +197,7 @@ private RoutingNodes(RoutingNodes routingNodes) {
         this.inactivePrimaryCount = routingNodes.inactivePrimaryCount;
         this.inactiveShardCount = routingNodes.inactiveShardCount;
         this.relocatingShards = routingNodes.relocatingShards;
+        this.relocatingFrozenShards = routingNodes.relocatingFrozenShards;
         this.attributeValuesByAttribute = Collections.synchronizedMap(Maps.copyOf(routingNodes.attributeValuesByAttribute, HashSet::new));
         this.recoveriesPerNode = Maps.copyOf(routingNodes.recoveriesPerNode, Recoveries::copy);
     }
@@ -343,6 +349,18 @@ public int getRelocatingShardCount() {
         return relocatingShards;
     }
 
+    private boolean isFrozenNode(String nodeId) {
+        RoutingNode node = nodesToShards.get(nodeId);
+        if (node != null && node.node() != null && node.node().isDedicatedFrozenNode()) {
+            return true;
+        }
+        return false;
+    }
+
+    public int getRelocatingFrozenShardCount() {
+        return relocatingFrozenShards;
+    }
+
     /**
      * Returns all shards that are not in the state UNASSIGNED with the same shard
      * ID as the given shard.
@@ -478,6 +496,9 @@ public Tuple<ShardRouting, ShardRouting> relocateShard(
     ) {
         ensureMutable();
         relocatingShards++;
+        if (isFrozenNode(nodeId)) {
+            relocatingFrozenShards++;
+        }
         ShardRouting source = startedShard.relocate(nodeId, expectedShardSize);
         ShardRouting target = source.getTargetRelocatingShard();
         updateAssigned(startedShard, source);
@@ -726,6 +747,9 @@ private ShardRouting started(ShardRouting shard, long expectedShardSize) {
      */
     private ShardRouting cancelRelocation(ShardRouting shard) {
         relocatingShards--;
+        if (isFrozenNode(shard.currentNodeId())) {
+            relocatingFrozenShards--;
+        }
         ShardRouting cancelledShard = shard.cancelRelocation();
         updateAssigned(shard, cancelledShard);
         return cancelledShard;
@@ -881,6 +905,7 @@ public boolean equals(Object o) {
             && inactivePrimaryCount == that.inactivePrimaryCount
             && inactiveShardCount == that.inactiveShardCount
             && relocatingShards == that.relocatingShards
+            && relocatingFrozenShards == that.relocatingFrozenShards
             && nodesToShards.equals(that.nodesToShards)
             && unassignedShards.equals(that.unassignedShards)
             && assignedShards.equals(that.assignedShards)
@@ -898,6 +923,7 @@ public int hashCode() {
             inactivePrimaryCount,
             inactiveShardCount,
             relocatingShards,
+            relocatingFrozenShards,
             attributeValuesByAttribute,
             recoveriesPerNode
         );

diff --git a/...lasticsearch/cluster/routing/allocation/decider/ConcurrentRebalanceAllocationDecider.java b/...lasticsearch/cluster/routing/allocation/decider/ConcurrentRebalanceAllocationDecider.java
@@ -20,13 +20,15 @@
 /**
  * Similar to the {@link ClusterRebalanceAllocationDecider} this
  * {@link AllocationDecider} controls the number of currently in-progress
- * re-balance (relocation) operations and restricts node allocations if the
- * configured threshold is reached. The default number of concurrent rebalance
- * operations is set to {@code 2}
+ * re-balance (shard relocation) operations and restricts node allocations
+ * if the configured threshold is reached. Frozen and non-frozen shards are
+ * considered separately. The default number of concurrent rebalance operations
+ * is set to {@code 2} for non-frozen shards, and {@code 10} for frozen shards.
  * <p>
  * Re-balance operations can be controlled in real-time via the cluster update API using
- * {@code cluster.routing.allocation.cluster_concurrent_rebalance}. Iff this
- * setting is set to {@code -1} the number of concurrent re-balance operations
+ * {@code cluster.routing.allocation.cluster_concurrent_rebalance} and
+ * {@code cluster.routing.allocation.cluster_concurrent_frozen_rebalance}.
+ * Iff either setting is set to {@code -1} the number of concurrent re-balance operations
  * are unlimited.
  */
 public class ConcurrentRebalanceAllocationDecider extends AllocationDecider {
@@ -44,21 +46,89 @@ public class ConcurrentRebalanceAllocationDecider extends AllocationDecider {
     );
     private volatile int clusterConcurrentRebalance;
 
+    /**
+     * Same as cluster_concurrent_rebalance, but applies separately to frozen tier shards
+     */
+    public static final Setting<Integer> CLUSTER_ROUTING_ALLOCATION_CLUSTER_CONCURRENT_FROZEN_REBALANCE_SETTING = Setting.intSetting(
+        "cluster.routing.allocation.cluster_concurrent_frozen_rebalance",
+        10,
+        -1,
+        Property.Dynamic,
+        Property.NodeScope
+    );
+    private volatile int clusterConcurrentFrozenRebalance;
+
     public ConcurrentRebalanceAllocationDecider(ClusterSettings clusterSettings) {
         clusterSettings.initializeAndWatch(
             CLUSTER_ROUTING_ALLOCATION_CLUSTER_CONCURRENT_REBALANCE_SETTING,
             this::setClusterConcurrentRebalance
         );
-        logger.debug("using [cluster_concurrent_rebalance] with [{}]", clusterConcurrentRebalance);
+        clusterSettings.initializeAndWatch(
+            CLUSTER_ROUTING_ALLOCATION_CLUSTER_CONCURRENT_FROZEN_REBALANCE_SETTING,
+            this::setClusterConcurrentFrozenRebalance
+        );
+        logger.debug(
+            "using [cluster_concurrent_rebalance] with [concurrent_rebalance={}, concurrent_frozen_rebalance={}]",
+            clusterConcurrentRebalance,
+            clusterConcurrentFrozenRebalance
+        );
     }
 
     private void setClusterConcurrentRebalance(int concurrentRebalance) {
         clusterConcurrentRebalance = concurrentRebalance;
     }
 
+    private void setClusterConcurrentFrozenRebalance(int concurrentFrozenRebalance) {
+        clusterConcurrentFrozenRebalance = concurrentFrozenRebalance;
+    }
+
     @Override
     public Decision canRebalance(ShardRouting shardRouting, RoutingAllocation allocation) {
-        return canRebalance(allocation);
+        int relocatingFrozenShards = allocation.routingNodes().getRelocatingFrozenShardCount();
+        if (isFrozenShard(allocation, shardRouting)) {
+            if (clusterConcurrentFrozenRebalance == -1) {
+                return allocation.decision(Decision.YES, NAME, "unlimited concurrent frozen rebalances are allowed");
+            }
+            if (relocatingFrozenShards >= clusterConcurrentFrozenRebalance) {
+                return allocation.decision(
+                    Decision.THROTTLE,
+                    NAME,
+                    "reached the limit of concurrently rebalancing frozen shards [%d], cluster setting [%s=%d]",
+                    relocatingFrozenShards,
+                    CLUSTER_ROUTING_ALLOCATION_CLUSTER_CONCURRENT_FROZEN_REBALANCE_SETTING.getKey(),
+                    clusterConcurrentFrozenRebalance
+                );
+            }
+            return allocation.decision(
+                Decision.YES,
+                NAME,
+                "below threshold [%d] for concurrent frozen rebalances, current frozen rebalance shard count [%d]",
+                clusterConcurrentFrozenRebalance,
+                relocatingFrozenShards
+            );
+        } else {
+            int relocatingShards = allocation.routingNodes().getRelocatingShardCount() - relocatingFrozenShards;
+            if (clusterConcurrentRebalance == -1) {
+                return allocation.decision(Decision.YES, NAME, "unlimited concurrent rebalances are allowed");
+            }
+            if (relocatingShards >= clusterConcurrentRebalance) {
+                return allocation.decision(
+                    Decision.THROTTLE,
+                    NAME,
+                    "reached the limit of concurrently rebalancing shards [%d], cluster setting [%s=%d]",
+                    relocatingShards,
+                    CLUSTER_ROUTING_ALLOCATION_CLUSTER_CONCURRENT_REBALANCE_SETTING.getKey(),
+                    clusterConcurrentRebalance
+                );
+            }
+            return allocation.decision(
+                Decision.YES,
+                NAME,
+                "below threshold [%d] for concurrent rebalances, current rebalance shard count [%d]",
+                clusterConcurrentRebalance,
+                relocatingShards
+            );
+        }
     }
 
     /**
@@ -68,33 +138,58 @@ public Decision canRebalance(ShardRouting shardRouting, RoutingAllocation alloca
      */
     @Override
     public Decision canRebalance(RoutingAllocation allocation) {
+        int relocatingFrozenShards = allocation.routingNodes().getRelocatingFrozenShardCount();
         int relocatingShards = allocation.routingNodes().getRelocatingShardCount();
         if (allocation.isSimulating() && relocatingShards >= 2) {
             // BalancedShardAllocator is prone to perform unnecessary moves when cluster_concurrent_rebalance is set to high values (>2).
             // (See https://github.com/elastic/elasticsearch/issues/87279)
             // Above allocator is used in DesiredBalanceComputer. Since we do not move actual shard data during calculation
             // it is possible to artificially set above setting to 2 to avoid unnecessary moves in desired balance.
+            // Separately: keep overall limit in simulation to two including frozen shards
             return allocation.decision(Decision.THROTTLE, NAME, "allocation should move one shard at the time when simulating");
         }
-        if (clusterConcurrentRebalance == -1) {
-            return allocation.decision(Decision.YES, NAME, "unlimited concurrent rebalances are allowed");
-        }
-        if (relocatingShards >= clusterConcurrentRebalance) {
+
+        // separate into frozen/non-frozen counts
+        relocatingShards = relocatingShards - relocatingFrozenShards;
+
+        // either frozen or non-frozen having some allowance before their limit means the allocator has room to rebalance
+        if (clusterConcurrentRebalance == -1 || relocatingShards < clusterConcurrentRebalance) {
             return allocation.decision(
-                Decision.THROTTLE,
+                Decision.YES,
                 NAME,
-                "reached the limit of concurrently rebalancing shards [%d], cluster setting [%s=%d]",
+                "below threshold [%d] for concurrent rebalances, current rebalance shard count [%d]",
                 relocatingShards,
                 CLUSTER_ROUTING_ALLOCATION_CLUSTER_CONCURRENT_REBALANCE_SETTING.getKey(),
                 clusterConcurrentRebalance
             );
         }
+        if (clusterConcurrentFrozenRebalance == -1 || relocatingFrozenShards < clusterConcurrentFrozenRebalance) {
+            return allocation.decision(
+                Decision.YES,
+                NAME,
+                "below threshold [%d] for concurrent frozen rebalances, current frozen rebalance shard count [%d]",
+                relocatingFrozenShards,
+                CLUSTER_ROUTING_ALLOCATION_CLUSTER_CONCURRENT_FROZEN_REBALANCE_SETTING.getKey(),
+                clusterConcurrentFrozenRebalance
+            );
+        }
         return allocation.decision(
-            Decision.YES,
+            Decision.THROTTLE,
             NAME,
-            "below threshold [%d] for concurrent rebalances, current rebalance shard count [%d]",
+            "above threshold [%d] for concurrent rebalances, current rebalance shard count [%d], "
+                + "and threshold [%d] for concurrent frozen rebalances, current frozen rebalance shard count [%d]",
             clusterConcurrentRebalance,
-            relocatingShards
+            relocatingShards,
+            clusterConcurrentFrozenRebalance,
+            relocatingFrozenShards
         );
     }
+
+    private boolean isFrozenShard(RoutingAllocation allocation, ShardRouting shard) {
+        String nodeId = shard.currentNodeId();
+        if (nodeId != null && allocation.routingNodes().node(nodeId).node().isDedicatedFrozenNode()) {
+            return true;
+        }
+        return false;
+    }
 }
@@ -238,6 +238,7 @@ public void apply(Settings value, Settings current, Settings previous) {
         BreakerSettings.CIRCUIT_BREAKER_TYPE,
         ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE_SETTING,
         ConcurrentRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_CLUSTER_CONCURRENT_REBALANCE_SETTING,
+        ConcurrentRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_CLUSTER_CONCURRENT_FROZEN_REBALANCE_SETTING,
         EnableAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ENABLE_SETTING,
         EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING,
         FilterAllocationDecider.CLUSTER_ROUTING_INCLUDE_GROUP_SETTING,