1111
1212import org .elasticsearch .cluster .node .DiscoveryNode ;
1313import org .elasticsearch .cluster .routing .allocation .NodeAllocationStatsAndWeightsCalculator .NodeAllocationStatsAndWeight ;
14+ import org .elasticsearch .cluster .routing .allocation .decider .AllocationDeciders ;
1415import org .elasticsearch .telemetry .metric .DoubleWithAttributes ;
1516import org .elasticsearch .telemetry .metric .LongWithAttributes ;
1617import org .elasticsearch .telemetry .metric .MeterRegistry ;
2829 */
2930public class DesiredBalanceMetrics {
3031
32+ /**
33+ * @param unassignedShards Shards that are not assigned to any node.
34+ * @param totalAllocations Shards that are assigned to a node.
35+ * @param undesiredAllocationsExcludingShuttingDownNodes Shards that are assigned to a node but must move to alleviate a resource
36+ * constraint per the {@link AllocationDeciders}. Excludes shards that must move
37+ * because of a node shutting down.
38+ */
3139 public record AllocationStats (long unassignedShards , long totalAllocations , long undesiredAllocationsExcludingShuttingDownNodes ) {}
3240
3341 public record NodeWeightStats (long shardCount , double diskUsageInBytes , double writeLoad , double nodeWeight ) {}
3442
35- public static final DesiredBalanceMetrics NOOP = new DesiredBalanceMetrics ( MeterRegistry . NOOP );
36-
43+ // Reconciliation metrics.
44+ /** See {@link #unassignedShards} */
3745 public static final String UNASSIGNED_SHARDS_METRIC_NAME = "es.allocator.desired_balance.shards.unassigned.current" ;
46+ /** See {@link #totalAllocations} */
3847 public static final String TOTAL_SHARDS_METRIC_NAME = "es.allocator.desired_balance.shards.current" ;
48+ /** See {@link #undesiredAllocationsExcludingShuttingDownNodes} */
3949 public static final String UNDESIRED_ALLOCATION_COUNT_METRIC_NAME = "es.allocator.desired_balance.allocations.undesired.current" ;
50+ /** {@link #UNDESIRED_ALLOCATION_COUNT_METRIC_NAME} / {@link #TOTAL_SHARDS_METRIC_NAME} */
4051 public static final String UNDESIRED_ALLOCATION_RATIO_METRIC_NAME = "es.allocator.desired_balance.allocations.undesired.ratio" ;
4152
53+ // Desired balance node metrics.
4254 public static final String DESIRED_BALANCE_NODE_WEIGHT_METRIC_NAME = "es.allocator.desired_balance.allocations.node_weight.current" ;
4355 public static final String DESIRED_BALANCE_NODE_SHARD_COUNT_METRIC_NAME =
4456 "es.allocator.desired_balance.allocations.node_shard_count.current" ;
@@ -47,6 +59,7 @@ public record NodeWeightStats(long shardCount, double diskUsageInBytes, double w
4759 public static final String DESIRED_BALANCE_NODE_DISK_USAGE_METRIC_NAME =
4860 "es.allocator.desired_balance.allocations.node_disk_usage_bytes.current" ;
4961
62+ // Node weight metrics.
5063 public static final String CURRENT_NODE_WEIGHT_METRIC_NAME = "es.allocator.allocations.node.weight.current" ;
5164 public static final String CURRENT_NODE_SHARD_COUNT_METRIC_NAME = "es.allocator.allocations.node.shard_count.current" ;
5265 public static final String CURRENT_NODE_WRITE_LOAD_METRIC_NAME = "es.allocator.allocations.node.write_load.current" ;
@@ -59,6 +72,7 @@ public record NodeWeightStats(long shardCount, double diskUsageInBytes, double w
5972 public static final AllocationStats EMPTY_ALLOCATION_STATS = new AllocationStats (-1 , -1 , -1 );
6073
6174 private volatile boolean nodeIsMaster = false ;
75+
6276 /**
6377 * Number of unassigned shards during last reconciliation
6478 */
@@ -70,9 +84,10 @@ public record NodeWeightStats(long shardCount, double diskUsageInBytes, double w
7084 private volatile long totalAllocations ;
7185
7286 /**
73- * Number of assigned shards during last reconciliation that are not allocated on desired node and need to be moved
87+ * Number of assigned shards during last reconciliation that are not allocated on a desired node and need to be moved.
88+ * This excludes shards that must be reassigned due to a shutting down node.
7489 */
75- private volatile long undesiredAllocations ;
90+ private volatile long undesiredAllocationsExcludingShuttingDownNodes ;
7691
7792 private final AtomicReference <Map <DiscoveryNode , NodeWeightStats >> weightStatsPerNodeRef = new AtomicReference <>(Map .of ());
7893 private final AtomicReference <Map <DiscoveryNode , NodeAllocationStatsAndWeight >> allocationStatsPerNodeRef = new AtomicReference <>(
@@ -89,7 +104,7 @@ public void updateMetrics(
89104 if (allocationStats != EMPTY_ALLOCATION_STATS ) {
90105 this .unassignedShards = allocationStats .unassignedShards ;
91106 this .totalAllocations = allocationStats .totalAllocations ;
92- this .undesiredAllocations = allocationStats .undesiredAllocationsExcludingShuttingDownNodes ;
107+ this .undesiredAllocationsExcludingShuttingDownNodes = allocationStats .undesiredAllocationsExcludingShuttingDownNodes ;
93108 }
94109 weightStatsPerNodeRef .set (weightStatsPerNode );
95110 allocationStatsPerNodeRef .set (nodeAllocationStats );
@@ -107,14 +122,15 @@ public DesiredBalanceMetrics(MeterRegistry meterRegistry) {
107122 UNDESIRED_ALLOCATION_COUNT_METRIC_NAME ,
108123 "Total number of shards allocated on undesired nodes excluding shutting down nodes" ,
109124 "{shard}" ,
110- this ::getUndesiredAllocationsMetrics
125+ this ::getUndesiredAllocationsExcludingShuttingDownNodesMetrics
111126 );
112127 meterRegistry .registerDoublesGauge (
113128 UNDESIRED_ALLOCATION_RATIO_METRIC_NAME ,
114129 "Ratio of undesired allocations to shard count excluding shutting down nodes" ,
115130 "1" ,
116131 this ::getUndesiredAllocationsRatioMetrics
117132 );
133+
118134 meterRegistry .registerDoublesGauge (
119135 DESIRED_BALANCE_NODE_WEIGHT_METRIC_NAME ,
120136 "Weight of nodes in the computed desired balance" ,
@@ -133,18 +149,19 @@ public DesiredBalanceMetrics(MeterRegistry meterRegistry) {
133149 "bytes" ,
134150 this ::getDesiredBalanceNodeDiskUsageMetrics
135151 );
136- meterRegistry .registerDoublesGauge (
137- CURRENT_NODE_WEIGHT_METRIC_NAME ,
138- "The weight of nodes based on the current allocation state" ,
139- "unit" ,
140- this ::getCurrentNodeWeightMetrics
141- );
142152 meterRegistry .registerLongsGauge (
143153 DESIRED_BALANCE_NODE_SHARD_COUNT_METRIC_NAME ,
144154 "Shard count of nodes in the computed desired balance" ,
145155 "unit" ,
146156 this ::getDesiredBalanceNodeShardCountMetrics
147157 );
158+
159+ meterRegistry .registerDoublesGauge (
160+ CURRENT_NODE_WEIGHT_METRIC_NAME ,
161+ "The weight of nodes based on the current allocation state" ,
162+ "unit" ,
163+ this ::getCurrentNodeWeightMetrics
164+ );
148165 meterRegistry .registerDoublesGauge (
149166 CURRENT_NODE_WRITE_LOAD_METRIC_NAME ,
150167 "The current write load of nodes" ,
@@ -194,7 +211,7 @@ public long totalAllocations() {
194211 }
195212
196213 public long undesiredAllocations () {
197- return undesiredAllocations ;
214+ return undesiredAllocationsExcludingShuttingDownNodes ;
198215 }
199216
200217 private List <LongWithAttributes > getUnassignedShardsMetrics () {
@@ -330,8 +347,8 @@ private List<LongWithAttributes> getTotalAllocationsMetrics() {
330347 return getIfPublishing (totalAllocations );
331348 }
332349
333- private List <LongWithAttributes > getUndesiredAllocationsMetrics () {
334- return getIfPublishing (undesiredAllocations );
350+ private List <LongWithAttributes > getUndesiredAllocationsExcludingShuttingDownNodesMetrics () {
351+ return getIfPublishing (undesiredAllocationsExcludingShuttingDownNodes );
335352 }
336353
337354 private List <LongWithAttributes > getIfPublishing (long value ) {
@@ -344,7 +361,7 @@ private List<LongWithAttributes> getIfPublishing(long value) {
344361 private List <DoubleWithAttributes > getUndesiredAllocationsRatioMetrics () {
345362 if (nodeIsMaster ) {
346363 var total = totalAllocations ;
347- var undesired = undesiredAllocations ;
364+ var undesired = undesiredAllocationsExcludingShuttingDownNodes ;
348365 return List .of (new DoubleWithAttributes (total != 0 ? (double ) undesired / total : 0.0 ));
349366 }
350367 return List .of ();
@@ -357,7 +374,7 @@ private List<DoubleWithAttributes> getUndesiredAllocationsRatioMetrics() {
357374 public void zeroAllMetrics () {
358375 unassignedShards = 0 ;
359376 totalAllocations = 0 ;
360- undesiredAllocations = 0 ;
377+ undesiredAllocationsExcludingShuttingDownNodes = 0 ;
361378 weightStatsPerNodeRef .set (Map .of ());
362379 allocationStatsPerNodeRef .set (Map .of ());
363380 }
0 commit comments