1010package org .elasticsearch .cluster .routing .allocation .allocator ;
1111
1212import org .elasticsearch .cluster .node .DiscoveryNode ;
13+ import org .elasticsearch .cluster .routing .ShardRouting ;
1314import org .elasticsearch .cluster .routing .allocation .NodeAllocationStatsAndWeightsCalculator .NodeAllocationStatsAndWeight ;
1415import org .elasticsearch .cluster .routing .allocation .decider .AllocationDeciders ;
1516import org .elasticsearch .telemetry .metric .DoubleWithAttributes ;
2021import java .util .List ;
2122import java .util .Map ;
2223import java .util .concurrent .atomic .AtomicReference ;
24+ import java .util .function .ToLongFunction ;
2325
2426/**
2527 * Maintains balancer metrics and makes them accessible to the {@link MeterRegistry} and APM reporting. Metrics are updated
@@ -31,12 +33,63 @@ public class DesiredBalanceMetrics {
3133
3234 /**
3335 * @param unassignedShards Shards that are not assigned to any node.
36+ * @param allocationStatsByRole A breakdown of the allocations stats by {@link ShardRouting.Role}
37+ */
38+ public record AllocationStats (long unassignedShards , Map <ShardRouting .Role , RoleAllocationStats > allocationStatsByRole ) {
39+
40+ public AllocationStats (long unassignedShards , long totalAllocations , long undesiredAllocationsExcludingShuttingDownNodes ) {
41+ this (
42+ unassignedShards ,
43+ Map .of (ShardRouting .Role .DEFAULT , new RoleAllocationStats (totalAllocations , undesiredAllocationsExcludingShuttingDownNodes ))
44+ );
45+ }
46+
47+ public long totalAllocations () {
48+ return allocationStatsByRole .values ().stream ().mapToLong (RoleAllocationStats ::totalAllocations ).sum ();
49+ }
50+
51+ public long undesiredAllocationsExcludingShuttingDownNodes () {
52+ return allocationStatsByRole .values ()
53+ .stream ()
54+ .mapToLong (RoleAllocationStats ::undesiredAllocationsExcludingShuttingDownNodes )
55+ .sum ();
56+ }
57+
58+ /**
59+ * Return the ratio of undesired allocations to the total number of allocations.
60+ *
61+ * @return a value in [0.0, 1.0]
62+ */
63+ public double undesiredAllocationsRatio () {
64+ final long totalAllocations = totalAllocations ();
65+ if (totalAllocations == 0 ) {
66+ return 0 ;
67+ }
68+ return undesiredAllocationsExcludingShuttingDownNodes () / (double ) totalAllocations ;
69+ }
70+ }
71+
72+ /**
3473 * @param totalAllocations Shards that are assigned to a node.
3574 * @param undesiredAllocationsExcludingShuttingDownNodes Shards that are assigned to a node but must move to alleviate a resource
3675 * constraint per the {@link AllocationDeciders}. Excludes shards that must move
3776 * because of a node shutting down.
3877 */
39- public record AllocationStats (long unassignedShards , long totalAllocations , long undesiredAllocationsExcludingShuttingDownNodes ) {}
78+ public record RoleAllocationStats (long totalAllocations , long undesiredAllocationsExcludingShuttingDownNodes ) {
79+ public static final RoleAllocationStats EMPTY = new RoleAllocationStats (0L , 0L );
80+
81+ /**
82+ * Return the ratio of undesired allocations to the total number of allocations.
83+ *
84+ * @return a value in [0.0, 1.0]
85+ */
86+ public double undesiredAllocationsRatio () {
87+ if (totalAllocations == 0 ) {
88+ return 0.0 ;
89+ }
90+ return undesiredAllocationsExcludingShuttingDownNodes / (double ) totalAllocations ;
91+ }
92+ }
4093
4194 public record NodeWeightStats (long shardCount , double diskUsageInBytes , double writeLoad , double nodeWeight ) {
4295 public static final NodeWeightStats ZERO = new NodeWeightStats (0 , 0 , 0 , 0 );
@@ -47,7 +100,7 @@ public record NodeWeightStats(long shardCount, double diskUsageInBytes, double w
47100 public static final String UNASSIGNED_SHARDS_METRIC_NAME = "es.allocator.desired_balance.shards.unassigned.current" ;
48101 /** See {@link #totalAllocations} */
49102 public static final String TOTAL_SHARDS_METRIC_NAME = "es.allocator.desired_balance.shards.current" ;
50- /** See {@link #undesiredAllocationsExcludingShuttingDownNodes } */
103+ /** See {@link #undesiredAllocations } */
51104 public static final String UNDESIRED_ALLOCATION_COUNT_METRIC_NAME = "es.allocator.desired_balance.allocations.undesired.current" ;
52105 /** {@link #UNDESIRED_ALLOCATION_COUNT_METRIC_NAME} / {@link #TOTAL_SHARDS_METRIC_NAME} */
53106 public static final String UNDESIRED_ALLOCATION_RATIO_METRIC_NAME = "es.allocator.desired_balance.allocations.undesired.ratio" ;
@@ -71,25 +124,14 @@ public record NodeWeightStats(long shardCount, double diskUsageInBytes, double w
71124 public static final String CURRENT_NODE_FORECASTED_DISK_USAGE_METRIC_NAME =
72125 "es.allocator.allocations.node.forecasted_disk_usage_bytes.current" ;
73126
74- public static final AllocationStats EMPTY_ALLOCATION_STATS = new AllocationStats (- 1 , - 1 , - 1 );
127+ public static final AllocationStats EMPTY_ALLOCATION_STATS = new AllocationStats (0 , Map . of () );
75128
76129 private volatile boolean nodeIsMaster = false ;
77130
78131 /**
79- * Number of unassigned shards during last reconciliation
80- */
81- private volatile long unassignedShards ;
82-
83- /**
84- * Total number of assigned shards during last reconciliation
132+ * The stats from the most recent reconciliation
85133 */
86- private volatile long totalAllocations ;
87-
88- /**
89- * Number of assigned shards during last reconciliation that are not allocated on a desired node and need to be moved.
90- * This excludes shards that must be reassigned due to a shutting down node.
91- */
92- private volatile long undesiredAllocationsExcludingShuttingDownNodes ;
134+ private volatile AllocationStats lastReconciliationAllocationStats = EMPTY_ALLOCATION_STATS ;
93135
94136 private final AtomicReference <Map <DiscoveryNode , NodeWeightStats >> weightStatsPerNodeRef = new AtomicReference <>(Map .of ());
95137 private final AtomicReference <Map <DiscoveryNode , NodeAllocationStatsAndWeight >> allocationStatsPerNodeRef = new AtomicReference <>(
@@ -104,9 +146,7 @@ public void updateMetrics(
104146 assert allocationStats != null : "allocation stats cannot be null" ;
105147 assert weightStatsPerNode != null : "node balance weight stats cannot be null" ;
106148 if (allocationStats != EMPTY_ALLOCATION_STATS ) {
107- this .unassignedShards = allocationStats .unassignedShards ;
108- this .totalAllocations = allocationStats .totalAllocations ;
109- this .undesiredAllocationsExcludingShuttingDownNodes = allocationStats .undesiredAllocationsExcludingShuttingDownNodes ;
149+ this .lastReconciliationAllocationStats = allocationStats ;
110150 }
111151 weightStatsPerNodeRef .set (weightStatsPerNode );
112152 allocationStatsPerNodeRef .set (nodeAllocationStats );
@@ -205,19 +245,23 @@ public void setNodeIsMaster(boolean nodeIsMaster) {
205245 }
206246
207247 public long unassignedShards () {
208- return unassignedShards ;
248+ return lastReconciliationAllocationStats . unassignedShards () ;
209249 }
210250
211251 public long totalAllocations () {
212- return totalAllocations ;
252+ return lastReconciliationAllocationStats . totalAllocations () ;
213253 }
214254
215255 public long undesiredAllocations () {
216- return undesiredAllocationsExcludingShuttingDownNodes ;
256+ return lastReconciliationAllocationStats .undesiredAllocationsExcludingShuttingDownNodes ();
257+ }
258+
259+ public AllocationStats allocationStats () {
260+ return lastReconciliationAllocationStats ;
217261 }
218262
219263 private List <LongWithAttributes > getUnassignedShardsMetrics () {
220- return getIfPublishing (unassignedShards );
264+ return getIfPublishing (AllocationStats :: unassignedShards );
221265 }
222266
223267 private List <DoubleWithAttributes > getDesiredBalanceNodeWeightMetrics () {
@@ -346,25 +390,25 @@ private Map<String, Object> getNodeAttributes(DiscoveryNode node) {
346390 }
347391
348392 private List <LongWithAttributes > getTotalAllocationsMetrics () {
349- return getIfPublishing (totalAllocations );
393+ return getIfPublishing (AllocationStats :: totalAllocations );
350394 }
351395
352396 private List <LongWithAttributes > getUndesiredAllocationsExcludingShuttingDownNodesMetrics () {
353- return getIfPublishing (undesiredAllocationsExcludingShuttingDownNodes );
397+ return getIfPublishing (AllocationStats :: undesiredAllocationsExcludingShuttingDownNodes );
354398 }
355399
356- private List <LongWithAttributes > getIfPublishing (long value ) {
357- if (nodeIsMaster ) {
358- return List .of (new LongWithAttributes (value ));
400+ private List <LongWithAttributes > getIfPublishing (ToLongFunction <AllocationStats > value ) {
401+ var currentStats = lastReconciliationAllocationStats ;
402+ if (nodeIsMaster && currentStats != EMPTY_ALLOCATION_STATS ) {
403+ return List .of (new LongWithAttributes (value .applyAsLong (currentStats )));
359404 }
360405 return List .of ();
361406 }
362407
363408 private List <DoubleWithAttributes > getUndesiredAllocationsRatioMetrics () {
364- if (nodeIsMaster ) {
365- var total = totalAllocations ;
366- var undesired = undesiredAllocationsExcludingShuttingDownNodes ;
367- return List .of (new DoubleWithAttributes (total != 0 ? (double ) undesired / total : 0.0 ));
409+ var currentStats = lastReconciliationAllocationStats ;
410+ if (nodeIsMaster && currentStats != EMPTY_ALLOCATION_STATS ) {
411+ return List .of (new DoubleWithAttributes (currentStats .undesiredAllocationsRatio ()));
368412 }
369413 return List .of ();
370414 }
@@ -374,9 +418,7 @@ private List<DoubleWithAttributes> getUndesiredAllocationsRatioMetrics() {
374418 * This is best-effort because it is possible for {@link #updateMetrics} to race with this method.
375419 */
376420 public void zeroAllMetrics () {
377- unassignedShards = 0 ;
378- totalAllocations = 0 ;
379- undesiredAllocationsExcludingShuttingDownNodes = 0 ;
421+ lastReconciliationAllocationStats = EMPTY_ALLOCATION_STATS ;
380422 weightStatsPerNodeRef .set (Map .of ());
381423 allocationStatsPerNodeRef .set (Map .of ());
382424 }
0 commit comments