Skip to content

Commit ebd4448

Browse files
Create balancer summary scaffolding
Lays out the balancer summary information that we want to collect and eventually report. Only the class scaffolding, still needs implementation.
1 parent ebb5528 commit ebd4448

File tree

3 files changed

+209
-0
lines changed

3 files changed

+209
-0
lines changed
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.cluster.routing.allocation.allocator;
11+
12+
import java.util.HashMap;
13+
import java.util.LinkedList;
14+
import java.util.concurrent.ConcurrentLinkedQueue;
15+
16+
/**
17+
* Manages the lifecycle of {@link BalancingSummary} data structures tracking allocation balancing round results. There are many balancing
18+
* rounds and this class manages their reporting.
19+
*
20+
* Summarizing balancer rounds and reporting the results will provide information with which to do a cost-benefit analysis of the work that
21+
* the allocation rebalancing performs.
22+
*/
23+
public class AllocationBalancingRoundSummaryService {
24+
25+
/** Value to return if no balancing rounds have occurred in the requested time period. */
26+
private final BalancingSummary.CombinedClusterBalancingRoundSummary EMPTY_RESULTS =
27+
new BalancingSummary.CombinedClusterBalancingRoundSummary(
28+
0,
29+
0,
30+
new LinkedList<>(),
31+
new BalancingSummary.ClusterShardMovements(0, 0, 0, 0, 0),
32+
new HashMap<>()
33+
);
34+
35+
/**
36+
* A concurrency-safe list of balancing round summaries. Balancer rounds are run and added here serially, so the queue will naturally
37+
* progress from newer to older results.
38+
*/
39+
private ConcurrentLinkedQueue<BalancingSummary.BalancingRoundSummary> summaries = new ConcurrentLinkedQueue<>();
40+
41+
/**
42+
* Returns a combined summary of all unreported allocation round summaries: may summarize a single balancer round, multiple, or none.
43+
*
44+
* @return returns {@link #EMPTY_RESULTS} if there are no unreported balancing rounds.
45+
*/
46+
public BalancingSummary.CombinedClusterBalancingRoundSummary combineSummaries() {
47+
// TODO: implement
48+
return EMPTY_RESULTS;
49+
}
50+
51+
public void addBalancerRoundSummary(BalancingSummary.BalancingRoundSummary summary) {
52+
summaries.add(summary);
53+
}
54+
55+
}
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.cluster.routing.allocation.allocator;
11+
12+
import java.util.List;
13+
import java.util.Map;
14+
15+
/**
16+
* Data structures defining the results of allocation balancing rounds.
17+
*/
18+
public class BalancingSummary {
19+
20+
/**
21+
* Holds combined {@link BalancingRoundSummary} results. Essentially holds a list of the balancing events and the summed up changes
22+
* across all those events: what allocation work was done across some period of time.
23+
*
24+
* @param eventsStartTime The earliest start time of all the combined balancing rounds.
25+
* @param eventsEndTime The latest end time of all the combined balancing rounds.
26+
* @param events A list of all the cluster events that started the balancing rounds.
27+
* @param shardMovements The sum of all shard movements across all combined balancing rounds.
28+
* @param nodeChanges The total change stats per node in the cluster from the earliest balancing round to the latest one.
29+
*/
30+
record CombinedClusterBalancingRoundSummary(
31+
long eventsStartTime,
32+
long eventsEndTime,
33+
List<ClusterRebalancingEvent> events,
34+
ClusterShardMovements shardMovements,
35+
Map<String, IndividualNodeRebalancingChangeStats> nodeChanges
36+
) {};
37+
38+
/**
39+
* Summarizes the impact to the cluster as a result of a rebalancing round.
40+
*
41+
* @param eventStartTime Time at which the desired balance calculation began due to a cluster event.
42+
* @param computationEndTime Time at which the new desired balance calculation was finished.
43+
* @param event Reports what provoked the rebalancing round. The rebalancer only runs when requested, not on a periodic basis.
44+
* @param computationFinishReason Whether the balancing round converged to a final allocation, or exiting early for some reason.
45+
* @param shardMovements Lists the total number of shard moves, and breaks down the total into number shards moved by category,
46+
* like node shutdown
47+
* @param nodeChanges A Map of node name to {@link IndividualNodeRebalancingChangeStats} to describe what each node gained and how much
48+
* work each node performed for the balancing round.
49+
*/
50+
record BalancingRoundSummary(
51+
long eventStartTime,
52+
long computationEndTime,
53+
ClusterRebalancingEvent event,
54+
DesiredBalance.ComputationFinishReason computationFinishReason,
55+
ClusterShardMovements shardMovements,
56+
Map<String, IndividualNodeRebalancingChangeStats> nodeChanges
57+
) {
58+
@Override
59+
public String toString() {
60+
return "BalancingRoundSummary{"
61+
+ "ClusterRebalancingEvent="
62+
+ event
63+
+ ", ClusterShardMovements="
64+
+ shardMovements
65+
+ ", NodeChangeStats={"
66+
+ nodeChanges
67+
+ "}"
68+
+ '}';
69+
}
70+
};
71+
72+
/**
73+
* General cost-benefit information on the node-level. Describes how each node was improved by a balancing round, and how much work that
74+
* node did to achieve the shard rebalancing.
75+
*
76+
* @param nodeWeightBeforeRebalancing
77+
* @param nodeWeightAfterRebalancing
78+
* @param dataMovedToNodeInMB
79+
* @param dataMovedAwayFromNodeInMB
80+
*/
81+
record IndividualNodeRebalancingChangeStats(
82+
long nodeWeightBeforeRebalancing,
83+
long nodeWeightAfterRebalancing,
84+
long dataMovedToNodeInMB,
85+
long dataMovedAwayFromNodeInMB
86+
) {
87+
@Override
88+
public String toString() {
89+
return "IndividualNodeRebalancingChangeStats{"
90+
+ "nodeWeightBeforeRebalancing="
91+
+ nodeWeightBeforeRebalancing
92+
+ ", nodeWeightAfterRebalancing="
93+
+ nodeWeightAfterRebalancing
94+
+ ", dataMovedToNodeInMB="
95+
+ dataMovedToNodeInMB
96+
+ ", dataMovedAwayFromNodeInMB="
97+
+ dataMovedAwayFromNodeInMB
98+
+ '}';
99+
}
100+
};
101+
102+
/**
103+
* Tracks and summarizes the more granular reasons why shards are moved between nodes.
104+
*
105+
* @param numShardMoves total number of shard moves
106+
* @param numAllocationDeciderForcedShardMoves total number of shards that must be moved because they violate an AllocationDecider rule
107+
* @param numRebalancingShardMoves total number of shards moved to improve cluster balance and are not otherwise required to move
108+
* @param numShutdownForcedShardMoves total number of shards that must move off of a node because it is shutting down
109+
* @param numStuckShards total number of shards violating an AllocationDecider on their current node and on every other cluster node
110+
*/
111+
record ClusterShardMovements(
112+
long numShardMoves,
113+
long numAllocationDeciderForcedShardMoves,
114+
long numRebalancingShardMoves,
115+
long numShutdownForcedShardMoves,
116+
long numStuckShards
117+
) {
118+
@Override
119+
public String toString() {
120+
return "ClusterShardMovements{"
121+
+ "numShardMoves="
122+
+ numShardMoves
123+
+ ", numAllocationDeciderForcedShardMoves="
124+
+ numAllocationDeciderForcedShardMoves
125+
+ ", numRebalancingShardMoves="
126+
+ numRebalancingShardMoves
127+
+ ", numShutdownForcedShardMoves="
128+
+ numShutdownForcedShardMoves
129+
+ ", numStuckShards="
130+
+ numStuckShards
131+
+ '}';
132+
}
133+
};
134+
135+
/**
136+
* The cluster event that initiated a rebalancing round. This will tell us what initiated the balancer doing some amount of rebalancing
137+
* work.
138+
*/
139+
enum ClusterRebalancingEvent {
140+
// TODO (Dianna): go through the reroute methods and identify the causes -- many reroute methods accept a 'reason' string -- and
141+
// replace them with this enum to be saved later in a balancing summary.
142+
RerouteCommand,
143+
IndexCreation,
144+
IndexDeletion,
145+
NodeShutdownAndRemoval,
146+
NewNodeAdded
147+
}
148+
149+
}

server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,10 @@ public class DesiredBalanceShardsAllocator implements ShardsAllocator {
8989
private volatile boolean resetCurrentDesiredBalance = false;
9090
private final Set<String> processedNodeShutdowns = new HashSet<>();
9191
private final DesiredBalanceMetrics desiredBalanceMetrics;
92+
/**
93+
* Manages balancer round results in order to report metrics on the balancer activity in a configurable manner.
94+
*/
95+
private final AllocationBalancingRoundSummaryService balancerRoundSummaryService;
9296

9397
// stats
9498
protected final CounterMetric computationsSubmitted = new CounterMetric();
@@ -133,6 +137,7 @@ public DesiredBalanceShardsAllocator(
133137
NodeAllocationStatsAndWeightsCalculator nodeAllocationStatsAndWeightsCalculator
134138
) {
135139
this.desiredBalanceMetrics = new DesiredBalanceMetrics(telemetryProvider.getMeterRegistry());
140+
this.balancerRoundSummaryService = new AllocationBalancingRoundSummaryService();
136141
this.delegateAllocator = delegateAllocator;
137142
this.threadPool = threadPool;
138143
this.reconciler = reconciler;

0 commit comments

Comments
 (0)