Skip to content

Commit 31f1810

Browse files
Revert "Add an option to return early from an allocate call (#134786)" (#135476)
This reverts commit 43741c2.
1 parent 7fefa5f commit 31f1810

File tree

5 files changed

+18
-256
lines changed

5 files changed

+18
-256
lines changed

server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java

Lines changed: 16 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -140,83 +140,25 @@ public BalancedShardsAllocator(
140140

141141
@Override
142142
public void allocate(RoutingAllocation allocation) {
143-
assert allocation.isSimulating() == false || balancerSettings.completeEarlyOnShardAssignmentChange()
144-
: "inconsistent states: isSimulating ["
145-
+ allocation.isSimulating()
146-
+ "] vs completeEarlyOnShardAssignmentChange ["
147-
+ balancerSettings.completeEarlyOnShardAssignmentChange()
148-
+ "]";
149143
if (allocation.metadata().hasAnyIndices()) {
150144
// must not use licensed features when just starting up
151145
writeLoadForecaster.refreshLicense();
152146
}
153147

154148
assert allocation.ignoreDisable() == false;
155-
assert allocation.isSimulating() == false || allocation.routingNodes().hasInactiveShards() == false
156-
: "expect no initializing shard, but got " + allocation.routingNodes();
157-
// TODO: ES-12943 cannot assert the following because shards moved by commands are not simulated promptly in DesiredBalanceComputer
158-
// assert allocation.isSimulating() == false || allocation.routingNodes().getRelocatingShardCount() == 0
159-
// : "expect no relocating shard, but got " + allocation.routingNodes();
160149

161150
if (allocation.routingNodes().size() == 0) {
162151
failAllocationOfNewPrimaries(allocation);
163152
return;
164153
}
165154
final BalancingWeights balancingWeights = balancingWeightsFactory.create();
166-
final Balancer balancer = new Balancer(
167-
writeLoadForecaster,
168-
allocation,
169-
balancerSettings.getThreshold(),
170-
balancingWeights,
171-
balancerSettings.completeEarlyOnShardAssignmentChange()
172-
);
173-
174-
boolean shardAssigned = false, shardMoved = false, shardBalanced = false;
175-
try {
176-
shardAssigned = balancer.allocateUnassigned();
177-
if (shardAssigned && balancerSettings.completeEarlyOnShardAssignmentChange()) {
178-
return;
179-
}
180-
181-
shardMoved = balancer.moveShards();
182-
if (shardMoved && balancerSettings.completeEarlyOnShardAssignmentChange()) {
183-
return;
184-
}
185-
186-
shardBalanced = balancer.balance();
187-
} finally {
188-
if (logger.isDebugEnabled()) {
189-
logger.debug(
190-
"shards assigned: {}, shards moved: {}, shards balanced: {}, "
191-
+ "routingNodes hasInactiveShards [{}], relocation count [{}]",
192-
shardAssigned,
193-
shardMoved,
194-
shardBalanced,
195-
allocation.routingNodes().hasInactiveShards(),
196-
allocation.routingNodes().getRelocatingShardCount()
197-
);
198-
}
199-
assert assertShardAssignmentChanges(allocation, shardAssigned, shardMoved, shardBalanced);
200-
// Node weights are calculated after each internal balancing round and saved to the RoutingNodes copy.
201-
collectAndRecordNodeWeightStats(balancer, balancingWeights, allocation);
202-
}
203-
}
155+
final Balancer balancer = new Balancer(writeLoadForecaster, allocation, balancerSettings.getThreshold(), balancingWeights);
156+
balancer.allocateUnassigned();
157+
balancer.moveShards();
158+
balancer.balance();
204159

205-
private boolean assertShardAssignmentChanges(
206-
RoutingAllocation allocation,
207-
boolean shardAssigned,
208-
boolean shardMoved,
209-
boolean shardBalanced
210-
) {
211-
if (allocation.isSimulating() == false) {
212-
return true;
213-
}
214-
assert shardAssigned == false || allocation.routingNodes().hasInactiveShards()
215-
: "expect initializing shard, but got " + allocation.routingNodes();
216-
217-
assert (shardMoved == false && shardBalanced == false) || allocation.routingNodes().getRelocatingShardCount() > 0
218-
: "expect relocating shard, but got " + allocation.routingNodes();
219-
return true;
160+
// Node weights are calculated after each internal balancing round and saved to the RoutingNodes copy.
161+
collectAndRecordNodeWeightStats(balancer, balancingWeights, allocation);
220162
}
221163

222164
private void collectAndRecordNodeWeightStats(Balancer balancer, BalancingWeights balancingWeights, RoutingAllocation allocation) {
@@ -246,8 +188,7 @@ public ShardAllocationDecision decideShardAllocation(final ShardRouting shard, f
246188
writeLoadForecaster,
247189
allocation,
248190
balancerSettings.getThreshold(),
249-
balancingWeightsFactory.create(),
250-
balancerSettings.completeEarlyOnShardAssignmentChange()
191+
balancingWeightsFactory.create()
251192
);
252193
AllocateUnassignedDecision allocateUnassignedDecision = AllocateUnassignedDecision.NOT_TAKEN;
253194
MoveDecision moveDecision = MoveDecision.NOT_TAKEN;
@@ -307,14 +248,12 @@ public static class Balancer {
307248
private final Map<String, ModelNode> nodes;
308249
private final BalancingWeights balancingWeights;
309250
private final NodeSorters nodeSorters;
310-
private final boolean completeEarlyOnShardAssignmentChange;
311251

312252
private Balancer(
313253
WriteLoadForecaster writeLoadForecaster,
314254
RoutingAllocation allocation,
315255
float threshold,
316-
BalancingWeights balancingWeights,
317-
boolean completeEarlyOnShardAssignmentChange
256+
BalancingWeights balancingWeights
318257
) {
319258
this.writeLoadForecaster = writeLoadForecaster;
320259
this.allocation = allocation;
@@ -327,7 +266,6 @@ private Balancer(
327266
nodes = Collections.unmodifiableMap(buildModelFromAssigned());
328267
this.nodeSorters = balancingWeights.createNodeSorters(nodesArray(), this);
329268
this.balancingWeights = balancingWeights;
330-
this.completeEarlyOnShardAssignmentChange = completeEarlyOnShardAssignmentChange;
331269
}
332270

333271
private static long getShardDiskUsageInBytes(ShardRouting shardRouting, IndexMetadata indexMetadata, ClusterInfo clusterInfo) {
@@ -420,7 +358,7 @@ private IndexMetadata indexMetadata(ProjectIndex index) {
420358
* Balances the nodes on the cluster model according to the weight function.
421359
* The actual balancing is delegated to {@link #balanceByWeights(NodeSorter)}
422360
*/
423-
private boolean balance() {
361+
private void balance() {
424362
if (logger.isTraceEnabled()) {
425363
logger.trace("Start balancing cluster");
426364
}
@@ -433,27 +371,21 @@ private boolean balance() {
433371
* Therefore we only do a rebalance if we have fetched all information.
434372
*/
435373
logger.debug("skipping rebalance due to in-flight shard/store fetches");
436-
return false;
374+
return;
437375
}
438376
if (allocation.deciders().canRebalance(allocation).type() != Type.YES) {
439377
logger.trace("skipping rebalance as it is disabled");
440-
return false;
378+
return;
441379
}
442380

443-
boolean shardBalanced = false;
444381
// Balance each partition
445382
for (NodeSorter nodeSorter : nodeSorters) {
446383
if (nodeSorter.modelNodes.length < 2) { /* skip if we only have one node */
447384
logger.trace("skipping rebalance as the partition has single node only");
448385
continue;
449386
}
450-
shardBalanced |= balanceByWeights(nodeSorter);
451-
// TODO: We could choose to account shardBalanced separately for each partition since they do not overlap.
452-
if (shardBalanced && completeEarlyOnShardAssignmentChange) {
453-
return true;
454-
}
387+
balanceByWeights(nodeSorter);
455388
}
456-
return shardBalanced;
457389
}
458390

459391
/**
@@ -599,8 +531,7 @@ private MoveDecision decideRebalance(final ProjectIndex index, final ShardRoutin
599531
* only, or in other words relocations that move the weight delta closer
600532
* to {@code 0.0}
601533
*/
602-
private boolean balanceByWeights(NodeSorter sorter) {
603-
boolean shardBalanced = false;
534+
private void balanceByWeights(NodeSorter sorter) {
604535
final AllocationDeciders deciders = allocation.deciders();
605536
final ModelNode[] modelNodes = sorter.modelNodes;
606537
final float[] weights = sorter.weights;
@@ -699,15 +630,6 @@ private boolean balanceByWeights(NodeSorter sorter) {
699630
sorter.sort(0, relevantNodes);
700631
lowIdx = 0;
701632
highIdx = relevantNodes - 1;
702-
703-
shardBalanced = true;
704-
if (completeEarlyOnShardAssignmentChange && routingNodes.getRelocatingShardCount() > 0) {
705-
// ES-12955: Check routingNodes.getRelocatingShardCount() > 0 in case the first relocation is a THROTTLE.
706-
// It should not happen in production, i.e, throttling should not happen unless there is a prior shard
707-
// that is already relocating. But in tests, we have decider like RandomAllocationDecider that can
708-
// randomly return THROTTLE when there is no existing relocation.
709-
return true;
710-
}
711633
continue;
712634
}
713635
}
@@ -729,7 +651,6 @@ private boolean balanceByWeights(NodeSorter sorter) {
729651
}
730652
}
731653
}
732-
return shardBalanced;
733654
}
734655

735656
/**
@@ -800,8 +721,7 @@ protected int comparePivot(int j) {
800721
* shard is created with an incremented version in the state
801722
* {@link ShardRoutingState#INITIALIZING}.
802723
*/
803-
public boolean moveShards() {
804-
boolean shardMoved = false;
724+
public void moveShards() {
805725
// Iterate over the started shards interleaving between nodes, and check if they can remain. In the presence of throttling
806726
// shard movements, the goal of this iteration order is to achieve a fairer movement of shards from the nodes that are
807727
// offloading the shards.
@@ -825,15 +745,10 @@ public boolean moveShards() {
825745
if (logger.isTraceEnabled()) {
826746
logger.trace("Moved shard [{}] to node [{}]", shardRouting, targetNode.getRoutingNode());
827747
}
828-
shardMoved = true;
829-
if (completeEarlyOnShardAssignmentChange) {
830-
return true;
831-
}
832748
} else if (moveDecision.isDecisionTaken() && moveDecision.canRemain() == false) {
833749
logger.trace("[{}][{}] can't move", shardRouting.index(), shardRouting.id());
834750
}
835751
}
836-
return shardMoved;
837752
}
838753

839754
/**
@@ -973,14 +888,14 @@ private Map<String, ModelNode> buildModelFromAssigned() {
973888
* Allocates all given shards on the minimal eligible node for the shards index
974889
* with respect to the weight function. All given shards must be unassigned.
975890
*/
976-
private boolean allocateUnassigned() {
891+
private void allocateUnassigned() {
977892
RoutingNodes.UnassignedShards unassigned = routingNodes.unassigned();
978893
assert nodes.isEmpty() == false;
979894
if (logger.isTraceEnabled()) {
980895
logger.trace("Start allocating unassigned shards");
981896
}
982897
if (unassigned.isEmpty()) {
983-
return false;
898+
return;
984899
}
985900

986901
/*
@@ -1017,7 +932,6 @@ private boolean allocateUnassigned() {
1017932
int secondaryLength = 0;
1018933
int primaryLength = primary.length;
1019934
ArrayUtil.timSort(primary, comparator);
1020-
boolean shardAssignmentChanged = false;
1021935
do {
1022936
for (int i = 0; i < primaryLength; i++) {
1023937
ShardRouting shard = primary[i];
@@ -1035,7 +949,6 @@ private boolean allocateUnassigned() {
1035949

1036950
final long shardSize = getExpectedShardSize(shard, ShardRouting.UNAVAILABLE_EXPECTED_SHARD_SIZE, allocation);
1037951
shard = routingNodes.initializeShard(shard, minNode.getNodeId(), null, shardSize, allocation.changes());
1038-
shardAssignmentChanged = true;
1039952
minNode.addShard(index, shard);
1040953
if (shard.primary() == false) {
1041954
// copy over the same replica shards to the secondary array so they will get allocated
@@ -1059,9 +972,6 @@ private boolean allocateUnassigned() {
1059972
assert allocationDecision.getAllocationStatus() == AllocationStatus.DECIDERS_THROTTLED;
1060973
final long shardSize = getExpectedShardSize(shard, ShardRouting.UNAVAILABLE_EXPECTED_SHARD_SIZE, allocation);
1061974
minNode.addShard(projectIndex(shard), shard.initialize(minNode.getNodeId(), null, shardSize));
1062-
// If we see a throttle decision in simulation, there must be other shards that got assigned before it.
1063-
assert allocation.isSimulating() == false || shardAssignmentChanged
1064-
: "shard " + shard + " was throttled but no other shards were assigned";
1065975
} else {
1066976
if (logger.isTraceEnabled()) {
1067977
logger.trace("No Node found to assign shard [{}]", shard);
@@ -1084,7 +994,6 @@ private boolean allocateUnassigned() {
1084994
secondaryLength = 0;
1085995
} while (primaryLength > 0);
1086996
// clear everything we have either added it or moved to ignoreUnassigned
1087-
return shardAssignmentChanged;
1088997
}
1089998

1090999
private ProjectIndex projectIndex(ShardRouting shardRouting) {

server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancerSettings.java

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99

1010
package org.elasticsearch.cluster.routing.allocation.allocator;
1111

12-
import org.elasticsearch.cluster.ClusterModule;
1312
import org.elasticsearch.common.settings.ClusterSettings;
1413
import org.elasticsearch.common.settings.Settings;
1514

@@ -27,7 +26,6 @@ public class BalancerSettings {
2726
private volatile float writeLoadBalanceFactor;
2827
private volatile float diskUsageBalanceFactor;
2928
private volatile float threshold;
30-
private final boolean completeEarlyOnShardAssignmentChange;
3129

3230
public BalancerSettings(Settings settings) {
3331
this(ClusterSettings.createBuiltInClusterSettings(settings));
@@ -39,9 +37,6 @@ public BalancerSettings(ClusterSettings clusterSettings) {
3937
clusterSettings.initializeAndWatch(WRITE_LOAD_BALANCE_FACTOR_SETTING, value -> this.writeLoadBalanceFactor = value);
4038
clusterSettings.initializeAndWatch(DISK_USAGE_BALANCE_FACTOR_SETTING, value -> this.diskUsageBalanceFactor = value);
4139
clusterSettings.initializeAndWatch(THRESHOLD_SETTING, value -> this.threshold = value);
42-
this.completeEarlyOnShardAssignmentChange = ClusterModule.DESIRED_BALANCE_ALLOCATOR.equals(
43-
clusterSettings.get(ClusterModule.SHARDS_ALLOCATOR_TYPE_SETTING)
44-
);
4540
}
4641

4742
/**
@@ -72,8 +67,4 @@ public float getDiskUsageBalanceFactor() {
7267
public float getThreshold() {
7368
return threshold;
7469
}
75-
76-
public boolean completeEarlyOnShardAssignmentChange() {
77-
return completeEarlyOnShardAssignmentChange;
78-
}
7970
}

0 commit comments

Comments
 (0)