Skip to content

Commit 30d4573

Browse files
authored
Handle NOT_PREFERRED when allocating unassigned shards generally (elastic#136733)
1 parent cf60614 commit 30d4573

File tree

4 files changed

+209
-4
lines changed

4 files changed

+209
-4
lines changed

server/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocateUnassignedDecision.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,10 @@ public static AllocateUnassignedDecision fromDecision(
187187
@Nullable List<NodeAllocationResult> nodeDecisions
188188
) {
189189
final Type decisionType = decision.type();
190-
AllocationStatus allocationStatus = decisionType != Type.YES ? AllocationStatus.fromDecision(decisionType) : null;
190+
AllocationStatus allocationStatus = switch (decisionType) {
191+
case YES, NOT_PREFERRED -> null;
192+
default -> AllocationStatus.fromDecision(decisionType);
193+
};
191194
return new AllocateUnassignedDecision(assignedNode, nodeDecisions, allocationStatus, null, false, 0L, 0L);
192195
}
193196

server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java

Lines changed: 42 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
import org.elasticsearch.common.settings.Settings;
4141
import org.elasticsearch.common.util.Maps;
4242
import org.elasticsearch.common.util.set.Sets;
43+
import org.elasticsearch.core.Nullable;
4344
import org.elasticsearch.core.Tuple;
4445
import org.elasticsearch.gateway.PriorityComparator;
4546
import org.elasticsearch.index.shard.ShardId;
@@ -1346,7 +1347,9 @@ private AllocateUnassignedDecision decideAllocateUnassigned(final ProjectIndex i
13461347
nodeExplanationMap.put(node.getNodeId(), new NodeAllocationResult(node.getRoutingNode().node(), currentDecision, 0));
13471348
nodeWeights.add(Tuple.tuple(node.getNodeId(), currentWeight));
13481349
}
1349-
if (currentDecision.type() == Type.YES || currentDecision.type() == Type.THROTTLE) {
1350+
if (currentDecision.type() == Type.YES
1351+
|| currentDecision.type() == Type.THROTTLE
1352+
|| currentDecision.type() == Type.NOT_PREFERRED) {
13501353
final boolean updateMinNode;
13511354
if (currentWeight == minWeight) {
13521355
/* we have an equal weight tie breaking:
@@ -1367,10 +1370,11 @@ private AllocateUnassignedDecision decideAllocateUnassigned(final ProjectIndex i
13671370
updateMinNode = ((((nodeHigh > repId && minNodeHigh > repId) || (nodeHigh < repId && minNodeHigh < repId))
13681371
&& (nodeHigh < minNodeHigh)) || (nodeHigh > repId && minNodeHigh < repId));
13691372
} else {
1370-
updateMinNode = currentDecision.type() == Type.YES;
1373+
// always prefer a YES, prefer anything over a NOT_PREFERRED
1374+
updateMinNode = currentDecision.type() == Type.YES || decision.type() == Type.NOT_PREFERRED;
13711375
}
13721376
} else {
1373-
updateMinNode = currentWeight < minWeight;
1377+
updateMinNode = preferNewDecisionOverExisting(currentDecision, currentWeight, decision, minWeight);
13741378
}
13751379
if (updateMinNode) {
13761380
minNode = node;
@@ -1397,6 +1401,41 @@ private AllocateUnassignedDecision decideAllocateUnassigned(final ProjectIndex i
13971401
return AllocateUnassignedDecision.fromDecision(decision, minNode != null ? minNode.routingNode.node() : null, nodeDecisions);
13981402
}
13991403

1404+
/**
1405+
* Decide whether to take a new allocation decision/weight over the existing allocation decision/weight
1406+
* <p>
1407+
* We take the lowest weight decision, but we always prefer {@code YES} or {@code THROTTLE} decisions over {@code NOT_PREFERRED}
1408+
*
1409+
* @param newDecision The new decision
1410+
* @param newWeight The new weight
1411+
* @param existingDecision The existing decision, or null if there is no existing decision
1412+
* @param existingWeight The existing weight, or {@link Float#POSITIVE_INFINITY} if there is no existing weight
1413+
* @return true to take the new decision/weight, false to keep the existing decision/weight
1414+
*/
1415+
private static boolean preferNewDecisionOverExisting(
1416+
Decision newDecision,
1417+
float newWeight,
1418+
@Nullable Decision existingDecision,
1419+
float existingWeight
1420+
) {
1421+
assert newDecision != null : "newDecision should never be null";
1422+
assert newDecision.type() == Type.YES || newDecision.type() == Type.NOT_PREFERRED || newDecision.type() == Type.THROTTLE
1423+
: "unsupported decision type: " + newDecision.type();
1424+
assert newWeight != existingWeight : "Equal weights should be handled elsewhere";
1425+
if (existingDecision == null) {
1426+
// This is the first YES/NOT_PREFERRED/THROTTLE decision we've seen, take it
1427+
return true;
1428+
} else if (existingDecision.type() == newDecision.type()) {
1429+
// Decision types are the same, take the lower weight
1430+
return newWeight < existingWeight;
1431+
} else {
1432+
// Decision types are different, take the lower weight unless it's NOT_PREFERRED
1433+
float adjustedNewWeight = newDecision.type() == Type.NOT_PREFERRED ? Float.POSITIVE_INFINITY : newWeight;
1434+
float adjustedExistingWeight = existingDecision.type() == Type.NOT_PREFERRED ? Float.POSITIVE_INFINITY : existingWeight;
1435+
return adjustedNewWeight < adjustedExistingWeight;
1436+
}
1437+
}
1438+
14001439
private static final Comparator<ShardRouting> BY_DESCENDING_SHARD_ID = (s1, s2) -> Integer.compare(s2.id(), s1.id());
14011440

14021441
/**

server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocatorTests.java

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import org.elasticsearch.cluster.routing.ShardRouting;
3636
import org.elasticsearch.cluster.routing.ShardRoutingState;
3737
import org.elasticsearch.cluster.routing.allocation.AllocateUnassignedDecision;
38+
import org.elasticsearch.cluster.routing.allocation.AllocationService;
3839
import org.elasticsearch.cluster.routing.allocation.RoutingAllocation;
3940
import org.elasticsearch.cluster.routing.allocation.allocator.BalancedShardsAllocator.Balancer.PrioritiseByShardWriteLoadComparator;
4041
import org.elasticsearch.cluster.routing.allocation.decider.AllocationDecider;
@@ -48,6 +49,7 @@
4849
import org.elasticsearch.common.settings.Settings;
4950
import org.elasticsearch.common.unit.ByteSizeUnit;
5051
import org.elasticsearch.common.unit.ByteSizeValue;
52+
import org.elasticsearch.core.Nullable;
5153
import org.elasticsearch.core.Strings;
5254
import org.elasticsearch.core.Tuple;
5355
import org.elasticsearch.index.IndexVersion;
@@ -1104,6 +1106,161 @@ public void testShardMovementPriorityComparator() {
11041106
}
11051107
}
11061108

1109+
public void testAssigmentPreferenceForUnassignedShards() {
1110+
final var notPreferredDecider = new AllocationDecider() {
1111+
@Override
1112+
public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) {
1113+
final var nodeId = node.node().getId();
1114+
if (nodeId.startsWith("not-preferred")) {
1115+
return Decision.NOT_PREFERRED;
1116+
} else if (nodeId.startsWith("yes")) {
1117+
return Decision.YES;
1118+
} else if (nodeId.startsWith("no")) {
1119+
return Decision.NO;
1120+
} else if (nodeId.startsWith("throttle")) {
1121+
return Decision.THROTTLE;
1122+
} else {
1123+
throw new AssertionError("unexpected node name: " + node.node().getName());
1124+
}
1125+
}
1126+
};
1127+
1128+
final var allocationService = new MockAllocationService(
1129+
new AllocationDeciders(List.of(notPreferredDecider)),
1130+
new TestGatewayAllocator(),
1131+
new BalancedShardsAllocator(BalancerSettings.DEFAULT, TEST_WRITE_LOAD_FORECASTER, new NodeNameDrivenBalancingWeightsFactory()),
1132+
() -> ClusterInfo.EMPTY,
1133+
SNAPSHOT_INFO_SERVICE_WITH_NO_SHARD_SIZES
1134+
);
1135+
1136+
// No allocation when NO
1137+
assertUnassigned(allocationService, shuffledList("no"));
1138+
// No allocation when THROTTLE
1139+
assertUnassigned(allocationService, shuffledList("throttle"));
1140+
// NOT_PREFERRED when no other choice
1141+
assertAssignedTo(allocationService, "not-preferred", shuffledList("not-preferred"));
1142+
// NOT_PREFERRED over NO
1143+
assertAssignedTo(allocationService, "not-preferred", shuffledList("not-preferred", "no"));
1144+
// THROTTLE (No allocation) over NOT_PREFERRED/NO
1145+
assertUnassigned(allocationService, shuffledList("throttle", "not-preferred", "no"));
1146+
// THROTTLE (No allocation) over NOT_PREFERRED
1147+
assertUnassigned(allocationService, shuffledList("throttle", "not-preferred"));
1148+
// YES over THROTTLE/NO/NOT_PREFERRED
1149+
assertAssignedTo(allocationService, "yes", shuffledList("not-preferred", "yes", "throttle", "no"));
1150+
// prioritize YES/THROTTLE by weight
1151+
assertUnassigned(allocationService, shuffledList("throttle-low", "yes-high", "yes"));
1152+
assertAssignedTo(allocationService, "yes-low", shuffledList("yes-low", "throttle", "throttle-high"));
1153+
// prioritize YES over THROTTLE when weights equal
1154+
assertAssignedTo(allocationService, "yes-low", shuffledList("yes-low", "throttle-low"));
1155+
// prioritize YES by weight
1156+
assertAssignedTo(allocationService, "yes-low", shuffledList("yes-low", "yes", "yes-high"));
1157+
// prioritize NOT_PREFERRED by weight
1158+
assertAssignedTo(allocationService, "not-preferred-low", shuffledList("not-preferred-low", "not-preferred", "not-preferred-high"));
1159+
}
1160+
1161+
private void assertUnassigned(AllocationService allocationService, List<String> allNodeIds) {
1162+
assertAssignedTo(allocationService, null, allNodeIds);
1163+
}
1164+
1165+
private void assertAssignedTo(AllocationService allocationService, @Nullable String expectedNodeId, List<String> allNodeIds) {
1166+
final var discoveryNodesBuilder = DiscoveryNodes.builder();
1167+
for (String nodeName : allNodeIds) {
1168+
discoveryNodesBuilder.add(newNode(nodeName));
1169+
}
1170+
final var projectMetadataBuilder = ProjectMetadata.builder(ProjectId.DEFAULT);
1171+
final var routingTableBuilder = RoutingTable.builder(TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY);
1172+
1173+
final var indexMetadata = anIndex("index", indexSettings(IndexVersion.current(), 1, 0)).build();
1174+
projectMetadataBuilder.put(indexMetadata, false);
1175+
routingTableBuilder.addAsNew(indexMetadata);
1176+
1177+
var clusterState = ClusterState.builder(ClusterName.DEFAULT)
1178+
.nodes(discoveryNodesBuilder)
1179+
.putProjectMetadata(projectMetadataBuilder)
1180+
.putRoutingTable(ProjectId.DEFAULT, routingTableBuilder.build())
1181+
.build();
1182+
1183+
clusterState = startInitializingShardsAndReroute(allocationService, clusterState);
1184+
1185+
final RoutingTable routingTable = clusterState.routingTable(ProjectId.DEFAULT);
1186+
final ShardRouting primaryShard = routingTable.shardRoutingTable(indexMetadata.getIndex().getName(), 0).primaryShard();
1187+
assertThat(primaryShard.currentNodeId(), equalTo(expectedNodeId));
1188+
}
1189+
1190+
/**
1191+
* Returns specific values for {@link WeightFunction#calculateNodeWeightWithIndex} depending on the
1192+
* suffix of the node name.
1193+
*/
1194+
private static class NodeNameDrivenBalancingWeightsFactory implements BalancingWeightsFactory {
1195+
1196+
private static class NodeNameDrivenWeightFunction extends WeightFunction {
1197+
1198+
NodeNameDrivenWeightFunction() {
1199+
super(1.0f, 1.0f, 1.0f, 1.0f);
1200+
}
1201+
1202+
@Override
1203+
float calculateNodeWeightWithIndex(
1204+
BalancedShardsAllocator.Balancer balancer,
1205+
BalancedShardsAllocator.ModelNode node,
1206+
BalancedShardsAllocator.ProjectIndex index
1207+
) {
1208+
final var nodeId = node.getNodeId();
1209+
if (nodeId.endsWith("-high")) {
1210+
return 10.0f;
1211+
} else if (nodeId.endsWith("-low")) {
1212+
return 0.0f;
1213+
} else {
1214+
return 5.0f;
1215+
}
1216+
}
1217+
}
1218+
1219+
private static class NodeNameDrivenBalancingWeights implements BalancingWeights {
1220+
1221+
private final NodeNameDrivenWeightFunction weightFunction = new NodeNameDrivenWeightFunction();
1222+
1223+
@Override
1224+
public WeightFunction weightFunctionForShard(ShardRouting shard) {
1225+
return weightFunction;
1226+
}
1227+
1228+
@Override
1229+
public WeightFunction weightFunctionForNode(RoutingNode node) {
1230+
return weightFunction;
1231+
}
1232+
1233+
@Override
1234+
public NodeSorters createNodeSorters(
1235+
BalancedShardsAllocator.ModelNode[] modelNodes,
1236+
BalancedShardsAllocator.Balancer balancer
1237+
) {
1238+
final var nodeSorter = new BalancedShardsAllocator.NodeSorter(modelNodes, weightFunction, balancer);
1239+
return new NodeSorters() {
1240+
@Override
1241+
public BalancedShardsAllocator.NodeSorter sorterForShard(ShardRouting shard) {
1242+
return nodeSorter;
1243+
}
1244+
1245+
@Override
1246+
public Iterator<BalancedShardsAllocator.NodeSorter> iterator() {
1247+
return List.of(nodeSorter).iterator();
1248+
}
1249+
};
1250+
}
1251+
1252+
@Override
1253+
public boolean diskUsageIgnored() {
1254+
return true;
1255+
}
1256+
}
1257+
1258+
@Override
1259+
public BalancingWeights create() {
1260+
return new NodeNameDrivenBalancingWeights();
1261+
}
1262+
}
1263+
11071264
/**
11081265
* Randomly select a shard and add a random write-load for it
11091266
*

test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1814,6 +1814,12 @@ public static <T> List<T> randomSubsetOf(int size, Collection<T> collection) {
18141814
return tempList.subList(0, size);
18151815
}
18161816

1817+
@SafeVarargs
1818+
@SuppressWarnings("varargs")
1819+
public static <T> List<T> shuffledList(T... values) {
1820+
return shuffledList(Arrays.asList(values));
1821+
}
1822+
18171823
public static <T> List<T> shuffledList(List<T> list) {
18181824
return randomSubsetOf(list.size(), list);
18191825
}

0 commit comments

Comments
 (0)