Skip to content

Commit bd0d1ac

Browse files
authored
WriteLoadConstraintDecider: Always allow allocation of unassigned shards (#135245)
1 parent 219337d commit bd0d1ac

File tree

2 files changed

+62
-16
lines changed

2 files changed

+62
-16
lines changed

server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDecider.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,11 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing
4343
return Decision.single(Decision.Type.YES, NAME, "Decider is disabled");
4444
}
4545

46+
// Never reject allocation of an unassigned shard
47+
if (shardRouting.assignedToNode() == false) {
48+
return Decision.single(Decision.Type.YES, NAME, "Shard is unassigned. Decider takes no action.");
49+
}
50+
4651
// Check whether the shard being relocated has any write load estimate. If it does not, then this decider has no opinion.
4752
var allShardWriteLoads = allocation.clusterInfo().getShardWriteLoads();
4853
var shardWriteLoad = allShardWriteLoads.get(shardRouting.shardId());

server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderTests.java

Lines changed: 57 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
import org.elasticsearch.cluster.routing.TestShardRouting;
2626
import org.elasticsearch.cluster.routing.allocation.RoutingAllocation;
2727
import org.elasticsearch.cluster.routing.allocation.WriteLoadConstraintSettings;
28+
import org.elasticsearch.common.Strings;
29+
import org.elasticsearch.common.regex.Regex;
2830
import org.elasticsearch.common.settings.Settings;
2931
import org.elasticsearch.index.Index;
3032
import org.elasticsearch.index.shard.ShardId;
@@ -107,38 +109,66 @@ public void testWriteLoadDeciderCanAllocate() {
107109
)
108110
.build()
109111
);
110-
assertEquals(
112+
assertDecisionMatches(
111113
"Assigning a new shard to a node that is above the threshold should fail",
112-
Decision.Type.NOT_PREFERRED,
113114
writeLoadDecider.canAllocate(
114115
testHarness.shardRouting2,
115116
testHarness.exceedingThresholdRoutingNode,
116117
testHarness.routingAllocation
117-
).type()
118+
),
119+
Decision.Type.NOT_PREFERRED,
120+
"Node [*] with write thread pool utilization [0.99] already exceeds the high utilization threshold of [0.900000]. "
121+
+ "Cannot allocate shard [[test-index][1]] to node without risking increased write latencies."
118122
);
119-
assertEquals(
123+
assertDecisionMatches(
124+
"Unassigned shard should always be accepted",
125+
writeLoadDecider.canAllocate(
126+
testHarness.unassignedShardRouting,
127+
randomFrom(testHarness.exceedingThresholdRoutingNode, testHarness.belowThresholdRoutingNode),
128+
testHarness.routingAllocation
129+
),
130+
Decision.Type.YES,
131+
"Shard is unassigned. Decider takes no action."
132+
);
133+
assertDecisionMatches(
120134
"Assigning a new shard to a node that has capacity should succeed",
135+
writeLoadDecider.canAllocate(testHarness.shardRouting1, testHarness.belowThresholdRoutingNode, testHarness.routingAllocation),
121136
Decision.Type.YES,
122-
writeLoadDecider.canAllocate(testHarness.shardRouting1, testHarness.belowThresholdRoutingNode, testHarness.routingAllocation)
123-
.type()
137+
null
124138
);
125-
assertEquals(
139+
assertDecisionMatches(
126140
"Assigning a new shard without a write load estimate should _not_ be blocked by lack of capacity",
127-
Decision.Type.YES,
128141
writeLoadDecider.canAllocate(
129142
testHarness.thirdRoutingNoWriteLoad,
130143
testHarness.exceedingThresholdRoutingNode,
131144
testHarness.routingAllocation
132-
).type()
145+
),
146+
Decision.Type.YES,
147+
"Shard has no estimated write load. Decider takes no action."
133148
);
134-
assertEquals(
149+
assertDecisionMatches(
135150
"Assigning a new shard that would cause the node to exceed capacity should fail",
151+
writeLoadDecider.canAllocate(testHarness.shardRouting1, testHarness.nearThresholdRoutingNode, testHarness.routingAllocation),
136152
Decision.Type.NOT_PREFERRED,
137-
writeLoadDecider.canAllocate(testHarness.shardRouting1, testHarness.nearThresholdRoutingNode, testHarness.routingAllocation)
138-
.type()
153+
"The high utilization threshold of [0.900000] would be exceeded on node [*] with utilization [0.89] "
154+
+ "if shard [[test-index][0]] with estimated additional utilisation [0.06250] (write load [0.50000] / threads [8]) were "
155+
+ "assigned to it. Cannot allocate shard to node without risking increased write latencies."
156+
139157
);
140158
}
141159

160+
private void assertDecisionMatches(String description, Decision decision, Decision.Type type, String explanationPattern) {
161+
assertEquals(description, type, decision.type());
162+
if (explanationPattern == null) {
163+
assertNull(decision.getExplanation());
164+
} else {
165+
assertTrue(
166+
Strings.format("Expected: \"%s\", got \"%s\"", explanationPattern, decision.getExplanation()),
167+
Regex.simpleMatch(explanationPattern, decision.getExplanation())
168+
);
169+
}
170+
}
171+
142172
/**
143173
* Carries all the cluster state objects needed for testing after {@link #createClusterStateAndRoutingAllocation} sets them up.
144174
*/
@@ -150,7 +180,8 @@ private record TestHarness(
150180
RoutingNode nearThresholdRoutingNode,
151181
ShardRouting shardRouting1,
152182
ShardRouting shardRouting2,
153-
ShardRouting thirdRoutingNoWriteLoad
183+
ShardRouting thirdRoutingNoWriteLoad,
184+
ShardRouting unassignedShardRouting
154185
) {}
155186

156187
/**
@@ -188,6 +219,7 @@ private TestHarness createClusterStateAndRoutingAllocation(String indexName) {
188219
ShardId testShardId1 = new ShardId(testIndex, 0);
189220
ShardId testShardId2 = new ShardId(testIndex, 1);
190221
ShardId testShardId3NoWriteLoad = new ShardId(testIndex, 2);
222+
ShardId testShardId4Unassigned = new ShardId(testIndex, 3);
191223

192224
/**
193225
* Create a ClusterInfo that includes the node and shard level write load estimates for a variety of node capacity situations.
@@ -213,6 +245,9 @@ private TestHarness createClusterStateAndRoutingAllocation(String indexName) {
213245
shardIdToWriteLoadEstimate.put(testShardId1, 0.5);
214246
shardIdToWriteLoadEstimate.put(testShardId2, 0.5);
215247
shardIdToWriteLoadEstimate.put(testShardId3NoWriteLoad, 0d);
248+
if (randomBoolean()) {
249+
shardIdToWriteLoadEstimate.put(testShardId4Unassigned, randomDoubleBetween(0.0, 2.0, true));
250+
}
216251

217252
ClusterInfo clusterInfo = ClusterInfo.builder()
218253
.nodeUsageStatsForThreadPools(nodeIdToNodeUsageStatsForThreadPools)
@@ -253,6 +288,12 @@ private TestHarness createClusterStateAndRoutingAllocation(String indexName) {
253288
true,
254289
ShardRoutingState.STARTED
255290
);
291+
ShardRouting unassignedShardRouting = TestShardRouting.newShardRouting(
292+
testShardId4Unassigned,
293+
null,
294+
true,
295+
ShardRoutingState.UNASSIGNED
296+
);
256297

257298
RoutingNode exceedingThresholdRoutingNode = RoutingNodesHelper.routingNode(
258299
exceedingThresholdDiscoveryNode.getId(),
@@ -266,8 +307,7 @@ private TestHarness createClusterStateAndRoutingAllocation(String indexName) {
266307
);
267308
RoutingNode nearThresholdRoutingNode = RoutingNodesHelper.routingNode(
268309
nearThresholdDiscoveryNode3.getId(),
269-
nearThresholdDiscoveryNode3,
270-
new ShardRouting[] {}
310+
nearThresholdDiscoveryNode3
271311
);
272312

273313
return new TestHarness(
@@ -278,7 +318,8 @@ private TestHarness createClusterStateAndRoutingAllocation(String indexName) {
278318
nearThresholdRoutingNode,
279319
shardRouting1,
280320
shardRouting2,
281-
thirdRoutingNoWriteLoad
321+
thirdRoutingNoWriteLoad,
322+
unassignedShardRouting
282323
);
283324
}
284325

0 commit comments

Comments
 (0)