|
11 | 11 |
|
12 | 12 | import org.elasticsearch.action.support.replication.ClusterStateCreationUtils; |
13 | 13 | import org.elasticsearch.cluster.ClusterInfo; |
| 14 | +import org.elasticsearch.cluster.ClusterInfoSimulator; |
14 | 15 | import org.elasticsearch.cluster.ClusterState; |
15 | 16 | import org.elasticsearch.cluster.ESAllocationTestCase; |
16 | 17 | import org.elasticsearch.cluster.NodeUsageStatsForThreadPools; |
17 | 18 | import org.elasticsearch.cluster.NodeUsageStatsForThreadPools.ThreadPoolUsageStats; |
18 | 19 | import org.elasticsearch.cluster.metadata.IndexMetadata; |
| 20 | +import org.elasticsearch.cluster.metadata.ProjectId; |
19 | 21 | import org.elasticsearch.cluster.node.DiscoveryNode; |
| 22 | +import org.elasticsearch.cluster.routing.RoutingChangesObserver; |
20 | 23 | import org.elasticsearch.cluster.routing.RoutingNode; |
21 | 24 | import org.elasticsearch.cluster.routing.RoutingNodes; |
22 | 25 | import org.elasticsearch.cluster.routing.RoutingNodesHelper; |
|
25 | 28 | import org.elasticsearch.cluster.routing.TestShardRouting; |
26 | 29 | import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; |
27 | 30 | import org.elasticsearch.cluster.routing.allocation.WriteLoadConstraintSettings; |
| 31 | +import org.elasticsearch.cluster.routing.allocation.allocator.BalancedShardsAllocator; |
28 | 32 | import org.elasticsearch.common.Strings; |
29 | 33 | import org.elasticsearch.common.regex.Regex; |
30 | 34 | import org.elasticsearch.common.settings.ClusterSettings; |
31 | 35 | import org.elasticsearch.common.settings.Settings; |
| 36 | +import org.elasticsearch.core.TimeValue; |
32 | 37 | import org.elasticsearch.index.Index; |
33 | 38 | import org.elasticsearch.index.shard.ShardId; |
| 39 | +import org.elasticsearch.snapshots.SnapshotShardSizeInfo; |
34 | 40 | import org.elasticsearch.threadpool.ThreadPool; |
35 | 41 |
|
36 | 42 | import java.util.HashMap; |
| 43 | +import java.util.HashSet; |
| 44 | +import java.util.List; |
| 45 | +import java.util.Map; |
| 46 | +import java.util.stream.Collectors; |
37 | 47 |
|
38 | 48 | import static org.elasticsearch.common.settings.ClusterSettings.createBuiltInClusterSettings; |
39 | 49 | import static org.mockito.ArgumentMatchers.any; |
@@ -155,14 +165,25 @@ public void testWriteLoadDeciderCanAllocate() { |
155 | 165 | "Shard [[test-index][0]] in index [[test-index]] can be assigned to node [*]. The node's utilization would become [*]" |
156 | 166 | ); |
157 | 167 | assertDecisionMatches( |
158 | | - "Assigning a new shard without a write load estimate should _not_ be blocked by lack of capacity", |
| 168 | + "Assigning a new shard without a write load estimate to an over-threshold node should be blocked", |
159 | 169 | writeLoadDecider.canAllocate( |
160 | 170 | testHarness.shardRoutingNoWriteLoad, |
161 | 171 | testHarness.exceedingThresholdRoutingNode, |
162 | 172 | testHarness.routingAllocation |
163 | 173 | ), |
| 174 | + Decision.Type.NOT_PREFERRED, |
| 175 | + "Node [*] with write thread pool utilization [0.99] already exceeds the high utilization threshold of " |
| 176 | + + "[0.900000]. Cannot allocate shard [[test-index][2]] to node without risking increased write latencies." |
| 177 | + ); |
| 178 | + assertDecisionMatches( |
| 179 | + "Assigning a new shard without a write load estimate to an under-threshold node should be allowed", |
| 180 | + writeLoadDecider.canAllocate( |
| 181 | + testHarness.shardRoutingNoWriteLoad, |
| 182 | + testHarness.belowThresholdRoutingNode, |
| 183 | + testHarness.routingAllocation |
| 184 | + ), |
164 | 185 | Decision.Type.YES, |
165 | | - "Shard has no estimated write load. Decider takes no action." |
| 186 | + "Shard [[test-index][2]] in index [[test-index]] can be assigned to node [*]. The node's utilization would become [*]" |
166 | 187 | ); |
167 | 188 | assertDecisionMatches( |
168 | 189 | "Assigning a new shard that would cause the node to exceed capacity should fail", |
@@ -247,6 +268,109 @@ public void testWriteLoadDeciderCanRemain() { |
247 | 268 | ); |
248 | 269 | } |
249 | 270 |
|
| 271 | + public void testWriteLoadDeciderShouldPreventBalancerMovingShardsBack() { |
| 272 | + final var indexName = randomIdentifier(); |
| 273 | + final int numThreads = randomIntBetween(1, 10); |
| 274 | + final float highUtilizationThreshold = randomFloatBetween(0.5f, 0.9f, true); |
| 275 | + final long highLatencyThreshold = randomLongBetween(1000, 10000); |
| 276 | + final var settings = Settings.builder() |
| 277 | + .put( |
| 278 | + WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_ENABLED_SETTING.getKey(), |
| 279 | + WriteLoadConstraintSettings.WriteLoadDeciderStatus.ENABLED |
| 280 | + ) |
| 281 | + .put(WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_HIGH_UTILIZATION_THRESHOLD_SETTING.getKey(), highUtilizationThreshold) |
| 282 | + .put( |
| 283 | + WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_QUEUE_LATENCY_THRESHOLD_SETTING.getKey(), |
| 284 | + TimeValue.timeValueMillis(highLatencyThreshold) |
| 285 | + ) |
| 286 | + .build(); |
| 287 | + |
| 288 | + final var state = ClusterStateCreationUtils.state(2, new String[] { indexName }, 4); |
| 289 | + final var balancedShardsAllocator = new BalancedShardsAllocator(settings); |
| 290 | + final var overloadedNode = randomFrom(state.nodes().getAllNodes()); |
| 291 | + final var otherNode = state.nodes().stream().filter(node -> node != overloadedNode).findFirst().orElseThrow(); |
| 292 | + final var clusterInfo = ClusterInfo.builder() |
| 293 | + .nodeUsageStatsForThreadPools( |
| 294 | + Map.of( |
| 295 | + overloadedNode.getId(), |
| 296 | + new NodeUsageStatsForThreadPools( |
| 297 | + overloadedNode.getId(), |
| 298 | + Map.of( |
| 299 | + ThreadPool.Names.WRITE, |
| 300 | + new ThreadPoolUsageStats( |
| 301 | + numThreads, |
| 302 | + randomFloatBetween(highUtilizationThreshold, 1.1f, false), |
| 303 | + randomLongBetween(highLatencyThreshold, highLatencyThreshold * 2) |
| 304 | + ) |
| 305 | + ) |
| 306 | + ), |
| 307 | + otherNode.getId(), |
| 308 | + new NodeUsageStatsForThreadPools( |
| 309 | + otherNode.getId(), |
| 310 | + Map.of( |
| 311 | + ThreadPool.Names.WRITE, |
| 312 | + new ThreadPoolUsageStats( |
| 313 | + numThreads, |
| 314 | + randomFloatBetween(0.0f, highUtilizationThreshold / 2, true), |
| 315 | + randomLongBetween(0, highLatencyThreshold / 2) |
| 316 | + ) |
| 317 | + ) |
| 318 | + ) |
| 319 | + ) |
| 320 | + ) |
| 321 | + // simulate all zero or missing shard write loads |
| 322 | + .shardWriteLoads( |
| 323 | + state.routingTable(ProjectId.DEFAULT) |
| 324 | + .allShards() |
| 325 | + .filter(ignored -> randomBoolean()) // some write-loads are missing altogether |
| 326 | + .collect(Collectors.toMap(ShardRouting::shardId, ignored -> 0.0d)) // the rest are zero |
| 327 | + ) |
| 328 | + .build(); |
| 329 | + |
| 330 | + final var clusterSettings = createBuiltInClusterSettings(settings); |
| 331 | + final var writeLoadConstraintDecider = new WriteLoadConstraintDecider(clusterSettings); |
| 332 | + final var routingAllocation = new RoutingAllocation( |
| 333 | + new AllocationDeciders(List.of(writeLoadConstraintDecider)), |
| 334 | + state.getRoutingNodes().mutableCopy(), |
| 335 | + state, |
| 336 | + clusterInfo, |
| 337 | + SnapshotShardSizeInfo.EMPTY, |
| 338 | + randomLong() |
| 339 | + ); |
| 340 | + |
| 341 | + // This should move a shard in an attempt to resolve the hot-spot |
| 342 | + balancedShardsAllocator.allocate(routingAllocation); |
| 343 | + |
| 344 | + assertEquals(1, routingAllocation.routingNodes().node(overloadedNode.getId()).numberOfOwningShards()); |
| 345 | + assertEquals(3, routingAllocation.routingNodes().node(otherNode.getId()).numberOfOwningShards()); |
| 346 | + |
| 347 | + final var clusterInfoSimulator = new ClusterInfoSimulator(routingAllocation); |
| 348 | + final var movedShards = new HashSet<ShardRouting>(); |
| 349 | + for (RoutingNode routingNode : routingAllocation.routingNodes()) { |
| 350 | + movedShards.addAll(routingNode.shardsWithState(ShardRoutingState.INITIALIZING).collect(Collectors.toSet())); |
| 351 | + } |
| 352 | + movedShards.forEach(shardRouting -> { |
| 353 | + routingAllocation.routingNodes().startShard(shardRouting, new RoutingChangesObserver() { |
| 354 | + }, randomNonNegativeLong()); |
| 355 | + clusterInfoSimulator.simulateShardStarted(shardRouting); |
| 356 | + }); |
| 357 | + |
| 358 | + // This should run through the balancer without moving any shards back |
| 359 | + ClusterInfo simulatedClusterInfo = clusterInfoSimulator.getClusterInfo(); |
| 360 | + balancedShardsAllocator.allocate( |
| 361 | + new RoutingAllocation( |
| 362 | + routingAllocation.deciders(), |
| 363 | + routingAllocation.routingNodes(), |
| 364 | + routingAllocation.getClusterState(), |
| 365 | + simulatedClusterInfo, |
| 366 | + routingAllocation.snapshotShardSizeInfo(), |
| 367 | + randomLong() |
| 368 | + ) |
| 369 | + ); |
| 370 | + assertEquals(1, routingAllocation.routingNodes().node(overloadedNode.getId()).numberOfOwningShards()); |
| 371 | + assertEquals(3, routingAllocation.routingNodes().node(otherNode.getId()).numberOfOwningShards()); |
| 372 | + } |
| 373 | + |
250 | 374 | private void assertDecisionMatches(String description, Decision decision, Decision.Type type, String explanationPattern) { |
251 | 375 | assertEquals(description, type, decision.type()); |
252 | 376 | if (explanationPattern == null) { |
|
0 commit comments