diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java index 865acc26dfe2d..d8ba3db3baf2a 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java @@ -704,12 +704,18 @@ private boolean balanceByWeights(NodeSorter sorter) { lowIdx = 0; highIdx = relevantNodes - 1; + assert allocation.isSimulating() == false || routingNodes.getRelocatingShardCount() == 1 + : "unexpected relocation shard count [" + + routingNodes.getRelocatingShardCount() + + "] when balancing index [" + + index + + "], isSimulating=[" + + allocation.isSimulating() + + "], earlyReturn=[" + + completeEarlyOnShardAssignmentChange + + "]"; + if (routingNodes.getRelocatingShardCount() > 0) { - // ES-12955: Check routingNodes.getRelocatingShardCount() > 0 in case the first relocation is a THROTTLE. - // This should rarely happen since in most cases, we don't throttle unless there is an existing relocation. - // But it can happen in production for frozen indices when the cache is still being prepared. It can also - // happen in tests because we have decider like RandomAllocationDecider that can randomly return THROTTLE - // when there is no existing relocation. shardBalanced = true; } if (completeEarlyOnShardAssignmentChange && shardBalanced) { @@ -821,6 +827,20 @@ public boolean moveShards() { shardRouting, bestNonPreferredShardMovementsTracker::shardIsBetterThanCurrent ); + // A THROTTLE allocation decision can happen when not simulating + assert moveDecision.isDecisionTaken() == false + || allocation.isSimulating() == false + || moveDecision.getAllocationDecision() != AllocationDecision.THROTTLED + : "unexpected allocation decision [" + + moveDecision.getAllocationDecision() + + "] (isSimulating=" + + allocation.isSimulating() + + ") with " + + (shardMoved ? "" : "no ") + + "prior shard movements when moving shard [" + + shardRouting + + "]"; + if (moveDecision.isDecisionTaken() && moveDecision.cannotRemainAndCanMove()) { // Defer moving of not-preferred until we've moved the NOs if (moveDecision.getCanRemainDecision().type() == Type.NOT_PREFERRED) { @@ -1233,6 +1253,21 @@ private boolean allocateUnassigned() { ShardRouting shard = primary[i]; final ProjectIndex index = projectIndex(shard); final AllocateUnassignedDecision allocationDecision = decideAllocateUnassigned(index, shard); + + assert allocationDecision.isDecisionTaken() : "decision not taken for unassigned shard [" + shard + "]"; + + // If we see a THROTTLE decision, it's either: + // 1. Not simulating + // 2. Or, there is shard assigned before this one + assert allocation.isSimulating() == false + || allocationDecision.getAllocationStatus() != AllocationStatus.DECIDERS_THROTTLED + || shardAssignmentChanged + : "unexpected THROTTLE decision (isSimulating=" + + allocation.isSimulating() + + ") with no prior assignment when allocating unassigned shard [" + + shard + + "]"; + final String assignedNodeId = allocationDecision.getTargetNode() != null ? allocationDecision.getTargetNode().getId() : null; @@ -1269,9 +1304,6 @@ private boolean allocateUnassigned() { assert allocationDecision.getAllocationStatus() == AllocationStatus.DECIDERS_THROTTLED; final long shardSize = getExpectedShardSize(shard, ShardRouting.UNAVAILABLE_EXPECTED_SHARD_SIZE, allocation); minNode.addShard(projectIndex(shard), shard.initialize(minNode.getNodeId(), null, shardSize)); - // If we see a throttle decision in simulation, there must be other shards that got assigned before it. - assert allocation.isSimulating() == false || shardAssignmentChanged - : "shard " + shard + " was throttled but no other shards were assigned"; } else { if (logger.isTraceEnabled()) { logger.trace("No Node found to assign shard [{}]", shard); diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceComputer.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceComputer.java index c27d629bd41ea..274bd3d818261 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceComputer.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceComputer.java @@ -486,6 +486,13 @@ public DesiredBalance compute( || info.lastAllocationStatus() == UnassignedInfo.AllocationStatus.DECIDERS_THROTTLED) : "Unexpected stats in: " + info; if (hasChanges == false && info.lastAllocationStatus() == UnassignedInfo.AllocationStatus.DECIDERS_THROTTLED) { + // Unassigned ignored shards must be based on the provided set of ignoredShards + assert ignoredShards.contains(discardAllocationStatus(shard)) + || ignoredShards.stream().filter(ShardRouting::primary).anyMatch(primary -> primary.shardId().equals(shard.shardId())) + : "ignored shard " + + shard + + " unexpectedly has THROTTLE status and no counterpart in the provided ignoredShards set " + + ignoredShards; // Simulation could not progress due to missing information in any of the deciders. // Currently, this could happen if `HasFrozenCacheAllocationDecider` is still fetching the data. // Progress would be made after the followup reroute call. diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ConcurrentRebalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ConcurrentRebalanceAllocationDecider.java index 1d1e37f115d86..4f3fa8a82072f 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ConcurrentRebalanceAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ConcurrentRebalanceAllocationDecider.java @@ -144,6 +144,8 @@ public Decision canRebalance(RoutingAllocation allocation) { int relocatingFrozenShards = allocation.routingNodes().getRelocatingFrozenShardCount(); int relocatingShards = allocation.routingNodes().getRelocatingShardCount(); if (allocation.isSimulating() && relocatingShards >= 2) { + // This branch should no longer run after https://github.com/elastic/elasticsearch/pull/134786 + assert false : "allocation simulation should have returned earlier and not hit throttling"; // BalancedShardAllocator is prone to perform unnecessary moves when cluster_concurrent_rebalance is set to high values (>2). // (See https://github.com/elastic/elasticsearch/issues/87279) // Above allocator is used in DesiredBalanceComputer. Since we do not move actual shard data during calculation diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/RandomAllocationDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/RandomAllocationDeciderTests.java index 169cae3b815c8..c0a76195fde67 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/RandomAllocationDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/RandomAllocationDeciderTests.java @@ -206,16 +206,16 @@ public RandomAllocationDecider(Random random) { @Override public Decision canRebalance(ShardRouting shardRouting, RoutingAllocation allocation) { - return getRandomDecision(); + return getRandomDecision(allocation.isSimulating() == false || allocation.routingNodes().getRelocatingShardCount() > 0); } - private Decision getRandomDecision() { + private Decision getRandomDecision(boolean canThrottle) { if (alwaysSayYes) { return Decision.YES; } return switch (random.nextInt(10)) { case 9, 8, 7, 6, 5 -> Decision.NO; - case 4 -> Decision.THROTTLE; + case 4 -> canThrottle ? Decision.THROTTLE : Decision.YES; case 3, 2, 1 -> Decision.YES; default -> Decision.ALWAYS; }; @@ -223,12 +223,16 @@ private Decision getRandomDecision() { @Override public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { - return getRandomDecision(); + return getRandomDecision( + allocation.isSimulating() == false + || allocation.routingNodes().getIncomingRecoveries(node.nodeId()) > 0 + || allocation.routingNodes().getOutgoingRecoveries(node.nodeId()) > 0 + ); } @Override public Decision canRemain(IndexMetadata indexMetadata, ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { - return getRandomDecision(); + return getRandomDecision(false); // throttle does not make sense for canRemain } } diff --git a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/allocation/decider/HasFrozenCacheAllocationDecider.java b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/allocation/decider/HasFrozenCacheAllocationDecider.java index 92c2863b21f21..9c72b08286675 100644 --- a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/allocation/decider/HasFrozenCacheAllocationDecider.java +++ b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/allocation/decider/HasFrozenCacheAllocationDecider.java @@ -28,6 +28,12 @@ public class HasFrozenCacheAllocationDecider extends AllocationDecider { "value of [" + SHARED_CACHE_SIZE_SETTING.getKey() + "] on this node is not known yet" ); + private static final Decision NO_STILL_FETCHING = Decision.single( + Decision.Type.NO, + NAME, + "Shard movement is not allowed in simulation when value of [" + SHARED_CACHE_SIZE_SETTING.getKey() + "] on this node is not known" + ); + private static final Decision HAS_FROZEN_CACHE = Decision.single( Decision.Type.YES, NAME, @@ -62,26 +68,26 @@ public HasFrozenCacheAllocationDecider(FrozenCacheInfoService frozenCacheService @Override public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { - return canAllocateToNode(allocation.metadata().indexMetadata(shardRouting.index()), node.node()); + return canAllocateToNode(allocation.metadata().indexMetadata(shardRouting.index()), node.node(), allocation); } @Override public Decision canRemain(IndexMetadata indexMetadata, ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { - return canAllocateToNode(indexMetadata, node.node()); + return canAllocateToNode(indexMetadata, node.node(), allocation); } @Override public Decision canAllocate(IndexMetadata indexMetadata, RoutingNode node, RoutingAllocation allocation) { - return canAllocateToNode(indexMetadata, node.node()); + return canAllocateToNode(indexMetadata, node.node(), allocation); } @Override public Decision shouldAutoExpandToNode(IndexMetadata indexMetadata, DiscoveryNode node, RoutingAllocation allocation) { - return canAllocateToNode(indexMetadata, node); + return canAllocateToNode(indexMetadata, node, allocation); } // Package private for tests - Decision canAllocateToNode(IndexMetadata indexMetadata, DiscoveryNode discoveryNode) { + Decision canAllocateToNode(IndexMetadata indexMetadata, DiscoveryNode discoveryNode, RoutingAllocation allocation) { if (indexMetadata.isPartialSearchableSnapshot() == false) { return Decision.ALWAYS; } @@ -90,7 +96,8 @@ Decision canAllocateToNode(IndexMetadata indexMetadata, DiscoveryNode discoveryN case HAS_CACHE -> HAS_FROZEN_CACHE; case NO_CACHE -> NO_FROZEN_CACHE; case FAILED -> UNKNOWN_FROZEN_CACHE; - case FETCHING -> STILL_FETCHING; + // TODO: considering returning NO as well for non-simulation ES-13378 + case FETCHING -> allocation.isSimulating() ? NO_STILL_FETCHING : STILL_FETCHING; case UNKNOWN -> NO_UNKNOWN_NODE; }; } diff --git a/x-pack/plugin/searchable-snapshots/src/test/java/org/elasticsearch/xpack/searchablesnapshots/allocation/decider/HasFrozenCacheAllocationDeciderTests.java b/x-pack/plugin/searchable-snapshots/src/test/java/org/elasticsearch/xpack/searchablesnapshots/allocation/decider/HasFrozenCacheAllocationDeciderTests.java index e2bcb53115079..28674bdf11109 100644 --- a/x-pack/plugin/searchable-snapshots/src/test/java/org/elasticsearch/xpack/searchablesnapshots/allocation/decider/HasFrozenCacheAllocationDeciderTests.java +++ b/x-pack/plugin/searchable-snapshots/src/test/java/org/elasticsearch/xpack/searchablesnapshots/allocation/decider/HasFrozenCacheAllocationDeciderTests.java @@ -9,6 +9,7 @@ import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; import org.elasticsearch.cluster.routing.allocation.decider.Decision; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.test.ESTestCase; @@ -27,6 +28,7 @@ public class HasFrozenCacheAllocationDeciderTests extends ESTestCase { public void testCanAllocateToNode() { final var frozenCacheService = mock(FrozenCacheInfoService.class); + final var allocation = mock(RoutingAllocation.class); final var decider = new HasFrozenCacheAllocationDecider(frozenCacheService); final var indexMetadata = IndexMetadata.builder(randomIdentifier()) @@ -35,7 +37,8 @@ public void testCanAllocateToNode() { for (var nodeState : NodeState.values()) { when(frozenCacheService.getNodeState(any(DiscoveryNode.class))).thenReturn(nodeState); - assertThat(decider.canAllocateToNode(indexMetadata, mock(DiscoveryNode.class)), equalTo(Decision.ALWAYS)); + when(allocation.isSimulating()).thenReturn(randomBoolean()); + assertThat(decider.canAllocateToNode(indexMetadata, mock(DiscoveryNode.class), allocation), equalTo(Decision.ALWAYS)); } final var partialSearchableSnapshotIndexMetadata = IndexMetadata.builder(randomIdentifier()) @@ -47,12 +50,18 @@ public void testCanAllocateToNode() { for (var nodeState : NodeState.values()) { when(frozenCacheService.getNodeState(any(DiscoveryNode.class))).thenReturn(nodeState); - final Decision decision = decider.canAllocateToNode(partialSearchableSnapshotIndexMetadata, mock(DiscoveryNode.class)); + final boolean isSimulating = randomBoolean(); + when(allocation.isSimulating()).thenReturn(isSimulating); + final Decision decision = decider.canAllocateToNode( + partialSearchableSnapshotIndexMetadata, + mock(DiscoveryNode.class), + allocation + ); final Decision.Type expectedType; if (nodeState == NodeState.HAS_CACHE) { expectedType = Decision.Type.YES; } else if (nodeState == NodeState.FETCHING) { - expectedType = Decision.Type.THROTTLE; + expectedType = isSimulating ? Decision.Type.NO : Decision.Type.THROTTLE; } else { expectedType = Decision.Type.NO; }