-
Notifications
You must be signed in to change notification settings - Fork 25.7k
Tighten on when THROTTLE decision can be returned #136794
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
9c56ded
e039dc1
4024c40
3a8664d
4af235f
7d19eac
548ea9d
c7f50d8
1f04641
4025f16
d36a3d9
69227ae
f7eba77
b905995
c62be8e
3750f3d
44c9c66
6cdfdc1
1726061
228c35d
d0883de
2c44ae1
abfe345
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -704,12 +704,18 @@ private boolean balanceByWeights(NodeSorter sorter) { | |
| lowIdx = 0; | ||
| highIdx = relevantNodes - 1; | ||
|
|
||
| assert allocation.isSimulating() == false || routingNodes.getRelocatingShardCount() == 1 | ||
| : "unexpected relocation shard count [" | ||
| + routingNodes.getRelocatingShardCount() | ||
| + "] when balancing index [" | ||
| + index | ||
| + "], isSimulating=[" | ||
| + allocation.isSimulating() | ||
| + "], earlyReturn=[" | ||
| + completeEarlyOnShardAssignmentChange | ||
| + "]"; | ||
|
|
||
| if (routingNodes.getRelocatingShardCount() > 0) { | ||
| // ES-12955: Check routingNodes.getRelocatingShardCount() > 0 in case the first relocation is a THROTTLE. | ||
| // This should rarely happen since in most cases, we don't throttle unless there is an existing relocation. | ||
| // But it can happen in production for frozen indices when the cache is still being prepared. It can also | ||
| // happen in tests because we have decider like RandomAllocationDecider that can randomly return THROTTLE | ||
| // when there is no existing relocation. | ||
| shardBalanced = true; | ||
| } | ||
| if (completeEarlyOnShardAssignmentChange && shardBalanced) { | ||
|
|
@@ -821,6 +827,20 @@ public boolean moveShards() { | |
| shardRouting, | ||
| bestNonPreferredShardMovementsTracker::shardIsBetterThanCurrent | ||
| ); | ||
| // A THROTTLE allocation decision can happen when not simulating | ||
| assert moveDecision.isDecisionTaken() == false | ||
| || allocation.isSimulating() == false | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. opt nit: might flip
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure see 1726061 |
||
| || moveDecision.getAllocationDecision() != AllocationDecision.THROTTLED | ||
| : "unexpected allocation decision [" | ||
| + moveDecision.getAllocationDecision() | ||
| + "] (isSimulating=" | ||
| + allocation.isSimulating() | ||
| + ") with " | ||
| + (shardMoved ? "" : "no ") | ||
| + "prior shard movements when moving shard [" | ||
| + shardRouting | ||
| + "]"; | ||
|
|
||
| if (moveDecision.isDecisionTaken() && moveDecision.cannotRemainAndCanMove()) { | ||
| // Defer moving of not-preferred until we've moved the NOs | ||
| if (moveDecision.getCanRemainDecision().type() == Type.NOT_PREFERRED) { | ||
|
|
@@ -1233,6 +1253,21 @@ private boolean allocateUnassigned() { | |
| ShardRouting shard = primary[i]; | ||
| final ProjectIndex index = projectIndex(shard); | ||
| final AllocateUnassignedDecision allocationDecision = decideAllocateUnassigned(index, shard); | ||
|
|
||
| assert allocationDecision.isDecisionTaken() : "decision not taken for unassigned shard [" + shard + "]"; | ||
|
|
||
| // If we see a THROTTLE decision, it's either: | ||
| // 1. Not simulating | ||
| // 2. Or, there is shard assigned before this one | ||
| assert allocation.isSimulating() == false | ||
| || allocationDecision.getAllocationStatus() != AllocationStatus.DECIDERS_THROTTLED | ||
| || shardAssignmentChanged | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unassigned replica shards can returning THROTTLE because only the primary shard path is THROTTLE free during simulation. At least until I get ES-12942 complete. I take it that's the problem here? Since we have more or less ignored the allocateUnassigned code path for not-preferred, feel free to skip this assert: I'm not sure what benefit it adds right now. Or you can add a TODO referencing ES-12942 and I expect I should be able to tighten this up. We'll revisit allocateUnassigned for not-preferred in ES-13279, too.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yes In summary, let me raise a follow-up to fix |
||
| : "unexpected THROTTLE decision (isSimulating=" | ||
| + allocation.isSimulating() | ||
| + ") with no prior assignment when allocating unassigned shard [" | ||
| + shard | ||
| + "]"; | ||
|
|
||
| final String assignedNodeId = allocationDecision.getTargetNode() != null | ||
| ? allocationDecision.getTargetNode().getId() | ||
| : null; | ||
|
|
@@ -1269,9 +1304,6 @@ private boolean allocateUnassigned() { | |
| assert allocationDecision.getAllocationStatus() == AllocationStatus.DECIDERS_THROTTLED; | ||
| final long shardSize = getExpectedShardSize(shard, ShardRouting.UNAVAILABLE_EXPECTED_SHARD_SIZE, allocation); | ||
| minNode.addShard(projectIndex(shard), shard.initialize(minNode.getNodeId(), null, shardSize)); | ||
| // If we see a throttle decision in simulation, there must be other shards that got assigned before it. | ||
| assert allocation.isSimulating() == false || shardAssignmentChanged | ||
| : "shard " + shard + " was throttled but no other shards were assigned"; | ||
| } else { | ||
| if (logger.isTraceEnabled()) { | ||
| logger.trace("No Node found to assign shard [{}]", shard); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -486,6 +486,13 @@ public DesiredBalance compute( | |
| || info.lastAllocationStatus() == UnassignedInfo.AllocationStatus.DECIDERS_THROTTLED) : "Unexpected stats in: " + info; | ||
|
|
||
| if (hasChanges == false && info.lastAllocationStatus() == UnassignedInfo.AllocationStatus.DECIDERS_THROTTLED) { | ||
| // Unassigned ignored shards must be based on the provided set of ignoredShards | ||
| assert ignoredShards.contains(discardAllocationStatus(shard)) | ||
| || ignoredShards.stream().filter(ShardRouting::primary).anyMatch(primary -> primary.shardId().equals(shard.shardId())) | ||
| : "ignored shard " | ||
| + shard | ||
| + " unexpectedly has THROTTLE status and no counterpart in the provided ignoredShards set " | ||
| + ignoredShards; | ||
| // Simulation could not progress due to missing information in any of the deciders. | ||
| // Currently, this could happen if `HasFrozenCacheAllocationDecider` is still fetching the data. | ||
| // Progress would be made after the followup reroute call. | ||
|
Comment on lines
488
to
498
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The comment about |
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.