Skip to content

Commit c9dc3e8

Browse files
authored
Simulate shards moved by explicit commands (#136066)
In DesiredBalancerComputer, shard movements should be simulated by starting any shards that are initializatng. Previously the explicitly moved shards are not covered. This PR fixes it. Resolves: ES-12943
1 parent 8c4eee6 commit c9dc3e8

File tree

4 files changed

+130
-10
lines changed

4 files changed

+130
-10
lines changed

docs/changelog/136066.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 136066
2+
summary: Simulate shards moved by explicit commands
3+
area: Allocation
4+
type: enhancement
5+
issues: []

server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -156,9 +156,8 @@ public void allocate(RoutingAllocation allocation) {
156156
assert allocation.ignoreDisable() == false;
157157
assert allocation.isSimulating() == false || allocation.routingNodes().hasInactiveShards() == false
158158
: "expect no initializing shard, but got " + allocation.routingNodes();
159-
// TODO: ES-12943 cannot assert the following because shards moved by commands are not simulated promptly in DesiredBalanceComputer
160-
// assert allocation.isSimulating() == false || allocation.routingNodes().getRelocatingShardCount() == 0
161-
// : "expect no relocating shard, but got " + allocation.routingNodes();
159+
assert allocation.isSimulating() == false || allocation.routingNodes().getRelocatingShardCount() == 0
160+
: "expect no relocating shard, but got " + allocation.routingNodes();
162161

163162
if (allocation.routingNodes().size() == 0) {
164163
failAllocationOfNewPrimaries(allocation);

server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceComputer.java

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,25 @@ public DesiredBalance compute(
291291
while ((commands = pendingDesiredBalanceMoves.poll()) != null) {
292292
for (MoveAllocationCommand command : commands) {
293293
try {
294-
command.execute(routingAllocation, false);
294+
final var rerouteExplanation = command.execute(routingAllocation, false);
295+
assert rerouteExplanation.decisions().type() != Decision.Type.NO : "should have thrown for NO decision";
296+
if (rerouteExplanation.decisions().type() != Decision.Type.NO) {
297+
final ShardRouting[] initializingShards = routingNodes.node(
298+
routingAllocation.nodes().resolveNode(command.toNode()).getId()
299+
).initializing();
300+
assert initializingShards.length == 1
301+
: "expect exactly one relocating shard, but got: " + List.of(initializingShards);
302+
final var initializingShard = initializingShards[0];
303+
assert routingAllocation.nodes()
304+
.resolveNode(command.fromNode())
305+
.getId()
306+
.equals(initializingShard.relocatingNodeId())
307+
: initializingShard
308+
+ " has unexpected relocation source node, expect node "
309+
+ routingAllocation.nodes().resolveNode(command.fromNode());
310+
clusterInfoSimulator.simulateShardStarted(initializingShard);
311+
routingNodes.startShard(initializingShard, changes, 0L);
312+
}
295313
} catch (RuntimeException e) {
296314
logger.debug(
297315
() -> "move shard ["

server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceComputerTests.java

Lines changed: 104 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import org.elasticsearch.cluster.routing.IndexShardRoutingTable;
3636
import org.elasticsearch.cluster.routing.RecoverySource;
3737
import org.elasticsearch.cluster.routing.RoutingChangesObserver;
38+
import org.elasticsearch.cluster.routing.RoutingNode;
3839
import org.elasticsearch.cluster.routing.RoutingNodes;
3940
import org.elasticsearch.cluster.routing.RoutingTable;
4041
import org.elasticsearch.cluster.routing.ShardRouting;
@@ -44,7 +45,9 @@
4445
import org.elasticsearch.cluster.routing.allocation.RoutingAllocation;
4546
import org.elasticsearch.cluster.routing.allocation.ShardAllocationDecision;
4647
import org.elasticsearch.cluster.routing.allocation.command.MoveAllocationCommand;
48+
import org.elasticsearch.cluster.routing.allocation.decider.AllocationDecider;
4749
import org.elasticsearch.cluster.routing.allocation.decider.AllocationDeciders;
50+
import org.elasticsearch.cluster.routing.allocation.decider.Decision;
4851
import org.elasticsearch.cluster.routing.allocation.decider.ThrottlingAllocationDecider;
4952
import org.elasticsearch.common.Randomness;
5053
import org.elasticsearch.common.UUIDs;
@@ -54,6 +57,7 @@
5457
import org.elasticsearch.common.time.TimeProviderUtils;
5558
import org.elasticsearch.common.unit.ByteSizeValue;
5659
import org.elasticsearch.common.util.Maps;
60+
import org.elasticsearch.common.util.set.Sets;
5761
import org.elasticsearch.core.Strings;
5862
import org.elasticsearch.core.TimeValue;
5963
import org.elasticsearch.core.Tuple;
@@ -99,6 +103,7 @@
99103
import static org.hamcrest.Matchers.aMapWithSize;
100104
import static org.hamcrest.Matchers.allOf;
101105
import static org.hamcrest.Matchers.anyOf;
106+
import static org.hamcrest.Matchers.arrayWithSize;
102107
import static org.hamcrest.Matchers.equalTo;
103108
import static org.hamcrest.Matchers.everyItem;
104109
import static org.hamcrest.Matchers.hasEntry;
@@ -566,7 +571,24 @@ public void testNoDataNodes() {
566571
}
567572

568573
public void testAppliesMoveCommands() {
569-
var desiredBalanceComputer = createDesiredBalanceComputer();
574+
var desiredBalanceComputer = createDesiredBalanceComputer(new ShardsAllocator() {
575+
@Override
576+
public void allocate(RoutingAllocation allocation) {
577+
// This runs after the move commands have been applied, we assert that the relocating shards caused by the move
578+
// commands are all started by the simulation.
579+
assertThat(
580+
"unexpected relocating shards: " + allocation.routingNodes(),
581+
allocation.routingNodes().getRelocatingShardCount(),
582+
equalTo(0)
583+
);
584+
assertThat(allocation.routingNodes().node("node-2").started(), arrayWithSize(2));
585+
}
586+
587+
@Override
588+
public ShardAllocationDecision decideShardAllocation(ShardRouting shard, RoutingAllocation allocation) {
589+
throw new AssertionError("only used for allocation explain");
590+
}
591+
});
570592
var clusterState = createInitialClusterState(3);
571593
var index = clusterState.metadata().getProject().index(TEST_INDEX).getIndex();
572594

@@ -578,23 +600,99 @@ public void testAppliesMoveCommands() {
578600
}
579601
clusterState = rebuildRoutingTable(clusterState, routingNodes);
580602

603+
final var dataNodeIds = clusterState.nodes().getDataNodes().keySet();
604+
for (var nodeId : List.of("node-0", "node-1")) {
605+
final var desiredBalanceInput = DesiredBalanceInput.create(
606+
randomInt(),
607+
new RoutingAllocation(new AllocationDeciders(List.of(new AllocationDecider() {
608+
@Override
609+
public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) {
610+
// Move command works every decision except NO
611+
return randomFrom(Decision.YES, Decision.THROTTLE, Decision.NOT_PREFERRED);
612+
}
613+
})), clusterState, ClusterInfo.EMPTY, SnapshotShardSizeInfo.EMPTY, 0L)
614+
);
615+
var desiredBalance = desiredBalanceComputer.compute(
616+
DesiredBalance.BECOME_MASTER_INITIAL,
617+
desiredBalanceInput,
618+
queue(
619+
new MoveAllocationCommand(index.getName(), 0, nodeId, "node-2"),
620+
new MoveAllocationCommand(index.getName(), 1, nodeId, "node-2")
621+
),
622+
input -> true
623+
);
624+
625+
final Set<String> expectedNodeIds = Sets.difference(dataNodeIds, Set.of(nodeId));
626+
assertDesiredAssignments(
627+
desiredBalance,
628+
Map.of(
629+
new ShardId(index, 0),
630+
new ShardAssignment(expectedNodeIds, 2, 0, 0),
631+
new ShardId(index, 1),
632+
new ShardAssignment(expectedNodeIds, 2, 0, 0)
633+
)
634+
);
635+
}
636+
}
637+
638+
public void testCannotApplyMoveCommand() {
639+
var desiredBalanceComputer = createDesiredBalanceComputer(new ShardsAllocator() {
640+
@Override
641+
public void allocate(RoutingAllocation allocation) {
642+
// This runs after the move commands have been executed and failed, we assert that no movement should be seen
643+
// in the routing nodes.
644+
assertThat(
645+
"unexpected relocating shards: " + allocation.routingNodes(),
646+
allocation.routingNodes().getRelocatingShardCount(),
647+
equalTo(0)
648+
);
649+
assertThat(allocation.routingNodes().node("node-2").isEmpty(), equalTo(true));
650+
}
651+
652+
@Override
653+
public ShardAllocationDecision decideShardAllocation(ShardRouting shard, RoutingAllocation allocation) {
654+
throw new AssertionError("only used for allocation explain");
655+
}
656+
});
657+
var clusterState = createInitialClusterState(3);
658+
var index = clusterState.metadata().getProject().index(TEST_INDEX).getIndex();
659+
660+
var changes = new RoutingChangesObserver.DelegatingRoutingChangesObserver();
661+
var routingNodes = clusterState.mutableRoutingNodes();
662+
for (var iterator = routingNodes.unassigned().iterator(); iterator.hasNext();) {
663+
var shardRouting = iterator.next();
664+
routingNodes.startShard(iterator.initialize(shardRouting.primary() ? "node-0" : "node-1", null, 0L, changes), changes, 0L);
665+
}
666+
clusterState = rebuildRoutingTable(clusterState, routingNodes);
667+
668+
final var desiredBalanceInput = DesiredBalanceInput.create(
669+
randomInt(),
670+
new RoutingAllocation(new AllocationDeciders(List.of(new AllocationDecider() {
671+
@Override
672+
public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) {
673+
// Always return NO so that AllocationCommands will silently fail.
674+
return Decision.NO;
675+
}
676+
})), clusterState, ClusterInfo.EMPTY, SnapshotShardSizeInfo.EMPTY, 0L)
677+
);
581678
var desiredBalance = desiredBalanceComputer.compute(
582679
DesiredBalance.BECOME_MASTER_INITIAL,
583-
createInput(clusterState),
680+
desiredBalanceInput,
584681
queue(
585-
new MoveAllocationCommand(index.getName(), 0, "node-1", "node-2"),
586-
new MoveAllocationCommand(index.getName(), 1, "node-1", "node-2")
682+
new MoveAllocationCommand(index.getName(), 0, randomFrom("node-0", "node-1"), "node-2"),
683+
new MoveAllocationCommand(index.getName(), 1, randomFrom("node-0", "node-1"), "node-2")
587684
),
588685
input -> true
589686
);
590687

688+
final Set<String> expectedNodeIds = Set.of("node-0", "node-1");
591689
assertDesiredAssignments(
592690
desiredBalance,
593691
Map.of(
594692
new ShardId(index, 0),
595-
new ShardAssignment(Set.of("node-0", "node-2"), 2, 0, 0),
693+
new ShardAssignment(expectedNodeIds, 2, 0, 0),
596694
new ShardId(index, 1),
597-
new ShardAssignment(Set.of("node-0", "node-2"), 2, 0, 0)
695+
new ShardAssignment(expectedNodeIds, 2, 0, 0)
598696
)
599697
);
600698
}

0 commit comments

Comments
 (0)